blob: 0a961e83d5af76d0adfba239f8e8a0a88975e108 [file] [log] [blame]
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X32
3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X64
4
5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse2-builtins.c
6
7define <2 x i64> @test_mm_add_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
8; X32-LABEL: test_mm_add_epi8:
9; X32: # BB#0:
10; X32-NEXT: paddb %xmm1, %xmm0
11; X32-NEXT: retl
12;
13; X64-LABEL: test_mm_add_epi8:
14; X64: # BB#0:
15; X64-NEXT: paddb %xmm1, %xmm0
16; X64-NEXT: retq
17 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
18 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
19 %res = add <16 x i8> %arg0, %arg1
20 %bc = bitcast <16 x i8> %res to <2 x i64>
21 ret <2 x i64> %bc
22}
23
24define <2 x i64> @test_mm_add_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
25; X32-LABEL: test_mm_add_epi16:
26; X32: # BB#0:
27; X32-NEXT: paddw %xmm1, %xmm0
28; X32-NEXT: retl
29;
30; X64-LABEL: test_mm_add_epi16:
31; X64: # BB#0:
32; X64-NEXT: paddw %xmm1, %xmm0
33; X64-NEXT: retq
34 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
35 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
36 %res = add <8 x i16> %arg0, %arg1
37 %bc = bitcast <8 x i16> %res to <2 x i64>
38 ret <2 x i64> %bc
39}
40
41define <2 x i64> @test_mm_add_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
42; X32-LABEL: test_mm_add_epi32:
43; X32: # BB#0:
44; X32-NEXT: paddd %xmm1, %xmm0
45; X32-NEXT: retl
46;
47; X64-LABEL: test_mm_add_epi32:
48; X64: # BB#0:
49; X64-NEXT: paddd %xmm1, %xmm0
50; X64-NEXT: retq
51 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
52 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
53 %res = add <4 x i32> %arg0, %arg1
54 %bc = bitcast <4 x i32> %res to <2 x i64>
55 ret <2 x i64> %bc
56}
57
58define <2 x i64> @test_mm_add_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind {
59; X32-LABEL: test_mm_add_epi64:
60; X32: # BB#0:
61; X32-NEXT: paddq %xmm1, %xmm0
62; X32-NEXT: retl
63;
64; X64-LABEL: test_mm_add_epi64:
65; X64: # BB#0:
66; X64-NEXT: paddq %xmm1, %xmm0
67; X64-NEXT: retq
68 %res = add <2 x i64> %a0, %a1
69 ret <2 x i64> %res
70}
71
72define <2 x double> @test_mm_add_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
73; X32-LABEL: test_mm_add_pd:
74; X32: # BB#0:
75; X32-NEXT: addpd %xmm1, %xmm0
76; X32-NEXT: retl
77;
78; X64-LABEL: test_mm_add_pd:
79; X64: # BB#0:
80; X64-NEXT: addpd %xmm1, %xmm0
81; X64-NEXT: retq
82 %res = fadd <2 x double> %a0, %a1
83 ret <2 x double> %res
84}
85
86define <2 x double> @test_mm_add_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
87; X32-LABEL: test_mm_add_sd:
88; X32: # BB#0:
89; X32-NEXT: addsd %xmm1, %xmm0
90; X32-NEXT: retl
91;
92; X64-LABEL: test_mm_add_sd:
93; X64: # BB#0:
94; X64-NEXT: addsd %xmm1, %xmm0
95; X64-NEXT: retq
96 %ext0 = extractelement <2 x double> %a0, i32 0
97 %ext1 = extractelement <2 x double> %a1, i32 0
98 %fadd = fadd double %ext0, %ext1
99 %res = insertelement <2 x double> %a0, double %fadd, i32 0
100 ret <2 x double> %res
101}
102
103define <2 x i64> @test_mm_adds_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
104; X32-LABEL: test_mm_adds_epi8:
105; X32: # BB#0:
106; X32-NEXT: paddsb %xmm1, %xmm0
107; X32-NEXT: retl
108;
109; X64-LABEL: test_mm_adds_epi8:
110; X64: # BB#0:
111; X64-NEXT: paddsb %xmm1, %xmm0
112; X64-NEXT: retq
113 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
114 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
115 %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %arg0, <16 x i8> %arg1)
116 %bc = bitcast <16 x i8> %res to <2 x i64>
117 ret <2 x i64> %bc
118}
119declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
120
121define <2 x i64> @test_mm_adds_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
122; X32-LABEL: test_mm_adds_epi16:
123; X32: # BB#0:
124; X32-NEXT: paddsw %xmm1, %xmm0
125; X32-NEXT: retl
126;
127; X64-LABEL: test_mm_adds_epi16:
128; X64: # BB#0:
129; X64-NEXT: paddsw %xmm1, %xmm0
130; X64-NEXT: retq
131 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
132 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
133 %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %arg0, <8 x i16> %arg1)
134 %bc = bitcast <8 x i16> %res to <2 x i64>
135 ret <2 x i64> %bc
136}
137declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
138
139define <2 x i64> @test_mm_adds_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
140; X32-LABEL: test_mm_adds_epu8:
141; X32: # BB#0:
142; X32-NEXT: paddusb %xmm1, %xmm0
143; X32-NEXT: retl
144;
145; X64-LABEL: test_mm_adds_epu8:
146; X64: # BB#0:
147; X64-NEXT: paddusb %xmm1, %xmm0
148; X64-NEXT: retq
149 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
150 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
151 %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %arg0, <16 x i8> %arg1)
152 %bc = bitcast <16 x i8> %res to <2 x i64>
153 ret <2 x i64> %bc
154}
155declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
156
157define <2 x i64> @test_mm_adds_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
158; X32-LABEL: test_mm_adds_epu16:
159; X32: # BB#0:
160; X32-NEXT: paddusw %xmm1, %xmm0
161; X32-NEXT: retl
162;
163; X64-LABEL: test_mm_adds_epu16:
164; X64: # BB#0:
165; X64-NEXT: paddusw %xmm1, %xmm0
166; X64-NEXT: retq
167 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
168 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
169 %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %arg0, <8 x i16> %arg1)
170 %bc = bitcast <8 x i16> %res to <2 x i64>
171 ret <2 x i64> %bc
172}
173declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
174
175define <2 x double> @test_mm_and_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
176; X32-LABEL: test_mm_and_pd:
177; X32: # BB#0:
178; X32-NEXT: andps %xmm1, %xmm0
179; X32-NEXT: retl
180;
181; X64-LABEL: test_mm_and_pd:
182; X64: # BB#0:
183; X64-NEXT: andps %xmm1, %xmm0
184; X64-NEXT: retq
185 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
186 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
187 %res = and <4 x i32> %arg0, %arg1
188 %bc = bitcast <4 x i32> %res to <2 x double>
189 ret <2 x double> %bc
190}
191
192define <2 x i64> @test_mm_and_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
193; X32-LABEL: test_mm_and_si128:
194; X32: # BB#0:
195; X32-NEXT: andps %xmm1, %xmm0
196; X32-NEXT: retl
197;
198; X64-LABEL: test_mm_and_si128:
199; X64: # BB#0:
200; X64-NEXT: andps %xmm1, %xmm0
201; X64-NEXT: retq
202 %res = and <2 x i64> %a0, %a1
203 ret <2 x i64> %res
204}
205
206define <2 x double> @test_mm_andnot_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
207; X32-LABEL: test_mm_andnot_pd:
208; X32: # BB#0:
209; X32-NEXT: andnps %xmm1, %xmm0
210; X32-NEXT: retl
211;
212; X64-LABEL: test_mm_andnot_pd:
213; X64: # BB#0:
214; X64-NEXT: andnps %xmm1, %xmm0
215; X64-NEXT: retq
216 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
217 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
218 %not = xor <4 x i32> %arg0, <i32 -1, i32 -1, i32 -1, i32 -1>
219 %res = and <4 x i32> %not, %arg1
220 %bc = bitcast <4 x i32> %res to <2 x double>
221 ret <2 x double> %bc
222}
223
224define <2 x i64> @test_mm_andnot_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
225; X32-LABEL: test_mm_andnot_si128:
226; X32: # BB#0:
227; X32-NEXT: pcmpeqd %xmm2, %xmm2
228; X32-NEXT: pxor %xmm2, %xmm0
229; X32-NEXT: pand %xmm1, %xmm0
230; X32-NEXT: retl
231;
232; X64-LABEL: test_mm_andnot_si128:
233; X64: # BB#0:
234; X64-NEXT: pcmpeqd %xmm2, %xmm2
235; X64-NEXT: pxor %xmm2, %xmm0
236; X64-NEXT: pand %xmm1, %xmm0
237; X64-NEXT: retq
238 %not = xor <2 x i64> %a0, <i64 -1, i64 -1>
239 %res = and <2 x i64> %not, %a1
240 ret <2 x i64> %res
241}
242
243define <2 x i64> @test_mm_avg_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
244; X32-LABEL: test_mm_avg_epu8:
245; X32: # BB#0:
246; X32-NEXT: pavgb %xmm1, %xmm0
247; X32-NEXT: retl
248;
249; X64-LABEL: test_mm_avg_epu8:
250; X64: # BB#0:
251; X64-NEXT: pavgb %xmm1, %xmm0
252; X64-NEXT: retq
253 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
254 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
255 %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %arg0, <16 x i8> %arg1)
256 %bc = bitcast <16 x i8> %res to <2 x i64>
257 ret <2 x i64> %bc
258}
259declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %arg0, <16 x i8> %arg1) nounwind readnone
260
261define <2 x i64> @test_mm_avg_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
262; X32-LABEL: test_mm_avg_epu16:
263; X32: # BB#0:
264; X32-NEXT: pavgw %xmm1, %xmm0
265; X32-NEXT: retl
266;
267; X64-LABEL: test_mm_avg_epu16:
268; X64: # BB#0:
269; X64-NEXT: pavgw %xmm1, %xmm0
270; X64-NEXT: retq
271 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
272 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
273 %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %arg0, <8 x i16> %arg1)
274 %bc = bitcast <8 x i16> %res to <2 x i64>
275 ret <2 x i64> %bc
276}
277declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
278
279define <2 x i64> @test_mm_bslli_si128(<2 x i64> %a0) nounwind {
280; X32-LABEL: test_mm_bslli_si128:
281; X32: # BB#0:
282; X32-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
283; X32-NEXT: retl
284;
285; X64-LABEL: test_mm_bslli_si128:
286; X64: # BB#0:
287; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
288; X64-NEXT: retq
289 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
290 %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
291 %bc = bitcast <16 x i8> %res to <2 x i64>
292 ret <2 x i64> %bc
293}
294
295define <2 x i64> @test_mm_bsrli_si128(<2 x i64> %a0) nounwind {
296; X32-LABEL: test_mm_bsrli_si128:
297; X32: # BB#0:
298; X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
299; X32-NEXT: retl
300;
301; X64-LABEL: test_mm_bsrli_si128:
302; X64: # BB#0:
303; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
304; X64-NEXT: retq
305 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
306 %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
307 %bc = bitcast <16 x i8> %res to <2 x i64>
308 ret <2 x i64> %bc
309}
310
Simon Pilgrim01809e02016-05-19 10:58:54 +0000311define <4 x float> @test_mm_castpd_ps(<2 x double> %a0) nounwind {
312; X32-LABEL: test_mm_castpd_ps:
313; X32: # BB#0:
314; X32-NEXT: retl
315;
316; X64-LABEL: test_mm_castpd_ps:
317; X64: # BB#0:
318; X64-NEXT: retq
319 %res = bitcast <2 x double> %a0 to <4 x float>
320 ret <4 x float> %res
321}
322
323define <2 x i64> @test_mm_castpd_si128(<2 x double> %a0) nounwind {
324; X32-LABEL: test_mm_castpd_si128:
325; X32: # BB#0:
326; X32-NEXT: retl
327;
328; X64-LABEL: test_mm_castpd_si128:
329; X64: # BB#0:
330; X64-NEXT: retq
331 %res = bitcast <2 x double> %a0 to <2 x i64>
332 ret <2 x i64> %res
333}
334
335define <2 x double> @test_mm_castps_pd(<4 x float> %a0) nounwind {
336; X32-LABEL: test_mm_castps_pd:
337; X32: # BB#0:
338; X32-NEXT: retl
339;
340; X64-LABEL: test_mm_castps_pd:
341; X64: # BB#0:
342; X64-NEXT: retq
343 %res = bitcast <4 x float> %a0 to <2 x double>
344 ret <2 x double> %res
345}
346
347define <2 x i64> @test_mm_castps_si128(<4 x float> %a0) nounwind {
348; X32-LABEL: test_mm_castps_si128:
349; X32: # BB#0:
350; X32-NEXT: retl
351;
352; X64-LABEL: test_mm_castps_si128:
353; X64: # BB#0:
354; X64-NEXT: retq
355 %res = bitcast <4 x float> %a0 to <2 x i64>
356 ret <2 x i64> %res
357}
358
359define <2 x double> @test_mm_castsi128_pd(<2 x i64> %a0) nounwind {
360; X32-LABEL: test_mm_castsi128_pd:
361; X32: # BB#0:
362; X32-NEXT: retl
363;
364; X64-LABEL: test_mm_castsi128_pd:
365; X64: # BB#0:
366; X64-NEXT: retq
367 %res = bitcast <2 x i64> %a0 to <2 x double>
368 ret <2 x double> %res
369}
370
371define <4 x float> @test_mm_castsi128_ps(<2 x i64> %a0) nounwind {
372; X32-LABEL: test_mm_castsi128_ps:
373; X32: # BB#0:
374; X32-NEXT: retl
375;
376; X64-LABEL: test_mm_castsi128_ps:
377; X64: # BB#0:
378; X64-NEXT: retq
379 %res = bitcast <2 x i64> %a0 to <4 x float>
380 ret <4 x float> %res
381}
382
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000383define void @test_mm_clflush(i8* %a0) nounwind {
384; X32-LABEL: test_mm_clflush:
385; X32: # BB#0:
386; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
387; X32-NEXT: clflush (%eax)
388; X32-NEXT: retl
389;
390; X64-LABEL: test_mm_clflush:
391; X64: # BB#0:
392; X64-NEXT: clflush (%rdi)
393; X64-NEXT: retq
394 call void @llvm.x86.sse2.clflush(i8* %a0)
395 ret void
396}
397declare void @llvm.x86.sse2.clflush(i8*) nounwind readnone
398
399define <2 x i64> @test_mm_cmpeq_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
400; X32-LABEL: test_mm_cmpeq_epi8:
401; X32: # BB#0:
402; X32-NEXT: pcmpeqb %xmm1, %xmm0
403; X32-NEXT: retl
404;
405; X64-LABEL: test_mm_cmpeq_epi8:
406; X64: # BB#0:
407; X64-NEXT: pcmpeqb %xmm1, %xmm0
408; X64-NEXT: retq
409 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
410 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
411 %cmp = icmp eq <16 x i8> %arg0, %arg1
412 %res = sext <16 x i1> %cmp to <16 x i8>
413 %bc = bitcast <16 x i8> %res to <2 x i64>
414 ret <2 x i64> %bc
415}
416
417define <2 x i64> @test_mm_cmpeq_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
418; X32-LABEL: test_mm_cmpeq_epi16:
419; X32: # BB#0:
420; X32-NEXT: pcmpeqw %xmm1, %xmm0
421; X32-NEXT: retl
422;
423; X64-LABEL: test_mm_cmpeq_epi16:
424; X64: # BB#0:
425; X64-NEXT: pcmpeqw %xmm1, %xmm0
426; X64-NEXT: retq
427 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
428 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
429 %cmp = icmp eq <8 x i16> %arg0, %arg1
430 %res = sext <8 x i1> %cmp to <8 x i16>
431 %bc = bitcast <8 x i16> %res to <2 x i64>
432 ret <2 x i64> %bc
433}
434
435define <2 x i64> @test_mm_cmpeq_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
436; X32-LABEL: test_mm_cmpeq_epi32:
437; X32: # BB#0:
438; X32-NEXT: pcmpeqd %xmm1, %xmm0
439; X32-NEXT: retl
440;
441; X64-LABEL: test_mm_cmpeq_epi32:
442; X64: # BB#0:
443; X64-NEXT: pcmpeqd %xmm1, %xmm0
444; X64-NEXT: retq
445 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
446 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
447 %cmp = icmp eq <4 x i32> %arg0, %arg1
448 %res = sext <4 x i1> %cmp to <4 x i32>
449 %bc = bitcast <4 x i32> %res to <2 x i64>
450 ret <2 x i64> %bc
451}
452
453define <2 x double> @test_mm_cmpeq_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
454; X32-LABEL: test_mm_cmpeq_pd:
455; X32: # BB#0:
456; X32-NEXT: cmpeqpd %xmm1, %xmm0
457; X32-NEXT: retl
458;
459; X64-LABEL: test_mm_cmpeq_pd:
460; X64: # BB#0:
461; X64-NEXT: cmpeqpd %xmm1, %xmm0
462; X64-NEXT: retq
463 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 0)
464 ret <2 x double> %res
465}
466declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
467
468define <2 x double> @test_mm_cmpeq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
469; X32-LABEL: test_mm_cmpeq_sd:
470; X32: # BB#0:
471; X32-NEXT: cmpeqsd %xmm1, %xmm0
472; X32-NEXT: retl
473;
474; X64-LABEL: test_mm_cmpeq_sd:
475; X64: # BB#0:
476; X64-NEXT: cmpeqsd %xmm1, %xmm0
477; X64-NEXT: retq
478 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0)
479 ret <2 x double> %res
480}
481declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
482
483define <2 x double> @test_mm_cmpge_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
484; X32-LABEL: test_mm_cmpge_pd:
485; X32: # BB#0:
486; X32-NEXT: cmplepd %xmm0, %xmm1
487; X32-NEXT: movapd %xmm1, %xmm0
488; X32-NEXT: retl
489;
490; X64-LABEL: test_mm_cmpge_pd:
491; X64: # BB#0:
492; X64-NEXT: cmplepd %xmm0, %xmm1
493; X64-NEXT: movapd %xmm1, %xmm0
494; X64-NEXT: retq
495 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a1, <2 x double> %a0, i8 2)
496 ret <2 x double> %res
497}
498
499define <2 x double> @test_mm_cmpge_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
500; X32-LABEL: test_mm_cmpge_sd:
501; X32: # BB#0:
502; X32-NEXT: cmplesd %xmm0, %xmm1
503; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
504; X32-NEXT: retl
505;
506; X64-LABEL: test_mm_cmpge_sd:
507; X64: # BB#0:
508; X64-NEXT: cmplesd %xmm0, %xmm1
509; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
510; X64-NEXT: retq
511 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 2)
512 %ext0 = extractelement <2 x double> %cmp, i32 0
513 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
514 %ext1 = extractelement <2 x double> %a0, i32 1
515 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
516 ret <2 x double> %ins1
517}
518
519define <2 x i64> @test_mm_cmpgt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
520; X32-LABEL: test_mm_cmpgt_epi8:
521; X32: # BB#0:
522; X32-NEXT: pcmpgtb %xmm1, %xmm0
523; X32-NEXT: retl
524;
525; X64-LABEL: test_mm_cmpgt_epi8:
526; X64: # BB#0:
527; X64-NEXT: pcmpgtb %xmm1, %xmm0
528; X64-NEXT: retq
529 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
530 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
531 %cmp = icmp sgt <16 x i8> %arg0, %arg1
532 %res = sext <16 x i1> %cmp to <16 x i8>
533 %bc = bitcast <16 x i8> %res to <2 x i64>
534 ret <2 x i64> %bc
535}
536
537define <2 x i64> @test_mm_cmpgt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
538; X32-LABEL: test_mm_cmpgt_epi16:
539; X32: # BB#0:
540; X32-NEXT: pcmpgtw %xmm1, %xmm0
541; X32-NEXT: retl
542;
543; X64-LABEL: test_mm_cmpgt_epi16:
544; X64: # BB#0:
545; X64-NEXT: pcmpgtw %xmm1, %xmm0
546; X64-NEXT: retq
547 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
548 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
549 %cmp = icmp sgt <8 x i16> %arg0, %arg1
550 %res = sext <8 x i1> %cmp to <8 x i16>
551 %bc = bitcast <8 x i16> %res to <2 x i64>
552 ret <2 x i64> %bc
553}
554
555define <2 x i64> @test_mm_cmpgt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
556; X32-LABEL: test_mm_cmpgt_epi32:
557; X32: # BB#0:
558; X32-NEXT: pcmpgtd %xmm1, %xmm0
559; X32-NEXT: retl
560;
561; X64-LABEL: test_mm_cmpgt_epi32:
562; X64: # BB#0:
563; X64-NEXT: pcmpgtd %xmm1, %xmm0
564; X64-NEXT: retq
565 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
566 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
567 %cmp = icmp sgt <4 x i32> %arg0, %arg1
568 %res = sext <4 x i1> %cmp to <4 x i32>
569 %bc = bitcast <4 x i32> %res to <2 x i64>
570 ret <2 x i64> %bc
571}
572
573define <2 x double> @test_mm_cmpgt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
574; X32-LABEL: test_mm_cmpgt_pd:
575; X32: # BB#0:
576; X32-NEXT: cmpltpd %xmm0, %xmm1
577; X32-NEXT: movapd %xmm1, %xmm0
578; X32-NEXT: retl
579;
580; X64-LABEL: test_mm_cmpgt_pd:
581; X64: # BB#0:
582; X64-NEXT: cmpltpd %xmm0, %xmm1
583; X64-NEXT: movapd %xmm1, %xmm0
584; X64-NEXT: retq
585 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a1, <2 x double> %a0, i8 1)
586 ret <2 x double> %res
587}
588
589define <2 x double> @test_mm_cmpgt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
590; X32-LABEL: test_mm_cmpgt_sd:
591; X32: # BB#0:
592; X32-NEXT: cmpltsd %xmm0, %xmm1
593; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
594; X32-NEXT: retl
595;
596; X64-LABEL: test_mm_cmpgt_sd:
597; X64: # BB#0:
598; X64-NEXT: cmpltsd %xmm0, %xmm1
599; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
600; X64-NEXT: retq
601 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 1)
602 %ext0 = extractelement <2 x double> %cmp, i32 0
603 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
604 %ext1 = extractelement <2 x double> %a0, i32 1
605 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
606 ret <2 x double> %ins1
607}
608
609define <2 x double> @test_mm_cmple_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
610; X32-LABEL: test_mm_cmple_pd:
611; X32: # BB#0:
612; X32-NEXT: cmplepd %xmm1, %xmm0
613; X32-NEXT: retl
614;
615; X64-LABEL: test_mm_cmple_pd:
616; X64: # BB#0:
617; X64-NEXT: cmplepd %xmm1, %xmm0
618; X64-NEXT: retq
619 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 2)
620 ret <2 x double> %res
621}
622
623define <2 x double> @test_mm_cmple_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
624; X32-LABEL: test_mm_cmple_sd:
625; X32: # BB#0:
626; X32-NEXT: cmplesd %xmm1, %xmm0
627; X32-NEXT: retl
628;
629; X64-LABEL: test_mm_cmple_sd:
630; X64: # BB#0:
631; X64-NEXT: cmplesd %xmm1, %xmm0
632; X64-NEXT: retq
633 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 2)
634 ret <2 x double> %res
635}
636
637define <2 x i64> @test_mm_cmplt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
638; X32-LABEL: test_mm_cmplt_epi8:
639; X32: # BB#0:
640; X32-NEXT: pcmpgtb %xmm0, %xmm1
641; X32-NEXT: movdqa %xmm1, %xmm0
642; X32-NEXT: retl
643;
644; X64-LABEL: test_mm_cmplt_epi8:
645; X64: # BB#0:
646; X64-NEXT: pcmpgtb %xmm0, %xmm1
647; X64-NEXT: movdqa %xmm1, %xmm0
648; X64-NEXT: retq
649 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
650 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
651 %cmp = icmp sgt <16 x i8> %arg1, %arg0
652 %res = sext <16 x i1> %cmp to <16 x i8>
653 %bc = bitcast <16 x i8> %res to <2 x i64>
654 ret <2 x i64> %bc
655}
656
657define <2 x i64> @test_mm_cmplt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
658; X32-LABEL: test_mm_cmplt_epi16:
659; X32: # BB#0:
660; X32-NEXT: pcmpgtw %xmm0, %xmm1
661; X32-NEXT: movdqa %xmm1, %xmm0
662; X32-NEXT: retl
663;
664; X64-LABEL: test_mm_cmplt_epi16:
665; X64: # BB#0:
666; X64-NEXT: pcmpgtw %xmm0, %xmm1
667; X64-NEXT: movdqa %xmm1, %xmm0
668; X64-NEXT: retq
669 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
670 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
671 %cmp = icmp sgt <8 x i16> %arg1, %arg0
672 %res = sext <8 x i1> %cmp to <8 x i16>
673 %bc = bitcast <8 x i16> %res to <2 x i64>
674 ret <2 x i64> %bc
675}
676
677define <2 x i64> @test_mm_cmplt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
678; X32-LABEL: test_mm_cmplt_epi32:
679; X32: # BB#0:
680; X32-NEXT: pcmpgtd %xmm0, %xmm1
681; X32-NEXT: movdqa %xmm1, %xmm0
682; X32-NEXT: retl
683;
684; X64-LABEL: test_mm_cmplt_epi32:
685; X64: # BB#0:
686; X64-NEXT: pcmpgtd %xmm0, %xmm1
687; X64-NEXT: movdqa %xmm1, %xmm0
688; X64-NEXT: retq
689 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
690 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
691 %cmp = icmp sgt <4 x i32> %arg1, %arg0
692 %res = sext <4 x i1> %cmp to <4 x i32>
693 %bc = bitcast <4 x i32> %res to <2 x i64>
694 ret <2 x i64> %bc
695}
696
697define <2 x double> @test_mm_cmplt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
698; X32-LABEL: test_mm_cmplt_pd:
699; X32: # BB#0:
700; X32-NEXT: cmpltpd %xmm1, %xmm0
701; X32-NEXT: retl
702;
703; X64-LABEL: test_mm_cmplt_pd:
704; X64: # BB#0:
705; X64-NEXT: cmpltpd %xmm1, %xmm0
706; X64-NEXT: retq
707 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 1)
708 ret <2 x double> %res
709}
710
711define <2 x double> @test_mm_cmplt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
712; X32-LABEL: test_mm_cmplt_sd:
713; X32: # BB#0:
714; X32-NEXT: cmpltsd %xmm1, %xmm0
715; X32-NEXT: retl
716;
717; X64-LABEL: test_mm_cmplt_sd:
718; X64: # BB#0:
719; X64-NEXT: cmpltsd %xmm1, %xmm0
720; X64-NEXT: retq
721 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 1)
722 ret <2 x double> %res
723}
724
725define <2 x double> @test_mm_cmpneq_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
726; X32-LABEL: test_mm_cmpneq_pd:
727; X32: # BB#0:
728; X32-NEXT: cmpneqpd %xmm1, %xmm0
729; X32-NEXT: retl
730;
731; X64-LABEL: test_mm_cmpneq_pd:
732; X64: # BB#0:
733; X64-NEXT: cmpneqpd %xmm1, %xmm0
734; X64-NEXT: retq
735 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 4)
736 ret <2 x double> %res
737}
738
739define <2 x double> @test_mm_cmpneq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
740; X32-LABEL: test_mm_cmpneq_sd:
741; X32: # BB#0:
742; X32-NEXT: cmpneqsd %xmm1, %xmm0
743; X32-NEXT: retl
744;
745; X64-LABEL: test_mm_cmpneq_sd:
746; X64: # BB#0:
747; X64-NEXT: cmpneqsd %xmm1, %xmm0
748; X64-NEXT: retq
749 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 4)
750 ret <2 x double> %res
751}
752
753define <2 x double> @test_mm_cmpnge_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
754; X32-LABEL: test_mm_cmpnge_pd:
755; X32: # BB#0:
756; X32-NEXT: cmpnlepd %xmm0, %xmm1
757; X32-NEXT: movapd %xmm1, %xmm0
758; X32-NEXT: retl
759;
760; X64-LABEL: test_mm_cmpnge_pd:
761; X64: # BB#0:
762; X64-NEXT: cmpnlepd %xmm0, %xmm1
763; X64-NEXT: movapd %xmm1, %xmm0
764; X64-NEXT: retq
765 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a1, <2 x double> %a0, i8 6)
766 ret <2 x double> %res
767}
768
769define <2 x double> @test_mm_cmpnge_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
770; X32-LABEL: test_mm_cmpnge_sd:
771; X32: # BB#0:
772; X32-NEXT: cmpnlesd %xmm0, %xmm1
Simon Pilgrimb1ff2dd2016-05-19 16:49:53 +0000773; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000774; X32-NEXT: retl
775;
776; X64-LABEL: test_mm_cmpnge_sd:
777; X64: # BB#0:
778; X64-NEXT: cmpnlesd %xmm0, %xmm1
Simon Pilgrimb1ff2dd2016-05-19 16:49:53 +0000779; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000780; X64-NEXT: retq
Simon Pilgrimb1ff2dd2016-05-19 16:49:53 +0000781 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 6)
782 %ext0 = extractelement <2 x double> %cmp, i32 0
783 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
784 %ext1 = extractelement <2 x double> %a0, i32 1
785 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
786 ret <2 x double> %ins1
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000787}
788
789define <2 x double> @test_mm_cmpngt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
790; X32-LABEL: test_mm_cmpngt_pd:
791; X32: # BB#0:
792; X32-NEXT: cmpnltpd %xmm0, %xmm1
793; X32-NEXT: movapd %xmm1, %xmm0
794; X32-NEXT: retl
795;
796; X64-LABEL: test_mm_cmpngt_pd:
797; X64: # BB#0:
798; X64-NEXT: cmpnltpd %xmm0, %xmm1
799; X64-NEXT: movapd %xmm1, %xmm0
800; X64-NEXT: retq
801 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a1, <2 x double> %a0, i8 5)
802 ret <2 x double> %res
803}
804
805define <2 x double> @test_mm_cmpngt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
806; X32-LABEL: test_mm_cmpngt_sd:
807; X32: # BB#0:
808; X32-NEXT: cmpnltsd %xmm0, %xmm1
Simon Pilgrimb1ff2dd2016-05-19 16:49:53 +0000809; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000810; X32-NEXT: retl
811;
812; X64-LABEL: test_mm_cmpngt_sd:
813; X64: # BB#0:
814; X64-NEXT: cmpnltsd %xmm0, %xmm1
Simon Pilgrimb1ff2dd2016-05-19 16:49:53 +0000815; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000816; X64-NEXT: retq
Simon Pilgrimb1ff2dd2016-05-19 16:49:53 +0000817 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 5)
818 %ext0 = extractelement <2 x double> %cmp, i32 0
819 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
820 %ext1 = extractelement <2 x double> %a0, i32 1
821 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
822 ret <2 x double> %ins1
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000823}
824
825define <2 x double> @test_mm_cmpnle_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
826; X32-LABEL: test_mm_cmpnle_pd:
827; X32: # BB#0:
828; X32-NEXT: cmpnlepd %xmm1, %xmm0
829; X32-NEXT: retl
830;
831; X64-LABEL: test_mm_cmpnle_pd:
832; X64: # BB#0:
833; X64-NEXT: cmpnlepd %xmm1, %xmm0
834; X64-NEXT: retq
835 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 6)
836 ret <2 x double> %res
837}
838
839define <2 x double> @test_mm_cmpnle_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
840; X32-LABEL: test_mm_cmpnle_sd:
841; X32: # BB#0:
842; X32-NEXT: cmpnlesd %xmm1, %xmm0
843; X32-NEXT: retl
844;
845; X64-LABEL: test_mm_cmpnle_sd:
846; X64: # BB#0:
847; X64-NEXT: cmpnlesd %xmm1, %xmm0
848; X64-NEXT: retq
849 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 6)
850 ret <2 x double> %res
851}
852
853define <2 x double> @test_mm_cmpnlt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
854; X32-LABEL: test_mm_cmpnlt_pd:
855; X32: # BB#0:
856; X32-NEXT: cmpnltpd %xmm1, %xmm0
857; X32-NEXT: retl
858;
859; X64-LABEL: test_mm_cmpnlt_pd:
860; X64: # BB#0:
861; X64-NEXT: cmpnltpd %xmm1, %xmm0
862; X64-NEXT: retq
863 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 5)
864 ret <2 x double> %res
865}
866
867define <2 x double> @test_mm_cmpnlt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
868; X32-LABEL: test_mm_cmpnlt_sd:
869; X32: # BB#0:
870; X32-NEXT: cmpnltsd %xmm1, %xmm0
871; X32-NEXT: retl
872;
873; X64-LABEL: test_mm_cmpnlt_sd:
874; X64: # BB#0:
875; X64-NEXT: cmpnltsd %xmm1, %xmm0
876; X64-NEXT: retq
877 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 5)
878 ret <2 x double> %res
879}
880
881define <2 x double> @test_mm_cmpord_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
882; X32-LABEL: test_mm_cmpord_pd:
883; X32: # BB#0:
884; X32-NEXT: cmpordpd %xmm1, %xmm0
885; X32-NEXT: retl
886;
887; X64-LABEL: test_mm_cmpord_pd:
888; X64: # BB#0:
889; X64-NEXT: cmpordpd %xmm1, %xmm0
890; X64-NEXT: retq
891 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7)
892 ret <2 x double> %res
893}
894
895define <2 x double> @test_mm_cmpord_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
896; X32-LABEL: test_mm_cmpord_sd:
897; X32: # BB#0:
898; X32-NEXT: cmpordsd %xmm1, %xmm0
899; X32-NEXT: retl
900;
901; X64-LABEL: test_mm_cmpord_sd:
902; X64: # BB#0:
903; X64-NEXT: cmpordsd %xmm1, %xmm0
904; X64-NEXT: retq
905 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7)
906 ret <2 x double> %res
907}
908
909define <2 x double> @test_mm_cmpunord_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
910; X32-LABEL: test_mm_cmpunord_pd:
911; X32: # BB#0:
912; X32-NEXT: cmpunordpd %xmm1, %xmm0
913; X32-NEXT: retl
914;
915; X64-LABEL: test_mm_cmpunord_pd:
916; X64: # BB#0:
917; X64-NEXT: cmpunordpd %xmm1, %xmm0
918; X64-NEXT: retq
919 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 3)
920 ret <2 x double> %res
921}
922
923define <2 x double> @test_mm_cmpunord_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
924; X32-LABEL: test_mm_cmpunord_sd:
925; X32: # BB#0:
926; X32-NEXT: cmpunordsd %xmm1, %xmm0
927; X32-NEXT: retl
928;
929; X64-LABEL: test_mm_cmpunord_sd:
930; X64: # BB#0:
931; X64-NEXT: cmpunordsd %xmm1, %xmm0
932; X64-NEXT: retq
933 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 3)
934 ret <2 x double> %res
935}
936
937define i32 @test_mm_comieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
938; X32-LABEL: test_mm_comieq_sd:
939; X32: # BB#0:
940; X32-NEXT: comisd %xmm1, %xmm0
941; X32-NEXT: setnp %al
942; X32-NEXT: sete %cl
943; X32-NEXT: andb %al, %cl
944; X32-NEXT: movzbl %cl, %eax
945; X32-NEXT: retl
946;
947; X64-LABEL: test_mm_comieq_sd:
948; X64: # BB#0:
949; X64-NEXT: comisd %xmm1, %xmm0
950; X64-NEXT: setnp %al
951; X64-NEXT: sete %cl
952; X64-NEXT: andb %al, %cl
953; X64-NEXT: movzbl %cl, %eax
954; X64-NEXT: retq
955 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1)
956 ret i32 %res
957}
958declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
959
960define i32 @test_mm_comige_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
961; X32-LABEL: test_mm_comige_sd:
962; X32: # BB#0:
963; X32-NEXT: comisd %xmm1, %xmm0
964; X32-NEXT: setae %al
965; X32-NEXT: movzbl %al, %eax
966; X32-NEXT: retl
967;
968; X64-LABEL: test_mm_comige_sd:
969; X64: # BB#0:
970; X64-NEXT: comisd %xmm1, %xmm0
971; X64-NEXT: setae %al
972; X64-NEXT: movzbl %al, %eax
973; X64-NEXT: retq
974 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1)
975 ret i32 %res
976}
977declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
978
979define i32 @test_mm_comigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
980; X32-LABEL: test_mm_comigt_sd:
981; X32: # BB#0:
982; X32-NEXT: comisd %xmm1, %xmm0
983; X32-NEXT: seta %al
984; X32-NEXT: movzbl %al, %eax
985; X32-NEXT: retl
986;
987; X64-LABEL: test_mm_comigt_sd:
988; X64: # BB#0:
989; X64-NEXT: comisd %xmm1, %xmm0
990; X64-NEXT: seta %al
991; X64-NEXT: movzbl %al, %eax
992; X64-NEXT: retq
993 %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1)
994 ret i32 %res
995}
996declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
997
998define i32 @test_mm_comile_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
999; X32-LABEL: test_mm_comile_sd:
1000; X32: # BB#0:
1001; X32-NEXT: comisd %xmm0, %xmm1
1002; X32-NEXT: setae %al
1003; X32-NEXT: movzbl %al, %eax
1004; X32-NEXT: retl
1005;
1006; X64-LABEL: test_mm_comile_sd:
1007; X64: # BB#0:
1008; X64-NEXT: comisd %xmm0, %xmm1
1009; X64-NEXT: setae %al
1010; X64-NEXT: movzbl %al, %eax
1011; X64-NEXT: retq
1012 %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1)
1013 ret i32 %res
1014}
1015declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
1016
1017define i32 @test_mm_comilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1018; X32-LABEL: test_mm_comilt_sd:
1019; X32: # BB#0:
1020; X32-NEXT: comisd %xmm0, %xmm1
1021; X32-NEXT: seta %al
1022; X32-NEXT: movzbl %al, %eax
1023; X32-NEXT: retl
1024;
1025; X64-LABEL: test_mm_comilt_sd:
1026; X64: # BB#0:
1027; X64-NEXT: comisd %xmm0, %xmm1
1028; X64-NEXT: seta %al
1029; X64-NEXT: movzbl %al, %eax
1030; X64-NEXT: retq
1031 %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1)
1032 ret i32 %res
1033}
1034declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
1035
1036define i32 @test_mm_comineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1037; X32-LABEL: test_mm_comineq_sd:
1038; X32: # BB#0:
1039; X32-NEXT: comisd %xmm1, %xmm0
1040; X32-NEXT: setp %al
1041; X32-NEXT: setne %cl
1042; X32-NEXT: orb %al, %cl
1043; X32-NEXT: movzbl %cl, %eax
1044; X32-NEXT: retl
1045;
1046; X64-LABEL: test_mm_comineq_sd:
1047; X64: # BB#0:
1048; X64-NEXT: comisd %xmm1, %xmm0
1049; X64-NEXT: setp %al
1050; X64-NEXT: setne %cl
1051; X64-NEXT: orb %al, %cl
1052; X64-NEXT: movzbl %cl, %eax
1053; X64-NEXT: retq
1054 %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1)
1055 ret i32 %res
1056}
1057declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
1058
1059define <2 x double> @test_mm_cvtepi32_pd(<2 x i64> %a0) nounwind {
1060; X32-LABEL: test_mm_cvtepi32_pd:
1061; X32: # BB#0:
1062; X32-NEXT: cvtdq2pd %xmm0, %xmm0
1063; X32-NEXT: retl
1064;
1065; X64-LABEL: test_mm_cvtepi32_pd:
1066; X64: # BB#0:
1067; X64-NEXT: cvtdq2pd %xmm0, %xmm0
1068; X64-NEXT: retq
1069 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
Simon Pilgrim8a5ff3c2016-05-23 22:17:36 +00001070 %ext = shufflevector <4 x i32> %arg0, <4 x i32> %arg0, <2 x i32> <i32 0, i32 1>
1071 %res = sitofp <2 x i32> %ext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001072 ret <2 x double> %res
1073}
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001074
1075define <4 x float> @test_mm_cvtepi32_ps(<2 x i64> %a0) nounwind {
1076; X32-LABEL: test_mm_cvtepi32_ps:
1077; X32: # BB#0:
1078; X32-NEXT: cvtdq2ps %xmm0, %xmm0
1079; X32-NEXT: retl
1080;
1081; X64-LABEL: test_mm_cvtepi32_ps:
1082; X64: # BB#0:
1083; X64-NEXT: cvtdq2ps %xmm0, %xmm0
1084; X64-NEXT: retq
1085 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1086 %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %arg0)
1087 ret <4 x float> %res
1088}
1089declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
1090
1091define <2 x i64> @test_mm_cvtpd_epi32(<2 x double> %a0) nounwind {
1092; X32-LABEL: test_mm_cvtpd_epi32:
1093; X32: # BB#0:
1094; X32-NEXT: cvtpd2dq %xmm0, %xmm0
1095; X32-NEXT: retl
1096;
1097; X64-LABEL: test_mm_cvtpd_epi32:
1098; X64: # BB#0:
1099; X64-NEXT: cvtpd2dq %xmm0, %xmm0
1100; X64-NEXT: retq
1101 %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
1102 %bc = bitcast <4 x i32> %res to <2 x i64>
1103 ret <2 x i64> %bc
1104}
1105declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
1106
1107define <4 x float> @test_mm_cvtpd_ps(<2 x double> %a0) nounwind {
1108; X32-LABEL: test_mm_cvtpd_ps:
1109; X32: # BB#0:
1110; X32-NEXT: cvtpd2ps %xmm0, %xmm0
1111; X32-NEXT: retl
1112;
1113; X64-LABEL: test_mm_cvtpd_ps:
1114; X64: # BB#0:
1115; X64-NEXT: cvtpd2ps %xmm0, %xmm0
1116; X64-NEXT: retq
1117 %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)
1118 ret <4 x float> %res
1119}
1120declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
1121
1122define <2 x i64> @test_mm_cvtps_epi32(<4 x float> %a0) nounwind {
1123; X32-LABEL: test_mm_cvtps_epi32:
1124; X32: # BB#0:
1125; X32-NEXT: cvtps2dq %xmm0, %xmm0
1126; X32-NEXT: retl
1127;
1128; X64-LABEL: test_mm_cvtps_epi32:
1129; X64: # BB#0:
1130; X64-NEXT: cvtps2dq %xmm0, %xmm0
1131; X64-NEXT: retq
1132 %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0)
1133 %bc = bitcast <4 x i32> %res to <2 x i64>
1134 ret <2 x i64> %bc
1135}
1136declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
1137
1138define <2 x double> @test_mm_cvtps_pd(<4 x float> %a0) nounwind {
1139; X32-LABEL: test_mm_cvtps_pd:
1140; X32: # BB#0:
1141; X32-NEXT: cvtps2pd %xmm0, %xmm0
1142; X32-NEXT: retl
1143;
1144; X64-LABEL: test_mm_cvtps_pd:
1145; X64: # BB#0:
1146; X64-NEXT: cvtps2pd %xmm0, %xmm0
1147; X64-NEXT: retq
Simon Pilgrim8a5ff3c2016-05-23 22:17:36 +00001148 %ext = shufflevector <4 x float> %a0, <4 x float> %a0, <2 x i32> <i32 0, i32 1>
1149 %res = fpext <2 x float> %ext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001150 ret <2 x double> %res
1151}
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001152
1153define double @test_mm_cvtsd_f64(<2 x double> %a0) nounwind {
1154; X32-LABEL: test_mm_cvtsd_f64:
1155; X32: # BB#0:
1156; X32-NEXT: pushl %ebp
1157; X32-NEXT: movl %esp, %ebp
1158; X32-NEXT: andl $-8, %esp
1159; X32-NEXT: subl $8, %esp
1160; X32-NEXT: movlps %xmm0, (%esp)
1161; X32-NEXT: fldl (%esp)
1162; X32-NEXT: movl %ebp, %esp
1163; X32-NEXT: popl %ebp
1164; X32-NEXT: retl
1165;
1166; X64-LABEL: test_mm_cvtsd_f64:
1167; X64: # BB#0:
1168; X64-NEXT: retq
1169 %res = extractelement <2 x double> %a0, i32 0
1170 ret double %res
1171}
1172
1173define i32 @test_mm_cvtsd_si32(<2 x double> %a0) nounwind {
1174; X32-LABEL: test_mm_cvtsd_si32:
1175; X32: # BB#0:
1176; X32-NEXT: cvtsd2si %xmm0, %eax
1177; X32-NEXT: retl
1178;
1179; X64-LABEL: test_mm_cvtsd_si32:
1180; X64: # BB#0:
1181; X64-NEXT: cvtsd2si %xmm0, %eax
1182; X64-NEXT: retq
1183 %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0)
1184 ret i32 %res
1185}
1186declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
1187
1188define i32 @test_mm_cvtsi128_si32(<2 x i64> %a0) nounwind {
1189; X32-LABEL: test_mm_cvtsi128_si32:
1190; X32: # BB#0:
1191; X32-NEXT: movd %xmm0, %eax
1192; X32-NEXT: retl
1193;
1194; X64-LABEL: test_mm_cvtsi128_si32:
1195; X64: # BB#0:
1196; X64-NEXT: movd %xmm0, %eax
1197; X64-NEXT: retq
1198 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1199 %res = extractelement <4 x i32> %arg0, i32 0
1200 ret i32 %res
1201}
1202
1203define <2 x double> @test_mm_cvtsi32_sd(<2 x double> %a0, i32 %a1) nounwind {
1204; X32-LABEL: test_mm_cvtsi32_sd:
1205; X32: # BB#0:
1206; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1207; X32-NEXT: cvtsi2sdl %eax, %xmm1
1208; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1209; X32-NEXT: retl
1210;
1211; X64-LABEL: test_mm_cvtsi32_sd:
1212; X64: # BB#0:
1213; X64-NEXT: cvtsi2sdl %edi, %xmm1
1214; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1215; X64-NEXT: retq
1216 %cvt = sitofp i32 %a1 to double
1217 %res = insertelement <2 x double> %a0, double %cvt, i32 0
1218 ret <2 x double> %res
1219}
1220
1221define <2 x i64> @test_mm_cvtsi32_si128(i32 %a0) nounwind {
1222; X32-LABEL: test_mm_cvtsi32_si128:
1223; X32: # BB#0:
1224; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1225; X32-NEXT: retl
1226;
1227; X64-LABEL: test_mm_cvtsi32_si128:
1228; X64: # BB#0:
1229; X64-NEXT: movd %edi, %xmm0
1230; X64-NEXT: retq
1231 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
1232 %res1 = insertelement <4 x i32> %res0, i32 0, i32 1
1233 %res2 = insertelement <4 x i32> %res1, i32 0, i32 2
1234 %res3 = insertelement <4 x i32> %res2, i32 0, i32 3
1235 %res = bitcast <4 x i32> %res3 to <2 x i64>
1236 ret <2 x i64> %res
1237}
1238
1239define <2 x double> @test_mm_cvtss_sd(<2 x double> %a0, <4 x float> %a1) nounwind {
1240; X32-LABEL: test_mm_cvtss_sd:
1241; X32: # BB#0:
1242; X32-NEXT: cvtss2sd %xmm1, %xmm1
1243; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1244; X32-NEXT: retl
1245;
1246; X64-LABEL: test_mm_cvtss_sd:
1247; X64: # BB#0:
1248; X64-NEXT: cvtss2sd %xmm1, %xmm1
1249; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1250; X64-NEXT: retq
1251 %ext = extractelement <4 x float> %a1, i32 0
1252 %cvt = fpext float %ext to double
1253 %res = insertelement <2 x double> %a0, double %cvt, i32 0
1254 ret <2 x double> %res
1255}
1256
1257define <2 x i64> @test_mm_cvttpd_epi32(<2 x double> %a0) nounwind {
1258; X32-LABEL: test_mm_cvttpd_epi32:
1259; X32: # BB#0:
1260; X32-NEXT: cvttpd2dq %xmm0, %xmm0
1261; X32-NEXT: retl
1262;
1263; X64-LABEL: test_mm_cvttpd_epi32:
1264; X64: # BB#0:
1265; X64-NEXT: cvttpd2dq %xmm0, %xmm0
1266; X64-NEXT: retq
1267 %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
1268 %bc = bitcast <4 x i32> %res to <2 x i64>
1269 ret <2 x i64> %bc
1270}
1271declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
1272
1273define <2 x i64> @test_mm_cvttps_epi32(<4 x float> %a0) nounwind {
1274; X32-LABEL: test_mm_cvttps_epi32:
1275; X32: # BB#0:
1276; X32-NEXT: cvttps2dq %xmm0, %xmm0
1277; X32-NEXT: retl
1278;
1279; X64-LABEL: test_mm_cvttps_epi32:
1280; X64: # BB#0:
1281; X64-NEXT: cvttps2dq %xmm0, %xmm0
1282; X64-NEXT: retq
Simon Pilgrim0afd5a42016-06-02 10:55:21 +00001283 %res = fptosi <4 x float> %a0 to <4 x i32>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001284 %bc = bitcast <4 x i32> %res to <2 x i64>
1285 ret <2 x i64> %bc
1286}
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001287
1288define i32 @test_mm_cvttsd_si32(<2 x double> %a0) nounwind {
1289; X32-LABEL: test_mm_cvttsd_si32:
1290; X32: # BB#0:
1291; X32-NEXT: cvttsd2si %xmm0, %eax
1292; X32-NEXT: retl
1293;
1294; X64-LABEL: test_mm_cvttsd_si32:
1295; X64: # BB#0:
1296; X64-NEXT: cvttsd2si %xmm0, %eax
1297; X64-NEXT: retq
1298 %ext = extractelement <2 x double> %a0, i32 0
1299 %res = fptosi double %ext to i32
1300 ret i32 %res
1301}
1302
1303define <2 x double> @test_mm_div_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1304; X32-LABEL: test_mm_div_pd:
1305; X32: # BB#0:
1306; X32-NEXT: divpd %xmm1, %xmm0
1307; X32-NEXT: retl
1308;
1309; X64-LABEL: test_mm_div_pd:
1310; X64: # BB#0:
1311; X64-NEXT: divpd %xmm1, %xmm0
1312; X64-NEXT: retq
1313 %res = fdiv <2 x double> %a0, %a1
1314 ret <2 x double> %res
1315}
1316
1317define <2 x double> @test_mm_div_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1318; X32-LABEL: test_mm_div_sd:
1319; X32: # BB#0:
1320; X32-NEXT: divsd %xmm1, %xmm0
1321; X32-NEXT: retl
1322;
1323; X64-LABEL: test_mm_div_sd:
1324; X64: # BB#0:
1325; X64-NEXT: divsd %xmm1, %xmm0
1326; X64-NEXT: retq
1327 %ext0 = extractelement <2 x double> %a0, i32 0
1328 %ext1 = extractelement <2 x double> %a1, i32 0
1329 %fdiv = fdiv double %ext0, %ext1
1330 %res = insertelement <2 x double> %a0, double %fdiv, i32 0
1331 ret <2 x double> %res
1332}
1333
1334define i32 @test_mm_extract_epi16(<2 x i64> %a0) nounwind {
1335; X32-LABEL: test_mm_extract_epi16:
1336; X32: # BB#0:
1337; X32-NEXT: pextrw $1, %xmm0, %eax
1338; X32-NEXT: movzwl %ax, %eax
1339; X32-NEXT: retl
1340;
1341; X64-LABEL: test_mm_extract_epi16:
1342; X64: # BB#0:
1343; X64-NEXT: pextrw $1, %xmm0, %eax
1344; X64-NEXT: movzwl %ax, %eax
1345; X64-NEXT: retq
1346 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1347 %ext = extractelement <8 x i16> %arg0, i32 1
1348 %res = zext i16 %ext to i32
1349 ret i32 %res
1350}
1351
1352define <2 x i64> @test_mm_insert_epi16(<2 x i64> %a0, i16 %a1) nounwind {
1353; X32-LABEL: test_mm_insert_epi16:
1354; X32: # BB#0:
1355; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
1356; X32-NEXT: pinsrw $1, %eax, %xmm0
1357; X32-NEXT: retl
1358;
1359; X64-LABEL: test_mm_insert_epi16:
1360; X64: # BB#0:
1361; X64-NEXT: pinsrw $1, %edi, %xmm0
1362; X64-NEXT: retq
1363 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1364 %res = insertelement <8 x i16> %arg0, i16 %a1,i32 1
1365 %bc = bitcast <8 x i16> %res to <2 x i64>
1366 ret <2 x i64> %bc
1367}
1368
1369define void @test_mm_lfence() nounwind {
1370; X32-LABEL: test_mm_lfence:
1371; X32: # BB#0:
1372; X32-NEXT: lfence
1373; X32-NEXT: retl
1374;
1375; X64-LABEL: test_mm_lfence:
1376; X64: # BB#0:
1377; X64-NEXT: lfence
1378; X64-NEXT: retq
1379 call void @llvm.x86.sse2.lfence()
1380 ret void
1381}
1382declare void @llvm.x86.sse2.lfence() nounwind readnone
1383
1384define <2 x double> @test_mm_load_pd(double* %a0) nounwind {
1385; X32-LABEL: test_mm_load_pd:
1386; X32: # BB#0:
1387; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1388; X32-NEXT: movaps (%eax), %xmm0
1389; X32-NEXT: retl
1390;
1391; X64-LABEL: test_mm_load_pd:
1392; X64: # BB#0:
1393; X64-NEXT: movaps (%rdi), %xmm0
1394; X64-NEXT: retq
1395 %arg0 = bitcast double* %a0 to <2 x double>*
1396 %res = load <2 x double>, <2 x double>* %arg0, align 16
1397 ret <2 x double> %res
1398}
1399
1400define <2 x double> @test_mm_load_sd(double* %a0) nounwind {
1401; X32-LABEL: test_mm_load_sd:
1402; X32: # BB#0:
1403; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1404; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1405; X32-NEXT: retl
1406;
1407; X64-LABEL: test_mm_load_sd:
1408; X64: # BB#0:
1409; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1410; X64-NEXT: retq
1411 %ld = load double, double* %a0, align 1
1412 %res0 = insertelement <2 x double> undef, double %ld, i32 0
1413 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1
1414 ret <2 x double> %res1
1415}
1416
1417define <2 x i64> @test_mm_load_si128(<2 x i64>* %a0) nounwind {
1418; X32-LABEL: test_mm_load_si128:
1419; X32: # BB#0:
1420; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1421; X32-NEXT: movaps (%eax), %xmm0
1422; X32-NEXT: retl
1423;
1424; X64-LABEL: test_mm_load_si128:
1425; X64: # BB#0:
1426; X64-NEXT: movaps (%rdi), %xmm0
1427; X64-NEXT: retq
1428 %res = load <2 x i64>, <2 x i64>* %a0, align 16
1429 ret <2 x i64> %res
1430}
1431
1432define <2 x double> @test_mm_load1_pd(double* %a0) nounwind {
1433; X32-LABEL: test_mm_load1_pd:
1434; X32: # BB#0:
1435; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1436; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1437; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1438; X32-NEXT: retl
1439;
1440; X64-LABEL: test_mm_load1_pd:
1441; X64: # BB#0:
1442; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1443; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1444; X64-NEXT: retq
1445 %ld = load double, double* %a0, align 8
1446 %res0 = insertelement <2 x double> undef, double %ld, i32 0
1447 %res1 = insertelement <2 x double> %res0, double %ld, i32 1
1448 ret <2 x double> %res1
1449}
1450
1451define <2 x double> @test_mm_loadh_pd(<2 x double> %a0, double* %a1) nounwind {
1452; X32-LABEL: test_mm_loadh_pd:
1453; X32: # BB#0:
1454; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1455; X32-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
1456; X32-NEXT: retl
1457;
1458; X64-LABEL: test_mm_loadh_pd:
1459; X64: # BB#0:
1460; X64-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
1461; X64-NEXT: retq
1462 %ld = load double, double* %a1, align 8
1463 %res = insertelement <2 x double> %a0, double %ld, i32 1
1464 ret <2 x double> %res
1465}
1466
1467define <2 x i64> @test_mm_loadl_epi64(<2 x i64> %a0, <2 x i64>* %a1) nounwind {
1468; X32-LABEL: test_mm_loadl_epi64:
1469; X32: # BB#0:
1470; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1471; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1472; X32-NEXT: retl
1473;
1474; X64-LABEL: test_mm_loadl_epi64:
1475; X64: # BB#0:
1476; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1477; X64-NEXT: retq
1478 %bc = bitcast <2 x i64>* %a1 to i64*
1479 %ld = load i64, i64* %bc, align 1
1480 %res0 = insertelement <2 x i64> undef, i64 %ld, i32 0
1481 %res1 = insertelement <2 x i64> %res0, i64 0, i32 1
1482 ret <2 x i64> %res1
1483}
1484
1485define <2 x double> @test_mm_loadl_pd(<2 x double> %a0, double* %a1) nounwind {
1486; X32-LABEL: test_mm_loadl_pd:
1487; X32: # BB#0:
1488; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1489; X32-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
1490; X32-NEXT: retl
1491;
1492; X64-LABEL: test_mm_loadl_pd:
1493; X64: # BB#0:
1494; X64-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
1495; X64-NEXT: retq
1496 %ld = load double, double* %a1, align 8
1497 %res = insertelement <2 x double> %a0, double %ld, i32 0
1498 ret <2 x double> %res
1499}
1500
1501define <2 x double> @test_mm_loadr_pd(double* %a0) nounwind {
1502; X32-LABEL: test_mm_loadr_pd:
1503; X32: # BB#0:
1504; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1505; X32-NEXT: movapd (%eax), %xmm0
1506; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
1507; X32-NEXT: retl
1508;
1509; X64-LABEL: test_mm_loadr_pd:
1510; X64: # BB#0:
1511; X64-NEXT: movapd (%rdi), %xmm0
1512; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
1513; X64-NEXT: retq
1514 %arg0 = bitcast double* %a0 to <2 x double>*
1515 %ld = load <2 x double>, <2 x double>* %arg0, align 16
1516 %res = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1517 ret <2 x double> %res
1518}
1519
1520define <2 x double> @test_mm_loadu_pd(double* %a0) nounwind {
1521; X32-LABEL: test_mm_loadu_pd:
1522; X32: # BB#0:
1523; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1524; X32-NEXT: movups (%eax), %xmm0
1525; X32-NEXT: retl
1526;
1527; X64-LABEL: test_mm_loadu_pd:
1528; X64: # BB#0:
1529; X64-NEXT: movups (%rdi), %xmm0
1530; X64-NEXT: retq
1531 %arg0 = bitcast double* %a0 to <2 x double>*
1532 %res = load <2 x double>, <2 x double>* %arg0, align 1
1533 ret <2 x double> %res
1534}
1535
1536define <2 x i64> @test_mm_loadu_si128(<2 x i64>* %a0) nounwind {
1537; X32-LABEL: test_mm_loadu_si128:
1538; X32: # BB#0:
1539; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1540; X32-NEXT: movups (%eax), %xmm0
1541; X32-NEXT: retl
1542;
1543; X64-LABEL: test_mm_loadu_si128:
1544; X64: # BB#0:
1545; X64-NEXT: movups (%rdi), %xmm0
1546; X64-NEXT: retq
1547 %res = load <2 x i64>, <2 x i64>* %a0, align 1
1548 ret <2 x i64> %res
1549}
1550
1551define <2 x i64> @test_mm_madd_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1552; X32-LABEL: test_mm_madd_epi16:
1553; X32: # BB#0:
1554; X32-NEXT: pmaddwd %xmm1, %xmm0
1555; X32-NEXT: retl
1556;
1557; X64-LABEL: test_mm_madd_epi16:
1558; X64: # BB#0:
1559; X64-NEXT: pmaddwd %xmm1, %xmm0
1560; X64-NEXT: retq
1561 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1562 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1563 %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %arg0, <8 x i16> %arg1)
1564 %bc = bitcast <4 x i32> %res to <2 x i64>
1565 ret <2 x i64> %bc
1566}
1567declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
1568
1569define void @test_mm_maskmoveu_si128(<2 x i64> %a0, <2 x i64> %a1, i8* %a2) nounwind {
1570; X32-LABEL: test_mm_maskmoveu_si128:
1571; X32: # BB#0:
1572; X32-NEXT: pushl %edi
1573; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
1574; X32-NEXT: maskmovdqu %xmm1, %xmm0
1575; X32-NEXT: popl %edi
1576; X32-NEXT: retl
1577;
1578; X64-LABEL: test_mm_maskmoveu_si128:
1579; X64: # BB#0:
1580; X64-NEXT: maskmovdqu %xmm1, %xmm0
1581; X64-NEXT: retq
1582 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1583 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
1584 call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %arg0, <16 x i8> %arg1, i8* %a2)
1585 ret void
1586}
1587declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind
1588
1589define <2 x i64> @test_mm_max_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1590; X32-LABEL: test_mm_max_epi16:
1591; X32: # BB#0:
1592; X32-NEXT: pmaxsw %xmm1, %xmm0
1593; X32-NEXT: retl
1594;
1595; X64-LABEL: test_mm_max_epi16:
1596; X64: # BB#0:
1597; X64-NEXT: pmaxsw %xmm1, %xmm0
1598; X64-NEXT: retq
1599 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1600 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
Sanjay Patela6c6f092016-06-15 17:17:27 +00001601 %cmp = icmp sgt <8 x i16> %arg0, %arg1
1602 %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1
1603 %bc = bitcast <8 x i16> %sel to <2 x i64>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001604 ret <2 x i64> %bc
1605}
1606declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
1607
1608define <2 x i64> @test_mm_max_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1609; X32-LABEL: test_mm_max_epu8:
1610; X32: # BB#0:
1611; X32-NEXT: pmaxub %xmm1, %xmm0
1612; X32-NEXT: retl
1613;
1614; X64-LABEL: test_mm_max_epu8:
1615; X64: # BB#0:
1616; X64-NEXT: pmaxub %xmm1, %xmm0
1617; X64-NEXT: retq
1618 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1619 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
Sanjay Patela6c6f092016-06-15 17:17:27 +00001620 %cmp = icmp ugt <16 x i8> %arg0, %arg1
1621 %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1
1622 %bc = bitcast <16 x i8> %sel to <2 x i64>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001623 ret <2 x i64> %bc
1624}
1625declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
1626
1627define <2 x double> @test_mm_max_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1628; X32-LABEL: test_mm_max_pd:
1629; X32: # BB#0:
1630; X32-NEXT: maxpd %xmm1, %xmm0
1631; X32-NEXT: retl
1632;
1633; X64-LABEL: test_mm_max_pd:
1634; X64: # BB#0:
1635; X64-NEXT: maxpd %xmm1, %xmm0
1636; X64-NEXT: retq
1637 %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
1638 ret <2 x double> %res
1639}
1640declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
1641
1642define <2 x double> @test_mm_max_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1643; X32-LABEL: test_mm_max_sd:
1644; X32: # BB#0:
1645; X32-NEXT: maxsd %xmm1, %xmm0
1646; X32-NEXT: retl
1647;
1648; X64-LABEL: test_mm_max_sd:
1649; X64: # BB#0:
1650; X64-NEXT: maxsd %xmm1, %xmm0
1651; X64-NEXT: retq
1652 %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1)
1653 ret <2 x double> %res
1654}
1655declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
1656
1657define void @test_mm_mfence() nounwind {
1658; X32-LABEL: test_mm_mfence:
1659; X32: # BB#0:
1660; X32-NEXT: mfence
1661; X32-NEXT: retl
1662;
1663; X64-LABEL: test_mm_mfence:
1664; X64: # BB#0:
1665; X64-NEXT: mfence
1666; X64-NEXT: retq
1667 call void @llvm.x86.sse2.mfence()
1668 ret void
1669}
1670declare void @llvm.x86.sse2.mfence() nounwind readnone
1671
1672define <2 x i64> @test_mm_min_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1673; X32-LABEL: test_mm_min_epi16:
1674; X32: # BB#0:
1675; X32-NEXT: pminsw %xmm1, %xmm0
1676; X32-NEXT: retl
1677;
1678; X64-LABEL: test_mm_min_epi16:
1679; X64: # BB#0:
1680; X64-NEXT: pminsw %xmm1, %xmm0
1681; X64-NEXT: retq
1682 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1683 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
Sanjay Patela6c6f092016-06-15 17:17:27 +00001684 %cmp = icmp slt <8 x i16> %arg0, %arg1
1685 %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1
1686 %bc = bitcast <8 x i16> %sel to <2 x i64>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001687 ret <2 x i64> %bc
1688}
1689declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
1690
1691define <2 x i64> @test_mm_min_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1692; X32-LABEL: test_mm_min_epu8:
1693; X32: # BB#0:
1694; X32-NEXT: pminub %xmm1, %xmm0
1695; X32-NEXT: retl
1696;
1697; X64-LABEL: test_mm_min_epu8:
1698; X64: # BB#0:
1699; X64-NEXT: pminub %xmm1, %xmm0
1700; X64-NEXT: retq
1701 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1702 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
Sanjay Patela6c6f092016-06-15 17:17:27 +00001703 %cmp = icmp ult <16 x i8> %arg0, %arg1
1704 %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1
1705 %bc = bitcast <16 x i8> %sel to <2 x i64>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001706 ret <2 x i64> %bc
1707}
1708declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
1709
1710define <2 x double> @test_mm_min_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1711; X32-LABEL: test_mm_min_pd:
1712; X32: # BB#0:
1713; X32-NEXT: minpd %xmm1, %xmm0
1714; X32-NEXT: retl
1715;
1716; X64-LABEL: test_mm_min_pd:
1717; X64: # BB#0:
1718; X64-NEXT: minpd %xmm1, %xmm0
1719; X64-NEXT: retq
1720 %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1)
1721 ret <2 x double> %res
1722}
1723declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
1724
1725define <2 x double> @test_mm_min_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1726; X32-LABEL: test_mm_min_sd:
1727; X32: # BB#0:
1728; X32-NEXT: minsd %xmm1, %xmm0
1729; X32-NEXT: retl
1730;
1731; X64-LABEL: test_mm_min_sd:
1732; X64: # BB#0:
1733; X64-NEXT: minsd %xmm1, %xmm0
1734; X64-NEXT: retq
1735 %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1)
1736 ret <2 x double> %res
1737}
1738declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
1739
Simon Pilgrim47825fa2016-05-19 11:59:57 +00001740define <2 x i64> @test_mm_move_epi64(<2 x i64> %a0) nounwind {
1741; X32-LABEL: test_mm_move_epi64:
1742; X32: # BB#0:
1743; X32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1744; X32-NEXT: retl
1745;
1746; X64-LABEL: test_mm_move_epi64:
1747; X64: # BB#0:
1748; X64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1749; X64-NEXT: retq
1750 %res = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2>
1751 ret <2 x i64> %res
1752}
1753
1754define <2 x double> @test_mm_move_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1755; X32-LABEL: test_mm_move_sd:
1756; X32: # BB#0:
1757; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1758; X32-NEXT: retl
1759;
1760; X64-LABEL: test_mm_move_sd:
1761; X64: # BB#0:
1762; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1763; X64-NEXT: retq
1764 %ext0 = extractelement <2 x double> %a1, i32 0
1765 %res0 = insertelement <2 x double> undef, double %ext0, i32 0
1766 %ext1 = extractelement <2 x double> %a0, i32 1
1767 %res1 = insertelement <2 x double> %res0, double %ext1, i32 1
1768 ret <2 x double> %res1
1769}
1770
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001771define i32 @test_mm_movemask_epi8(<2 x i64> %a0) nounwind {
1772; X32-LABEL: test_mm_movemask_epi8:
1773; X32: # BB#0:
1774; X32-NEXT: pmovmskb %xmm0, %eax
1775; X32-NEXT: retl
1776;
1777; X64-LABEL: test_mm_movemask_epi8:
1778; X64: # BB#0:
1779; X64-NEXT: pmovmskb %xmm0, %eax
1780; X64-NEXT: retq
1781 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1782 %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %arg0)
1783 ret i32 %res
1784}
1785declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
1786
1787define i32 @test_mm_movemask_pd(<2 x double> %a0) nounwind {
1788; X32-LABEL: test_mm_movemask_pd:
1789; X32: # BB#0:
1790; X32-NEXT: movmskpd %xmm0, %eax
1791; X32-NEXT: retl
1792;
1793; X64-LABEL: test_mm_movemask_pd:
1794; X64: # BB#0:
1795; X64-NEXT: movmskpd %xmm0, %eax
1796; X64-NEXT: retq
1797 %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
1798 ret i32 %res
1799}
1800declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
1801
1802define <2 x i64> @test_mm_mul_epu32(<2 x i64> %a0, <2 x i64> %a1) {
1803; X32-LABEL: test_mm_mul_epu32:
1804; X32: # BB#0:
1805; X32-NEXT: pmuludq %xmm1, %xmm0
1806; X32-NEXT: retl
1807;
1808; X64-LABEL: test_mm_mul_epu32:
1809; X64: # BB#0:
1810; X64-NEXT: pmuludq %xmm1, %xmm0
1811; X64-NEXT: retq
1812 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1813 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
1814 %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %arg0, <4 x i32> %arg1)
1815 ret <2 x i64> %res
1816}
1817declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
1818
1819define <2 x double> @test_mm_mul_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1820; X32-LABEL: test_mm_mul_pd:
1821; X32: # BB#0:
1822; X32-NEXT: mulpd %xmm1, %xmm0
1823; X32-NEXT: retl
1824;
1825; X64-LABEL: test_mm_mul_pd:
1826; X64: # BB#0:
1827; X64-NEXT: mulpd %xmm1, %xmm0
1828; X64-NEXT: retq
1829 %res = fmul <2 x double> %a0, %a1
1830 ret <2 x double> %res
1831}
1832
1833define <2 x double> @test_mm_mul_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1834; X32-LABEL: test_mm_mul_sd:
1835; X32: # BB#0:
1836; X32-NEXT: mulsd %xmm1, %xmm0
1837; X32-NEXT: retl
1838;
1839; X64-LABEL: test_mm_mul_sd:
1840; X64: # BB#0:
1841; X64-NEXT: mulsd %xmm1, %xmm0
1842; X64-NEXT: retq
1843 %ext0 = extractelement <2 x double> %a0, i32 0
1844 %ext1 = extractelement <2 x double> %a1, i32 0
1845 %fmul = fmul double %ext0, %ext1
1846 %res = insertelement <2 x double> %a0, double %fmul, i32 0
1847 ret <2 x double> %res
1848}
1849
1850define <2 x i64> @test_mm_mulhi_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1851; X32-LABEL: test_mm_mulhi_epi16:
1852; X32: # BB#0:
1853; X32-NEXT: pmulhw %xmm1, %xmm0
1854; X32-NEXT: retl
1855;
1856; X64-LABEL: test_mm_mulhi_epi16:
1857; X64: # BB#0:
1858; X64-NEXT: pmulhw %xmm1, %xmm0
1859; X64-NEXT: retq
1860 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1861 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1862 %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %arg0, <8 x i16> %arg1)
1863 %bc = bitcast <8 x i16> %res to <2 x i64>
1864 ret <2 x i64> %bc
1865}
1866declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
1867
1868define <2 x i64> @test_mm_mulhi_epu16(<2 x i64> %a0, <2 x i64> %a1) {
1869; X32-LABEL: test_mm_mulhi_epu16:
1870; X32: # BB#0:
1871; X32-NEXT: pmulhuw %xmm1, %xmm0
1872; X32-NEXT: retl
1873;
1874; X64-LABEL: test_mm_mulhi_epu16:
1875; X64: # BB#0:
1876; X64-NEXT: pmulhuw %xmm1, %xmm0
1877; X64-NEXT: retq
1878 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1879 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1880 %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %arg0, <8 x i16> %arg1)
1881 %bc = bitcast <8 x i16> %res to <2 x i64>
1882 ret <2 x i64> %bc
1883}
1884declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
1885
1886define <2 x i64> @test_mm_mullo_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1887; X32-LABEL: test_mm_mullo_epi16:
1888; X32: # BB#0:
1889; X32-NEXT: pmullw %xmm1, %xmm0
1890; X32-NEXT: retl
1891;
1892; X64-LABEL: test_mm_mullo_epi16:
1893; X64: # BB#0:
1894; X64-NEXT: pmullw %xmm1, %xmm0
1895; X64-NEXT: retq
1896 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1897 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1898 %res = mul <8 x i16> %arg0, %arg1
1899 %bc = bitcast <8 x i16> %res to <2 x i64>
1900 ret <2 x i64> %bc
1901}
1902
1903define <2 x double> @test_mm_or_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1904; X32-LABEL: test_mm_or_pd:
1905; X32: # BB#0:
1906; X32-NEXT: orps %xmm1, %xmm0
1907; X32-NEXT: retl
1908;
1909; X64-LABEL: test_mm_or_pd:
1910; X64: # BB#0:
1911; X64-NEXT: orps %xmm1, %xmm0
1912; X64-NEXT: retq
1913 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
1914 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
1915 %res = or <4 x i32> %arg0, %arg1
1916 %bc = bitcast <4 x i32> %res to <2 x double>
1917 ret <2 x double> %bc
1918}
1919
1920define <2 x i64> @test_mm_or_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1921; X32-LABEL: test_mm_or_si128:
1922; X32: # BB#0:
1923; X32-NEXT: orps %xmm1, %xmm0
1924; X32-NEXT: retl
1925;
1926; X64-LABEL: test_mm_or_si128:
1927; X64: # BB#0:
1928; X64-NEXT: orps %xmm1, %xmm0
1929; X64-NEXT: retq
1930 %res = or <2 x i64> %a0, %a1
1931 ret <2 x i64> %res
1932}
1933
1934define <2 x i64> @test_mm_packs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1935; X32-LABEL: test_mm_packs_epi16:
1936; X32: # BB#0:
1937; X32-NEXT: packsswb %xmm1, %xmm0
1938; X32-NEXT: retl
1939;
1940; X64-LABEL: test_mm_packs_epi16:
1941; X64: # BB#0:
1942; X64-NEXT: packsswb %xmm1, %xmm0
1943; X64-NEXT: retq
1944 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1945 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1946 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %arg0, <8 x i16> %arg1)
1947 %bc = bitcast <16 x i8> %res to <2 x i64>
1948 ret <2 x i64> %bc
1949}
1950declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
1951
1952define <2 x i64> @test_mm_packs_epi32(<2 x i64> %a0, <2 x i64> %a1) {
1953; X32-LABEL: test_mm_packs_epi32:
1954; X32: # BB#0:
1955; X32-NEXT: packssdw %xmm1, %xmm0
1956; X32-NEXT: retl
1957;
1958; X64-LABEL: test_mm_packs_epi32:
1959; X64: # BB#0:
1960; X64-NEXT: packssdw %xmm1, %xmm0
1961; X64-NEXT: retq
1962 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1963 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
1964 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %arg0, <4 x i32> %arg1)
1965 %bc = bitcast <8 x i16> %res to <2 x i64>
1966 ret <2 x i64> %bc
1967}
1968declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
1969
1970define <2 x i64> @test_mm_packus_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1971; X32-LABEL: test_mm_packus_epi16:
1972; X32: # BB#0:
1973; X32-NEXT: packuswb %xmm1, %xmm0
1974; X32-NEXT: retl
1975;
1976; X64-LABEL: test_mm_packus_epi16:
1977; X64: # BB#0:
1978; X64-NEXT: packuswb %xmm1, %xmm0
1979; X64-NEXT: retq
1980 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1981 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1982 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %arg0, <8 x i16> %arg1)
1983 %bc = bitcast <16 x i8> %res to <2 x i64>
1984 ret <2 x i64> %bc
1985}
1986declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
1987
1988define void @test_mm_pause() nounwind {
1989; X32-LABEL: test_mm_pause:
1990; X32: # BB#0:
1991; X32-NEXT: pause
1992; X32-NEXT: retl
1993;
1994; X64-LABEL: test_mm_pause:
1995; X64: # BB#0:
1996; X64-NEXT: pause
1997; X64-NEXT: retq
1998 call void @llvm.x86.sse2.pause()
1999 ret void
2000}
2001declare void @llvm.x86.sse2.pause() nounwind readnone
2002
2003define <2 x i64> @test_mm_sad_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2004; X32-LABEL: test_mm_sad_epu8:
2005; X32: # BB#0:
2006; X32-NEXT: psadbw %xmm1, %xmm0
2007; X32-NEXT: retl
2008;
2009; X64-LABEL: test_mm_sad_epu8:
2010; X64: # BB#0:
2011; X64-NEXT: psadbw %xmm1, %xmm0
2012; X64-NEXT: retq
2013 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2014 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
2015 %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %arg0, <16 x i8> %arg1)
2016 ret <2 x i64> %res
2017}
2018declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
2019
Simon Pilgrim01809e02016-05-19 10:58:54 +00002020define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind {
2021; X32-LABEL: test_mm_set_epi8:
2022; X32: # BB#0:
2023; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2024; X32-NEXT: movd %eax, %xmm0
2025; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2026; X32-NEXT: movd %eax, %xmm1
2027; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2028; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2029; X32-NEXT: movd %eax, %xmm0
2030; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2031; X32-NEXT: movd %eax, %xmm2
2032; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2033; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
2034; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2035; X32-NEXT: movd %eax, %xmm0
2036; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2037; X32-NEXT: movd %eax, %xmm3
2038; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2039; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2040; X32-NEXT: movd %eax, %xmm0
2041; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2042; X32-NEXT: movd %eax, %xmm1
2043; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2044; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2045; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2046; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2047; X32-NEXT: movd %eax, %xmm0
2048; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2049; X32-NEXT: movd %eax, %xmm2
2050; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2051; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2052; X32-NEXT: movd %eax, %xmm0
2053; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2054; X32-NEXT: movd %eax, %xmm3
2055; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2056; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2057; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2058; X32-NEXT: movd %eax, %xmm0
2059; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2060; X32-NEXT: movd %eax, %xmm2
2061; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2062; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2063; X32-NEXT: movd %eax, %xmm4
2064; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2065; X32-NEXT: movd %eax, %xmm0
2066; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2067; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2068; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
2069; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2070; X32-NEXT: retl
2071;
2072; X64-LABEL: test_mm_set_epi8:
2073; X64: # BB#0:
2074; X64-NEXT: movzbl %dil, %eax
2075; X64-NEXT: movd %eax, %xmm0
2076; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2077; X64-NEXT: movd %eax, %xmm1
2078; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2079; X64-NEXT: movzbl %r8b, %eax
2080; X64-NEXT: movd %eax, %xmm0
2081; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2082; X64-NEXT: movd %eax, %xmm2
2083; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2084; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
2085; X64-NEXT: movzbl %dl, %eax
2086; X64-NEXT: movd %eax, %xmm0
2087; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2088; X64-NEXT: movd %eax, %xmm3
2089; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2090; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2091; X64-NEXT: movd %eax, %xmm0
2092; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2093; X64-NEXT: movd %eax, %xmm1
2094; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2095; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2096; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2097; X64-NEXT: movzbl %sil, %eax
2098; X64-NEXT: movd %eax, %xmm0
2099; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2100; X64-NEXT: movd %eax, %xmm2
2101; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2102; X64-NEXT: movzbl %r9b, %eax
2103; X64-NEXT: movd %eax, %xmm0
2104; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2105; X64-NEXT: movd %eax, %xmm3
2106; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2107; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2108; X64-NEXT: movzbl %cl, %eax
2109; X64-NEXT: movd %eax, %xmm0
2110; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2111; X64-NEXT: movd %eax, %xmm2
2112; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2113; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2114; X64-NEXT: movd %eax, %xmm4
2115; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2116; X64-NEXT: movd %eax, %xmm0
2117; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2118; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2119; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
2120; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2121; X64-NEXT: retq
2122 %res0 = insertelement <16 x i8> undef, i8 %a15, i32 0
2123 %res1 = insertelement <16 x i8> %res0, i8 %a14, i32 1
2124 %res2 = insertelement <16 x i8> %res1, i8 %a13, i32 2
2125 %res3 = insertelement <16 x i8> %res2, i8 %a12, i32 3
2126 %res4 = insertelement <16 x i8> %res3, i8 %a11, i32 4
2127 %res5 = insertelement <16 x i8> %res4, i8 %a10, i32 5
2128 %res6 = insertelement <16 x i8> %res5, i8 %a9 , i32 6
2129 %res7 = insertelement <16 x i8> %res6, i8 %a8 , i32 7
2130 %res8 = insertelement <16 x i8> %res7, i8 %a7 , i32 8
2131 %res9 = insertelement <16 x i8> %res8, i8 %a6 , i32 9
2132 %res10 = insertelement <16 x i8> %res9, i8 %a5 , i32 10
2133 %res11 = insertelement <16 x i8> %res10, i8 %a4 , i32 11
2134 %res12 = insertelement <16 x i8> %res11, i8 %a3 , i32 12
2135 %res13 = insertelement <16 x i8> %res12, i8 %a2 , i32 13
2136 %res14 = insertelement <16 x i8> %res13, i8 %a1 , i32 14
2137 %res15 = insertelement <16 x i8> %res14, i8 %a0 , i32 15
2138 %res = bitcast <16 x i8> %res15 to <2 x i64>
2139 ret <2 x i64> %res
2140}
2141
2142define <2 x i64> @test_mm_set_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
2143; X32-LABEL: test_mm_set_epi16:
2144; X32: # BB#0:
2145; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2146; X32-NEXT: movd %eax, %xmm1
2147; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2148; X32-NEXT: movd %eax, %xmm2
2149; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2150; X32-NEXT: movd %eax, %xmm3
2151; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2152; X32-NEXT: movd %eax, %xmm4
2153; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2154; X32-NEXT: movd %eax, %xmm5
2155; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2156; X32-NEXT: movd %eax, %xmm6
2157; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2158; X32-NEXT: movd %eax, %xmm7
2159; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2160; X32-NEXT: movd %eax, %xmm0
2161; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2162; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
2163; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
2164; X32-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
2165; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
2166; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3]
2167; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
2168; X32-NEXT: retl
2169;
2170; X64-LABEL: test_mm_set_epi16:
2171; X64: # BB#0:
2172; X64-NEXT: movw {{[0-9]+}}(%rsp), %r10w
2173; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax
2174; X64-NEXT: movd %edi, %xmm0
2175; X64-NEXT: movd %r8d, %xmm1
2176; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2177; X64-NEXT: movd %edx, %xmm0
2178; X64-NEXT: movd %eax, %xmm2
2179; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
2180; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2181; X64-NEXT: movd %esi, %xmm0
2182; X64-NEXT: movd %r9d, %xmm1
2183; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2184; X64-NEXT: movd %ecx, %xmm3
2185; X64-NEXT: movd %r10d, %xmm0
2186; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
2187; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2188; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2189; X64-NEXT: retq
2190 %res0 = insertelement <8 x i16> undef, i16 %a7, i32 0
2191 %res1 = insertelement <8 x i16> %res0, i16 %a6, i32 1
2192 %res2 = insertelement <8 x i16> %res1, i16 %a5, i32 2
2193 %res3 = insertelement <8 x i16> %res2, i16 %a4, i32 3
2194 %res4 = insertelement <8 x i16> %res3, i16 %a3, i32 4
2195 %res5 = insertelement <8 x i16> %res4, i16 %a2, i32 5
2196 %res6 = insertelement <8 x i16> %res5, i16 %a1, i32 6
2197 %res7 = insertelement <8 x i16> %res6, i16 %a0, i32 7
2198 %res = bitcast <8 x i16> %res7 to <2 x i64>
2199 ret <2 x i64> %res
2200}
2201
2202define <2 x i64> @test_mm_set_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
2203; X32-LABEL: test_mm_set_epi32:
2204; X32: # BB#0:
2205; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2206; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2207; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2208; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2209; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2210; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2211; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2212; X32-NEXT: retl
2213;
2214; X64-LABEL: test_mm_set_epi32:
2215; X64: # BB#0:
2216; X64-NEXT: movd %edi, %xmm0
2217; X64-NEXT: movd %edx, %xmm1
2218; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2219; X64-NEXT: movd %esi, %xmm2
2220; X64-NEXT: movd %ecx, %xmm0
2221; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2222; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2223; X64-NEXT: retq
2224 %res0 = insertelement <4 x i32> undef, i32 %a3, i32 0
2225 %res1 = insertelement <4 x i32> %res0, i32 %a2, i32 1
2226 %res2 = insertelement <4 x i32> %res1, i32 %a1, i32 2
2227 %res3 = insertelement <4 x i32> %res2, i32 %a0, i32 3
2228 %res = bitcast <4 x i32> %res3 to <2 x i64>
2229 ret <2 x i64> %res
2230}
2231
2232; TODO test_mm_set_epi64
2233
2234define <2 x i64> @test_mm_set_epi64x(i64 %a0, i64 %a1) nounwind {
2235; X32-LABEL: test_mm_set_epi64x:
2236; X32: # BB#0:
2237; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2238; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2239; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2240; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2241; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2242; X32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2243; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2244; X32-NEXT: retl
2245;
2246; X64-LABEL: test_mm_set_epi64x:
2247; X64: # BB#0:
2248; X64-NEXT: movd %rdi, %xmm1
2249; X64-NEXT: movd %rsi, %xmm0
2250; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2251; X64-NEXT: retq
2252 %res0 = insertelement <2 x i64> undef, i64 %a1, i32 0
2253 %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1
2254 ret <2 x i64> %res1
2255}
2256
2257define <2 x double> @test_mm_set_pd(double %a0, double %a1) nounwind {
2258; X32-LABEL: test_mm_set_pd:
2259; X32: # BB#0:
2260; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2261; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
2262; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2263; X32-NEXT: retl
2264;
2265; X64-LABEL: test_mm_set_pd:
2266; X64: # BB#0:
2267; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
2268; X64-NEXT: movapd %xmm1, %xmm0
2269; X64-NEXT: retq
2270 %res0 = insertelement <2 x double> undef, double %a1, i32 0
2271 %res1 = insertelement <2 x double> %res0, double %a0, i32 1
2272 ret <2 x double> %res1
2273}
2274
2275define <2 x double> @test_mm_set_sd(double %a0) nounwind {
2276; X32-LABEL: test_mm_set_sd:
2277; X32: # BB#0:
2278; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2279; X32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
2280; X32-NEXT: retl
2281;
2282; X64-LABEL: test_mm_set_sd:
2283; X64: # BB#0:
2284; X64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
2285; X64-NEXT: retq
2286 %res0 = insertelement <2 x double> undef, double %a0, i32 0
2287 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1
2288 ret <2 x double> %res1
2289}
2290
2291define <2 x i64> @test_mm_set1_epi8(i8 %a0) nounwind {
2292; X32-LABEL: test_mm_set1_epi8:
2293; X32: # BB#0:
2294; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2295; X32-NEXT: movd %eax, %xmm0
2296; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2297; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2298; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2299; X32-NEXT: retl
2300;
2301; X64-LABEL: test_mm_set1_epi8:
2302; X64: # BB#0:
2303; X64-NEXT: movzbl %dil, %eax
2304; X64-NEXT: movd %eax, %xmm0
2305; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2306; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2307; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2308; X64-NEXT: retq
2309 %res0 = insertelement <16 x i8> undef, i8 %a0, i32 0
2310 %res1 = insertelement <16 x i8> %res0, i8 %a0, i32 1
2311 %res2 = insertelement <16 x i8> %res1, i8 %a0, i32 2
2312 %res3 = insertelement <16 x i8> %res2, i8 %a0, i32 3
2313 %res4 = insertelement <16 x i8> %res3, i8 %a0, i32 4
2314 %res5 = insertelement <16 x i8> %res4, i8 %a0, i32 5
2315 %res6 = insertelement <16 x i8> %res5, i8 %a0, i32 6
2316 %res7 = insertelement <16 x i8> %res6, i8 %a0, i32 7
2317 %res8 = insertelement <16 x i8> %res7, i8 %a0, i32 8
2318 %res9 = insertelement <16 x i8> %res8, i8 %a0, i32 9
2319 %res10 = insertelement <16 x i8> %res9, i8 %a0, i32 10
2320 %res11 = insertelement <16 x i8> %res10, i8 %a0, i32 11
2321 %res12 = insertelement <16 x i8> %res11, i8 %a0, i32 12
2322 %res13 = insertelement <16 x i8> %res12, i8 %a0, i32 13
2323 %res14 = insertelement <16 x i8> %res13, i8 %a0, i32 14
2324 %res15 = insertelement <16 x i8> %res14, i8 %a0, i32 15
2325 %res = bitcast <16 x i8> %res15 to <2 x i64>
2326 ret <2 x i64> %res
2327}
2328
2329define <2 x i64> @test_mm_set1_epi16(i16 %a0) nounwind {
2330; X32-LABEL: test_mm_set1_epi16:
2331; X32: # BB#0:
2332; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2333; X32-NEXT: movd %eax, %xmm0
2334; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2335; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2336; X32-NEXT: retl
2337;
2338; X64-LABEL: test_mm_set1_epi16:
2339; X64: # BB#0:
2340; X64-NEXT: movd %edi, %xmm0
2341; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2342; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2343; X64-NEXT: retq
2344 %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0
2345 %res1 = insertelement <8 x i16> %res0, i16 %a0, i32 1
2346 %res2 = insertelement <8 x i16> %res1, i16 %a0, i32 2
2347 %res3 = insertelement <8 x i16> %res2, i16 %a0, i32 3
2348 %res4 = insertelement <8 x i16> %res3, i16 %a0, i32 4
2349 %res5 = insertelement <8 x i16> %res4, i16 %a0, i32 5
2350 %res6 = insertelement <8 x i16> %res5, i16 %a0, i32 6
2351 %res7 = insertelement <8 x i16> %res6, i16 %a0, i32 7
2352 %res = bitcast <8 x i16> %res7 to <2 x i64>
2353 ret <2 x i64> %res
2354}
2355
2356define <2 x i64> @test_mm_set1_epi32(i32 %a0) nounwind {
2357; X32-LABEL: test_mm_set1_epi32:
2358; X32: # BB#0:
2359; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2360; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2361; X32-NEXT: retl
2362;
2363; X64-LABEL: test_mm_set1_epi32:
2364; X64: # BB#0:
2365; X64-NEXT: movd %edi, %xmm0
2366; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2367; X64-NEXT: retq
2368 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
2369 %res1 = insertelement <4 x i32> %res0, i32 %a0, i32 1
2370 %res2 = insertelement <4 x i32> %res1, i32 %a0, i32 2
2371 %res3 = insertelement <4 x i32> %res2, i32 %a0, i32 3
2372 %res = bitcast <4 x i32> %res3 to <2 x i64>
2373 ret <2 x i64> %res
2374}
2375
2376; TODO test_mm_set1_epi64
2377
2378define <2 x i64> @test_mm_set1_epi64x(i64 %a0) nounwind {
2379; X32-LABEL: test_mm_set1_epi64x:
2380; X32: # BB#0:
2381; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2382; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2383; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2384; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
2385; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2386; X32-NEXT: retl
2387;
2388; X64-LABEL: test_mm_set1_epi64x:
2389; X64: # BB#0:
2390; X64-NEXT: movd %rdi, %xmm0
2391; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2392; X64-NEXT: retq
2393 %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0
2394 %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1
2395 ret <2 x i64> %res1
2396}
2397
2398define <2 x double> @test_mm_set1_pd(double %a0) nounwind {
2399; X32-LABEL: test_mm_set1_pd:
2400; X32: # BB#0:
2401; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2402; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2403; X32-NEXT: retl
2404;
2405; X64-LABEL: test_mm_set1_pd:
2406; X64: # BB#0:
2407; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2408; X64-NEXT: retq
2409 %res0 = insertelement <2 x double> undef, double %a0, i32 0
2410 %res1 = insertelement <2 x double> %res0, double %a0, i32 1
2411 ret <2 x double> %res1
2412}
2413
2414define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind {
2415; X32-LABEL: test_mm_setr_epi8:
2416; X32: # BB#0:
2417; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2418; X32-NEXT: movd %eax, %xmm0
2419; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2420; X32-NEXT: movd %eax, %xmm1
2421; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2422; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2423; X32-NEXT: movd %eax, %xmm0
2424; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2425; X32-NEXT: movd %eax, %xmm2
2426; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2427; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
2428; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2429; X32-NEXT: movd %eax, %xmm0
2430; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2431; X32-NEXT: movd %eax, %xmm3
2432; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2433; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2434; X32-NEXT: movd %eax, %xmm0
2435; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2436; X32-NEXT: movd %eax, %xmm1
2437; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2438; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2439; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2440; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2441; X32-NEXT: movd %eax, %xmm0
2442; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2443; X32-NEXT: movd %eax, %xmm2
2444; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2445; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2446; X32-NEXT: movd %eax, %xmm0
2447; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2448; X32-NEXT: movd %eax, %xmm3
2449; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2450; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2451; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2452; X32-NEXT: movd %eax, %xmm0
2453; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2454; X32-NEXT: movd %eax, %xmm2
2455; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2456; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2457; X32-NEXT: movd %eax, %xmm4
2458; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2459; X32-NEXT: movd %eax, %xmm0
2460; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2461; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2462; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
2463; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2464; X32-NEXT: retl
2465;
2466; X64-LABEL: test_mm_setr_epi8:
2467; X64: # BB#0:
2468; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2469; X64-NEXT: movd %eax, %xmm0
2470; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2471; X64-NEXT: movd %eax, %xmm1
2472; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2473; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2474; X64-NEXT: movd %eax, %xmm0
2475; X64-NEXT: movzbl %cl, %eax
2476; X64-NEXT: movd %eax, %xmm2
2477; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2478; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
2479; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2480; X64-NEXT: movd %eax, %xmm0
2481; X64-NEXT: movzbl %r9b, %eax
2482; X64-NEXT: movd %eax, %xmm3
2483; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2484; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2485; X64-NEXT: movd %eax, %xmm0
2486; X64-NEXT: movzbl %sil, %eax
2487; X64-NEXT: movd %eax, %xmm1
2488; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2489; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2490; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2491; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2492; X64-NEXT: movd %eax, %xmm0
2493; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2494; X64-NEXT: movd %eax, %xmm2
2495; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2496; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2497; X64-NEXT: movd %eax, %xmm0
2498; X64-NEXT: movzbl %dl, %eax
2499; X64-NEXT: movd %eax, %xmm3
2500; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2501; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2502; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2503; X64-NEXT: movd %eax, %xmm0
2504; X64-NEXT: movzbl %r8b, %eax
2505; X64-NEXT: movd %eax, %xmm2
2506; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2507; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2508; X64-NEXT: movd %eax, %xmm4
2509; X64-NEXT: movzbl %dil, %eax
2510; X64-NEXT: movd %eax, %xmm0
2511; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2512; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2513; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
2514; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2515; X64-NEXT: retq
2516 %res0 = insertelement <16 x i8> undef, i8 %a0 , i32 0
2517 %res1 = insertelement <16 x i8> %res0, i8 %a1 , i32 1
2518 %res2 = insertelement <16 x i8> %res1, i8 %a2 , i32 2
2519 %res3 = insertelement <16 x i8> %res2, i8 %a3 , i32 3
2520 %res4 = insertelement <16 x i8> %res3, i8 %a4 , i32 4
2521 %res5 = insertelement <16 x i8> %res4, i8 %a5 , i32 5
2522 %res6 = insertelement <16 x i8> %res5, i8 %a6 , i32 6
2523 %res7 = insertelement <16 x i8> %res6, i8 %a7 , i32 7
2524 %res8 = insertelement <16 x i8> %res7, i8 %a8 , i32 8
2525 %res9 = insertelement <16 x i8> %res8, i8 %a9 , i32 9
2526 %res10 = insertelement <16 x i8> %res9, i8 %a10, i32 10
2527 %res11 = insertelement <16 x i8> %res10, i8 %a11, i32 11
2528 %res12 = insertelement <16 x i8> %res11, i8 %a12, i32 12
2529 %res13 = insertelement <16 x i8> %res12, i8 %a13, i32 13
2530 %res14 = insertelement <16 x i8> %res13, i8 %a14, i32 14
2531 %res15 = insertelement <16 x i8> %res14, i8 %a15, i32 15
2532 %res = bitcast <16 x i8> %res15 to <2 x i64>
2533 ret <2 x i64> %res
2534}
2535
2536define <2 x i64> @test_mm_setr_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
2537; X32-LABEL: test_mm_setr_epi16:
2538; X32: # BB#0:
2539; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2540; X32-NEXT: movd %eax, %xmm1
2541; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2542; X32-NEXT: movd %eax, %xmm2
2543; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2544; X32-NEXT: movd %eax, %xmm3
2545; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2546; X32-NEXT: movd %eax, %xmm4
2547; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2548; X32-NEXT: movd %eax, %xmm5
2549; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2550; X32-NEXT: movd %eax, %xmm6
2551; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2552; X32-NEXT: movd %eax, %xmm7
2553; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2554; X32-NEXT: movd %eax, %xmm0
2555; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2556; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
2557; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
2558; X32-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
2559; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
2560; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3]
2561; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
2562; X32-NEXT: retl
2563;
2564; X64-LABEL: test_mm_setr_epi16:
2565; X64: # BB#0:
2566; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax
2567; X64-NEXT: movw {{[0-9]+}}(%rsp), %r10w
2568; X64-NEXT: movd %eax, %xmm0
2569; X64-NEXT: movd %ecx, %xmm1
2570; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2571; X64-NEXT: movd %r9d, %xmm0
2572; X64-NEXT: movd %esi, %xmm2
2573; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
2574; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2575; X64-NEXT: movd %r10d, %xmm0
2576; X64-NEXT: movd %edx, %xmm1
2577; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2578; X64-NEXT: movd %r8d, %xmm3
2579; X64-NEXT: movd %edi, %xmm0
2580; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
2581; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2582; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2583; X64-NEXT: retq
2584 %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0
2585 %res1 = insertelement <8 x i16> %res0, i16 %a1, i32 1
2586 %res2 = insertelement <8 x i16> %res1, i16 %a2, i32 2
2587 %res3 = insertelement <8 x i16> %res2, i16 %a3, i32 3
2588 %res4 = insertelement <8 x i16> %res3, i16 %a4, i32 4
2589 %res5 = insertelement <8 x i16> %res4, i16 %a5, i32 5
2590 %res6 = insertelement <8 x i16> %res5, i16 %a6, i32 6
2591 %res7 = insertelement <8 x i16> %res6, i16 %a7, i32 7
2592 %res = bitcast <8 x i16> %res7 to <2 x i64>
2593 ret <2 x i64> %res
2594}
2595
2596define <2 x i64> @test_mm_setr_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
2597; X32-LABEL: test_mm_setr_epi32:
2598; X32: # BB#0:
2599; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2600; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2601; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2602; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2603; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2604; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2605; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2606; X32-NEXT: retl
2607;
2608; X64-LABEL: test_mm_setr_epi32:
2609; X64: # BB#0:
2610; X64-NEXT: movd %ecx, %xmm0
2611; X64-NEXT: movd %esi, %xmm1
2612; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2613; X64-NEXT: movd %edx, %xmm2
2614; X64-NEXT: movd %edi, %xmm0
2615; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2616; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2617; X64-NEXT: retq
2618 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
2619 %res1 = insertelement <4 x i32> %res0, i32 %a1, i32 1
2620 %res2 = insertelement <4 x i32> %res1, i32 %a2, i32 2
2621 %res3 = insertelement <4 x i32> %res2, i32 %a3, i32 3
2622 %res = bitcast <4 x i32> %res3 to <2 x i64>
2623 ret <2 x i64> %res
2624}
2625
2626; TODO test_mm_setr_epi64
2627
2628define <2 x i64> @test_mm_setr_epi64x(i64 %a0, i64 %a1) nounwind {
2629; X32-LABEL: test_mm_setr_epi64x:
2630; X32: # BB#0:
2631; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2632; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2633; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2634; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2635; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2636; X32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2637; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2638; X32-NEXT: retl
2639;
2640; X64-LABEL: test_mm_setr_epi64x:
2641; X64: # BB#0:
2642; X64-NEXT: movd %rsi, %xmm1
2643; X64-NEXT: movd %rdi, %xmm0
2644; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2645; X64-NEXT: retq
2646 %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0
2647 %res1 = insertelement <2 x i64> %res0, i64 %a1, i32 1
2648 ret <2 x i64> %res1
2649}
2650
2651define <2 x double> @test_mm_setr_pd(double %a0, double %a1) nounwind {
2652; X32-LABEL: test_mm_setr_pd:
2653; X32: # BB#0:
2654; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
2655; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2656; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2657; X32-NEXT: retl
2658;
2659; X64-LABEL: test_mm_setr_pd:
2660; X64: # BB#0:
2661; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2662; X64-NEXT: retq
2663 %res0 = insertelement <2 x double> undef, double %a0, i32 0
2664 %res1 = insertelement <2 x double> %res0, double %a1, i32 1
2665 ret <2 x double> %res1
2666}
2667
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00002668define <2 x double> @test_mm_setzero_pd() {
2669; X32-LABEL: test_mm_setzero_pd:
2670; X32: # BB#0:
2671; X32-NEXT: xorps %xmm0, %xmm0
2672; X32-NEXT: retl
2673;
2674; X64-LABEL: test_mm_setzero_pd:
2675; X64: # BB#0:
2676; X64-NEXT: xorps %xmm0, %xmm0
2677; X64-NEXT: retq
2678 ret <2 x double> zeroinitializer
2679}
2680
2681define <2 x i64> @test_mm_setzero_si128() {
2682; X32-LABEL: test_mm_setzero_si128:
2683; X32: # BB#0:
2684; X32-NEXT: xorps %xmm0, %xmm0
2685; X32-NEXT: retl
2686;
2687; X64-LABEL: test_mm_setzero_si128:
2688; X64: # BB#0:
2689; X64-NEXT: xorps %xmm0, %xmm0
2690; X64-NEXT: retq
2691 ret <2 x i64> zeroinitializer
2692}
2693
2694define <2 x i64> @test_mm_shuffle_epi32(<2 x i64> %a0) {
2695; X32-LABEL: test_mm_shuffle_epi32:
2696; X32: # BB#0:
2697; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2698; X32-NEXT: retl
2699;
2700; X64-LABEL: test_mm_shuffle_epi32:
2701; X64: # BB#0:
2702; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2703; X64-NEXT: retq
2704 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2705 %res = shufflevector <4 x i32> %arg0, <4 x i32> undef, <4 x i32> zeroinitializer
2706 %bc = bitcast <4 x i32> %res to <2 x i64>
2707 ret <2 x i64> %bc
2708}
2709
2710define <2 x double> @test_mm_shuffle_pd(<2 x double> %a0, <2 x double> %a1) {
2711; X32-LABEL: test_mm_shuffle_pd:
2712; X32: # BB#0:
2713; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
2714; X32-NEXT: retl
2715;
2716; X64-LABEL: test_mm_shuffle_pd:
2717; X64: # BB#0:
2718; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
2719; X64-NEXT: retq
2720 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2>
2721 ret <2 x double> %res
2722}
2723
2724define <2 x i64> @test_mm_shufflehi_epi16(<2 x i64> %a0) {
2725; X32-LABEL: test_mm_shufflehi_epi16:
2726; X32: # BB#0:
2727; X32-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2728; X32-NEXT: retl
2729;
2730; X64-LABEL: test_mm_shufflehi_epi16:
2731; X64: # BB#0:
2732; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2733; X64-NEXT: retq
2734 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2735 %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
2736 %bc = bitcast <8 x i16> %res to <2 x i64>
2737 ret <2 x i64> %bc
2738}
2739
2740define <2 x i64> @test_mm_shufflelo_epi16(<2 x i64> %a0) {
2741; X32-LABEL: test_mm_shufflelo_epi16:
2742; X32: # BB#0:
2743; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2744; X32-NEXT: retl
2745;
2746; X64-LABEL: test_mm_shufflelo_epi16:
2747; X64: # BB#0:
2748; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2749; X64-NEXT: retq
2750 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2751 %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
2752 %bc = bitcast <8 x i16> %res to <2 x i64>
2753 ret <2 x i64> %bc
2754}
2755
2756define <2 x i64> @test_mm_sll_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2757; X32-LABEL: test_mm_sll_epi16:
2758; X32: # BB#0:
2759; X32-NEXT: psllw %xmm1, %xmm0
2760; X32-NEXT: retl
2761;
2762; X64-LABEL: test_mm_sll_epi16:
2763; X64: # BB#0:
2764; X64-NEXT: psllw %xmm1, %xmm0
2765; X64-NEXT: retq
2766 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2767 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2768 %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %arg0, <8 x i16> %arg1)
2769 %bc = bitcast <8 x i16> %res to <2 x i64>
2770 ret <2 x i64> %bc
2771}
2772declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
2773
2774define <2 x i64> @test_mm_sll_epi32(<2 x i64> %a0, <2 x i64> %a1) {
2775; X32-LABEL: test_mm_sll_epi32:
2776; X32: # BB#0:
2777; X32-NEXT: pslld %xmm1, %xmm0
2778; X32-NEXT: retl
2779;
2780; X64-LABEL: test_mm_sll_epi32:
2781; X64: # BB#0:
2782; X64-NEXT: pslld %xmm1, %xmm0
2783; X64-NEXT: retq
2784 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2785 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
2786 %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %arg0, <4 x i32> %arg1)
2787 %bc = bitcast <4 x i32> %res to <2 x i64>
2788 ret <2 x i64> %bc
2789}
2790declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
2791
2792define <2 x i64> @test_mm_sll_epi64(<2 x i64> %a0, <2 x i64> %a1) {
2793; X32-LABEL: test_mm_sll_epi64:
2794; X32: # BB#0:
2795; X32-NEXT: psllq %xmm1, %xmm0
2796; X32-NEXT: retl
2797;
2798; X64-LABEL: test_mm_sll_epi64:
2799; X64: # BB#0:
2800; X64-NEXT: psllq %xmm1, %xmm0
2801; X64-NEXT: retq
2802 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1)
2803 ret <2 x i64> %res
2804}
2805declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
2806
2807define <2 x i64> @test_mm_slli_epi16(<2 x i64> %a0) {
2808; X32-LABEL: test_mm_slli_epi16:
2809; X32: # BB#0:
2810; X32-NEXT: psllw $1, %xmm0
2811; X32-NEXT: retl
2812;
2813; X64-LABEL: test_mm_slli_epi16:
2814; X64: # BB#0:
2815; X64-NEXT: psllw $1, %xmm0
2816; X64-NEXT: retq
2817 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2818 %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %arg0, i32 1)
2819 %bc = bitcast <8 x i16> %res to <2 x i64>
2820 ret <2 x i64> %bc
2821}
2822declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
2823
2824define <2 x i64> @test_mm_slli_epi32(<2 x i64> %a0) {
2825; X32-LABEL: test_mm_slli_epi32:
2826; X32: # BB#0:
2827; X32-NEXT: pslld $1, %xmm0
2828; X32-NEXT: retl
2829;
2830; X64-LABEL: test_mm_slli_epi32:
2831; X64: # BB#0:
2832; X64-NEXT: pslld $1, %xmm0
2833; X64-NEXT: retq
2834 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2835 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %arg0, i32 1)
2836 %bc = bitcast <4 x i32> %res to <2 x i64>
2837 ret <2 x i64> %bc
2838}
2839declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
2840
2841define <2 x i64> @test_mm_slli_epi64(<2 x i64> %a0) {
2842; X32-LABEL: test_mm_slli_epi64:
2843; X32: # BB#0:
2844; X32-NEXT: psllq $1, %xmm0
2845; X32-NEXT: retl
2846;
2847; X64-LABEL: test_mm_slli_epi64:
2848; X64: # BB#0:
2849; X64-NEXT: psllq $1, %xmm0
2850; X64-NEXT: retq
2851 %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 1)
2852 ret <2 x i64> %res
2853}
2854declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
2855
2856define <2 x i64> @test_mm_slli_si128(<2 x i64> %a0) nounwind {
2857; X32-LABEL: test_mm_slli_si128:
2858; X32: # BB#0:
2859; X32-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
2860; X32-NEXT: retl
2861;
2862; X64-LABEL: test_mm_slli_si128:
2863; X64: # BB#0:
2864; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
2865; X64-NEXT: retq
2866 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2867 %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
2868 %bc = bitcast <16 x i8> %res to <2 x i64>
2869 ret <2 x i64> %bc
2870}
2871
2872define <2 x double> @test_mm_sqrt_pd(<2 x double> %a0) nounwind {
2873; X32-LABEL: test_mm_sqrt_pd:
2874; X32: # BB#0:
2875; X32-NEXT: sqrtpd %xmm0, %xmm0
2876; X32-NEXT: retl
2877;
2878; X64-LABEL: test_mm_sqrt_pd:
2879; X64: # BB#0:
2880; X64-NEXT: sqrtpd %xmm0, %xmm0
2881; X64-NEXT: retq
2882 %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0)
2883 ret <2 x double> %res
2884}
2885declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
2886
2887define <2 x double> @test_mm_sqrt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
2888; X32-LABEL: test_mm_sqrt_sd:
2889; X32: # BB#0:
2890; X32-NEXT: sqrtsd %xmm0, %xmm1
2891; X32-NEXT: movaps %xmm1, %xmm0
2892; X32-NEXT: retl
2893;
2894; X64-LABEL: test_mm_sqrt_sd:
2895; X64: # BB#0:
2896; X64-NEXT: sqrtsd %xmm0, %xmm1
2897; X64-NEXT: movaps %xmm1, %xmm0
2898; X64-NEXT: retq
2899 %call = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0)
2900 %ext0 = extractelement <2 x double> %call, i32 0
2901 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
2902 %ext1 = extractelement <2 x double> %a1, i32 1
2903 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
2904 ret <2 x double> %ins1
2905}
2906declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
2907
2908define <2 x i64> @test_mm_sra_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2909; X32-LABEL: test_mm_sra_epi16:
2910; X32: # BB#0:
2911; X32-NEXT: psraw %xmm1, %xmm0
2912; X32-NEXT: retl
2913;
2914; X64-LABEL: test_mm_sra_epi16:
2915; X64: # BB#0:
2916; X64-NEXT: psraw %xmm1, %xmm0
2917; X64-NEXT: retq
2918 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2919 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2920 %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %arg0, <8 x i16> %arg1)
2921 %bc = bitcast <8 x i16> %res to <2 x i64>
2922 ret <2 x i64> %bc
2923}
2924declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
2925
2926define <2 x i64> @test_mm_sra_epi32(<2 x i64> %a0, <2 x i64> %a1) {
2927; X32-LABEL: test_mm_sra_epi32:
2928; X32: # BB#0:
2929; X32-NEXT: psrad %xmm1, %xmm0
2930; X32-NEXT: retl
2931;
2932; X64-LABEL: test_mm_sra_epi32:
2933; X64: # BB#0:
2934; X64-NEXT: psrad %xmm1, %xmm0
2935; X64-NEXT: retq
2936 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2937 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
2938 %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %arg0, <4 x i32> %arg1)
2939 %bc = bitcast <4 x i32> %res to <2 x i64>
2940 ret <2 x i64> %bc
2941}
2942declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
2943
2944define <2 x i64> @test_mm_srai_epi16(<2 x i64> %a0) {
2945; X32-LABEL: test_mm_srai_epi16:
2946; X32: # BB#0:
2947; X32-NEXT: psraw $1, %xmm0
2948; X32-NEXT: retl
2949;
2950; X64-LABEL: test_mm_srai_epi16:
2951; X64: # BB#0:
2952; X64-NEXT: psraw $1, %xmm0
2953; X64-NEXT: retq
2954 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2955 %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %arg0, i32 1)
2956 %bc = bitcast <8 x i16> %res to <2 x i64>
2957 ret <2 x i64> %bc
2958}
2959declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
2960
2961define <2 x i64> @test_mm_srai_epi32(<2 x i64> %a0) {
2962; X32-LABEL: test_mm_srai_epi32:
2963; X32: # BB#0:
2964; X32-NEXT: psrad $1, %xmm0
2965; X32-NEXT: retl
2966;
2967; X64-LABEL: test_mm_srai_epi32:
2968; X64: # BB#0:
2969; X64-NEXT: psrad $1, %xmm0
2970; X64-NEXT: retq
2971 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2972 %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %arg0, i32 1)
2973 %bc = bitcast <4 x i32> %res to <2 x i64>
2974 ret <2 x i64> %bc
2975}
2976declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
2977
2978define <2 x i64> @test_mm_srl_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2979; X32-LABEL: test_mm_srl_epi16:
2980; X32: # BB#0:
2981; X32-NEXT: psrlw %xmm1, %xmm0
2982; X32-NEXT: retl
2983;
2984; X64-LABEL: test_mm_srl_epi16:
2985; X64: # BB#0:
2986; X64-NEXT: psrlw %xmm1, %xmm0
2987; X64-NEXT: retq
2988 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2989 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2990 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %arg0, <8 x i16> %arg1)
2991 %bc = bitcast <8 x i16> %res to <2 x i64>
2992 ret <2 x i64> %bc
2993}
2994declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
2995
2996define <2 x i64> @test_mm_srl_epi32(<2 x i64> %a0, <2 x i64> %a1) {
2997; X32-LABEL: test_mm_srl_epi32:
2998; X32: # BB#0:
2999; X32-NEXT: psrld %xmm1, %xmm0
3000; X32-NEXT: retl
3001;
3002; X64-LABEL: test_mm_srl_epi32:
3003; X64: # BB#0:
3004; X64-NEXT: psrld %xmm1, %xmm0
3005; X64-NEXT: retq
3006 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3007 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3008 %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %arg0, <4 x i32> %arg1)
3009 %bc = bitcast <4 x i32> %res to <2 x i64>
3010 ret <2 x i64> %bc
3011}
3012declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
3013
3014define <2 x i64> @test_mm_srl_epi64(<2 x i64> %a0, <2 x i64> %a1) {
3015; X32-LABEL: test_mm_srl_epi64:
3016; X32: # BB#0:
3017; X32-NEXT: psrlq %xmm1, %xmm0
3018; X32-NEXT: retl
3019;
3020; X64-LABEL: test_mm_srl_epi64:
3021; X64: # BB#0:
3022; X64-NEXT: psrlq %xmm1, %xmm0
3023; X64-NEXT: retq
3024 %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1)
3025 ret <2 x i64> %res
3026}
3027declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
3028
3029define <2 x i64> @test_mm_srli_epi16(<2 x i64> %a0) {
3030; X32-LABEL: test_mm_srli_epi16:
3031; X32: # BB#0:
3032; X32-NEXT: psrlw $1, %xmm0
3033; X32-NEXT: retl
3034;
3035; X64-LABEL: test_mm_srli_epi16:
3036; X64: # BB#0:
3037; X64-NEXT: psrlw $1, %xmm0
3038; X64-NEXT: retq
3039 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3040 %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %arg0, i32 1)
3041 %bc = bitcast <8 x i16> %res to <2 x i64>
3042 ret <2 x i64> %bc
3043}
3044declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
3045
3046define <2 x i64> @test_mm_srli_epi32(<2 x i64> %a0) {
3047; X32-LABEL: test_mm_srli_epi32:
3048; X32: # BB#0:
3049; X32-NEXT: psrld $1, %xmm0
3050; X32-NEXT: retl
3051;
3052; X64-LABEL: test_mm_srli_epi32:
3053; X64: # BB#0:
3054; X64-NEXT: psrld $1, %xmm0
3055; X64-NEXT: retq
3056 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3057 %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %arg0, i32 1)
3058 %bc = bitcast <4 x i32> %res to <2 x i64>
3059 ret <2 x i64> %bc
3060}
3061declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
3062
3063define <2 x i64> @test_mm_srli_epi64(<2 x i64> %a0) {
3064; X32-LABEL: test_mm_srli_epi64:
3065; X32: # BB#0:
3066; X32-NEXT: psrlq $1, %xmm0
3067; X32-NEXT: retl
3068;
3069; X64-LABEL: test_mm_srli_epi64:
3070; X64: # BB#0:
3071; X64-NEXT: psrlq $1, %xmm0
3072; X64-NEXT: retq
3073 %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 1)
3074 ret <2 x i64> %res
3075}
3076declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
3077
3078define <2 x i64> @test_mm_srli_si128(<2 x i64> %a0) nounwind {
3079; X32-LABEL: test_mm_srli_si128:
3080; X32: # BB#0:
3081; X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
3082; X32-NEXT: retl
3083;
3084; X64-LABEL: test_mm_srli_si128:
3085; X64: # BB#0:
3086; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
3087; X64-NEXT: retq
3088 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3089 %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
3090 %bc = bitcast <16 x i8> %res to <2 x i64>
3091 ret <2 x i64> %bc
3092}
3093
3094define void @test_mm_store_pd(double *%a0, <2 x double> %a1) {
3095; X32-LABEL: test_mm_store_pd:
3096; X32: # BB#0:
3097; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3098; X32-NEXT: movaps %xmm0, (%eax)
3099; X32-NEXT: retl
3100;
3101; X64-LABEL: test_mm_store_pd:
3102; X64: # BB#0:
3103; X64-NEXT: movaps %xmm0, (%rdi)
3104; X64-NEXT: retq
3105 %arg0 = bitcast double* %a0 to <2 x double>*
3106 store <2 x double> %a1, <2 x double>* %arg0, align 16
3107 ret void
3108}
3109
Simon Pilgrim4ed0e072016-05-30 18:18:44 +00003110define void @test_mm_store_pd1(double *%a0, <2 x double> %a1) {
3111; X32-LABEL: test_mm_store_pd1:
3112; X32: # BB#0:
3113; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3114; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
3115; X32-NEXT: movaps %xmm0, (%eax)
3116; X32-NEXT: retl
3117;
3118; X64-LABEL: test_mm_store_pd1:
3119; X64: # BB#0:
3120; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
3121; X64-NEXT: movaps %xmm0, (%rdi)
3122; X64-NEXT: retq
3123 %arg0 = bitcast double * %a0 to <2 x double>*
3124 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer
3125 store <2 x double> %shuf, <2 x double>* %arg0, align 16
3126 ret void
3127}
3128
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003129define void @test_mm_store_sd(double *%a0, <2 x double> %a1) {
3130; X32-LABEL: test_mm_store_sd:
3131; X32: # BB#0:
3132; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3133; X32-NEXT: movsd %xmm0, (%eax)
3134; X32-NEXT: retl
3135;
3136; X64-LABEL: test_mm_store_sd:
3137; X64: # BB#0:
3138; X64-NEXT: movsd %xmm0, (%rdi)
3139; X64-NEXT: retq
3140 %ext = extractelement <2 x double> %a1, i32 0
3141 store double %ext, double* %a0, align 1
3142 ret void
3143}
3144
3145define void @test_mm_store_si128(<2 x i64> *%a0, <2 x i64> %a1) {
3146; X32-LABEL: test_mm_store_si128:
3147; X32: # BB#0:
3148; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3149; X32-NEXT: movaps %xmm0, (%eax)
3150; X32-NEXT: retl
3151;
3152; X64-LABEL: test_mm_store_si128:
3153; X64: # BB#0:
3154; X64-NEXT: movaps %xmm0, (%rdi)
3155; X64-NEXT: retq
3156 store <2 x i64> %a1, <2 x i64>* %a0, align 16
3157 ret void
3158}
3159
Simon Pilgrim4d1e2582016-05-25 09:42:29 +00003160define void @test_mm_store1_pd(double *%a0, <2 x double> %a1) {
3161; X32-LABEL: test_mm_store1_pd:
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003162; X32: # BB#0:
3163; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
Simon Pilgrim4ed0e072016-05-30 18:18:44 +00003164; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
3165; X32-NEXT: movaps %xmm0, (%eax)
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003166; X32-NEXT: retl
3167;
Simon Pilgrim4d1e2582016-05-25 09:42:29 +00003168; X64-LABEL: test_mm_store1_pd:
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003169; X64: # BB#0:
Simon Pilgrim4ed0e072016-05-30 18:18:44 +00003170; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
3171; X64-NEXT: movaps %xmm0, (%rdi)
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003172; X64-NEXT: retq
Simon Pilgrim4ed0e072016-05-30 18:18:44 +00003173 %arg0 = bitcast double * %a0 to <2 x double>*
3174 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer
3175 store <2 x double> %shuf, <2 x double>* %arg0, align 16
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003176 ret void
3177}
3178
3179define void @test_mm_storeh_sd(double *%a0, <2 x double> %a1) {
3180; X32-LABEL: test_mm_storeh_sd:
3181; X32: # BB#0:
3182; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3183; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
3184; X32-NEXT: movsd %xmm0, (%eax)
3185; X32-NEXT: retl
3186;
3187; X64-LABEL: test_mm_storeh_sd:
3188; X64: # BB#0:
3189; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
3190; X64-NEXT: movsd %xmm0, (%rdi)
3191; X64-NEXT: retq
3192 %ext = extractelement <2 x double> %a1, i32 1
3193 store double %ext, double* %a0, align 8
3194 ret void
3195}
3196
3197define void @test_mm_storel_epi64(<2 x i64> *%a0, <2 x i64> %a1) {
3198; X32-LABEL: test_mm_storel_epi64:
3199; X32: # BB#0:
3200; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3201; X32-NEXT: movlps %xmm0, (%eax)
3202; X32-NEXT: retl
3203;
3204; X64-LABEL: test_mm_storel_epi64:
3205; X64: # BB#0:
3206; X64-NEXT: movd %xmm0, %rax
3207; X64-NEXT: movq %rax, (%rdi)
3208; X64-NEXT: retq
3209 %ext = extractelement <2 x i64> %a1, i32 0
3210 %bc = bitcast <2 x i64> *%a0 to i64*
3211 store i64 %ext, i64* %bc, align 8
3212 ret void
3213}
3214
3215define void @test_mm_storel_sd(double *%a0, <2 x double> %a1) {
3216; X32-LABEL: test_mm_storel_sd:
3217; X32: # BB#0:
3218; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3219; X32-NEXT: movsd %xmm0, (%eax)
3220; X32-NEXT: retl
3221;
3222; X64-LABEL: test_mm_storel_sd:
3223; X64: # BB#0:
3224; X64-NEXT: movsd %xmm0, (%rdi)
3225; X64-NEXT: retq
3226 %ext = extractelement <2 x double> %a1, i32 0
3227 store double %ext, double* %a0, align 8
3228 ret void
3229}
3230
3231define void @test_mm_storer_pd(double *%a0, <2 x double> %a1) {
3232; X32-LABEL: test_mm_storer_pd:
3233; X32: # BB#0:
3234; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3235; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
3236; X32-NEXT: movapd %xmm0, (%eax)
3237; X32-NEXT: retl
3238;
3239; X64-LABEL: test_mm_storer_pd:
3240; X64: # BB#0:
3241; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
3242; X64-NEXT: movapd %xmm0, (%rdi)
3243; X64-NEXT: retq
3244 %arg0 = bitcast double* %a0 to <2 x double>*
3245 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> <i32 1, i32 0>
3246 store <2 x double> %shuf, <2 x double>* %arg0, align 16
3247 ret void
3248}
3249
3250define void @test_mm_storeu_pd(double *%a0, <2 x double> %a1) {
3251; X32-LABEL: test_mm_storeu_pd:
3252; X32: # BB#0:
3253; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3254; X32-NEXT: movups %xmm0, (%eax)
3255; X32-NEXT: retl
3256;
3257; X64-LABEL: test_mm_storeu_pd:
3258; X64: # BB#0:
3259; X64-NEXT: movups %xmm0, (%rdi)
3260; X64-NEXT: retq
Simon Pilgrimd64af652016-05-30 18:42:51 +00003261 %arg0 = bitcast double* %a0 to <2 x double>*
3262 store <2 x double> %a1, <2 x double>* %arg0, align 1
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003263 ret void
3264}
3265
3266define void @test_mm_storeu_si128(<2 x i64> *%a0, <2 x i64> %a1) {
3267; X32-LABEL: test_mm_storeu_si128:
3268; X32: # BB#0:
3269; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3270; X32-NEXT: movups %xmm0, (%eax)
3271; X32-NEXT: retl
3272;
3273; X64-LABEL: test_mm_storeu_si128:
3274; X64: # BB#0:
3275; X64-NEXT: movups %xmm0, (%rdi)
3276; X64-NEXT: retq
Simon Pilgrimd64af652016-05-30 18:42:51 +00003277 store <2 x i64> %a1, <2 x i64>* %a0, align 1
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003278 ret void
3279}
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003280
3281define void @test_mm_stream_pd(double *%a0, <2 x double> %a1) {
3282; X32-LABEL: test_mm_stream_pd:
3283; X32: # BB#0:
3284; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3285; X32-NEXT: movntps %xmm0, (%eax)
3286; X32-NEXT: retl
3287;
3288; X64-LABEL: test_mm_stream_pd:
3289; X64: # BB#0:
3290; X64-NEXT: movntps %xmm0, (%rdi)
3291; X64-NEXT: retq
3292 %arg0 = bitcast double* %a0 to <2 x double>*
3293 store <2 x double> %a1, <2 x double>* %arg0, align 16, !nontemporal !0
3294 ret void
3295}
3296
3297define void @test_mm_stream_si32(i32 *%a0, i32 %a1) {
3298; X32-LABEL: test_mm_stream_si32:
3299; X32: # BB#0:
3300; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3301; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
3302; X32-NEXT: movntil %eax, (%ecx)
3303; X32-NEXT: retl
3304;
3305; X64-LABEL: test_mm_stream_si32:
3306; X64: # BB#0:
3307; X64-NEXT: movntil %esi, (%rdi)
3308; X64-NEXT: retq
3309 store i32 %a1, i32* %a0, align 1, !nontemporal !0
3310 ret void
3311}
3312
3313define void @test_mm_stream_si128(<2 x i64> *%a0, <2 x i64> %a1) {
3314; X32-LABEL: test_mm_stream_si128:
3315; X32: # BB#0:
3316; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3317; X32-NEXT: movntps %xmm0, (%eax)
3318; X32-NEXT: retl
3319;
3320; X64-LABEL: test_mm_stream_si128:
3321; X64: # BB#0:
3322; X64-NEXT: movntps %xmm0, (%rdi)
3323; X64-NEXT: retq
3324 store <2 x i64> %a1, <2 x i64>* %a0, align 16, !nontemporal !0
3325 ret void
3326}
3327
3328define <2 x i64> @test_mm_sub_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3329; X32-LABEL: test_mm_sub_epi8:
3330; X32: # BB#0:
3331; X32-NEXT: psubb %xmm1, %xmm0
3332; X32-NEXT: retl
3333;
3334; X64-LABEL: test_mm_sub_epi8:
3335; X64: # BB#0:
3336; X64-NEXT: psubb %xmm1, %xmm0
3337; X64-NEXT: retq
3338 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3339 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3340 %res = sub <16 x i8> %arg0, %arg1
3341 %bc = bitcast <16 x i8> %res to <2 x i64>
3342 ret <2 x i64> %bc
3343}
3344
3345define <2 x i64> @test_mm_sub_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3346; X32-LABEL: test_mm_sub_epi16:
3347; X32: # BB#0:
3348; X32-NEXT: psubw %xmm1, %xmm0
3349; X32-NEXT: retl
3350;
3351; X64-LABEL: test_mm_sub_epi16:
3352; X64: # BB#0:
3353; X64-NEXT: psubw %xmm1, %xmm0
3354; X64-NEXT: retq
3355 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3356 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3357 %res = sub <8 x i16> %arg0, %arg1
3358 %bc = bitcast <8 x i16> %res to <2 x i64>
3359 ret <2 x i64> %bc
3360}
3361
3362define <2 x i64> @test_mm_sub_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3363; X32-LABEL: test_mm_sub_epi32:
3364; X32: # BB#0:
3365; X32-NEXT: psubd %xmm1, %xmm0
3366; X32-NEXT: retl
3367;
3368; X64-LABEL: test_mm_sub_epi32:
3369; X64: # BB#0:
3370; X64-NEXT: psubd %xmm1, %xmm0
3371; X64-NEXT: retq
3372 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3373 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3374 %res = sub <4 x i32> %arg0, %arg1
3375 %bc = bitcast <4 x i32> %res to <2 x i64>
3376 ret <2 x i64> %bc
3377}
3378
3379define <2 x i64> @test_mm_sub_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3380; X32-LABEL: test_mm_sub_epi64:
3381; X32: # BB#0:
3382; X32-NEXT: psubq %xmm1, %xmm0
3383; X32-NEXT: retl
3384;
3385; X64-LABEL: test_mm_sub_epi64:
3386; X64: # BB#0:
3387; X64-NEXT: psubq %xmm1, %xmm0
3388; X64-NEXT: retq
3389 %res = sub <2 x i64> %a0, %a1
3390 ret <2 x i64> %res
3391}
3392
3393define <2 x double> @test_mm_sub_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
3394; X32-LABEL: test_mm_sub_pd:
3395; X32: # BB#0:
3396; X32-NEXT: subpd %xmm1, %xmm0
3397; X32-NEXT: retl
3398;
3399; X64-LABEL: test_mm_sub_pd:
3400; X64: # BB#0:
3401; X64-NEXT: subpd %xmm1, %xmm0
3402; X64-NEXT: retq
3403 %res = fsub <2 x double> %a0, %a1
3404 ret <2 x double> %res
3405}
3406
3407define <2 x double> @test_mm_sub_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3408; X32-LABEL: test_mm_sub_sd:
3409; X32: # BB#0:
3410; X32-NEXT: subsd %xmm1, %xmm0
3411; X32-NEXT: retl
3412;
3413; X64-LABEL: test_mm_sub_sd:
3414; X64: # BB#0:
3415; X64-NEXT: subsd %xmm1, %xmm0
3416; X64-NEXT: retq
3417 %ext0 = extractelement <2 x double> %a0, i32 0
3418 %ext1 = extractelement <2 x double> %a1, i32 0
3419 %fsub = fsub double %ext0, %ext1
3420 %res = insertelement <2 x double> %a0, double %fsub, i32 0
3421 ret <2 x double> %res
3422}
3423
3424define <2 x i64> @test_mm_subs_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3425; X32-LABEL: test_mm_subs_epi8:
3426; X32: # BB#0:
3427; X32-NEXT: psubsb %xmm1, %xmm0
3428; X32-NEXT: retl
3429;
3430; X64-LABEL: test_mm_subs_epi8:
3431; X64: # BB#0:
3432; X64-NEXT: psubsb %xmm1, %xmm0
3433; X64-NEXT: retq
3434 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3435 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3436 %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %arg0, <16 x i8> %arg1)
3437 %bc = bitcast <16 x i8> %res to <2 x i64>
3438 ret <2 x i64> %bc
3439}
3440declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
3441
3442define <2 x i64> @test_mm_subs_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3443; X32-LABEL: test_mm_subs_epi16:
3444; X32: # BB#0:
3445; X32-NEXT: psubsw %xmm1, %xmm0
3446; X32-NEXT: retl
3447;
3448; X64-LABEL: test_mm_subs_epi16:
3449; X64: # BB#0:
3450; X64-NEXT: psubsw %xmm1, %xmm0
3451; X64-NEXT: retq
3452 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3453 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3454 %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %arg0, <8 x i16> %arg1)
3455 %bc = bitcast <8 x i16> %res to <2 x i64>
3456 ret <2 x i64> %bc
3457}
3458declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
3459
3460define <2 x i64> @test_mm_subs_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3461; X32-LABEL: test_mm_subs_epu8:
3462; X32: # BB#0:
3463; X32-NEXT: psubusb %xmm1, %xmm0
3464; X32-NEXT: retl
3465;
3466; X64-LABEL: test_mm_subs_epu8:
3467; X64: # BB#0:
3468; X64-NEXT: psubusb %xmm1, %xmm0
3469; X64-NEXT: retq
3470 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3471 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3472 %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %arg0, <16 x i8> %arg1)
3473 %bc = bitcast <16 x i8> %res to <2 x i64>
3474 ret <2 x i64> %bc
3475}
3476declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
3477
3478define <2 x i64> @test_mm_subs_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3479; X32-LABEL: test_mm_subs_epu16:
3480; X32: # BB#0:
3481; X32-NEXT: psubusw %xmm1, %xmm0
3482; X32-NEXT: retl
3483;
3484; X64-LABEL: test_mm_subs_epu16:
3485; X64: # BB#0:
3486; X64-NEXT: psubusw %xmm1, %xmm0
3487; X64-NEXT: retq
3488 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3489 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3490 %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %arg0, <8 x i16> %arg1)
3491 %bc = bitcast <8 x i16> %res to <2 x i64>
3492 ret <2 x i64> %bc
3493}
3494declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
3495
3496define i32 @test_mm_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3497; X32-LABEL: test_mm_ucomieq_sd:
3498; X32: # BB#0:
3499; X32-NEXT: ucomisd %xmm1, %xmm0
3500; X32-NEXT: setnp %al
3501; X32-NEXT: sete %cl
3502; X32-NEXT: andb %al, %cl
3503; X32-NEXT: movzbl %cl, %eax
3504; X32-NEXT: retl
3505;
3506; X64-LABEL: test_mm_ucomieq_sd:
3507; X64: # BB#0:
3508; X64-NEXT: ucomisd %xmm1, %xmm0
3509; X64-NEXT: setnp %al
3510; X64-NEXT: sete %cl
3511; X64-NEXT: andb %al, %cl
3512; X64-NEXT: movzbl %cl, %eax
3513; X64-NEXT: retq
3514 %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1)
3515 ret i32 %res
3516}
3517declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
3518
3519define i32 @test_mm_ucomige_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3520; X32-LABEL: test_mm_ucomige_sd:
3521; X32: # BB#0:
3522; X32-NEXT: ucomisd %xmm1, %xmm0
3523; X32-NEXT: setae %al
3524; X32-NEXT: movzbl %al, %eax
3525; X32-NEXT: retl
3526;
3527; X64-LABEL: test_mm_ucomige_sd:
3528; X64: # BB#0:
3529; X64-NEXT: ucomisd %xmm1, %xmm0
3530; X64-NEXT: setae %al
3531; X64-NEXT: movzbl %al, %eax
3532; X64-NEXT: retq
3533 %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1)
3534 ret i32 %res
3535}
3536declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
3537
3538define i32 @test_mm_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3539; X32-LABEL: test_mm_ucomigt_sd:
3540; X32: # BB#0:
3541; X32-NEXT: ucomisd %xmm1, %xmm0
3542; X32-NEXT: seta %al
3543; X32-NEXT: movzbl %al, %eax
3544; X32-NEXT: retl
3545;
3546; X64-LABEL: test_mm_ucomigt_sd:
3547; X64: # BB#0:
3548; X64-NEXT: ucomisd %xmm1, %xmm0
3549; X64-NEXT: seta %al
3550; X64-NEXT: movzbl %al, %eax
3551; X64-NEXT: retq
3552 %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1)
3553 ret i32 %res
3554}
3555declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
3556
3557define i32 @test_mm_ucomile_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3558; X32-LABEL: test_mm_ucomile_sd:
3559; X32: # BB#0:
3560; X32-NEXT: ucomisd %xmm0, %xmm1
3561; X32-NEXT: setae %al
3562; X32-NEXT: movzbl %al, %eax
3563; X32-NEXT: retl
3564;
3565; X64-LABEL: test_mm_ucomile_sd:
3566; X64: # BB#0:
3567; X64-NEXT: ucomisd %xmm0, %xmm1
3568; X64-NEXT: setae %al
3569; X64-NEXT: movzbl %al, %eax
3570; X64-NEXT: retq
3571 %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1)
3572 ret i32 %res
3573}
3574declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
3575
3576define i32 @test_mm_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3577; X32-LABEL: test_mm_ucomilt_sd:
3578; X32: # BB#0:
3579; X32-NEXT: ucomisd %xmm0, %xmm1
3580; X32-NEXT: seta %al
3581; X32-NEXT: movzbl %al, %eax
3582; X32-NEXT: retl
3583;
3584; X64-LABEL: test_mm_ucomilt_sd:
3585; X64: # BB#0:
3586; X64-NEXT: ucomisd %xmm0, %xmm1
3587; X64-NEXT: seta %al
3588; X64-NEXT: movzbl %al, %eax
3589; X64-NEXT: retq
3590 %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1)
3591 ret i32 %res
3592}
3593declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
3594
3595define i32 @test_mm_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3596; X32-LABEL: test_mm_ucomineq_sd:
3597; X32: # BB#0:
3598; X32-NEXT: ucomisd %xmm1, %xmm0
3599; X32-NEXT: setp %al
3600; X32-NEXT: setne %cl
3601; X32-NEXT: orb %al, %cl
3602; X32-NEXT: movzbl %cl, %eax
3603; X32-NEXT: retl
3604;
3605; X64-LABEL: test_mm_ucomineq_sd:
3606; X64: # BB#0:
3607; X64-NEXT: ucomisd %xmm1, %xmm0
3608; X64-NEXT: setp %al
3609; X64-NEXT: setne %cl
3610; X64-NEXT: orb %al, %cl
3611; X64-NEXT: movzbl %cl, %eax
3612; X64-NEXT: retq
3613 %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1)
3614 ret i32 %res
3615}
3616declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
3617
3618define <2 x double> @test_mm_undefined_pd() {
3619; X32-LABEL: test_mm_undefined_pd:
3620; X32: # BB#0:
3621; X32-NEXT: retl
3622;
3623; X64-LABEL: test_mm_undefined_pd:
3624; X64: # BB#0:
3625; X64-NEXT: retq
3626 ret <2 x double> undef
3627}
3628
3629define <2 x i64> @test_mm_undefined_si128() {
3630; X32-LABEL: test_mm_undefined_si128:
3631; X32: # BB#0:
3632; X32-NEXT: retl
3633;
3634; X64-LABEL: test_mm_undefined_si128:
3635; X64: # BB#0:
3636; X64-NEXT: retq
3637 ret <2 x i64> undef
3638}
3639
3640define <2 x i64> @test_mm_unpackhi_epi8(<2 x i64> %a0, <2 x i64> %a1) {
3641; X32-LABEL: test_mm_unpackhi_epi8:
3642; X32: # BB#0:
3643; X32-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
3644; X32-NEXT: retl
3645;
3646; X64-LABEL: test_mm_unpackhi_epi8:
3647; X64: # BB#0:
3648; X64-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
3649; X64-NEXT: retq
3650 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3651 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3652 %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
3653 %bc = bitcast <16 x i8> %res to <2 x i64>
3654 ret <2 x i64> %bc
3655}
3656
3657define <2 x i64> @test_mm_unpackhi_epi16(<2 x i64> %a0, <2 x i64> %a1) {
3658; X32-LABEL: test_mm_unpackhi_epi16:
3659; X32: # BB#0:
3660; X32-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3661; X32-NEXT: retl
3662;
3663; X64-LABEL: test_mm_unpackhi_epi16:
3664; X64: # BB#0:
3665; X64-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3666; X64-NEXT: retq
3667 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3668 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3669 %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
3670 %bc = bitcast <8 x i16> %res to <2 x i64>
3671 ret <2 x i64> %bc
3672}
3673
3674define <2 x i64> @test_mm_unpackhi_epi32(<2 x i64> %a0, <2 x i64> %a1) {
3675; X32-LABEL: test_mm_unpackhi_epi32:
3676; X32: # BB#0:
3677; X32-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3678; X32-NEXT: retl
3679;
3680; X64-LABEL: test_mm_unpackhi_epi32:
3681; X64: # BB#0:
3682; X64-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3683; X64-NEXT: retq
3684 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3685 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3686 %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
3687 %bc = bitcast <4 x i32> %res to <2 x i64>
3688 ret <2 x i64> %bc
3689}
3690
3691define <2 x i64> @test_mm_unpackhi_epi64(<2 x i64> %a0, <2 x i64> %a1) {
3692; X32-LABEL: test_mm_unpackhi_epi64:
3693; X32: # BB#0:
3694; X32-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3695; X32-NEXT: retl
3696;
3697; X64-LABEL: test_mm_unpackhi_epi64:
3698; X64: # BB#0:
3699; X64-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3700; X64-NEXT: retq
3701 %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3>
3702 ret <2 x i64> %res
3703}
3704
3705define <2 x double> @test_mm_unpackhi_pd(<2 x double> %a0, <2 x double> %a1) {
3706; X32-LABEL: test_mm_unpackhi_pd:
3707; X32: # BB#0:
3708; X32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3709; X32-NEXT: retl
3710;
3711; X64-LABEL: test_mm_unpackhi_pd:
3712; X64: # BB#0:
3713; X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3714; X64-NEXT: retq
3715 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3>
3716 ret <2 x double> %res
3717}
3718
3719define <2 x i64> @test_mm_unpacklo_epi8(<2 x i64> %a0, <2 x i64> %a1) {
3720; X32-LABEL: test_mm_unpacklo_epi8:
3721; X32: # BB#0:
3722; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3723; X32-NEXT: retl
3724;
3725; X64-LABEL: test_mm_unpacklo_epi8:
3726; X64: # BB#0:
3727; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3728; X64-NEXT: retq
3729 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3730 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3731 %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
3732 %bc = bitcast <16 x i8> %res to <2 x i64>
3733 ret <2 x i64> %bc
3734}
3735
3736define <2 x i64> @test_mm_unpacklo_epi16(<2 x i64> %a0, <2 x i64> %a1) {
3737; X32-LABEL: test_mm_unpacklo_epi16:
3738; X32: # BB#0:
3739; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3740; X32-NEXT: retl
3741;
3742; X64-LABEL: test_mm_unpacklo_epi16:
3743; X64: # BB#0:
3744; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3745; X64-NEXT: retq
3746 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3747 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3748 %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
3749 %bc = bitcast <8 x i16> %res to <2 x i64>
3750 ret <2 x i64> %bc
3751}
3752
3753define <2 x i64> @test_mm_unpacklo_epi32(<2 x i64> %a0, <2 x i64> %a1) {
3754; X32-LABEL: test_mm_unpacklo_epi32:
3755; X32: # BB#0:
3756; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3757; X32-NEXT: retl
3758;
3759; X64-LABEL: test_mm_unpacklo_epi32:
3760; X64: # BB#0:
3761; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3762; X64-NEXT: retq
3763 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3764 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3765 %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
3766 %bc = bitcast <4 x i32> %res to <2 x i64>
3767 ret <2 x i64> %bc
3768}
3769
3770define <2 x i64> @test_mm_unpacklo_epi64(<2 x i64> %a0, <2 x i64> %a1) {
3771; X32-LABEL: test_mm_unpacklo_epi64:
3772; X32: # BB#0:
3773; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3774; X32-NEXT: retl
3775;
3776; X64-LABEL: test_mm_unpacklo_epi64:
3777; X64: # BB#0:
3778; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3779; X64-NEXT: retq
3780 %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2>
3781 ret <2 x i64> %res
3782}
3783
3784define <2 x double> @test_mm_unpacklo_pd(<2 x double> %a0, <2 x double> %a1) {
3785; X32-LABEL: test_mm_unpacklo_pd:
3786; X32: # BB#0:
3787; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3788; X32-NEXT: retl
3789;
3790; X64-LABEL: test_mm_unpacklo_pd:
3791; X64: # BB#0:
3792; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3793; X64-NEXT: retq
3794 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2>
3795 ret <2 x double> %res
3796}
3797
3798define <2 x double> @test_mm_xor_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
3799; X32-LABEL: test_mm_xor_pd:
3800; X32: # BB#0:
3801; X32-NEXT: xorps %xmm1, %xmm0
3802; X32-NEXT: retl
3803;
3804; X64-LABEL: test_mm_xor_pd:
3805; X64: # BB#0:
3806; X64-NEXT: xorps %xmm1, %xmm0
3807; X64-NEXT: retq
3808 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
3809 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
3810 %res = xor <4 x i32> %arg0, %arg1
3811 %bc = bitcast <4 x i32> %res to <2 x double>
3812 ret <2 x double> %bc
3813}
3814
3815define <2 x i64> @test_mm_xor_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3816; X32-LABEL: test_mm_xor_si128:
3817; X32: # BB#0:
3818; X32-NEXT: xorps %xmm1, %xmm0
3819; X32-NEXT: retl
3820;
3821; X64-LABEL: test_mm_xor_si128:
3822; X64: # BB#0:
3823; X64-NEXT: xorps %xmm1, %xmm0
3824; X64-NEXT: retq
3825 %res = xor <2 x i64> %a0, %a1
3826 ret <2 x i64> %res
3827}
3828
3829!0 = !{i32 1}
3830