blob: 3e86a2ae5ea5a253fb6392f35819c6c93312766e [file] [log] [blame]
Bruno Cardoso Lopesef8d6992011-08-11 21:50:44 +00001; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
Bruno Cardoso Lopes50b37c72011-08-15 21:45:54 +00002; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=CHECK-SSE %s
Bruno Cardoso Lopesef8d6992011-08-11 21:50:44 +00003
4; CHECK-NOT: vunpck
5; CHECK: vinsertf128 $1
6define <8 x float> @A(<8 x float> %a) nounwind uwtable readnone ssp {
7entry:
8 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 0, i32 1, i32 2, i32 3>
9 ret <8 x float> %shuffle
10}
11
12; CHECK-NOT: vunpck
13; CHECK: vinsertf128 $1
14define <4 x double> @B(<4 x double> %a) nounwind uwtable readnone ssp {
15entry:
16 %shuffle = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> <i32 4, i32 4, i32 0, i32 1>
17 ret <4 x double> %shuffle
18}
19
Bruno Cardoso Lopes50b37c72011-08-15 21:45:54 +000020declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
21
22declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
23
24; Just check that no crash happens
25; CHECK-SSE: _insert_crash
26define void @insert_crash() nounwind {
27allocas:
28 %v1.i.i451 = shufflevector <4 x double> zeroinitializer, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
29 %ret_0a.i.i.i452 = shufflevector <4 x double> %v1.i.i451, <4 x double> undef, <2 x i32> <i32 0, i32 1>
30 %vret_0.i.i.i454 = tail call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %ret_0a.i.i.i452, <2 x double> undef) nounwind
31 %ret_val.i.i.i463 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %vret_0.i.i.i454, <2 x double> undef) nounwind
32 %ret.i1.i.i464 = extractelement <2 x double> %ret_val.i.i.i463, i32 0
33 %double2float = fptrunc double %ret.i1.i.i464 to float
34 %smearinsert50 = insertelement <4 x float> undef, float %double2float, i32 3
35 %blendAsInt.i503 = bitcast <4 x float> %smearinsert50 to <4 x i32>
36 store <4 x i32> %blendAsInt.i503, <4 x i32>* undef, align 4
37 ret void
38}
Bruno Cardoso Lopes97136c92011-09-19 23:36:50 +000039
Bruno Cardoso Lopese97190f2011-09-20 23:19:33 +000040;; DAG Combine must remove useless vinsertf128 instructions
41
42; CHECK: DAGCombineA
43; CHECK-NOT: vinsertf128 $1
44define <4 x i32> @DAGCombineA(<4 x i32> %v1) nounwind readonly {
45 %1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
46 %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
47 ret <4 x i32> %2
48}
49
50; CHECK: DAGCombineB
51; CHECK: vpaddd %xmm
52; CHECK-NOT: vinsertf128 $1
53; CHECK: vpaddd %xmm
54define <8 x i32> @DAGCombineB(<8 x i32> %v1, <8 x i32> %v2) nounwind readonly {
55 %1 = add <8 x i32> %v1, %v2
56 %2 = add <8 x i32> %1, %v1
57 ret <8 x i32> %2
58}
Pete Cooperd18134f2012-02-24 03:51:49 +000059
60; CHECK: insert_pd
61define <4 x double> @insert_pd(<4 x double> %a0, <2 x double> %a1) {
62; CHECK: vinsertf128
63%res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 0)
64ret <4 x double> %res
65}
66
67; CHECK: insert_undef_pd
68define <4 x double> @insert_undef_pd(<4 x double> %a0, <2 x double> %a1) {
69; CHECK: vmovaps %ymm1, %ymm0
70%res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> undef, <2 x double> %a1, i8 0)
71ret <4 x double> %res
72}
73declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone
74
75
76; CHECK: insert_ps
77define <8 x float> @insert_ps(<8 x float> %a0, <4 x float> %a1) {
78; CHECK: vinsertf128
79%res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 0)
80ret <8 x float> %res
81}
82
83; CHECK: insert_undef_ps
84define <8 x float> @insert_undef_ps(<8 x float> %a0, <4 x float> %a1) {
85; CHECK: vmovaps %ymm1, %ymm0
86%res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %a1, i8 0)
87ret <8 x float> %res
88}
89declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone
90
91
92; CHECK: insert_si
93define <8 x i32> @insert_si(<8 x i32> %a0, <4 x i32> %a1) {
94; CHECK: vinsertf128
95%res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 0)
96ret <8 x i32> %res
97}
98
99; CHECK: insert_undef_si
100define <8 x i32> @insert_undef_si(<8 x i32> %a0, <4 x i32> %a1) {
101; CHECK: vmovaps %ymm1, %ymm0
102%res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> undef, <4 x i32> %a1, i8 0)
103ret <8 x i32> %res
104}
105declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone
106
Chad Rosier38ca0d72012-03-15 00:45:30 +0000107; rdar://10643481
108; CHECK: vinsertf128_combine
109define <8 x float> @vinsertf128_combine(float* nocapture %f) nounwind uwtable readonly ssp {
110; CHECK-NOT: vmovaps
111; CHECK: vinsertf128
112entry:
113 %add.ptr = getelementptr inbounds float* %f, i64 4
114 %0 = bitcast float* %add.ptr to <4 x float>*
115 %1 = load <4 x float>* %0, align 16
116 %2 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %1, i8 1)
117 ret <8 x float> %2
118}
Chad Rosier33e528d2012-03-20 17:08:51 +0000119
120rdar://11076953
121; CHECK: vinsertf128_ucombine
122define <8 x float> @vinsertf128_ucombine(float* nocapture %f) nounwind uwtable readonly ssp {
123; CHECK-NOT: vmovups
124; CHECK: vinsertf128
125entry:
126 %add.ptr = getelementptr inbounds float* %f, i64 4
127 %0 = bitcast float* %add.ptr to <4 x float>*
128 %1 = load <4 x float>* %0, align 8
129 %2 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %1, i8 1)
130 ret <8 x float> %2
131}