blob: 2371fbcf11ade1b5331ace3f1ed470876e8e778d [file] [log] [blame]
Chandler Carruth0539c072012-03-31 12:42:41 +00001; RUN: opt < %s -inline -inline-threshold=20 -S | FileCheck %s
Chris Lattner1a2a1012006-05-27 01:16:22 +00002
Chandler Carruth6242a0f2012-03-12 11:19:28 +00003define internal i32 @callee1(i32 %A, i32 %B) {
4 %C = sdiv i32 %A, %B
5 ret i32 %C
Chris Lattner1a2a1012006-05-27 01:16:22 +00006}
7
Chandler Carruth6242a0f2012-03-12 11:19:28 +00008define i32 @caller1() {
Stephen Lina76289a2013-07-14 01:50:49 +00009; CHECK-LABEL: define i32 @caller1(
Chandler Carruth6242a0f2012-03-12 11:19:28 +000010; CHECK-NEXT: ret i32 3
Tanya Lattner5640bd12008-03-01 09:15:35 +000011
Chandler Carruth6242a0f2012-03-12 11:19:28 +000012 %X = call i32 @callee1( i32 10, i32 3 )
13 ret i32 %X
14}
Chandler Carruth595fda82012-03-12 11:19:33 +000015
16define i32 @caller2() {
Chandler Carruth0539c072012-03-31 12:42:41 +000017; Check that we can constant-prop through instructions after inlining callee21
18; to get constants in the inlined callsite to callee22.
19; FIXME: Currently, the threshold is fixed at 20 because we don't perform
20; *recursive* cost analysis to realize that the nested call site will definitely
21; inline and be cheap. We should eventually do that and lower the threshold here
22; to 1.
23;
Stephen Linc1c7a132013-07-14 01:42:54 +000024; CHECK-LABEL: @caller2(
Chandler Carruth595fda82012-03-12 11:19:33 +000025; CHECK-NOT: call void @callee2
26; CHECK: ret
27
Chandler Carruth0539c072012-03-31 12:42:41 +000028 %x = call i32 @callee21(i32 42, i32 48)
Chandler Carruth595fda82012-03-12 11:19:33 +000029 ret i32 %x
30}
31
32define i32 @callee21(i32 %x, i32 %y) {
33 %sub = sub i32 %y, %x
34 %result = call i32 @callee22(i32 %sub)
35 ret i32 %result
36}
37
38declare i8* @getptr()
39
40define i32 @callee22(i32 %x) {
41 %icmp = icmp ugt i32 %x, 42
42 br i1 %icmp, label %bb.true, label %bb.false
43bb.true:
44 ; This block musn't be counted in the inline cost.
Chandler Carruth0539c072012-03-31 12:42:41 +000045 %x1 = add i32 %x, 1
46 %x2 = add i32 %x1, 1
47 %x3 = add i32 %x2, 1
48 %x4 = add i32 %x3, 1
49 %x5 = add i32 %x4, 1
50 %x6 = add i32 %x5, 1
51 %x7 = add i32 %x6, 1
52 %x8 = add i32 %x7, 1
Chandler Carruth595fda82012-03-12 11:19:33 +000053
Chandler Carruth0539c072012-03-31 12:42:41 +000054 ret i32 %x8
Chandler Carruth595fda82012-03-12 11:19:33 +000055bb.false:
56 ret i32 %x
57}
Chandler Carruth0539c072012-03-31 12:42:41 +000058
59define i32 @caller3() {
60; Check that even if the expensive path is hidden behind several basic blocks,
61; it doesn't count toward the inline cost when constant-prop proves those paths
62; dead.
63;
Stephen Linc1c7a132013-07-14 01:42:54 +000064; CHECK-LABEL: @caller3(
Chandler Carruth0539c072012-03-31 12:42:41 +000065; CHECK-NOT: call
66; CHECK: ret i32 6
67
68entry:
69 %x = call i32 @callee3(i32 42, i32 48)
70 ret i32 %x
71}
72
73define i32 @callee3(i32 %x, i32 %y) {
74 %sub = sub i32 %y, %x
75 %icmp = icmp ugt i32 %sub, 42
76 br i1 %icmp, label %bb.true, label %bb.false
77
78bb.true:
79 %icmp2 = icmp ult i32 %sub, 64
80 br i1 %icmp2, label %bb.true.true, label %bb.true.false
81
82bb.true.true:
83 ; This block musn't be counted in the inline cost.
84 %x1 = add i32 %x, 1
85 %x2 = add i32 %x1, 1
86 %x3 = add i32 %x2, 1
87 %x4 = add i32 %x3, 1
88 %x5 = add i32 %x4, 1
89 %x6 = add i32 %x5, 1
90 %x7 = add i32 %x6, 1
91 %x8 = add i32 %x7, 1
92 br label %bb.merge
93
94bb.true.false:
95 ; This block musn't be counted in the inline cost.
96 %y1 = add i32 %y, 1
97 %y2 = add i32 %y1, 1
98 %y3 = add i32 %y2, 1
99 %y4 = add i32 %y3, 1
100 %y5 = add i32 %y4, 1
101 %y6 = add i32 %y5, 1
102 %y7 = add i32 %y6, 1
103 %y8 = add i32 %y7, 1
104 br label %bb.merge
105
106bb.merge:
107 %result = phi i32 [ %x8, %bb.true.true ], [ %y8, %bb.true.false ]
108 ret i32 %result
109
110bb.false:
111 ret i32 %sub
112}
Chandler Carruth2f6cf482012-08-07 10:59:59 +0000113
Chandler Carruth753e21d2012-12-28 14:23:32 +0000114declare {i8, i1} @llvm.uadd.with.overflow.i8(i8 %a, i8 %b)
115
116define i8 @caller4(i8 %z) {
117; Check that we can constant fold through intrinsics such as the
118; overflow-detecting arithmetic instrinsics. These are particularly important
119; as they are used heavily in standard library code and generic C++ code where
120; the arguments are oftent constant but complete generality is required.
121;
Stephen Linc1c7a132013-07-14 01:42:54 +0000122; CHECK-LABEL: @caller4(
Chandler Carruth753e21d2012-12-28 14:23:32 +0000123; CHECK-NOT: call
124; CHECK: ret i8 -1
125
126entry:
127 %x = call i8 @callee4(i8 254, i8 14, i8 %z)
128 ret i8 %x
129}
130
131define i8 @callee4(i8 %x, i8 %y, i8 %z) {
132 %uadd = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 %x, i8 %y)
133 %o = extractvalue {i8, i1} %uadd, 1
134 br i1 %o, label %bb.true, label %bb.false
135
136bb.true:
137 ret i8 -1
138
139bb.false:
140 ; This block musn't be counted in the inline cost.
141 %z1 = add i8 %z, 1
142 %z2 = add i8 %z1, 1
143 %z3 = add i8 %z2, 1
144 %z4 = add i8 %z3, 1
145 %z5 = add i8 %z4, 1
146 %z6 = add i8 %z5, 1
147 %z7 = add i8 %z6, 1
148 %z8 = add i8 %z7, 1
149 ret i8 %z8
150}
151
Chandler Carruth86ed5302012-12-28 14:43:42 +0000152define i64 @caller5(i64 %y) {
153; Check that we can round trip constants through various kinds of casts etc w/o
154; losing track of the constant prop in the inline cost analysis.
155;
Stephen Linc1c7a132013-07-14 01:42:54 +0000156; CHECK-LABEL: @caller5(
Chandler Carruth86ed5302012-12-28 14:43:42 +0000157; CHECK-NOT: call
158; CHECK: ret i64 -1
159
160entry:
161 %x = call i64 @callee5(i64 42, i64 %y)
162 ret i64 %x
163}
164
165define i64 @callee5(i64 %x, i64 %y) {
166 %inttoptr = inttoptr i64 %x to i8*
167 %bitcast = bitcast i8* %inttoptr to i32*
168 %ptrtoint = ptrtoint i32* %bitcast to i64
169 %trunc = trunc i64 %ptrtoint to i32
170 %zext = zext i32 %trunc to i64
171 %cmp = icmp eq i64 %zext, 42
172 br i1 %cmp, label %bb.true, label %bb.false
173
174bb.true:
175 ret i64 -1
176
177bb.false:
178 ; This block musn't be counted in the inline cost.
179 %y1 = add i64 %y, 1
180 %y2 = add i64 %y1, 1
181 %y3 = add i64 %y2, 1
182 %y4 = add i64 %y3, 1
183 %y5 = add i64 %y4, 1
184 %y6 = add i64 %y5, 1
185 %y7 = add i64 %y6, 1
186 %y8 = add i64 %y7, 1
187 ret i64 %y8
188}
189
Chandler Carruth2f6cf482012-08-07 10:59:59 +0000190
191define i32 @PR13412.main() {
192; This is a somewhat complicated three layer subprogram that was reported to
193; compute the wrong value for a branch due to assuming that an argument
194; mid-inline couldn't be equal to another pointer.
195;
196; After inlining, the branch should point directly to the exit block, not to
197; the intermediate block.
198; CHECK: @PR13412.main
199; CHECK: br i1 true, label %[[TRUE_DEST:.*]], label %[[FALSE_DEST:.*]]
200; CHECK: [[FALSE_DEST]]:
201; CHECK-NEXT: call void @PR13412.fail()
202; CHECK: [[TRUE_DEST]]:
203; CHECK-NEXT: ret i32 0
204
205entry:
206 %i1 = alloca i64
207 store i64 0, i64* %i1
208 %arraydecay = bitcast i64* %i1 to i32*
209 %call = call i1 @PR13412.first(i32* %arraydecay, i32* %arraydecay)
210 br i1 %call, label %cond.end, label %cond.false
211
212cond.false:
213 call void @PR13412.fail()
214 br label %cond.end
215
216cond.end:
217 ret i32 0
218}
219
220define internal i1 @PR13412.first(i32* %a, i32* %b) {
221entry:
222 %call = call i32* @PR13412.second(i32* %a, i32* %b)
223 %cmp = icmp eq i32* %call, %b
224 ret i1 %cmp
225}
226
227declare void @PR13412.fail()
228
229define internal i32* @PR13412.second(i32* %a, i32* %b) {
230entry:
231 %sub.ptr.lhs.cast = ptrtoint i32* %b to i64
232 %sub.ptr.rhs.cast = ptrtoint i32* %a to i64
233 %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
234 %sub.ptr.div = ashr exact i64 %sub.ptr.sub, 2
235 %cmp = icmp ugt i64 %sub.ptr.div, 1
236 br i1 %cmp, label %if.then, label %if.end3
237
238if.then:
239 %0 = load i32* %a
240 %1 = load i32* %b
241 %cmp1 = icmp eq i32 %0, %1
242 br i1 %cmp1, label %return, label %if.end3
243
244if.end3:
245 br label %return
246
247return:
248 %retval.0 = phi i32* [ %b, %if.end3 ], [ %a, %if.then ]
249 ret i32* %retval.0
250}