blob: 46ab7e5542b6bc2a24178e6a7d9ccb75d215ae1c [file] [log] [blame]
Chris Lattnera64cbf02011-01-01 19:54:22 +00001; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
Chris Lattnera92ff912010-12-26 23:42:51 +00002target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
3target triple = "x86_64-apple-darwin10.0.0"
4
5define void @test1(i8* %Base, i64 %Size) nounwind ssp {
6bb.nph: ; preds = %entry
7 br label %for.body
8
9for.body: ; preds = %bb.nph, %for.body
10 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
11 %I.0.014 = getelementptr i8* %Base, i64 %indvar
12 store i8 0, i8* %I.0.014, align 1
13 %indvar.next = add i64 %indvar, 1
14 %exitcond = icmp eq i64 %indvar.next, %Size
15 br i1 %exitcond, label %for.end, label %for.body
16
17for.end: ; preds = %for.body, %entry
18 ret void
19; CHECK: @test1
20; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
21; CHECK-NOT: store
22}
23
Chris Lattnercf078f22011-01-02 07:58:36 +000024; This is a loop that was rotated but where the blocks weren't merged. This
25; shouldn't perturb us.
26define void @test1a(i8* %Base, i64 %Size) nounwind ssp {
27bb.nph: ; preds = %entry
28 br label %for.body
29
30for.body: ; preds = %bb.nph, %for.body
31 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ]
32 %I.0.014 = getelementptr i8* %Base, i64 %indvar
33 store i8 0, i8* %I.0.014, align 1
34 %indvar.next = add i64 %indvar, 1
35 br label %for.body.cont
36for.body.cont:
37 %exitcond = icmp eq i64 %indvar.next, %Size
38 br i1 %exitcond, label %for.end, label %for.body
39
40for.end: ; preds = %for.body, %entry
41 ret void
42; CHECK: @test1a
43; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
44; CHECK-NOT: store
45}
46
47
Chris Lattnera92ff912010-12-26 23:42:51 +000048define void @test2(i32* %Base, i64 %Size) nounwind ssp {
49entry:
50 %cmp10 = icmp eq i64 %Size, 0
51 br i1 %cmp10, label %for.end, label %for.body
52
53for.body: ; preds = %entry, %for.body
54 %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
55 %add.ptr.i = getelementptr i32* %Base, i64 %i.011
56 store i32 16843009, i32* %add.ptr.i, align 4
57 %inc = add nsw i64 %i.011, 1
58 %exitcond = icmp eq i64 %inc, %Size
59 br i1 %exitcond, label %for.end, label %for.body
60
61for.end: ; preds = %for.body, %entry
62 ret void
63; CHECK: @test2
64; CHECK: br i1 %cmp10,
Benjamin Kramera9390a42011-09-27 20:39:19 +000065; CHECK: %0 = mul i64 %Size, 4
66; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base1, i8 1, i64 %0, i32 4, i1 false)
Chris Lattnera92ff912010-12-26 23:42:51 +000067; CHECK-NOT: store
68}
Chris Lattner30980b62011-01-01 19:39:01 +000069
70; This is a case where there is an extra may-aliased store in the loop, we can't
71; promote the memset.
72define void @test3(i32* %Base, i64 %Size, i8 *%MayAlias) nounwind ssp {
73entry:
74 br label %for.body
75
76for.body: ; preds = %entry, %for.body
77 %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
78 %add.ptr.i = getelementptr i32* %Base, i64 %i.011
79 store i32 16843009, i32* %add.ptr.i, align 4
80
81 store i8 42, i8* %MayAlias
82 %inc = add nsw i64 %i.011, 1
83 %exitcond = icmp eq i64 %inc, %Size
84 br i1 %exitcond, label %for.end, label %for.body
85
86for.end: ; preds = %entry
87 ret void
88; CHECK: @test3
89; CHECK-NOT: memset
90; CHECK: ret void
91}
Chris Lattnera64cbf02011-01-01 19:54:22 +000092
93
94;; TODO: We should be able to promote this memset. Not yet though.
95define void @test4(i8* %Base) nounwind ssp {
96bb.nph: ; preds = %entry
97 %Base100 = getelementptr i8* %Base, i64 1000
98 br label %for.body
99
100for.body: ; preds = %bb.nph, %for.body
101 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
102 %I.0.014 = getelementptr i8* %Base, i64 %indvar
103 store i8 0, i8* %I.0.014, align 1
104
105 ;; Store beyond the range memset, should be safe to promote.
106 store i8 42, i8* %Base100
107
108 %indvar.next = add i64 %indvar, 1
109 %exitcond = icmp eq i64 %indvar.next, 100
110 br i1 %exitcond, label %for.end, label %for.body
111
112for.end: ; preds = %for.body, %entry
113 ret void
114; CHECK-TODO: @test4
115; CHECK-TODO: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 100, i32 1, i1 false)
116; CHECK-TODO-NOT: store
117}
Chris Lattnerbafa1172011-01-01 20:12:04 +0000118
119; This can't be promoted: the memset is a store of a loop variant value.
120define void @test5(i8* %Base, i64 %Size) nounwind ssp {
121bb.nph: ; preds = %entry
122 br label %for.body
123
124for.body: ; preds = %bb.nph, %for.body
125 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
126 %I.0.014 = getelementptr i8* %Base, i64 %indvar
127
128 %V = trunc i64 %indvar to i8
129 store i8 %V, i8* %I.0.014, align 1
130 %indvar.next = add i64 %indvar, 1
131 %exitcond = icmp eq i64 %indvar.next, %Size
132 br i1 %exitcond, label %for.end, label %for.body
133
134for.end: ; preds = %for.body, %entry
135 ret void
136; CHECK: @test5
137; CHECK-NOT: memset
138; CHECK: ret void
139}
140
Chris Lattnere2c43922011-01-02 03:37:56 +0000141
142;; memcpy formation
143define void @test6(i64 %Size) nounwind ssp {
144bb.nph:
145 %Base = alloca i8, i32 10000
146 %Dest = alloca i8, i32 10000
147 br label %for.body
148
149for.body: ; preds = %bb.nph, %for.body
150 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
151 %I.0.014 = getelementptr i8* %Base, i64 %indvar
152 %DestI = getelementptr i8* %Dest, i64 %indvar
153 %V = load i8* %I.0.014, align 1
154 store i8 %V, i8* %DestI, align 1
155 %indvar.next = add i64 %indvar, 1
156 %exitcond = icmp eq i64 %indvar.next, %Size
157 br i1 %exitcond, label %for.end, label %for.body
158
159for.end: ; preds = %for.body, %entry
160 ret void
161; CHECK: @test6
162; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %Dest, i8* %Base, i64 %Size, i32 1, i1 false)
163; CHECK-NOT: store
164; CHECK: ret void
165}
166
167
Chris Lattner62c50fd2011-01-02 19:01:03 +0000168; This is a loop that was rotated but where the blocks weren't merged. This
169; shouldn't perturb us.
170define void @test7(i8* %Base, i64 %Size) nounwind ssp {
171bb.nph: ; preds = %entry
172 br label %for.body
173
174for.body: ; preds = %bb.nph, %for.body
175 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ]
176 br label %for.body.cont
177for.body.cont:
178 %I.0.014 = getelementptr i8* %Base, i64 %indvar
179 store i8 0, i8* %I.0.014, align 1
180 %indvar.next = add i64 %indvar, 1
181 %exitcond = icmp eq i64 %indvar.next, %Size
182 br i1 %exitcond, label %for.end, label %for.body
183
184for.end: ; preds = %for.body, %entry
185 ret void
186; CHECK: @test7
187; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
188; CHECK-NOT: store
189}
190
Chris Lattner8e08e732011-01-02 20:24:21 +0000191; This is a loop should not be transformed, it only executes one iteration.
192define void @test8(i64* %Ptr, i64 %Size) nounwind ssp {
193bb.nph: ; preds = %entry
194 br label %for.body
195
196for.body: ; preds = %bb.nph, %for.body
197 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
198 %PI = getelementptr i64* %Ptr, i64 %indvar
199 store i64 0, i64 *%PI
200 %indvar.next = add i64 %indvar, 1
201 %exitcond = icmp eq i64 %indvar.next, 1
202 br i1 %exitcond, label %for.end, label %for.body
203
204for.end: ; preds = %for.body, %entry
205 ret void
206; CHECK: @test8
207; CHECK: store i64 0, i64* %PI
208}
Chris Lattnere2c43922011-01-02 03:37:56 +0000209
Chris Lattner63f9c3c2011-01-02 21:14:18 +0000210declare i8* @external(i8*)
211
212;; This cannot be transformed into a memcpy, because the read-from location is
213;; mutated by the loop.
214define void @test9(i64 %Size) nounwind ssp {
215bb.nph:
216 %Base = alloca i8, i32 10000
217 %Dest = alloca i8, i32 10000
218
219 %BaseAlias = call i8* @external(i8* %Base)
220 br label %for.body
221
222for.body: ; preds = %bb.nph, %for.body
223 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
224 %I.0.014 = getelementptr i8* %Base, i64 %indvar
225 %DestI = getelementptr i8* %Dest, i64 %indvar
226 %V = load i8* %I.0.014, align 1
227 store i8 %V, i8* %DestI, align 1
228
229 ;; This store can clobber the input.
230 store i8 4, i8* %BaseAlias
231
232 %indvar.next = add i64 %indvar, 1
233 %exitcond = icmp eq i64 %indvar.next, %Size
234 br i1 %exitcond, label %for.end, label %for.body
235
236for.end: ; preds = %for.body, %entry
237 ret void
238; CHECK: @test9
239; CHECK-NOT: llvm.memcpy
240; CHECK: ret void
241}
242
Chris Lattnere41d3c02011-01-04 07:46:33 +0000243; Two dimensional nested loop should be promoted to one big memset.
244define void @test10(i8* %X) nounwind ssp {
245entry:
246 br label %bb.nph
247
248bb.nph: ; preds = %entry, %for.inc10
249 %i.04 = phi i32 [ 0, %entry ], [ %inc12, %for.inc10 ]
250 br label %for.body5
251
252for.body5: ; preds = %for.body5, %bb.nph
253 %j.02 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body5 ]
254 %mul = mul nsw i32 %i.04, 100
255 %add = add nsw i32 %j.02, %mul
256 %idxprom = sext i32 %add to i64
257 %arrayidx = getelementptr inbounds i8* %X, i64 %idxprom
258 store i8 0, i8* %arrayidx, align 1
259 %inc = add nsw i32 %j.02, 1
260 %cmp4 = icmp eq i32 %inc, 100
261 br i1 %cmp4, label %for.inc10, label %for.body5
262
263for.inc10: ; preds = %for.body5
264 %inc12 = add nsw i32 %i.04, 1
265 %cmp = icmp eq i32 %inc12, 100
266 br i1 %cmp, label %for.end13, label %bb.nph
267
268for.end13: ; preds = %for.inc10
269 ret void
270; CHECK: @test10
271; CHECK: entry:
272; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %X, i8 0, i64 10000, i32 1, i1 false)
273; CHECK-NOT: store
274; CHECK: ret void
275}
Chris Lattner3a393722011-02-19 19:31:39 +0000276
277; On darwin10 (which is the triple in this .ll file) this loop can be turned
278; into a memset_pattern call.
279; rdar://9009151
Chris Lattner80e8b502011-02-19 19:56:44 +0000280define void @test11_pattern(i32* nocapture %P) nounwind ssp {
Chris Lattner3a393722011-02-19 19:31:39 +0000281entry:
282 br label %for.body
283
284for.body: ; preds = %entry, %for.body
285 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
286 %arrayidx = getelementptr i32* %P, i64 %indvar
287 store i32 1, i32* %arrayidx, align 4
288 %indvar.next = add i64 %indvar, 1
289 %exitcond = icmp eq i64 %indvar.next, 10000
290 br i1 %exitcond, label %for.end, label %for.body
291
292for.end: ; preds = %for.body
293 ret void
Chris Lattner80e8b502011-02-19 19:56:44 +0000294; CHECK: @test11_pattern
Chris Lattner3a393722011-02-19 19:31:39 +0000295; CHECK-NEXT: entry:
296; CHECK-NEXT: bitcast
297; CHECK-NEXT: memset_pattern
298; CHECK-NOT: store
299; CHECK: ret void
300}
301
Chris Lattner41bfbb02011-02-19 19:35:49 +0000302; Store of null should turn into memset of zero.
303define void @test12(i32** nocapture %P) nounwind ssp {
304entry:
305 br label %for.body
306
307for.body: ; preds = %entry, %for.body
308 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
309 %arrayidx = getelementptr i32** %P, i64 %indvar
310 store i32* null, i32** %arrayidx, align 4
311 %indvar.next = add i64 %indvar, 1
312 %exitcond = icmp eq i64 %indvar.next, 10000
313 br i1 %exitcond, label %for.end, label %for.body
314
315for.end: ; preds = %for.body
316 ret void
317; CHECK: @test12
318; CHECK-NEXT: entry:
319; CHECK-NEXT: bitcast
320; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %P1, i8 0, i64 80000, i32 4, i1 false)
321; CHECK-NOT: store
322; CHECK: ret void
323}
Chris Lattner3a393722011-02-19 19:31:39 +0000324
Chris Lattner80e8b502011-02-19 19:56:44 +0000325@G = global i32 5
326
327; This store-of-address loop can be turned into a memset_pattern call.
328; rdar://9009151
329define void @test13_pattern(i32** nocapture %P) nounwind ssp {
330entry:
331 br label %for.body
332
333for.body: ; preds = %entry, %for.body
334 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
335 %arrayidx = getelementptr i32** %P, i64 %indvar
336 store i32* @G, i32** %arrayidx, align 4
337 %indvar.next = add i64 %indvar, 1
338 %exitcond = icmp eq i64 %indvar.next, 10000
339 br i1 %exitcond, label %for.end, label %for.body
340
341for.end: ; preds = %for.body
342 ret void
343; CHECK: @test13_pattern
344; CHECK-NEXT: entry:
345; CHECK-NEXT: bitcast
346; CHECK-NEXT: memset_pattern
347; CHECK-NOT: store
348; CHECK: ret void
349}
Chris Lattner4f81b542011-05-22 17:39:56 +0000350
351
352
353; PR9815 - This is a partial overlap case that cannot be safely transformed
354; into a memcpy.
355@g_50 = global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16
356
357define i32 @test14() nounwind {
358entry:
359 br label %for.body
360
361for.body: ; preds = %for.inc, %for.body.lr.ph
362 %tmp5 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
363 %add = add nsw i32 %tmp5, 4
364 %idxprom = sext i32 %add to i64
365 %arrayidx = getelementptr inbounds [7 x i32]* @g_50, i32 0, i64 %idxprom
366 %tmp2 = load i32* %arrayidx, align 4
367 %add4 = add nsw i32 %tmp5, 5
368 %idxprom5 = sext i32 %add4 to i64
369 %arrayidx6 = getelementptr inbounds [7 x i32]* @g_50, i32 0, i64 %idxprom5
370 store i32 %tmp2, i32* %arrayidx6, align 4
371 %inc = add nsw i32 %tmp5, 1
372 %cmp = icmp slt i32 %inc, 2
373 br i1 %cmp, label %for.body, label %for.end
374
375for.end: ; preds = %for.inc
376 %tmp8 = load i32* getelementptr inbounds ([7 x i32]* @g_50, i32 0, i64 6), align 4
377 ret i32 %tmp8
378; CHECK: @test14
379; CHECK: for.body:
380; CHECK: load i32
381; CHECK: store i32
382; CHECK: br i1 %cmp
383
384}
385
386