blob: f6d68520b7b4f5591750815a7b5c845810ea7efb [file] [log] [blame]
Nadav Rotemb27777f2012-10-04 22:35:15 +00001; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx < %s | FileCheck %s
Hal Finkelc3998302014-04-12 00:59:48 +00002; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx -addr-sink-using-gep=1 < %s | FileCheck %s
Nadav Rotem7cbc12a2012-10-03 16:11:15 +00003
4target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
5target triple = "x86_64-apple-macosx10.8.0"
6
7%struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 }
8%struct.B = type { i32, i32, i32, i32, i32, i32, i32, i32 }
9
Nadav Rotem7cbc12a2012-10-03 16:11:15 +000010; CHECK: merge_const_store
11; save 1,2,3 ... as one big integer.
12; CHECK: movabsq $578437695752307201
13; CHECK: ret
14define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
15 %1 = icmp sgt i32 %count, 0
16 br i1 %1, label %.lr.ph, label %._crit_edge
17.lr.ph:
18 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
19 %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
20 %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
21 store i8 1, i8* %2, align 1
22 %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
23 store i8 2, i8* %3, align 1
24 %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2
25 store i8 3, i8* %4, align 1
26 %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3
27 store i8 4, i8* %5, align 1
28 %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4
29 store i8 5, i8* %6, align 1
30 %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5
31 store i8 6, i8* %7, align 1
32 %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6
33 store i8 7, i8* %8, align 1
34 %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7
35 store i8 8, i8* %9, align 1
36 %10 = add nsw i32 %i.02, 1
37 %11 = getelementptr inbounds %struct.A* %.01, i64 1
38 %exitcond = icmp eq i32 %10, %count
39 br i1 %exitcond, label %._crit_edge, label %.lr.ph
40._crit_edge:
41 ret void
42}
43
Nadav Rotem495b1a42013-02-14 18:28:52 +000044; No vectors because we use noimplicitfloat
45; CHECK: merge_const_store_no_vec
46; CHECK-NOT: vmovups
47; CHECK: ret
48define void @merge_const_store_no_vec(i32 %count, %struct.B* nocapture %p) noimplicitfloat{
49 %1 = icmp sgt i32 %count, 0
50 br i1 %1, label %.lr.ph, label %._crit_edge
51.lr.ph:
52 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
53 %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ]
54 %2 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
55 store i32 0, i32* %2, align 4
56 %3 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
57 store i32 0, i32* %3, align 4
58 %4 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
59 store i32 0, i32* %4, align 4
60 %5 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
61 store i32 0, i32* %5, align 4
62 %6 = getelementptr inbounds %struct.B* %.01, i64 0, i32 4
63 store i32 0, i32* %6, align 4
64 %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 5
65 store i32 0, i32* %7, align 4
66 %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 6
67 store i32 0, i32* %8, align 4
68 %9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 7
69 store i32 0, i32* %9, align 4
70 %10 = add nsw i32 %i.02, 1
71 %11 = getelementptr inbounds %struct.B* %.01, i64 1
72 %exitcond = icmp eq i32 %10, %count
73 br i1 %exitcond, label %._crit_edge, label %.lr.ph
74._crit_edge:
75 ret void
76}
77
Nadav Rotemb27777f2012-10-04 22:35:15 +000078; Move the constants using a single vector store.
79; CHECK: merge_const_store_vec
Nadav Rotem7b3120b2013-01-19 08:38:41 +000080; CHECK: vmovups
Nadav Rotemb27777f2012-10-04 22:35:15 +000081; CHECK: ret
82define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind uwtable noinline ssp {
83 %1 = icmp sgt i32 %count, 0
84 br i1 %1, label %.lr.ph, label %._crit_edge
85.lr.ph:
86 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
87 %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ]
88 %2 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
89 store i32 0, i32* %2, align 4
90 %3 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
91 store i32 0, i32* %3, align 4
92 %4 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
93 store i32 0, i32* %4, align 4
94 %5 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
95 store i32 0, i32* %5, align 4
96 %6 = getelementptr inbounds %struct.B* %.01, i64 0, i32 4
97 store i32 0, i32* %6, align 4
98 %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 5
99 store i32 0, i32* %7, align 4
100 %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 6
101 store i32 0, i32* %8, align 4
102 %9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 7
103 store i32 0, i32* %9, align 4
104 %10 = add nsw i32 %i.02, 1
105 %11 = getelementptr inbounds %struct.B* %.01, i64 1
106 %exitcond = icmp eq i32 %10, %count
107 br i1 %exitcond, label %._crit_edge, label %.lr.ph
108._crit_edge:
109 ret void
110}
111
Nadav Rotem7cbc12a2012-10-03 16:11:15 +0000112; Move the first 4 constants as a single vector. Move the rest as scalars.
113; CHECK: merge_nonconst_store
114; CHECK: movl $67305985
115; CHECK: movb
116; CHECK: movb
117; CHECK: movb
118; CHECK: movb
119; CHECK: ret
120define void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
121 %1 = icmp sgt i32 %count, 0
122 br i1 %1, label %.lr.ph, label %._crit_edge
123.lr.ph:
124 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
125 %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
126 %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
127 store i8 1, i8* %2, align 1
128 %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
129 store i8 2, i8* %3, align 1
130 %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2
131 store i8 3, i8* %4, align 1
132 %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3
133 store i8 4, i8* %5, align 1
134 %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4
135 store i8 %zz, i8* %6, align 1 ; <----------- Not a const;
136 %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5
137 store i8 6, i8* %7, align 1
138 %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6
139 store i8 7, i8* %8, align 1
140 %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7
141 store i8 8, i8* %9, align 1
142 %10 = add nsw i32 %i.02, 1
143 %11 = getelementptr inbounds %struct.A* %.01, i64 1
144 %exitcond = icmp eq i32 %10, %count
145 br i1 %exitcond, label %._crit_edge, label %.lr.ph
146._crit_edge:
147 ret void
148}
149
150
Stephen Lin6f36b452013-07-18 22:47:09 +0000151;CHECK-LABEL: merge_loads_i16:
Nadav Rotem7cbc12a2012-10-03 16:11:15 +0000152; load:
153;CHECK: movw
154; store:
155;CHECK: movw
156;CHECK: ret
157define void @merge_loads_i16(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
158 %1 = icmp sgt i32 %count, 0
159 br i1 %1, label %.lr.ph, label %._crit_edge
160
161.lr.ph: ; preds = %0
162 %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0
163 %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1
164 br label %4
165
166; <label>:4 ; preds = %4, %.lr.ph
167 %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
168 %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %10, %4 ]
169 %5 = load i8* %2, align 1
170 %6 = load i8* %3, align 1
171 %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
172 store i8 %5, i8* %7, align 1
173 %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
174 store i8 %6, i8* %8, align 1
175 %9 = add nsw i32 %i.02, 1
176 %10 = getelementptr inbounds %struct.A* %.01, i64 1
177 %exitcond = icmp eq i32 %9, %count
178 br i1 %exitcond, label %._crit_edge, label %4
179
180._crit_edge: ; preds = %4, %0
181 ret void
182}
183
184; The loads and the stores are interleved. Can't merge them.
Stephen Lin6f36b452013-07-18 22:47:09 +0000185;CHECK-LABEL: no_merge_loads:
Nadav Rotem7cbc12a2012-10-03 16:11:15 +0000186;CHECK: movb
187;CHECK: movb
188;CHECK: movb
189;CHECK: movb
190;CHECK: ret
191define void @no_merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
192 %1 = icmp sgt i32 %count, 0
193 br i1 %1, label %.lr.ph, label %._crit_edge
194
195.lr.ph: ; preds = %0
196 %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0
197 %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1
198 br label %a4
199
200a4: ; preds = %4, %.lr.ph
201 %i.02 = phi i32 [ 0, %.lr.ph ], [ %a9, %a4 ]
202 %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %a10, %a4 ]
203 %a5 = load i8* %2, align 1
204 %a7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
205 store i8 %a5, i8* %a7, align 1
206 %a8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
207 %a6 = load i8* %3, align 1
208 store i8 %a6, i8* %a8, align 1
209 %a9 = add nsw i32 %i.02, 1
210 %a10 = getelementptr inbounds %struct.A* %.01, i64 1
211 %exitcond = icmp eq i32 %a9, %count
212 br i1 %exitcond, label %._crit_edge, label %a4
213
214._crit_edge: ; preds = %4, %0
215 ret void
216}
217
218
Stephen Lin6f36b452013-07-18 22:47:09 +0000219;CHECK-LABEL: merge_loads_integer:
Nadav Rotem7cbc12a2012-10-03 16:11:15 +0000220; load:
221;CHECK: movq
222; store:
223;CHECK: movq
224;CHECK: ret
225define void @merge_loads_integer(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
226 %1 = icmp sgt i32 %count, 0
227 br i1 %1, label %.lr.ph, label %._crit_edge
228
229.lr.ph: ; preds = %0
230 %2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0
231 %3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1
232 br label %4
233
234; <label>:4 ; preds = %4, %.lr.ph
235 %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
236 %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %10, %4 ]
237 %5 = load i32* %2
238 %6 = load i32* %3
239 %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
240 store i32 %5, i32* %7
241 %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
242 store i32 %6, i32* %8
243 %9 = add nsw i32 %i.02, 1
244 %10 = getelementptr inbounds %struct.B* %.01, i64 1
245 %exitcond = icmp eq i32 %9, %count
246 br i1 %exitcond, label %._crit_edge, label %4
247
248._crit_edge: ; preds = %4, %0
249 ret void
250}
251
252
Stephen Lin6f36b452013-07-18 22:47:09 +0000253;CHECK-LABEL: merge_loads_vector:
Nadav Rotem7cbc12a2012-10-03 16:11:15 +0000254; load:
255;CHECK: movups
256; store:
257;CHECK: movups
258;CHECK: ret
259define void @merge_loads_vector(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
260 %a1 = icmp sgt i32 %count, 0
261 br i1 %a1, label %.lr.ph, label %._crit_edge
262
263.lr.ph: ; preds = %0
264 %a2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0
265 %a3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1
266 %a4 = getelementptr inbounds %struct.B* %q, i64 0, i32 2
267 %a5 = getelementptr inbounds %struct.B* %q, i64 0, i32 3
268 br label %block4
269
270block4: ; preds = %4, %.lr.ph
271 %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ]
272 %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ]
273 %a7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
274 %a8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
275 %a9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
276 %a10 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
277 %b1 = load i32* %a2
278 %b2 = load i32* %a3
279 %b3 = load i32* %a4
280 %b4 = load i32* %a5
281 store i32 %b1, i32* %a7
282 store i32 %b2, i32* %a8
283 store i32 %b3, i32* %a9
284 store i32 %b4, i32* %a10
285 %c9 = add nsw i32 %i.02, 1
286 %c10 = getelementptr inbounds %struct.B* %.01, i64 1
287 %exitcond = icmp eq i32 %c9, %count
288 br i1 %exitcond, label %._crit_edge, label %block4
289
290._crit_edge: ; preds = %4, %0
291 ret void
292}
293
Stephen Lin6f36b452013-07-18 22:47:09 +0000294;CHECK-LABEL: merge_loads_no_align:
Nadav Rotem7cbc12a2012-10-03 16:11:15 +0000295; load:
296;CHECK: movl
297;CHECK: movl
298;CHECK: movl
299;CHECK: movl
300; store:
301;CHECK: movl
302;CHECK: movl
303;CHECK: movl
304;CHECK: movl
305;CHECK: ret
306define void @merge_loads_no_align(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
307 %a1 = icmp sgt i32 %count, 0
308 br i1 %a1, label %.lr.ph, label %._crit_edge
309
310.lr.ph: ; preds = %0
311 %a2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0
312 %a3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1
313 %a4 = getelementptr inbounds %struct.B* %q, i64 0, i32 2
314 %a5 = getelementptr inbounds %struct.B* %q, i64 0, i32 3
315 br label %block4
316
317block4: ; preds = %4, %.lr.ph
318 %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ]
319 %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ]
320 %a7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
321 %a8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
322 %a9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
323 %a10 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
324 %b1 = load i32* %a2, align 1
325 %b2 = load i32* %a3, align 1
326 %b3 = load i32* %a4, align 1
327 %b4 = load i32* %a5, align 1
328 store i32 %b1, i32* %a7, align 1
329 store i32 %b2, i32* %a8, align 1
330 store i32 %b3, i32* %a9, align 1
331 store i32 %b4, i32* %a10, align 1
332 %c9 = add nsw i32 %i.02, 1
333 %c10 = getelementptr inbounds %struct.B* %.01, i64 1
334 %exitcond = icmp eq i32 %c9, %count
335 br i1 %exitcond, label %._crit_edge, label %block4
336
337._crit_edge: ; preds = %4, %0
338 ret void
339}
340
Arnold Schwaighofer67523662013-04-01 18:12:58 +0000341; Make sure that we merge the consecutive load/store sequence below and use a
342; word (16 bit) instead of a byte copy.
343; CHECK: MergeLoadStoreBaseIndexOffset
344; CHECK: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
345; CHECK: movw [[REG]], (%{{.*}})
346define void @MergeLoadStoreBaseIndexOffset(i64* %a, i8* %b, i8* %c, i32 %n) {
347 br label %1
348
349; <label>:1
350 %.09 = phi i32 [ %n, %0 ], [ %11, %1 ]
351 %.08 = phi i8* [ %b, %0 ], [ %10, %1 ]
352 %.0 = phi i64* [ %a, %0 ], [ %2, %1 ]
353 %2 = getelementptr inbounds i64* %.0, i64 1
354 %3 = load i64* %.0, align 1
355 %4 = getelementptr inbounds i8* %c, i64 %3
356 %5 = load i8* %4, align 1
357 %6 = add i64 %3, 1
358 %7 = getelementptr inbounds i8* %c, i64 %6
359 %8 = load i8* %7, align 1
360 store i8 %5, i8* %.08, align 1
361 %9 = getelementptr inbounds i8* %.08, i64 1
362 store i8 %8, i8* %9, align 1
363 %10 = getelementptr inbounds i8* %.08, i64 2
364 %11 = add nsw i32 %.09, -1
365 %12 = icmp eq i32 %11, 0
366 br i1 %12, label %13, label %1
367
368; <label>:13
369 ret void
370}
371
372; Make sure that we merge the consecutive load/store sequence below and use a
373; word (16 bit) instead of a byte copy even if there are intermediate sign
374; extensions.
375; CHECK: MergeLoadStoreBaseIndexOffsetSext
376; CHECK: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
377; CHECK: movw [[REG]], (%{{.*}})
378define void @MergeLoadStoreBaseIndexOffsetSext(i8* %a, i8* %b, i8* %c, i32 %n) {
379 br label %1
380
381; <label>:1
382 %.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
383 %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
384 %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
385 %2 = getelementptr inbounds i8* %.0, i64 1
386 %3 = load i8* %.0, align 1
387 %4 = sext i8 %3 to i64
388 %5 = getelementptr inbounds i8* %c, i64 %4
389 %6 = load i8* %5, align 1
390 %7 = add i64 %4, 1
391 %8 = getelementptr inbounds i8* %c, i64 %7
392 %9 = load i8* %8, align 1
393 store i8 %6, i8* %.08, align 1
394 %10 = getelementptr inbounds i8* %.08, i64 1
395 store i8 %9, i8* %10, align 1
396 %11 = getelementptr inbounds i8* %.08, i64 2
397 %12 = add nsw i32 %.09, -1
398 %13 = icmp eq i32 %12, 0
399 br i1 %13, label %14, label %1
400
401; <label>:14
402 ret void
403}
404
405; However, we can only merge ignore sign extensions when they are on all memory
406; computations;
407; CHECK: loadStoreBaseIndexOffsetSextNoSex
408; CHECK-NOT: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
409; CHECK-NOT: movw [[REG]], (%{{.*}})
410define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) {
411 br label %1
412
413; <label>:1
414 %.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
415 %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
416 %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
417 %2 = getelementptr inbounds i8* %.0, i64 1
418 %3 = load i8* %.0, align 1
419 %4 = sext i8 %3 to i64
420 %5 = getelementptr inbounds i8* %c, i64 %4
421 %6 = load i8* %5, align 1
422 %7 = add i8 %3, 1
423 %wrap.4 = sext i8 %7 to i64
424 %8 = getelementptr inbounds i8* %c, i64 %wrap.4
425 %9 = load i8* %8, align 1
426 store i8 %6, i8* %.08, align 1
427 %10 = getelementptr inbounds i8* %.08, i64 1
428 store i8 %9, i8* %10, align 1
429 %11 = getelementptr inbounds i8* %.08, i64 2
430 %12 = add nsw i32 %.09, -1
431 %13 = icmp eq i32 %12, 0
432 br i1 %13, label %14, label %1
433
434; <label>:14
435 ret void
436}