blob: 64825bac97190957714a98879ede22a61fe50891 [file] [log] [blame]
Nadav Rotemb27777f2012-10-04 22:35:15 +00001; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx < %s | FileCheck %s
Nadav Rotem7cbc12a2012-10-03 16:11:15 +00002
3target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
4target triple = "x86_64-apple-macosx10.8.0"
5
6%struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 }
7%struct.B = type { i32, i32, i32, i32, i32, i32, i32, i32 }
8
Nadav Rotem7cbc12a2012-10-03 16:11:15 +00009; CHECK: merge_const_store
10; save 1,2,3 ... as one big integer.
11; CHECK: movabsq $578437695752307201
12; CHECK: ret
13define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
14 %1 = icmp sgt i32 %count, 0
15 br i1 %1, label %.lr.ph, label %._crit_edge
16.lr.ph:
17 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
18 %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
19 %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
20 store i8 1, i8* %2, align 1
21 %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
22 store i8 2, i8* %3, align 1
23 %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2
24 store i8 3, i8* %4, align 1
25 %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3
26 store i8 4, i8* %5, align 1
27 %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4
28 store i8 5, i8* %6, align 1
29 %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5
30 store i8 6, i8* %7, align 1
31 %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6
32 store i8 7, i8* %8, align 1
33 %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7
34 store i8 8, i8* %9, align 1
35 %10 = add nsw i32 %i.02, 1
36 %11 = getelementptr inbounds %struct.A* %.01, i64 1
37 %exitcond = icmp eq i32 %10, %count
38 br i1 %exitcond, label %._crit_edge, label %.lr.ph
39._crit_edge:
40 ret void
41}
42
Nadav Rotemb27777f2012-10-04 22:35:15 +000043; Move the constants using a single vector store.
44; CHECK: merge_const_store_vec
45; CHECK: vmovups %ymm0, (%rsi)
46; CHECK: ret
47define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind uwtable noinline ssp {
48 %1 = icmp sgt i32 %count, 0
49 br i1 %1, label %.lr.ph, label %._crit_edge
50.lr.ph:
51 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
52 %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ]
53 %2 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
54 store i32 0, i32* %2, align 4
55 %3 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
56 store i32 0, i32* %3, align 4
57 %4 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
58 store i32 0, i32* %4, align 4
59 %5 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
60 store i32 0, i32* %5, align 4
61 %6 = getelementptr inbounds %struct.B* %.01, i64 0, i32 4
62 store i32 0, i32* %6, align 4
63 %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 5
64 store i32 0, i32* %7, align 4
65 %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 6
66 store i32 0, i32* %8, align 4
67 %9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 7
68 store i32 0, i32* %9, align 4
69 %10 = add nsw i32 %i.02, 1
70 %11 = getelementptr inbounds %struct.B* %.01, i64 1
71 %exitcond = icmp eq i32 %10, %count
72 br i1 %exitcond, label %._crit_edge, label %.lr.ph
73._crit_edge:
74 ret void
75}
76
Nadav Rotem7cbc12a2012-10-03 16:11:15 +000077; Move the first 4 constants as a single vector. Move the rest as scalars.
78; CHECK: merge_nonconst_store
79; CHECK: movl $67305985
80; CHECK: movb
81; CHECK: movb
82; CHECK: movb
83; CHECK: movb
84; CHECK: ret
85define void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
86 %1 = icmp sgt i32 %count, 0
87 br i1 %1, label %.lr.ph, label %._crit_edge
88.lr.ph:
89 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
90 %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
91 %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
92 store i8 1, i8* %2, align 1
93 %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
94 store i8 2, i8* %3, align 1
95 %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2
96 store i8 3, i8* %4, align 1
97 %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3
98 store i8 4, i8* %5, align 1
99 %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4
100 store i8 %zz, i8* %6, align 1 ; <----------- Not a const;
101 %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5
102 store i8 6, i8* %7, align 1
103 %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6
104 store i8 7, i8* %8, align 1
105 %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7
106 store i8 8, i8* %9, align 1
107 %10 = add nsw i32 %i.02, 1
108 %11 = getelementptr inbounds %struct.A* %.01, i64 1
109 %exitcond = icmp eq i32 %10, %count
110 br i1 %exitcond, label %._crit_edge, label %.lr.ph
111._crit_edge:
112 ret void
113}
114
115
116;CHECK: merge_loads_i16
117; load:
118;CHECK: movw
119; store:
120;CHECK: movw
121;CHECK: ret
122define void @merge_loads_i16(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
123 %1 = icmp sgt i32 %count, 0
124 br i1 %1, label %.lr.ph, label %._crit_edge
125
126.lr.ph: ; preds = %0
127 %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0
128 %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1
129 br label %4
130
131; <label>:4 ; preds = %4, %.lr.ph
132 %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
133 %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %10, %4 ]
134 %5 = load i8* %2, align 1
135 %6 = load i8* %3, align 1
136 %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
137 store i8 %5, i8* %7, align 1
138 %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
139 store i8 %6, i8* %8, align 1
140 %9 = add nsw i32 %i.02, 1
141 %10 = getelementptr inbounds %struct.A* %.01, i64 1
142 %exitcond = icmp eq i32 %9, %count
143 br i1 %exitcond, label %._crit_edge, label %4
144
145._crit_edge: ; preds = %4, %0
146 ret void
147}
148
149; The loads and the stores are interleved. Can't merge them.
150;CHECK: no_merge_loads
151;CHECK: movb
152;CHECK: movb
153;CHECK: movb
154;CHECK: movb
155;CHECK: ret
156define void @no_merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
157 %1 = icmp sgt i32 %count, 0
158 br i1 %1, label %.lr.ph, label %._crit_edge
159
160.lr.ph: ; preds = %0
161 %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0
162 %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1
163 br label %a4
164
165a4: ; preds = %4, %.lr.ph
166 %i.02 = phi i32 [ 0, %.lr.ph ], [ %a9, %a4 ]
167 %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %a10, %a4 ]
168 %a5 = load i8* %2, align 1
169 %a7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
170 store i8 %a5, i8* %a7, align 1
171 %a8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
172 %a6 = load i8* %3, align 1
173 store i8 %a6, i8* %a8, align 1
174 %a9 = add nsw i32 %i.02, 1
175 %a10 = getelementptr inbounds %struct.A* %.01, i64 1
176 %exitcond = icmp eq i32 %a9, %count
177 br i1 %exitcond, label %._crit_edge, label %a4
178
179._crit_edge: ; preds = %4, %0
180 ret void
181}
182
183
184;CHECK: merge_loads_integer
185; load:
186;CHECK: movq
187; store:
188;CHECK: movq
189;CHECK: ret
190define void @merge_loads_integer(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
191 %1 = icmp sgt i32 %count, 0
192 br i1 %1, label %.lr.ph, label %._crit_edge
193
194.lr.ph: ; preds = %0
195 %2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0
196 %3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1
197 br label %4
198
199; <label>:4 ; preds = %4, %.lr.ph
200 %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
201 %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %10, %4 ]
202 %5 = load i32* %2
203 %6 = load i32* %3
204 %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
205 store i32 %5, i32* %7
206 %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
207 store i32 %6, i32* %8
208 %9 = add nsw i32 %i.02, 1
209 %10 = getelementptr inbounds %struct.B* %.01, i64 1
210 %exitcond = icmp eq i32 %9, %count
211 br i1 %exitcond, label %._crit_edge, label %4
212
213._crit_edge: ; preds = %4, %0
214 ret void
215}
216
217
218;CHECK: merge_loads_vector
219; load:
220;CHECK: movups
221; store:
222;CHECK: movups
223;CHECK: ret
224define void @merge_loads_vector(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
225 %a1 = icmp sgt i32 %count, 0
226 br i1 %a1, label %.lr.ph, label %._crit_edge
227
228.lr.ph: ; preds = %0
229 %a2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0
230 %a3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1
231 %a4 = getelementptr inbounds %struct.B* %q, i64 0, i32 2
232 %a5 = getelementptr inbounds %struct.B* %q, i64 0, i32 3
233 br label %block4
234
235block4: ; preds = %4, %.lr.ph
236 %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ]
237 %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ]
238 %a7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
239 %a8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
240 %a9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
241 %a10 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
242 %b1 = load i32* %a2
243 %b2 = load i32* %a3
244 %b3 = load i32* %a4
245 %b4 = load i32* %a5
246 store i32 %b1, i32* %a7
247 store i32 %b2, i32* %a8
248 store i32 %b3, i32* %a9
249 store i32 %b4, i32* %a10
250 %c9 = add nsw i32 %i.02, 1
251 %c10 = getelementptr inbounds %struct.B* %.01, i64 1
252 %exitcond = icmp eq i32 %c9, %count
253 br i1 %exitcond, label %._crit_edge, label %block4
254
255._crit_edge: ; preds = %4, %0
256 ret void
257}
258
Nadav Rotem7cbc12a2012-10-03 16:11:15 +0000259;CHECK: merge_loads_no_align
260; load:
261;CHECK: movl
262;CHECK: movl
263;CHECK: movl
264;CHECK: movl
265; store:
266;CHECK: movl
267;CHECK: movl
268;CHECK: movl
269;CHECK: movl
270;CHECK: ret
271define void @merge_loads_no_align(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
272 %a1 = icmp sgt i32 %count, 0
273 br i1 %a1, label %.lr.ph, label %._crit_edge
274
275.lr.ph: ; preds = %0
276 %a2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0
277 %a3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1
278 %a4 = getelementptr inbounds %struct.B* %q, i64 0, i32 2
279 %a5 = getelementptr inbounds %struct.B* %q, i64 0, i32 3
280 br label %block4
281
282block4: ; preds = %4, %.lr.ph
283 %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ]
284 %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ]
285 %a7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
286 %a8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
287 %a9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
288 %a10 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
289 %b1 = load i32* %a2, align 1
290 %b2 = load i32* %a3, align 1
291 %b3 = load i32* %a4, align 1
292 %b4 = load i32* %a5, align 1
293 store i32 %b1, i32* %a7, align 1
294 store i32 %b2, i32* %a8, align 1
295 store i32 %b3, i32* %a9, align 1
296 store i32 %b4, i32* %a10, align 1
297 %c9 = add nsw i32 %i.02, 1
298 %c10 = getelementptr inbounds %struct.B* %.01, i64 1
299 %exitcond = icmp eq i32 %c9, %count
300 br i1 %exitcond, label %._crit_edge, label %block4
301
302._crit_edge: ; preds = %4, %0
303 ret void
304}
305