blob: a3e604f1da0861de5f2ee32f5e1209e07ffdcf37 [file] [log] [blame]
Artur Pilipenko41c00052017-01-25 08:53:31 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s
3; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=CHECK64
4
5; i8* p;
6; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
7define i32 @load_i32_by_i8(i32* %arg) {
8; CHECK-LABEL: load_i32_by_i8:
9; CHECK: # BB#0:
10; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
11; CHECK-NEXT: movl (%eax), %eax
12; CHECK-NEXT: retl
13;
14; CHECK64-LABEL: load_i32_by_i8:
15; CHECK64: # BB#0:
16; CHECK64-NEXT: movl (%rdi), %eax
17; CHECK64-NEXT: retq
18
19 %tmp = bitcast i32* %arg to i8*
20 %tmp1 = load i8, i8* %tmp, align 1
21 %tmp2 = zext i8 %tmp1 to i32
22 %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
23 %tmp4 = load i8, i8* %tmp3, align 1
24 %tmp5 = zext i8 %tmp4 to i32
25 %tmp6 = shl nuw nsw i32 %tmp5, 8
26 %tmp7 = or i32 %tmp6, %tmp2
27 %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
28 %tmp9 = load i8, i8* %tmp8, align 1
29 %tmp10 = zext i8 %tmp9 to i32
30 %tmp11 = shl nuw nsw i32 %tmp10, 16
31 %tmp12 = or i32 %tmp7, %tmp11
32 %tmp13 = getelementptr inbounds i8, i8* %tmp, i32 3
33 %tmp14 = load i8, i8* %tmp13, align 1
34 %tmp15 = zext i8 %tmp14 to i32
35 %tmp16 = shl nuw nsw i32 %tmp15, 24
36 %tmp17 = or i32 %tmp12, %tmp16
37 ret i32 %tmp17
38}
39
40; i8* p;
41; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
42define i32 @load_i32_by_i8_bswap(i32* %arg) {
43; CHECK-LABEL: load_i32_by_i8_bswap:
44; CHECK: # BB#0:
45; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
46; CHECK-NEXT: movl (%eax), %eax
47; CHECK-NEXT: bswapl %eax
48; CHECK-NEXT: retl
49;
50; CHECK64-LABEL: load_i32_by_i8_bswap:
51; CHECK64: # BB#0:
52; CHECK64-NEXT: movl (%rdi), %eax
53; CHECK64-NEXT: bswapl %eax
54; CHECK64-NEXT: retq
55
56 %tmp = bitcast i32* %arg to i8*
57 %tmp1 = load i8, i8* %tmp, align 1
58 %tmp2 = zext i8 %tmp1 to i32
59 %tmp3 = shl nuw nsw i32 %tmp2, 24
60 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
61 %tmp5 = load i8, i8* %tmp4, align 1
62 %tmp6 = zext i8 %tmp5 to i32
63 %tmp7 = shl nuw nsw i32 %tmp6, 16
64 %tmp8 = or i32 %tmp7, %tmp3
65 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
66 %tmp10 = load i8, i8* %tmp9, align 1
67 %tmp11 = zext i8 %tmp10 to i32
68 %tmp12 = shl nuw nsw i32 %tmp11, 8
69 %tmp13 = or i32 %tmp8, %tmp12
70 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
71 %tmp15 = load i8, i8* %tmp14, align 1
72 %tmp16 = zext i8 %tmp15 to i32
73 %tmp17 = or i32 %tmp13, %tmp16
74 ret i32 %tmp17
75}
76
77; i16* p;
78; (i32) p[0] | ((i32) p[1] << 16)
79define i32 @load_i32_by_i16(i32* %arg) {
80; CHECK-LABEL: load_i32_by_i16:
81; CHECK: # BB#0:
82; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
83; CHECK-NEXT: movl (%eax), %eax
84; CHECK-NEXT: retl
85;
86; CHECK64-LABEL: load_i32_by_i16:
87; CHECK64: # BB#0:
88; CHECK64-NEXT: movl (%rdi), %eax
89; CHECK64-NEXT: retq
90
91 %tmp = bitcast i32* %arg to i16*
92 %tmp1 = load i16, i16* %tmp, align 1
93 %tmp2 = zext i16 %tmp1 to i32
94 %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
95 %tmp4 = load i16, i16* %tmp3, align 1
96 %tmp5 = zext i16 %tmp4 to i32
97 %tmp6 = shl nuw nsw i32 %tmp5, 16
98 %tmp7 = or i32 %tmp6, %tmp2
99 ret i32 %tmp7
100}
101
102; i16* p_16;
103; i8* p_8 = (i8*) p_16;
104; (i32) p_16[0] | ((i32) p[2] << 16) | ((i32) p[3] << 24)
105define i32 @load_i32_by_i16_i8(i32* %arg) {
106; CHECK-LABEL: load_i32_by_i16_i8:
107; CHECK: # BB#0:
108; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
109; CHECK-NEXT: movl (%eax), %eax
110; CHECK-NEXT: retl
111;
112; CHECK64-LABEL: load_i32_by_i16_i8:
113; CHECK64: # BB#0:
114; CHECK64-NEXT: movl (%rdi), %eax
115; CHECK64-NEXT: retq
116
117 %tmp = bitcast i32* %arg to i16*
118 %tmp1 = bitcast i32* %arg to i8*
119 %tmp2 = load i16, i16* %tmp, align 1
120 %tmp3 = zext i16 %tmp2 to i32
121 %tmp4 = getelementptr inbounds i8, i8* %tmp1, i32 2
122 %tmp5 = load i8, i8* %tmp4, align 1
123 %tmp6 = zext i8 %tmp5 to i32
124 %tmp7 = shl nuw nsw i32 %tmp6, 16
125 %tmp8 = getelementptr inbounds i8, i8* %tmp1, i32 3
126 %tmp9 = load i8, i8* %tmp8, align 1
127 %tmp10 = zext i8 %tmp9 to i32
128 %tmp11 = shl nuw nsw i32 %tmp10, 24
129 %tmp12 = or i32 %tmp7, %tmp11
130 %tmp13 = or i32 %tmp12, %tmp3
131 ret i32 %tmp13
132}
133
134
135; i8* p;
136; (i32) ((i16) p[0] | ((i16) p[1] << 8)) | (((i32) ((i16) p[3] | ((i16) p[4] << 8)) << 16)
137define i32 @load_i32_by_i16_by_i8(i32* %arg) {
138; CHECK-LABEL: load_i32_by_i16_by_i8:
139; CHECK: # BB#0:
140; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
141; CHECK-NEXT: movl (%eax), %eax
142; CHECK-NEXT: retl
143;
144; CHECK64-LABEL: load_i32_by_i16_by_i8:
145; CHECK64: # BB#0:
146; CHECK64-NEXT: movl (%rdi), %eax
147; CHECK64-NEXT: retq
148
149 %tmp = bitcast i32* %arg to i8*
150 %tmp1 = load i8, i8* %tmp, align 1
151 %tmp2 = zext i8 %tmp1 to i16
152 %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
153 %tmp4 = load i8, i8* %tmp3, align 1
154 %tmp5 = zext i8 %tmp4 to i16
155 %tmp6 = shl nuw nsw i16 %tmp5, 8
156 %tmp7 = or i16 %tmp6, %tmp2
157 %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
158 %tmp9 = load i8, i8* %tmp8, align 1
159 %tmp10 = zext i8 %tmp9 to i16
160 %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3
161 %tmp12 = load i8, i8* %tmp11, align 1
162 %tmp13 = zext i8 %tmp12 to i16
163 %tmp14 = shl nuw nsw i16 %tmp13, 8
164 %tmp15 = or i16 %tmp14, %tmp10
165 %tmp16 = zext i16 %tmp7 to i32
166 %tmp17 = zext i16 %tmp15 to i32
167 %tmp18 = shl nuw nsw i32 %tmp17, 16
168 %tmp19 = or i32 %tmp18, %tmp16
169 ret i32 %tmp19
170}
171
172; i8* p;
173; ((i32) (((i16) p[0] << 8) | (i16) p[1]) << 16) | (i32) (((i16) p[3] << 8) | (i16) p[4])
174define i32 @load_i32_by_i16_by_i8_bswap(i32* %arg) {
175; CHECK-LABEL: load_i32_by_i16_by_i8_bswap:
176; CHECK: # BB#0:
177; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
178; CHECK-NEXT: movl (%eax), %eax
179; CHECK-NEXT: bswapl %eax
180; CHECK-NEXT: retl
181;
182; CHECK64-LABEL: load_i32_by_i16_by_i8_bswap:
183; CHECK64: # BB#0:
184; CHECK64-NEXT: movl (%rdi), %eax
185; CHECK64-NEXT: bswapl %eax
186; CHECK64-NEXT: retq
187
188 %tmp = bitcast i32* %arg to i8*
189 %tmp1 = load i8, i8* %tmp, align 1
190 %tmp2 = zext i8 %tmp1 to i16
191 %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
192 %tmp4 = load i8, i8* %tmp3, align 1
193 %tmp5 = zext i8 %tmp4 to i16
194 %tmp6 = shl nuw nsw i16 %tmp2, 8
195 %tmp7 = or i16 %tmp6, %tmp5
196 %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
197 %tmp9 = load i8, i8* %tmp8, align 1
198 %tmp10 = zext i8 %tmp9 to i16
199 %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3
200 %tmp12 = load i8, i8* %tmp11, align 1
201 %tmp13 = zext i8 %tmp12 to i16
202 %tmp14 = shl nuw nsw i16 %tmp10, 8
203 %tmp15 = or i16 %tmp14, %tmp13
204 %tmp16 = zext i16 %tmp7 to i32
205 %tmp17 = zext i16 %tmp15 to i32
206 %tmp18 = shl nuw nsw i32 %tmp16, 16
207 %tmp19 = or i32 %tmp18, %tmp17
208 ret i32 %tmp19
209}
210
211; i8* p;
212; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56)
213define i64 @load_i64_by_i8(i64* %arg) {
214; CHECK-LABEL: load_i64_by_i8:
215; CHECK: # BB#0:
216; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
217; CHECK-NEXT: movl (%ecx), %eax
218; CHECK-NEXT: movl 4(%ecx), %edx
219; CHECK-NEXT: retl
220;
221; CHECK64-LABEL: load_i64_by_i8:
222; CHECK64: # BB#0:
223; CHECK64-NEXT: movq (%rdi), %rax
224; CHECK64-NEXT: retq
225
226 %tmp = bitcast i64* %arg to i8*
227 %tmp1 = load i8, i8* %tmp, align 1
228 %tmp2 = zext i8 %tmp1 to i64
229 %tmp3 = getelementptr inbounds i8, i8* %tmp, i64 1
230 %tmp4 = load i8, i8* %tmp3, align 1
231 %tmp5 = zext i8 %tmp4 to i64
232 %tmp6 = shl nuw nsw i64 %tmp5, 8
233 %tmp7 = or i64 %tmp6, %tmp2
234 %tmp8 = getelementptr inbounds i8, i8* %tmp, i64 2
235 %tmp9 = load i8, i8* %tmp8, align 1
236 %tmp10 = zext i8 %tmp9 to i64
237 %tmp11 = shl nuw nsw i64 %tmp10, 16
238 %tmp12 = or i64 %tmp7, %tmp11
239 %tmp13 = getelementptr inbounds i8, i8* %tmp, i64 3
240 %tmp14 = load i8, i8* %tmp13, align 1
241 %tmp15 = zext i8 %tmp14 to i64
242 %tmp16 = shl nuw nsw i64 %tmp15, 24
243 %tmp17 = or i64 %tmp12, %tmp16
244 %tmp18 = getelementptr inbounds i8, i8* %tmp, i64 4
245 %tmp19 = load i8, i8* %tmp18, align 1
246 %tmp20 = zext i8 %tmp19 to i64
247 %tmp21 = shl nuw nsw i64 %tmp20, 32
248 %tmp22 = or i64 %tmp17, %tmp21
249 %tmp23 = getelementptr inbounds i8, i8* %tmp, i64 5
250 %tmp24 = load i8, i8* %tmp23, align 1
251 %tmp25 = zext i8 %tmp24 to i64
252 %tmp26 = shl nuw nsw i64 %tmp25, 40
253 %tmp27 = or i64 %tmp22, %tmp26
254 %tmp28 = getelementptr inbounds i8, i8* %tmp, i64 6
255 %tmp29 = load i8, i8* %tmp28, align 1
256 %tmp30 = zext i8 %tmp29 to i64
257 %tmp31 = shl nuw nsw i64 %tmp30, 48
258 %tmp32 = or i64 %tmp27, %tmp31
259 %tmp33 = getelementptr inbounds i8, i8* %tmp, i64 7
260 %tmp34 = load i8, i8* %tmp33, align 1
261 %tmp35 = zext i8 %tmp34 to i64
262 %tmp36 = shl nuw i64 %tmp35, 56
263 %tmp37 = or i64 %tmp32, %tmp36
264 ret i64 %tmp37
265}
266
267; i8* p;
268; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7]
269define i64 @load_i64_by_i8_bswap(i64* %arg) {
270; CHECK-LABEL: load_i64_by_i8_bswap:
271; CHECK: # BB#0:
272; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
273; CHECK-NEXT: movl (%eax), %edx
274; CHECK-NEXT: movl 4(%eax), %eax
275; CHECK-NEXT: bswapl %eax
276; CHECK-NEXT: bswapl %edx
277; CHECK-NEXT: retl
278;
279; CHECK64-LABEL: load_i64_by_i8_bswap:
280; CHECK64: # BB#0:
281; CHECK64-NEXT: movq (%rdi), %rax
282; CHECK64-NEXT: bswapq %rax
283; CHECK64-NEXT: retq
284
285 %tmp = bitcast i64* %arg to i8*
286 %tmp1 = load i8, i8* %tmp, align 1
287 %tmp2 = zext i8 %tmp1 to i64
288 %tmp3 = shl nuw i64 %tmp2, 56
289 %tmp4 = getelementptr inbounds i8, i8* %tmp, i64 1
290 %tmp5 = load i8, i8* %tmp4, align 1
291 %tmp6 = zext i8 %tmp5 to i64
292 %tmp7 = shl nuw nsw i64 %tmp6, 48
293 %tmp8 = or i64 %tmp7, %tmp3
294 %tmp9 = getelementptr inbounds i8, i8* %tmp, i64 2
295 %tmp10 = load i8, i8* %tmp9, align 1
296 %tmp11 = zext i8 %tmp10 to i64
297 %tmp12 = shl nuw nsw i64 %tmp11, 40
298 %tmp13 = or i64 %tmp8, %tmp12
299 %tmp14 = getelementptr inbounds i8, i8* %tmp, i64 3
300 %tmp15 = load i8, i8* %tmp14, align 1
301 %tmp16 = zext i8 %tmp15 to i64
302 %tmp17 = shl nuw nsw i64 %tmp16, 32
303 %tmp18 = or i64 %tmp13, %tmp17
304 %tmp19 = getelementptr inbounds i8, i8* %tmp, i64 4
305 %tmp20 = load i8, i8* %tmp19, align 1
306 %tmp21 = zext i8 %tmp20 to i64
307 %tmp22 = shl nuw nsw i64 %tmp21, 24
308 %tmp23 = or i64 %tmp18, %tmp22
309 %tmp24 = getelementptr inbounds i8, i8* %tmp, i64 5
310 %tmp25 = load i8, i8* %tmp24, align 1
311 %tmp26 = zext i8 %tmp25 to i64
312 %tmp27 = shl nuw nsw i64 %tmp26, 16
313 %tmp28 = or i64 %tmp23, %tmp27
314 %tmp29 = getelementptr inbounds i8, i8* %tmp, i64 6
315 %tmp30 = load i8, i8* %tmp29, align 1
316 %tmp31 = zext i8 %tmp30 to i64
317 %tmp32 = shl nuw nsw i64 %tmp31, 8
318 %tmp33 = or i64 %tmp28, %tmp32
319 %tmp34 = getelementptr inbounds i8, i8* %tmp, i64 7
320 %tmp35 = load i8, i8* %tmp34, align 1
321 %tmp36 = zext i8 %tmp35 to i64
322 %tmp37 = or i64 %tmp33, %tmp36
323 ret i64 %tmp37
324}
325
326; Part of the load by bytes pattern is used outside of the pattern
327; i8* p;
328; i32 x = (i32) p[1]
329; res = ((i32) p[0] << 24) | (x << 16) | ((i32) p[2] << 8) | (i32) p[3]
330; x | res
331define i32 @load_i32_by_i8_bswap_uses(i32* %arg) {
332; CHECK-LABEL: load_i32_by_i8_bswap_uses:
333; CHECK: # BB#0:
334; CHECK-NEXT: pushl %esi
335; CHECK-NEXT: .Lcfi0:
336; CHECK-NEXT: .cfi_def_cfa_offset 8
337; CHECK-NEXT: .Lcfi1:
338; CHECK-NEXT: .cfi_offset %esi, -8
339; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
340; CHECK-NEXT: movzbl (%eax), %ecx
341; CHECK-NEXT: shll $24, %ecx
342; CHECK-NEXT: movzbl 1(%eax), %edx
343; CHECK-NEXT: movl %edx, %esi
344; CHECK-NEXT: shll $16, %esi
345; CHECK-NEXT: orl %ecx, %esi
346; CHECK-NEXT: movzbl 2(%eax), %ecx
347; CHECK-NEXT: shll $8, %ecx
348; CHECK-NEXT: orl %esi, %ecx
349; CHECK-NEXT: movzbl 3(%eax), %eax
350; CHECK-NEXT: orl %ecx, %eax
351; CHECK-NEXT: orl %edx, %eax
352; CHECK-NEXT: popl %esi
353; CHECK-NEXT: retl
354;
355; CHECK64-LABEL: load_i32_by_i8_bswap_uses:
356; CHECK64: # BB#0:
357; CHECK64-NEXT: movzbl (%rdi), %eax
358; CHECK64-NEXT: shll $24, %eax
359; CHECK64-NEXT: movzbl 1(%rdi), %ecx
360; CHECK64-NEXT: movl %ecx, %edx
361; CHECK64-NEXT: shll $16, %edx
362; CHECK64-NEXT: orl %eax, %edx
363; CHECK64-NEXT: movzbl 2(%rdi), %esi
364; CHECK64-NEXT: shll $8, %esi
365; CHECK64-NEXT: orl %edx, %esi
366; CHECK64-NEXT: movzbl 3(%rdi), %eax
367; CHECK64-NEXT: orl %esi, %eax
368; CHECK64-NEXT: orl %ecx, %eax
369; CHECK64-NEXT: retq
370
371 %tmp = bitcast i32* %arg to i8*
372 %tmp1 = load i8, i8* %tmp, align 1
373 %tmp2 = zext i8 %tmp1 to i32
374 %tmp3 = shl nuw nsw i32 %tmp2, 24
375 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
376 %tmp5 = load i8, i8* %tmp4, align 1
377 %tmp6 = zext i8 %tmp5 to i32
378 %tmp7 = shl nuw nsw i32 %tmp6, 16
379 %tmp8 = or i32 %tmp7, %tmp3
380 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
381 %tmp10 = load i8, i8* %tmp9, align 1
382 %tmp11 = zext i8 %tmp10 to i32
383 %tmp12 = shl nuw nsw i32 %tmp11, 8
384 %tmp13 = or i32 %tmp8, %tmp12
385 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
386 %tmp15 = load i8, i8* %tmp14, align 1
387 %tmp16 = zext i8 %tmp15 to i32
388 %tmp17 = or i32 %tmp13, %tmp16
389 ; Use individual part of the pattern outside of the pattern
390 %tmp18 = or i32 %tmp6, %tmp17
391 ret i32 %tmp18
392}
393
394; One of the loads is volatile
395; i8* p;
396; p0 = volatile *p;
397; ((i32) p0 << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
398define i32 @load_i32_by_i8_bswap_volatile(i32* %arg) {
399; CHECK-LABEL: load_i32_by_i8_bswap_volatile:
400; CHECK: # BB#0:
401; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
402; CHECK-NEXT: movzbl (%eax), %ecx
403; CHECK-NEXT: shll $24, %ecx
404; CHECK-NEXT: movzbl 1(%eax), %edx
405; CHECK-NEXT: shll $16, %edx
406; CHECK-NEXT: orl %ecx, %edx
407; CHECK-NEXT: movzbl 2(%eax), %ecx
408; CHECK-NEXT: shll $8, %ecx
409; CHECK-NEXT: orl %edx, %ecx
410; CHECK-NEXT: movzbl 3(%eax), %eax
411; CHECK-NEXT: orl %ecx, %eax
412; CHECK-NEXT: retl
413;
414; CHECK64-LABEL: load_i32_by_i8_bswap_volatile:
415; CHECK64: # BB#0:
416; CHECK64-NEXT: movzbl (%rdi), %eax
417; CHECK64-NEXT: shll $24, %eax
418; CHECK64-NEXT: movzbl 1(%rdi), %ecx
419; CHECK64-NEXT: shll $16, %ecx
420; CHECK64-NEXT: orl %eax, %ecx
421; CHECK64-NEXT: movzbl 2(%rdi), %edx
422; CHECK64-NEXT: shll $8, %edx
423; CHECK64-NEXT: orl %ecx, %edx
424; CHECK64-NEXT: movzbl 3(%rdi), %eax
425; CHECK64-NEXT: orl %edx, %eax
426; CHECK64-NEXT: retq
427
428 %tmp = bitcast i32* %arg to i8*
429 %tmp1 = load volatile i8, i8* %tmp, align 1
430 %tmp2 = zext i8 %tmp1 to i32
431 %tmp3 = shl nuw nsw i32 %tmp2, 24
432 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
433 %tmp5 = load i8, i8* %tmp4, align 1
434 %tmp6 = zext i8 %tmp5 to i32
435 %tmp7 = shl nuw nsw i32 %tmp6, 16
436 %tmp8 = or i32 %tmp7, %tmp3
437 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
438 %tmp10 = load i8, i8* %tmp9, align 1
439 %tmp11 = zext i8 %tmp10 to i32
440 %tmp12 = shl nuw nsw i32 %tmp11, 8
441 %tmp13 = or i32 %tmp8, %tmp12
442 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
443 %tmp15 = load i8, i8* %tmp14, align 1
444 %tmp16 = zext i8 %tmp15 to i32
445 %tmp17 = or i32 %tmp13, %tmp16
446 ret i32 %tmp17
447}
448
449; There is a store in between individual loads
450; i8* p, q;
451; res1 = ((i32) p[0] << 24) | ((i32) p[1] << 16)
452; *q = 0;
453; res2 = ((i32) p[2] << 8) | (i32) p[3]
454; res1 | res2
455define i32 @load_i32_by_i8_bswap_store_in_between(i32* %arg, i32* %arg1) {
456; CHECK-LABEL: load_i32_by_i8_bswap_store_in_between:
457; CHECK: # BB#0:
458; CHECK-NEXT: pushl %esi
459; CHECK-NEXT: .Lcfi2:
460; CHECK-NEXT: .cfi_def_cfa_offset 8
461; CHECK-NEXT: .Lcfi3:
462; CHECK-NEXT: .cfi_offset %esi, -8
463; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
464; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
465; CHECK-NEXT: movzbl (%ecx), %edx
466; CHECK-NEXT: shll $24, %edx
467; CHECK-NEXT: movzbl 1(%ecx), %esi
468; CHECK-NEXT: movl $0, (%eax)
469; CHECK-NEXT: shll $16, %esi
470; CHECK-NEXT: orl %edx, %esi
471; CHECK-NEXT: movzbl 2(%ecx), %edx
472; CHECK-NEXT: shll $8, %edx
473; CHECK-NEXT: orl %esi, %edx
474; CHECK-NEXT: movzbl 3(%ecx), %eax
475; CHECK-NEXT: orl %edx, %eax
476; CHECK-NEXT: popl %esi
477; CHECK-NEXT: retl
478;
479; CHECK64-LABEL: load_i32_by_i8_bswap_store_in_between:
480; CHECK64: # BB#0:
481; CHECK64-NEXT: movzbl (%rdi), %eax
482; CHECK64-NEXT: shll $24, %eax
483; CHECK64-NEXT: movzbl 1(%rdi), %ecx
484; CHECK64-NEXT: movl $0, (%rsi)
485; CHECK64-NEXT: shll $16, %ecx
486; CHECK64-NEXT: orl %eax, %ecx
487; CHECK64-NEXT: movzbl 2(%rdi), %edx
488; CHECK64-NEXT: shll $8, %edx
489; CHECK64-NEXT: orl %ecx, %edx
490; CHECK64-NEXT: movzbl 3(%rdi), %eax
491; CHECK64-NEXT: orl %edx, %eax
492; CHECK64-NEXT: retq
493
494 %tmp = bitcast i32* %arg to i8*
495 %tmp2 = load i8, i8* %tmp, align 1
496 %tmp3 = zext i8 %tmp2 to i32
497 %tmp4 = shl nuw nsw i32 %tmp3, 24
498 %tmp5 = getelementptr inbounds i8, i8* %tmp, i32 1
499 %tmp6 = load i8, i8* %tmp5, align 1
500 ; This store will prevent folding of the pattern
501 store i32 0, i32* %arg1
502 %tmp7 = zext i8 %tmp6 to i32
503 %tmp8 = shl nuw nsw i32 %tmp7, 16
504 %tmp9 = or i32 %tmp8, %tmp4
505 %tmp10 = getelementptr inbounds i8, i8* %tmp, i32 2
506 %tmp11 = load i8, i8* %tmp10, align 1
507 %tmp12 = zext i8 %tmp11 to i32
508 %tmp13 = shl nuw nsw i32 %tmp12, 8
509 %tmp14 = or i32 %tmp9, %tmp13
510 %tmp15 = getelementptr inbounds i8, i8* %tmp, i32 3
511 %tmp16 = load i8, i8* %tmp15, align 1
512 %tmp17 = zext i8 %tmp16 to i32
513 %tmp18 = or i32 %tmp14, %tmp17
514 ret i32 %tmp18
515}
516
517; One of the loads is from an unrelated location
518; i8* p, q;
519; ((i32) p[0] << 24) | ((i32) q[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
520define i32 @load_i32_by_i8_bswap_unrelated_load(i32* %arg, i32* %arg1) {
521; CHECK-LABEL: load_i32_by_i8_bswap_unrelated_load:
522; CHECK: # BB#0:
523; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
524; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
525; CHECK-NEXT: movzbl (%ecx), %edx
526; CHECK-NEXT: shll $24, %edx
527; CHECK-NEXT: movzbl 1(%eax), %eax
528; CHECK-NEXT: shll $16, %eax
529; CHECK-NEXT: orl %edx, %eax
530; CHECK-NEXT: movzbl 2(%ecx), %edx
531; CHECK-NEXT: shll $8, %edx
532; CHECK-NEXT: orl %eax, %edx
533; CHECK-NEXT: movzbl 3(%ecx), %eax
534; CHECK-NEXT: orl %edx, %eax
535; CHECK-NEXT: retl
536;
537; CHECK64-LABEL: load_i32_by_i8_bswap_unrelated_load:
538; CHECK64: # BB#0:
539; CHECK64-NEXT: movzbl (%rdi), %eax
540; CHECK64-NEXT: shll $24, %eax
541; CHECK64-NEXT: movzbl 1(%rsi), %ecx
542; CHECK64-NEXT: shll $16, %ecx
543; CHECK64-NEXT: orl %eax, %ecx
544; CHECK64-NEXT: movzbl 2(%rdi), %edx
545; CHECK64-NEXT: shll $8, %edx
546; CHECK64-NEXT: orl %ecx, %edx
547; CHECK64-NEXT: movzbl 3(%rdi), %eax
548; CHECK64-NEXT: orl %edx, %eax
549; CHECK64-NEXT: retq
550
551 %tmp = bitcast i32* %arg to i8*
552 %tmp2 = bitcast i32* %arg1 to i8*
553 %tmp3 = load i8, i8* %tmp, align 1
554 %tmp4 = zext i8 %tmp3 to i32
555 %tmp5 = shl nuw nsw i32 %tmp4, 24
556 ; Load from an unrelated address
557 %tmp6 = getelementptr inbounds i8, i8* %tmp2, i32 1
558 %tmp7 = load i8, i8* %tmp6, align 1
559 %tmp8 = zext i8 %tmp7 to i32
560 %tmp9 = shl nuw nsw i32 %tmp8, 16
561 %tmp10 = or i32 %tmp9, %tmp5
562 %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 2
563 %tmp12 = load i8, i8* %tmp11, align 1
564 %tmp13 = zext i8 %tmp12 to i32
565 %tmp14 = shl nuw nsw i32 %tmp13, 8
566 %tmp15 = or i32 %tmp10, %tmp14
567 %tmp16 = getelementptr inbounds i8, i8* %tmp, i32 3
568 %tmp17 = load i8, i8* %tmp16, align 1
569 %tmp18 = zext i8 %tmp17 to i32
570 %tmp19 = or i32 %tmp15, %tmp18
571 ret i32 %tmp19
572}
573
Artur Pilipenko41c00052017-01-25 08:53:31 +0000574; i8* p;
575; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
Artur Pilipenkobdf3c5a2017-02-06 14:15:31 +0000576define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
577; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
Artur Pilipenko41c00052017-01-25 08:53:31 +0000578; CHECK: # BB#0:
579; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
Artur Pilipenko4a640312017-02-09 12:06:01 +0000580; CHECK-NEXT: movl 1(%eax), %eax
Artur Pilipenko41c00052017-01-25 08:53:31 +0000581; CHECK-NEXT: retl
582;
Artur Pilipenkobdf3c5a2017-02-06 14:15:31 +0000583; CHECK64-LABEL: load_i32_by_i8_nonzero_offset:
Artur Pilipenko41c00052017-01-25 08:53:31 +0000584; CHECK64: # BB#0:
Artur Pilipenko4a640312017-02-09 12:06:01 +0000585; CHECK64-NEXT: movl 1(%rdi), %eax
Artur Pilipenko41c00052017-01-25 08:53:31 +0000586; CHECK64-NEXT: retq
587
588 %tmp = bitcast i32* %arg to i8*
589 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
590 %tmp2 = load i8, i8* %tmp1, align 1
591 %tmp3 = zext i8 %tmp2 to i32
592 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2
593 %tmp5 = load i8, i8* %tmp4, align 1
594 %tmp6 = zext i8 %tmp5 to i32
595 %tmp7 = shl nuw nsw i32 %tmp6, 8
596 %tmp8 = or i32 %tmp7, %tmp3
597 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3
598 %tmp10 = load i8, i8* %tmp9, align 1
599 %tmp11 = zext i8 %tmp10 to i32
600 %tmp12 = shl nuw nsw i32 %tmp11, 16
601 %tmp13 = or i32 %tmp8, %tmp12
602 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4
603 %tmp15 = load i8, i8* %tmp14, align 1
604 %tmp16 = zext i8 %tmp15 to i32
605 %tmp17 = shl nuw nsw i32 %tmp16, 24
606 %tmp18 = or i32 %tmp13, %tmp17
607 ret i32 %tmp18
608}
609
Artur Pilipenkobdf3c5a2017-02-06 14:15:31 +0000610; i8* p;
611; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
612define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
613; CHECK-LABEL: load_i32_by_i8_neg_offset:
614; CHECK: # BB#0:
615; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
Artur Pilipenko4a640312017-02-09 12:06:01 +0000616; CHECK-NEXT: movl -4(%eax), %eax
Artur Pilipenkobdf3c5a2017-02-06 14:15:31 +0000617; CHECK-NEXT: retl
618;
619; CHECK64-LABEL: load_i32_by_i8_neg_offset:
620; CHECK64: # BB#0:
Artur Pilipenko4a640312017-02-09 12:06:01 +0000621; CHECK64-NEXT: movl -4(%rdi), %eax
Artur Pilipenkobdf3c5a2017-02-06 14:15:31 +0000622; CHECK64-NEXT: retq
623
624 %tmp = bitcast i32* %arg to i8*
625 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
626 %tmp2 = load i8, i8* %tmp1, align 1
627 %tmp3 = zext i8 %tmp2 to i32
628 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3
629 %tmp5 = load i8, i8* %tmp4, align 1
630 %tmp6 = zext i8 %tmp5 to i32
631 %tmp7 = shl nuw nsw i32 %tmp6, 8
632 %tmp8 = or i32 %tmp7, %tmp3
633 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2
634 %tmp10 = load i8, i8* %tmp9, align 1
635 %tmp11 = zext i8 %tmp10 to i32
636 %tmp12 = shl nuw nsw i32 %tmp11, 16
637 %tmp13 = or i32 %tmp8, %tmp12
638 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1
639 %tmp15 = load i8, i8* %tmp14, align 1
640 %tmp16 = zext i8 %tmp15 to i32
641 %tmp17 = shl nuw nsw i32 %tmp16, 24
642 %tmp18 = or i32 %tmp13, %tmp17
643 ret i32 %tmp18
644}
645
646; i8* p;
647; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
648define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
649; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
650; CHECK: # BB#0:
651; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
Artur Pilipenko4a640312017-02-09 12:06:01 +0000652; CHECK-NEXT: movl 1(%eax), %eax
653; CHECK-NEXT: bswapl %eax
Artur Pilipenkobdf3c5a2017-02-06 14:15:31 +0000654; CHECK-NEXT: retl
655;
656; CHECK64-LABEL: load_i32_by_i8_nonzero_offset_bswap:
657; CHECK64: # BB#0:
Artur Pilipenko4a640312017-02-09 12:06:01 +0000658; CHECK64-NEXT: movl 1(%rdi), %eax
659; CHECK64-NEXT: bswapl %eax
Artur Pilipenkobdf3c5a2017-02-06 14:15:31 +0000660; CHECK64-NEXT: retq
661
662 %tmp = bitcast i32* %arg to i8*
663 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
664 %tmp2 = load i8, i8* %tmp1, align 1
665 %tmp3 = zext i8 %tmp2 to i32
666 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3
667 %tmp5 = load i8, i8* %tmp4, align 1
668 %tmp6 = zext i8 %tmp5 to i32
669 %tmp7 = shl nuw nsw i32 %tmp6, 8
670 %tmp8 = or i32 %tmp7, %tmp3
671 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
672 %tmp10 = load i8, i8* %tmp9, align 1
673 %tmp11 = zext i8 %tmp10 to i32
674 %tmp12 = shl nuw nsw i32 %tmp11, 16
675 %tmp13 = or i32 %tmp8, %tmp12
676 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1
677 %tmp15 = load i8, i8* %tmp14, align 1
678 %tmp16 = zext i8 %tmp15 to i32
679 %tmp17 = shl nuw nsw i32 %tmp16, 24
680 %tmp18 = or i32 %tmp13, %tmp17
681 ret i32 %tmp18
682}
683
684; i8* p;
685; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
686define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
687; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
688; CHECK: # BB#0:
689; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
Artur Pilipenko4a640312017-02-09 12:06:01 +0000690; CHECK-NEXT: movl -4(%eax), %eax
691; CHECK-NEXT: bswapl %eax
Artur Pilipenkobdf3c5a2017-02-06 14:15:31 +0000692; CHECK-NEXT: retl
693;
694; CHECK64-LABEL: load_i32_by_i8_neg_offset_bswap:
695; CHECK64: # BB#0:
Artur Pilipenko4a640312017-02-09 12:06:01 +0000696; CHECK64-NEXT: movl -4(%rdi), %eax
697; CHECK64-NEXT: bswapl %eax
Artur Pilipenkobdf3c5a2017-02-06 14:15:31 +0000698; CHECK64-NEXT: retq
699
700 %tmp = bitcast i32* %arg to i8*
701 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
702 %tmp2 = load i8, i8* %tmp1, align 1
703 %tmp3 = zext i8 %tmp2 to i32
704 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2
705 %tmp5 = load i8, i8* %tmp4, align 1
706 %tmp6 = zext i8 %tmp5 to i32
707 %tmp7 = shl nuw nsw i32 %tmp6, 8
708 %tmp8 = or i32 %tmp7, %tmp3
709 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3
710 %tmp10 = load i8, i8* %tmp9, align 1
711 %tmp11 = zext i8 %tmp10 to i32
712 %tmp12 = shl nuw nsw i32 %tmp11, 16
713 %tmp13 = or i32 %tmp8, %tmp12
714 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4
715 %tmp15 = load i8, i8* %tmp14, align 1
716 %tmp16 = zext i8 %tmp15 to i32
717 %tmp17 = shl nuw nsw i32 %tmp16, 24
718 %tmp18 = or i32 %tmp13, %tmp17
719 ret i32 %tmp18
720}
721
Artur Pilipenko41c00052017-01-25 08:53:31 +0000722; i8* p; i32 i;
723; ((i32) p[i] << 24) | ((i32) p[i + 1] << 16) | ((i32) p[i + 2] << 8) | (i32) p[i + 3]
724define i32 @load_i32_by_i8_bswap_base_index_offset(i32* %arg, i32 %arg1) {
725; CHECK-LABEL: load_i32_by_i8_bswap_base_index_offset:
726; CHECK: # BB#0:
727; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
728; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
729; CHECK-NEXT: movl (%ecx,%eax), %eax
730; CHECK-NEXT: bswapl %eax
731; CHECK-NEXT: retl
732;
733; CHECK64-LABEL: load_i32_by_i8_bswap_base_index_offset:
734; CHECK64: # BB#0:
735; CHECK64-NEXT: movslq %esi, %rax
736; CHECK64-NEXT: movzbl (%rdi,%rax), %ecx
737; CHECK64-NEXT: shll $24, %ecx
738; CHECK64-NEXT: movzbl 1(%rdi,%rax), %edx
739; CHECK64-NEXT: shll $16, %edx
740; CHECK64-NEXT: orl %ecx, %edx
741; CHECK64-NEXT: movzbl 2(%rdi,%rax), %ecx
742; CHECK64-NEXT: shll $8, %ecx
743; CHECK64-NEXT: orl %edx, %ecx
744; CHECK64-NEXT: movzbl 3(%rdi,%rax), %eax
745; CHECK64-NEXT: orl %ecx, %eax
746; CHECK64-NEXT: retq
Artur Pilipenko41c00052017-01-25 08:53:31 +0000747 %tmp = bitcast i32* %arg to i8*
748 %tmp2 = getelementptr inbounds i8, i8* %tmp, i32 %arg1
749 %tmp3 = load i8, i8* %tmp2, align 1
750 %tmp4 = zext i8 %tmp3 to i32
751 %tmp5 = shl nuw nsw i32 %tmp4, 24
752 %tmp6 = add nuw nsw i32 %arg1, 1
753 %tmp7 = getelementptr inbounds i8, i8* %tmp, i32 %tmp6
754 %tmp8 = load i8, i8* %tmp7, align 1
755 %tmp9 = zext i8 %tmp8 to i32
756 %tmp10 = shl nuw nsw i32 %tmp9, 16
757 %tmp11 = or i32 %tmp10, %tmp5
758 %tmp12 = add nuw nsw i32 %arg1, 2
759 %tmp13 = getelementptr inbounds i8, i8* %tmp, i32 %tmp12
760 %tmp14 = load i8, i8* %tmp13, align 1
761 %tmp15 = zext i8 %tmp14 to i32
762 %tmp16 = shl nuw nsw i32 %tmp15, 8
763 %tmp17 = or i32 %tmp11, %tmp16
764 %tmp18 = add nuw nsw i32 %arg1, 3
765 %tmp19 = getelementptr inbounds i8, i8* %tmp, i32 %tmp18
766 %tmp20 = load i8, i8* %tmp19, align 1
767 %tmp21 = zext i8 %tmp20 to i32
768 %tmp22 = or i32 %tmp17, %tmp21
769 ret i32 %tmp22
770}
771
772; Verify that we don't crash handling shl i32 %conv57, 32
773define void @shift_i32_by_32(i8* %src1, i8* %src2, i64* %dst) {
774; CHECK-LABEL: shift_i32_by_32:
775; CHECK: # BB#0: # %entry
776; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
777; CHECK-NEXT: movl $-1, 4(%eax)
778; CHECK-NEXT: movl $-1, (%eax)
779; CHECK-NEXT: retl
780;
781; CHECK64-LABEL: shift_i32_by_32:
782; CHECK64: # BB#0: # %entry
783; CHECK64-NEXT: movq $-1, (%rdx)
784; CHECK64-NEXT: retq
785entry:
786 %load1 = load i8, i8* %src1, align 1
787 %conv46 = zext i8 %load1 to i32
788 %shl47 = shl i32 %conv46, 56
789 %or55 = or i32 %shl47, 0
790 %load2 = load i8, i8* %src2, align 1
791 %conv57 = zext i8 %load2 to i32
792 %shl58 = shl i32 %conv57, 32
793 %or59 = or i32 %or55, %shl58
794 %or74 = or i32 %or59, 0
795 %conv75 = sext i32 %or74 to i64
796 store i64 %conv75, i64* %dst, align 8
797 ret void
798}
Artur Pilipenkod3464bf2017-02-06 17:48:08 +0000799
800declare i16 @llvm.bswap.i16(i16)
801
802; i16* p;
803; (i32) bswap(p[1]) | (i32) bswap(p[0] << 16)
804define i32 @load_i32_by_bswap_i16(i32* %arg) {
805; CHECK-LABEL: load_i32_by_bswap_i16:
806; CHECK: # BB#0:
807; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
808; CHECK-NEXT: movl (%eax), %eax
809; CHECK-NEXT: bswapl %eax
810; CHECK-NEXT: retl
811;
812; CHECK64-LABEL: load_i32_by_bswap_i16:
813; CHECK64: # BB#0:
814; CHECK64-NEXT: movl (%rdi), %eax
815; CHECK64-NEXT: bswapl %eax
816; CHECK64-NEXT: retq
817
818
819 %tmp = bitcast i32* %arg to i16*
820 %tmp1 = load i16, i16* %tmp, align 4
821 %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1)
822 %tmp2 = zext i16 %tmp11 to i32
823 %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
824 %tmp4 = load i16, i16* %tmp3, align 1
825 %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4)
826 %tmp5 = zext i16 %tmp41 to i32
827 %tmp6 = shl nuw nsw i32 %tmp2, 16
828 %tmp7 = or i32 %tmp6, %tmp5
829 ret i32 %tmp7
830}
Artur Pilipenko469596e2017-02-07 14:09:37 +0000831
832; i16* p;
833; (i32) p[0] | (sext(p[1] << 16) to i32)
834define i32 @load_i32_by_sext_i16(i32* %arg) {
835; CHECK-LABEL: load_i32_by_sext_i16:
836; CHECK: # BB#0:
837; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
838; CHECK-NEXT: movzwl (%eax), %ecx
839; CHECK-NEXT: movzwl 2(%eax), %eax
840; CHECK-NEXT: shll $16, %eax
841; CHECK-NEXT: orl %ecx, %eax
842; CHECK-NEXT: retl
843;
844; CHECK64-LABEL: load_i32_by_sext_i16:
845; CHECK64: # BB#0:
846; CHECK64-NEXT: movzwl (%rdi), %ecx
847; CHECK64-NEXT: movzwl 2(%rdi), %eax
848; CHECK64-NEXT: shll $16, %eax
849; CHECK64-NEXT: orl %ecx, %eax
850; CHECK64-NEXT: retq
851 %tmp = bitcast i32* %arg to i16*
852 %tmp1 = load i16, i16* %tmp, align 1
853 %tmp2 = zext i16 %tmp1 to i32
854 %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
855 %tmp4 = load i16, i16* %tmp3, align 1
856 %tmp5 = sext i16 %tmp4 to i32
857 %tmp6 = shl nuw nsw i32 %tmp5, 16
858 %tmp7 = or i32 %tmp6, %tmp2
859 ret i32 %tmp7
860}
861
862; i8* arg; i32 i;
863; p = arg + 12;
864; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24)
865define i32 @load_i32_by_i8_base_offset_index(i8* %arg, i32 %i) {
866; CHECK-LABEL: load_i32_by_i8_base_offset_index:
867; CHECK: # BB#0:
868; CHECK-NEXT: pushl %esi
869; CHECK-NEXT: .Lcfi4:
870; CHECK-NEXT: .cfi_def_cfa_offset 8
871; CHECK-NEXT: .Lcfi5:
872; CHECK-NEXT: .cfi_offset %esi, -8
873; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
874; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
875; CHECK-NEXT: movzbl 12(%eax,%ecx), %edx
876; CHECK-NEXT: movzbl 13(%eax,%ecx), %esi
877; CHECK-NEXT: shll $8, %esi
878; CHECK-NEXT: orl %edx, %esi
879; CHECK-NEXT: movzbl 14(%eax,%ecx), %edx
880; CHECK-NEXT: shll $16, %edx
881; CHECK-NEXT: orl %esi, %edx
882; CHECK-NEXT: movzbl 15(%eax,%ecx), %eax
883; CHECK-NEXT: shll $24, %eax
884; CHECK-NEXT: orl %edx, %eax
885; CHECK-NEXT: popl %esi
886; CHECK-NEXT: retl
887;
888; CHECK64-LABEL: load_i32_by_i8_base_offset_index:
889; CHECK64: # BB#0:
890; CHECK64-NEXT: movl %esi, %eax
891; CHECK64-NEXT: movl 12(%rdi,%rax), %eax
892; CHECK64-NEXT: retq
893 %tmp = add nuw nsw i32 %i, 3
894 %tmp2 = add nuw nsw i32 %i, 2
895 %tmp3 = add nuw nsw i32 %i, 1
896 %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
897 %tmp5 = zext i32 %i to i64
898 %tmp6 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp5
899 %tmp7 = load i8, i8* %tmp6, align 1
900 %tmp8 = zext i8 %tmp7 to i32
901 %tmp9 = zext i32 %tmp3 to i64
902 %tmp10 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp9
903 %tmp11 = load i8, i8* %tmp10, align 1
904 %tmp12 = zext i8 %tmp11 to i32
905 %tmp13 = shl nuw nsw i32 %tmp12, 8
906 %tmp14 = or i32 %tmp13, %tmp8
907 %tmp15 = zext i32 %tmp2 to i64
908 %tmp16 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp15
909 %tmp17 = load i8, i8* %tmp16, align 1
910 %tmp18 = zext i8 %tmp17 to i32
911 %tmp19 = shl nuw nsw i32 %tmp18, 16
912 %tmp20 = or i32 %tmp14, %tmp19
913 %tmp21 = zext i32 %tmp to i64
914 %tmp22 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp21
915 %tmp23 = load i8, i8* %tmp22, align 1
916 %tmp24 = zext i8 %tmp23 to i32
917 %tmp25 = shl nuw i32 %tmp24, 24
918 %tmp26 = or i32 %tmp20, %tmp25
919 ret i32 %tmp26
920}
921
922; i8* arg; i32 i;
923; p = arg + 12;
924; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24)
925define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
926; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
927; CHECK: # BB#0:
928; CHECK-NEXT: pushl %esi
929; CHECK-NEXT: .Lcfi6:
930; CHECK-NEXT: .cfi_def_cfa_offset 8
931; CHECK-NEXT: .Lcfi7:
932; CHECK-NEXT: .cfi_offset %esi, -8
933; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
934; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
935; CHECK-NEXT: movzbl 13(%eax,%ecx), %edx
936; CHECK-NEXT: movzbl 14(%eax,%ecx), %esi
937; CHECK-NEXT: shll $8, %esi
938; CHECK-NEXT: orl %edx, %esi
939; CHECK-NEXT: movzbl 15(%eax,%ecx), %edx
940; CHECK-NEXT: shll $16, %edx
941; CHECK-NEXT: orl %esi, %edx
942; CHECK-NEXT: movzbl 16(%eax,%ecx), %eax
943; CHECK-NEXT: shll $24, %eax
944; CHECK-NEXT: orl %edx, %eax
945; CHECK-NEXT: popl %esi
946; CHECK-NEXT: retl
947;
948; CHECK64-LABEL: load_i32_by_i8_base_offset_index_2:
949; CHECK64: # BB#0:
950; CHECK64-NEXT: movl %esi, %eax
Artur Pilipenko4a640312017-02-09 12:06:01 +0000951; CHECK64-NEXT: movl 13(%rdi,%rax), %eax
Artur Pilipenko469596e2017-02-07 14:09:37 +0000952; CHECK64-NEXT: retq
953 %tmp = add nuw nsw i32 %i, 4
954 %tmp2 = add nuw nsw i32 %i, 3
955 %tmp3 = add nuw nsw i32 %i, 2
956 %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
957 %tmp5 = add nuw nsw i32 %i, 1
958 %tmp27 = zext i32 %tmp5 to i64
959 %tmp28 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp27
960 %tmp29 = load i8, i8* %tmp28, align 1
961 %tmp30 = zext i8 %tmp29 to i32
962 %tmp31 = zext i32 %tmp3 to i64
963 %tmp32 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp31
964 %tmp33 = load i8, i8* %tmp32, align 1
965 %tmp34 = zext i8 %tmp33 to i32
966 %tmp35 = shl nuw nsw i32 %tmp34, 8
967 %tmp36 = or i32 %tmp35, %tmp30
968 %tmp37 = zext i32 %tmp2 to i64
969 %tmp38 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp37
970 %tmp39 = load i8, i8* %tmp38, align 1
971 %tmp40 = zext i8 %tmp39 to i32
972 %tmp41 = shl nuw nsw i32 %tmp40, 16
973 %tmp42 = or i32 %tmp36, %tmp41
974 %tmp43 = zext i32 %tmp to i64
975 %tmp44 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp43
976 %tmp45 = load i8, i8* %tmp44, align 1
977 %tmp46 = zext i8 %tmp45 to i32
978 %tmp47 = shl nuw i32 %tmp46, 24
979 %tmp48 = or i32 %tmp42, %tmp47
980 ret i32 %tmp48
981}
982
983; i8* arg; i32 i;
984;
985; p0 = arg;
986; p1 = arg + i + 1;
987; p2 = arg + i + 2;
988; p3 = arg + i + 3;
989;
990; (i32) p0[12] | ((i32) p1[12] << 8) | ((i32) p2[12] << 16) | ((i32) p3[12] << 24)
991;
992; This test excercises zero and any extend loads as a part of load combine pattern.
993; In order to fold the pattern above we need to reassociate the address computation
994; first. By the time the address computation is reassociated loads are combined to
995; to zext and aext loads.
996define i32 @load_i32_by_i8_zaext_loads(i8* %arg, i32 %arg1) {
997; CHECK-LABEL: load_i32_by_i8_zaext_loads:
998; CHECK: # BB#0:
999; CHECK-NEXT: pushl %esi
1000; CHECK-NEXT: .Lcfi8:
1001; CHECK-NEXT: .cfi_def_cfa_offset 8
1002; CHECK-NEXT: .Lcfi9:
1003; CHECK-NEXT: .cfi_offset %esi, -8
1004; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1005; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
1006; CHECK-NEXT: movzbl 12(%eax,%ecx), %edx
1007; CHECK-NEXT: movzbl 13(%eax,%ecx), %esi
1008; CHECK-NEXT: shll $8, %esi
1009; CHECK-NEXT: orl %edx, %esi
1010; CHECK-NEXT: movzbl 14(%eax,%ecx), %edx
1011; CHECK-NEXT: shll $16, %edx
1012; CHECK-NEXT: orl %esi, %edx
1013; CHECK-NEXT: movzbl 15(%eax,%ecx), %eax
1014; CHECK-NEXT: shll $24, %eax
1015; CHECK-NEXT: orl %edx, %eax
1016; CHECK-NEXT: popl %esi
1017; CHECK-NEXT: retl
1018;
1019; CHECK64-LABEL: load_i32_by_i8_zaext_loads:
1020; CHECK64: # BB#0:
1021; CHECK64-NEXT: movl %esi, %eax
1022; CHECK64-NEXT: movzbl 12(%rdi,%rax), %ecx
1023; CHECK64-NEXT: movzbl 13(%rdi,%rax), %edx
1024; CHECK64-NEXT: shll $8, %edx
1025; CHECK64-NEXT: orl %ecx, %edx
1026; CHECK64-NEXT: movzbl 14(%rdi,%rax), %ecx
1027; CHECK64-NEXT: shll $16, %ecx
1028; CHECK64-NEXT: orl %edx, %ecx
1029; CHECK64-NEXT: movzbl 15(%rdi,%rax), %eax
1030; CHECK64-NEXT: shll $24, %eax
1031; CHECK64-NEXT: orl %ecx, %eax
1032; CHECK64-NEXT: retq
1033 %tmp = add nuw nsw i32 %arg1, 3
1034 %tmp2 = add nuw nsw i32 %arg1, 2
1035 %tmp3 = add nuw nsw i32 %arg1, 1
1036 %tmp4 = zext i32 %tmp to i64
1037 %tmp5 = zext i32 %tmp2 to i64
1038 %tmp6 = zext i32 %tmp3 to i64
1039 %tmp24 = getelementptr inbounds i8, i8* %arg, i64 %tmp4
1040 %tmp30 = getelementptr inbounds i8, i8* %arg, i64 %tmp5
1041 %tmp31 = getelementptr inbounds i8, i8* %arg, i64 %tmp6
1042 %tmp32 = getelementptr inbounds i8, i8* %arg, i64 12
1043 %tmp33 = zext i32 %arg1 to i64
1044 %tmp34 = getelementptr inbounds i8, i8* %tmp32, i64 %tmp33
1045 %tmp35 = load i8, i8* %tmp34, align 1
1046 %tmp36 = zext i8 %tmp35 to i32
1047 %tmp37 = getelementptr inbounds i8, i8* %tmp31, i64 12
1048 %tmp38 = load i8, i8* %tmp37, align 1
1049 %tmp39 = zext i8 %tmp38 to i32
1050 %tmp40 = shl nuw nsw i32 %tmp39, 8
1051 %tmp41 = or i32 %tmp40, %tmp36
1052 %tmp42 = getelementptr inbounds i8, i8* %tmp30, i64 12
1053 %tmp43 = load i8, i8* %tmp42, align 1
1054 %tmp44 = zext i8 %tmp43 to i32
1055 %tmp45 = shl nuw nsw i32 %tmp44, 16
1056 %tmp46 = or i32 %tmp41, %tmp45
1057 %tmp47 = getelementptr inbounds i8, i8* %tmp24, i64 12
1058 %tmp48 = load i8, i8* %tmp47, align 1
1059 %tmp49 = zext i8 %tmp48 to i32
1060 %tmp50 = shl nuw i32 %tmp49, 24
1061 %tmp51 = or i32 %tmp46, %tmp50
1062 ret i32 %tmp51
1063}
1064
1065; The same as load_i32_by_i8_zaext_loads but the last load is combined to
1066; a sext load.
1067;
1068; i8* arg; i32 i;
1069;
1070; p0 = arg;
1071; p1 = arg + i + 1;
1072; p2 = arg + i + 2;
1073; p3 = arg + i + 3;
1074;
1075; (i32) p0[12] | ((i32) p1[12] << 8) | ((i32) p2[12] << 16) | ((i32) p3[12] << 24)
1076define i32 @load_i32_by_i8_zsext_loads(i8* %arg, i32 %arg1) {
1077; CHECK-LABEL: load_i32_by_i8_zsext_loads:
1078; CHECK: # BB#0:
1079; CHECK-NEXT: pushl %esi
1080; CHECK-NEXT: .Lcfi10:
1081; CHECK-NEXT: .cfi_def_cfa_offset 8
1082; CHECK-NEXT: .Lcfi11:
1083; CHECK-NEXT: .cfi_offset %esi, -8
1084; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1085; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
1086; CHECK-NEXT: movzbl 12(%eax,%ecx), %edx
1087; CHECK-NEXT: movzbl 13(%eax,%ecx), %esi
1088; CHECK-NEXT: shll $8, %esi
1089; CHECK-NEXT: orl %edx, %esi
1090; CHECK-NEXT: movzbl 14(%eax,%ecx), %edx
1091; CHECK-NEXT: shll $16, %edx
1092; CHECK-NEXT: orl %esi, %edx
1093; CHECK-NEXT: movsbl 15(%eax,%ecx), %eax
1094; CHECK-NEXT: shll $24, %eax
1095; CHECK-NEXT: orl %edx, %eax
1096; CHECK-NEXT: popl %esi
1097; CHECK-NEXT: retl
1098;
1099; CHECK64-LABEL: load_i32_by_i8_zsext_loads:
1100; CHECK64: # BB#0:
1101; CHECK64-NEXT: movl %esi, %eax
1102; CHECK64-NEXT: movzbl 12(%rdi,%rax), %ecx
1103; CHECK64-NEXT: movzbl 13(%rdi,%rax), %edx
1104; CHECK64-NEXT: shll $8, %edx
1105; CHECK64-NEXT: orl %ecx, %edx
1106; CHECK64-NEXT: movzbl 14(%rdi,%rax), %ecx
1107; CHECK64-NEXT: shll $16, %ecx
1108; CHECK64-NEXT: orl %edx, %ecx
1109; CHECK64-NEXT: movsbl 15(%rdi,%rax), %eax
1110; CHECK64-NEXT: shll $24, %eax
1111; CHECK64-NEXT: orl %ecx, %eax
1112; CHECK64-NEXT: retq
1113 %tmp = add nuw nsw i32 %arg1, 3
1114 %tmp2 = add nuw nsw i32 %arg1, 2
1115 %tmp3 = add nuw nsw i32 %arg1, 1
1116 %tmp4 = zext i32 %tmp to i64
1117 %tmp5 = zext i32 %tmp2 to i64
1118 %tmp6 = zext i32 %tmp3 to i64
1119 %tmp24 = getelementptr inbounds i8, i8* %arg, i64 %tmp4
1120 %tmp30 = getelementptr inbounds i8, i8* %arg, i64 %tmp5
1121 %tmp31 = getelementptr inbounds i8, i8* %arg, i64 %tmp6
1122 %tmp32 = getelementptr inbounds i8, i8* %arg, i64 12
1123 %tmp33 = zext i32 %arg1 to i64
1124 %tmp34 = getelementptr inbounds i8, i8* %tmp32, i64 %tmp33
1125 %tmp35 = load i8, i8* %tmp34, align 1
1126 %tmp36 = zext i8 %tmp35 to i32
1127 %tmp37 = getelementptr inbounds i8, i8* %tmp31, i64 12
1128 %tmp38 = load i8, i8* %tmp37, align 1
1129 %tmp39 = zext i8 %tmp38 to i32
1130 %tmp40 = shl nuw nsw i32 %tmp39, 8
1131 %tmp41 = or i32 %tmp40, %tmp36
1132 %tmp42 = getelementptr inbounds i8, i8* %tmp30, i64 12
1133 %tmp43 = load i8, i8* %tmp42, align 1
1134 %tmp44 = zext i8 %tmp43 to i32
1135 %tmp45 = shl nuw nsw i32 %tmp44, 16
1136 %tmp46 = or i32 %tmp41, %tmp45
1137 %tmp47 = getelementptr inbounds i8, i8* %tmp24, i64 12
1138 %tmp48 = load i8, i8* %tmp47, align 1
1139 %tmp49 = sext i8 %tmp48 to i16
1140 %tmp50 = zext i16 %tmp49 to i32
1141 %tmp51 = shl nuw i32 %tmp50, 24
1142 %tmp52 = or i32 %tmp46, %tmp51
1143 ret i32 %tmp52
1144}