blob: c986670478e56325d8a58ca8339a2cf94a121381 [file] [log] [blame]
Artur Pilipenko41c00052017-01-25 08:53:31 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s
3; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=CHECK64
4
5; i8* p;
6; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
7define i32 @load_i32_by_i8(i32* %arg) {
8; CHECK-LABEL: load_i32_by_i8:
9; CHECK: # BB#0:
10; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
11; CHECK-NEXT: movl (%eax), %eax
12; CHECK-NEXT: retl
13;
14; CHECK64-LABEL: load_i32_by_i8:
15; CHECK64: # BB#0:
16; CHECK64-NEXT: movl (%rdi), %eax
17; CHECK64-NEXT: retq
18
19 %tmp = bitcast i32* %arg to i8*
20 %tmp1 = load i8, i8* %tmp, align 1
21 %tmp2 = zext i8 %tmp1 to i32
22 %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
23 %tmp4 = load i8, i8* %tmp3, align 1
24 %tmp5 = zext i8 %tmp4 to i32
25 %tmp6 = shl nuw nsw i32 %tmp5, 8
26 %tmp7 = or i32 %tmp6, %tmp2
27 %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
28 %tmp9 = load i8, i8* %tmp8, align 1
29 %tmp10 = zext i8 %tmp9 to i32
30 %tmp11 = shl nuw nsw i32 %tmp10, 16
31 %tmp12 = or i32 %tmp7, %tmp11
32 %tmp13 = getelementptr inbounds i8, i8* %tmp, i32 3
33 %tmp14 = load i8, i8* %tmp13, align 1
34 %tmp15 = zext i8 %tmp14 to i32
35 %tmp16 = shl nuw nsw i32 %tmp15, 24
36 %tmp17 = or i32 %tmp12, %tmp16
37 ret i32 %tmp17
38}
39
40; i8* p;
41; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
42define i32 @load_i32_by_i8_bswap(i32* %arg) {
43; CHECK-LABEL: load_i32_by_i8_bswap:
44; CHECK: # BB#0:
45; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
46; CHECK-NEXT: movl (%eax), %eax
47; CHECK-NEXT: bswapl %eax
48; CHECK-NEXT: retl
49;
50; CHECK64-LABEL: load_i32_by_i8_bswap:
51; CHECK64: # BB#0:
52; CHECK64-NEXT: movl (%rdi), %eax
53; CHECK64-NEXT: bswapl %eax
54; CHECK64-NEXT: retq
55
56 %tmp = bitcast i32* %arg to i8*
57 %tmp1 = load i8, i8* %tmp, align 1
58 %tmp2 = zext i8 %tmp1 to i32
59 %tmp3 = shl nuw nsw i32 %tmp2, 24
60 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
61 %tmp5 = load i8, i8* %tmp4, align 1
62 %tmp6 = zext i8 %tmp5 to i32
63 %tmp7 = shl nuw nsw i32 %tmp6, 16
64 %tmp8 = or i32 %tmp7, %tmp3
65 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
66 %tmp10 = load i8, i8* %tmp9, align 1
67 %tmp11 = zext i8 %tmp10 to i32
68 %tmp12 = shl nuw nsw i32 %tmp11, 8
69 %tmp13 = or i32 %tmp8, %tmp12
70 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
71 %tmp15 = load i8, i8* %tmp14, align 1
72 %tmp16 = zext i8 %tmp15 to i32
73 %tmp17 = or i32 %tmp13, %tmp16
74 ret i32 %tmp17
75}
76
77; i16* p;
78; (i32) p[0] | ((i32) p[1] << 16)
79define i32 @load_i32_by_i16(i32* %arg) {
80; CHECK-LABEL: load_i32_by_i16:
81; CHECK: # BB#0:
82; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
83; CHECK-NEXT: movl (%eax), %eax
84; CHECK-NEXT: retl
85;
86; CHECK64-LABEL: load_i32_by_i16:
87; CHECK64: # BB#0:
88; CHECK64-NEXT: movl (%rdi), %eax
89; CHECK64-NEXT: retq
90
91 %tmp = bitcast i32* %arg to i16*
92 %tmp1 = load i16, i16* %tmp, align 1
93 %tmp2 = zext i16 %tmp1 to i32
94 %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
95 %tmp4 = load i16, i16* %tmp3, align 1
96 %tmp5 = zext i16 %tmp4 to i32
97 %tmp6 = shl nuw nsw i32 %tmp5, 16
98 %tmp7 = or i32 %tmp6, %tmp2
99 ret i32 %tmp7
100}
101
102; i16* p_16;
103; i8* p_8 = (i8*) p_16;
104; (i32) p_16[0] | ((i32) p[2] << 16) | ((i32) p[3] << 24)
105define i32 @load_i32_by_i16_i8(i32* %arg) {
106; CHECK-LABEL: load_i32_by_i16_i8:
107; CHECK: # BB#0:
108; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
109; CHECK-NEXT: movl (%eax), %eax
110; CHECK-NEXT: retl
111;
112; CHECK64-LABEL: load_i32_by_i16_i8:
113; CHECK64: # BB#0:
114; CHECK64-NEXT: movl (%rdi), %eax
115; CHECK64-NEXT: retq
116
117 %tmp = bitcast i32* %arg to i16*
118 %tmp1 = bitcast i32* %arg to i8*
119 %tmp2 = load i16, i16* %tmp, align 1
120 %tmp3 = zext i16 %tmp2 to i32
121 %tmp4 = getelementptr inbounds i8, i8* %tmp1, i32 2
122 %tmp5 = load i8, i8* %tmp4, align 1
123 %tmp6 = zext i8 %tmp5 to i32
124 %tmp7 = shl nuw nsw i32 %tmp6, 16
125 %tmp8 = getelementptr inbounds i8, i8* %tmp1, i32 3
126 %tmp9 = load i8, i8* %tmp8, align 1
127 %tmp10 = zext i8 %tmp9 to i32
128 %tmp11 = shl nuw nsw i32 %tmp10, 24
129 %tmp12 = or i32 %tmp7, %tmp11
130 %tmp13 = or i32 %tmp12, %tmp3
131 ret i32 %tmp13
132}
133
134
135; i8* p;
136; (i32) ((i16) p[0] | ((i16) p[1] << 8)) | (((i32) ((i16) p[3] | ((i16) p[4] << 8)) << 16)
137define i32 @load_i32_by_i16_by_i8(i32* %arg) {
138; CHECK-LABEL: load_i32_by_i16_by_i8:
139; CHECK: # BB#0:
140; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
141; CHECK-NEXT: movl (%eax), %eax
142; CHECK-NEXT: retl
143;
144; CHECK64-LABEL: load_i32_by_i16_by_i8:
145; CHECK64: # BB#0:
146; CHECK64-NEXT: movl (%rdi), %eax
147; CHECK64-NEXT: retq
148
149 %tmp = bitcast i32* %arg to i8*
150 %tmp1 = load i8, i8* %tmp, align 1
151 %tmp2 = zext i8 %tmp1 to i16
152 %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
153 %tmp4 = load i8, i8* %tmp3, align 1
154 %tmp5 = zext i8 %tmp4 to i16
155 %tmp6 = shl nuw nsw i16 %tmp5, 8
156 %tmp7 = or i16 %tmp6, %tmp2
157 %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
158 %tmp9 = load i8, i8* %tmp8, align 1
159 %tmp10 = zext i8 %tmp9 to i16
160 %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3
161 %tmp12 = load i8, i8* %tmp11, align 1
162 %tmp13 = zext i8 %tmp12 to i16
163 %tmp14 = shl nuw nsw i16 %tmp13, 8
164 %tmp15 = or i16 %tmp14, %tmp10
165 %tmp16 = zext i16 %tmp7 to i32
166 %tmp17 = zext i16 %tmp15 to i32
167 %tmp18 = shl nuw nsw i32 %tmp17, 16
168 %tmp19 = or i32 %tmp18, %tmp16
169 ret i32 %tmp19
170}
171
172; i8* p;
173; ((i32) (((i16) p[0] << 8) | (i16) p[1]) << 16) | (i32) (((i16) p[3] << 8) | (i16) p[4])
174define i32 @load_i32_by_i16_by_i8_bswap(i32* %arg) {
175; CHECK-LABEL: load_i32_by_i16_by_i8_bswap:
176; CHECK: # BB#0:
177; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
178; CHECK-NEXT: movl (%eax), %eax
179; CHECK-NEXT: bswapl %eax
180; CHECK-NEXT: retl
181;
182; CHECK64-LABEL: load_i32_by_i16_by_i8_bswap:
183; CHECK64: # BB#0:
184; CHECK64-NEXT: movl (%rdi), %eax
185; CHECK64-NEXT: bswapl %eax
186; CHECK64-NEXT: retq
187
188 %tmp = bitcast i32* %arg to i8*
189 %tmp1 = load i8, i8* %tmp, align 1
190 %tmp2 = zext i8 %tmp1 to i16
191 %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
192 %tmp4 = load i8, i8* %tmp3, align 1
193 %tmp5 = zext i8 %tmp4 to i16
194 %tmp6 = shl nuw nsw i16 %tmp2, 8
195 %tmp7 = or i16 %tmp6, %tmp5
196 %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
197 %tmp9 = load i8, i8* %tmp8, align 1
198 %tmp10 = zext i8 %tmp9 to i16
199 %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3
200 %tmp12 = load i8, i8* %tmp11, align 1
201 %tmp13 = zext i8 %tmp12 to i16
202 %tmp14 = shl nuw nsw i16 %tmp10, 8
203 %tmp15 = or i16 %tmp14, %tmp13
204 %tmp16 = zext i16 %tmp7 to i32
205 %tmp17 = zext i16 %tmp15 to i32
206 %tmp18 = shl nuw nsw i32 %tmp16, 16
207 %tmp19 = or i32 %tmp18, %tmp17
208 ret i32 %tmp19
209}
210
211; i8* p;
212; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56)
213define i64 @load_i64_by_i8(i64* %arg) {
214; CHECK-LABEL: load_i64_by_i8:
215; CHECK: # BB#0:
216; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
217; CHECK-NEXT: movl (%ecx), %eax
218; CHECK-NEXT: movl 4(%ecx), %edx
219; CHECK-NEXT: retl
220;
221; CHECK64-LABEL: load_i64_by_i8:
222; CHECK64: # BB#0:
223; CHECK64-NEXT: movq (%rdi), %rax
224; CHECK64-NEXT: retq
225
226 %tmp = bitcast i64* %arg to i8*
227 %tmp1 = load i8, i8* %tmp, align 1
228 %tmp2 = zext i8 %tmp1 to i64
229 %tmp3 = getelementptr inbounds i8, i8* %tmp, i64 1
230 %tmp4 = load i8, i8* %tmp3, align 1
231 %tmp5 = zext i8 %tmp4 to i64
232 %tmp6 = shl nuw nsw i64 %tmp5, 8
233 %tmp7 = or i64 %tmp6, %tmp2
234 %tmp8 = getelementptr inbounds i8, i8* %tmp, i64 2
235 %tmp9 = load i8, i8* %tmp8, align 1
236 %tmp10 = zext i8 %tmp9 to i64
237 %tmp11 = shl nuw nsw i64 %tmp10, 16
238 %tmp12 = or i64 %tmp7, %tmp11
239 %tmp13 = getelementptr inbounds i8, i8* %tmp, i64 3
240 %tmp14 = load i8, i8* %tmp13, align 1
241 %tmp15 = zext i8 %tmp14 to i64
242 %tmp16 = shl nuw nsw i64 %tmp15, 24
243 %tmp17 = or i64 %tmp12, %tmp16
244 %tmp18 = getelementptr inbounds i8, i8* %tmp, i64 4
245 %tmp19 = load i8, i8* %tmp18, align 1
246 %tmp20 = zext i8 %tmp19 to i64
247 %tmp21 = shl nuw nsw i64 %tmp20, 32
248 %tmp22 = or i64 %tmp17, %tmp21
249 %tmp23 = getelementptr inbounds i8, i8* %tmp, i64 5
250 %tmp24 = load i8, i8* %tmp23, align 1
251 %tmp25 = zext i8 %tmp24 to i64
252 %tmp26 = shl nuw nsw i64 %tmp25, 40
253 %tmp27 = or i64 %tmp22, %tmp26
254 %tmp28 = getelementptr inbounds i8, i8* %tmp, i64 6
255 %tmp29 = load i8, i8* %tmp28, align 1
256 %tmp30 = zext i8 %tmp29 to i64
257 %tmp31 = shl nuw nsw i64 %tmp30, 48
258 %tmp32 = or i64 %tmp27, %tmp31
259 %tmp33 = getelementptr inbounds i8, i8* %tmp, i64 7
260 %tmp34 = load i8, i8* %tmp33, align 1
261 %tmp35 = zext i8 %tmp34 to i64
262 %tmp36 = shl nuw i64 %tmp35, 56
263 %tmp37 = or i64 %tmp32, %tmp36
264 ret i64 %tmp37
265}
266
267; i8* p;
268; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7]
269define i64 @load_i64_by_i8_bswap(i64* %arg) {
270; CHECK-LABEL: load_i64_by_i8_bswap:
271; CHECK: # BB#0:
272; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
273; CHECK-NEXT: movl (%eax), %edx
274; CHECK-NEXT: movl 4(%eax), %eax
275; CHECK-NEXT: bswapl %eax
276; CHECK-NEXT: bswapl %edx
277; CHECK-NEXT: retl
278;
279; CHECK64-LABEL: load_i64_by_i8_bswap:
280; CHECK64: # BB#0:
281; CHECK64-NEXT: movq (%rdi), %rax
282; CHECK64-NEXT: bswapq %rax
283; CHECK64-NEXT: retq
284
285 %tmp = bitcast i64* %arg to i8*
286 %tmp1 = load i8, i8* %tmp, align 1
287 %tmp2 = zext i8 %tmp1 to i64
288 %tmp3 = shl nuw i64 %tmp2, 56
289 %tmp4 = getelementptr inbounds i8, i8* %tmp, i64 1
290 %tmp5 = load i8, i8* %tmp4, align 1
291 %tmp6 = zext i8 %tmp5 to i64
292 %tmp7 = shl nuw nsw i64 %tmp6, 48
293 %tmp8 = or i64 %tmp7, %tmp3
294 %tmp9 = getelementptr inbounds i8, i8* %tmp, i64 2
295 %tmp10 = load i8, i8* %tmp9, align 1
296 %tmp11 = zext i8 %tmp10 to i64
297 %tmp12 = shl nuw nsw i64 %tmp11, 40
298 %tmp13 = or i64 %tmp8, %tmp12
299 %tmp14 = getelementptr inbounds i8, i8* %tmp, i64 3
300 %tmp15 = load i8, i8* %tmp14, align 1
301 %tmp16 = zext i8 %tmp15 to i64
302 %tmp17 = shl nuw nsw i64 %tmp16, 32
303 %tmp18 = or i64 %tmp13, %tmp17
304 %tmp19 = getelementptr inbounds i8, i8* %tmp, i64 4
305 %tmp20 = load i8, i8* %tmp19, align 1
306 %tmp21 = zext i8 %tmp20 to i64
307 %tmp22 = shl nuw nsw i64 %tmp21, 24
308 %tmp23 = or i64 %tmp18, %tmp22
309 %tmp24 = getelementptr inbounds i8, i8* %tmp, i64 5
310 %tmp25 = load i8, i8* %tmp24, align 1
311 %tmp26 = zext i8 %tmp25 to i64
312 %tmp27 = shl nuw nsw i64 %tmp26, 16
313 %tmp28 = or i64 %tmp23, %tmp27
314 %tmp29 = getelementptr inbounds i8, i8* %tmp, i64 6
315 %tmp30 = load i8, i8* %tmp29, align 1
316 %tmp31 = zext i8 %tmp30 to i64
317 %tmp32 = shl nuw nsw i64 %tmp31, 8
318 %tmp33 = or i64 %tmp28, %tmp32
319 %tmp34 = getelementptr inbounds i8, i8* %tmp, i64 7
320 %tmp35 = load i8, i8* %tmp34, align 1
321 %tmp36 = zext i8 %tmp35 to i64
322 %tmp37 = or i64 %tmp33, %tmp36
323 ret i64 %tmp37
324}
325
326; Part of the load by bytes pattern is used outside of the pattern
327; i8* p;
328; i32 x = (i32) p[1]
329; res = ((i32) p[0] << 24) | (x << 16) | ((i32) p[2] << 8) | (i32) p[3]
330; x | res
331define i32 @load_i32_by_i8_bswap_uses(i32* %arg) {
332; CHECK-LABEL: load_i32_by_i8_bswap_uses:
333; CHECK: # BB#0:
334; CHECK-NEXT: pushl %esi
335; CHECK-NEXT: .Lcfi0:
336; CHECK-NEXT: .cfi_def_cfa_offset 8
337; CHECK-NEXT: .Lcfi1:
338; CHECK-NEXT: .cfi_offset %esi, -8
339; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
340; CHECK-NEXT: movzbl (%eax), %ecx
341; CHECK-NEXT: shll $24, %ecx
342; CHECK-NEXT: movzbl 1(%eax), %edx
343; CHECK-NEXT: movl %edx, %esi
344; CHECK-NEXT: shll $16, %esi
345; CHECK-NEXT: orl %ecx, %esi
346; CHECK-NEXT: movzbl 2(%eax), %ecx
347; CHECK-NEXT: shll $8, %ecx
348; CHECK-NEXT: orl %esi, %ecx
349; CHECK-NEXT: movzbl 3(%eax), %eax
350; CHECK-NEXT: orl %ecx, %eax
351; CHECK-NEXT: orl %edx, %eax
352; CHECK-NEXT: popl %esi
353; CHECK-NEXT: retl
354;
355; CHECK64-LABEL: load_i32_by_i8_bswap_uses:
356; CHECK64: # BB#0:
357; CHECK64-NEXT: movzbl (%rdi), %eax
358; CHECK64-NEXT: shll $24, %eax
359; CHECK64-NEXT: movzbl 1(%rdi), %ecx
360; CHECK64-NEXT: movl %ecx, %edx
361; CHECK64-NEXT: shll $16, %edx
362; CHECK64-NEXT: orl %eax, %edx
363; CHECK64-NEXT: movzbl 2(%rdi), %esi
364; CHECK64-NEXT: shll $8, %esi
365; CHECK64-NEXT: orl %edx, %esi
366; CHECK64-NEXT: movzbl 3(%rdi), %eax
367; CHECK64-NEXT: orl %esi, %eax
368; CHECK64-NEXT: orl %ecx, %eax
369; CHECK64-NEXT: retq
370
371 %tmp = bitcast i32* %arg to i8*
372 %tmp1 = load i8, i8* %tmp, align 1
373 %tmp2 = zext i8 %tmp1 to i32
374 %tmp3 = shl nuw nsw i32 %tmp2, 24
375 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
376 %tmp5 = load i8, i8* %tmp4, align 1
377 %tmp6 = zext i8 %tmp5 to i32
378 %tmp7 = shl nuw nsw i32 %tmp6, 16
379 %tmp8 = or i32 %tmp7, %tmp3
380 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
381 %tmp10 = load i8, i8* %tmp9, align 1
382 %tmp11 = zext i8 %tmp10 to i32
383 %tmp12 = shl nuw nsw i32 %tmp11, 8
384 %tmp13 = or i32 %tmp8, %tmp12
385 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
386 %tmp15 = load i8, i8* %tmp14, align 1
387 %tmp16 = zext i8 %tmp15 to i32
388 %tmp17 = or i32 %tmp13, %tmp16
389 ; Use individual part of the pattern outside of the pattern
390 %tmp18 = or i32 %tmp6, %tmp17
391 ret i32 %tmp18
392}
393
394; One of the loads is volatile
395; i8* p;
396; p0 = volatile *p;
397; ((i32) p0 << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
398define i32 @load_i32_by_i8_bswap_volatile(i32* %arg) {
399; CHECK-LABEL: load_i32_by_i8_bswap_volatile:
400; CHECK: # BB#0:
401; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
402; CHECK-NEXT: movzbl (%eax), %ecx
403; CHECK-NEXT: shll $24, %ecx
404; CHECK-NEXT: movzbl 1(%eax), %edx
405; CHECK-NEXT: shll $16, %edx
406; CHECK-NEXT: orl %ecx, %edx
407; CHECK-NEXT: movzbl 2(%eax), %ecx
408; CHECK-NEXT: shll $8, %ecx
409; CHECK-NEXT: orl %edx, %ecx
410; CHECK-NEXT: movzbl 3(%eax), %eax
411; CHECK-NEXT: orl %ecx, %eax
412; CHECK-NEXT: retl
413;
414; CHECK64-LABEL: load_i32_by_i8_bswap_volatile:
415; CHECK64: # BB#0:
416; CHECK64-NEXT: movzbl (%rdi), %eax
417; CHECK64-NEXT: shll $24, %eax
418; CHECK64-NEXT: movzbl 1(%rdi), %ecx
419; CHECK64-NEXT: shll $16, %ecx
420; CHECK64-NEXT: orl %eax, %ecx
421; CHECK64-NEXT: movzbl 2(%rdi), %edx
422; CHECK64-NEXT: shll $8, %edx
423; CHECK64-NEXT: orl %ecx, %edx
424; CHECK64-NEXT: movzbl 3(%rdi), %eax
425; CHECK64-NEXT: orl %edx, %eax
426; CHECK64-NEXT: retq
427
428 %tmp = bitcast i32* %arg to i8*
429 %tmp1 = load volatile i8, i8* %tmp, align 1
430 %tmp2 = zext i8 %tmp1 to i32
431 %tmp3 = shl nuw nsw i32 %tmp2, 24
432 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
433 %tmp5 = load i8, i8* %tmp4, align 1
434 %tmp6 = zext i8 %tmp5 to i32
435 %tmp7 = shl nuw nsw i32 %tmp6, 16
436 %tmp8 = or i32 %tmp7, %tmp3
437 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
438 %tmp10 = load i8, i8* %tmp9, align 1
439 %tmp11 = zext i8 %tmp10 to i32
440 %tmp12 = shl nuw nsw i32 %tmp11, 8
441 %tmp13 = or i32 %tmp8, %tmp12
442 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
443 %tmp15 = load i8, i8* %tmp14, align 1
444 %tmp16 = zext i8 %tmp15 to i32
445 %tmp17 = or i32 %tmp13, %tmp16
446 ret i32 %tmp17
447}
448
449; There is a store in between individual loads
450; i8* p, q;
451; res1 = ((i32) p[0] << 24) | ((i32) p[1] << 16)
452; *q = 0;
453; res2 = ((i32) p[2] << 8) | (i32) p[3]
454; res1 | res2
455define i32 @load_i32_by_i8_bswap_store_in_between(i32* %arg, i32* %arg1) {
456; CHECK-LABEL: load_i32_by_i8_bswap_store_in_between:
457; CHECK: # BB#0:
458; CHECK-NEXT: pushl %esi
459; CHECK-NEXT: .Lcfi2:
460; CHECK-NEXT: .cfi_def_cfa_offset 8
461; CHECK-NEXT: .Lcfi3:
462; CHECK-NEXT: .cfi_offset %esi, -8
463; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
464; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
465; CHECK-NEXT: movzbl (%ecx), %edx
466; CHECK-NEXT: shll $24, %edx
467; CHECK-NEXT: movzbl 1(%ecx), %esi
468; CHECK-NEXT: movl $0, (%eax)
469; CHECK-NEXT: shll $16, %esi
470; CHECK-NEXT: orl %edx, %esi
471; CHECK-NEXT: movzbl 2(%ecx), %edx
472; CHECK-NEXT: shll $8, %edx
473; CHECK-NEXT: orl %esi, %edx
474; CHECK-NEXT: movzbl 3(%ecx), %eax
475; CHECK-NEXT: orl %edx, %eax
476; CHECK-NEXT: popl %esi
477; CHECK-NEXT: retl
478;
479; CHECK64-LABEL: load_i32_by_i8_bswap_store_in_between:
480; CHECK64: # BB#0:
481; CHECK64-NEXT: movzbl (%rdi), %eax
482; CHECK64-NEXT: shll $24, %eax
483; CHECK64-NEXT: movzbl 1(%rdi), %ecx
484; CHECK64-NEXT: movl $0, (%rsi)
485; CHECK64-NEXT: shll $16, %ecx
486; CHECK64-NEXT: orl %eax, %ecx
487; CHECK64-NEXT: movzbl 2(%rdi), %edx
488; CHECK64-NEXT: shll $8, %edx
489; CHECK64-NEXT: orl %ecx, %edx
490; CHECK64-NEXT: movzbl 3(%rdi), %eax
491; CHECK64-NEXT: orl %edx, %eax
492; CHECK64-NEXT: retq
493
494 %tmp = bitcast i32* %arg to i8*
495 %tmp2 = load i8, i8* %tmp, align 1
496 %tmp3 = zext i8 %tmp2 to i32
497 %tmp4 = shl nuw nsw i32 %tmp3, 24
498 %tmp5 = getelementptr inbounds i8, i8* %tmp, i32 1
499 %tmp6 = load i8, i8* %tmp5, align 1
500 ; This store will prevent folding of the pattern
501 store i32 0, i32* %arg1
502 %tmp7 = zext i8 %tmp6 to i32
503 %tmp8 = shl nuw nsw i32 %tmp7, 16
504 %tmp9 = or i32 %tmp8, %tmp4
505 %tmp10 = getelementptr inbounds i8, i8* %tmp, i32 2
506 %tmp11 = load i8, i8* %tmp10, align 1
507 %tmp12 = zext i8 %tmp11 to i32
508 %tmp13 = shl nuw nsw i32 %tmp12, 8
509 %tmp14 = or i32 %tmp9, %tmp13
510 %tmp15 = getelementptr inbounds i8, i8* %tmp, i32 3
511 %tmp16 = load i8, i8* %tmp15, align 1
512 %tmp17 = zext i8 %tmp16 to i32
513 %tmp18 = or i32 %tmp14, %tmp17
514 ret i32 %tmp18
515}
516
517; One of the loads is from an unrelated location
518; i8* p, q;
519; ((i32) p[0] << 24) | ((i32) q[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
520define i32 @load_i32_by_i8_bswap_unrelated_load(i32* %arg, i32* %arg1) {
521; CHECK-LABEL: load_i32_by_i8_bswap_unrelated_load:
522; CHECK: # BB#0:
523; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
524; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
525; CHECK-NEXT: movzbl (%ecx), %edx
526; CHECK-NEXT: shll $24, %edx
527; CHECK-NEXT: movzbl 1(%eax), %eax
528; CHECK-NEXT: shll $16, %eax
529; CHECK-NEXT: orl %edx, %eax
530; CHECK-NEXT: movzbl 2(%ecx), %edx
531; CHECK-NEXT: shll $8, %edx
532; CHECK-NEXT: orl %eax, %edx
533; CHECK-NEXT: movzbl 3(%ecx), %eax
534; CHECK-NEXT: orl %edx, %eax
535; CHECK-NEXT: retl
536;
537; CHECK64-LABEL: load_i32_by_i8_bswap_unrelated_load:
538; CHECK64: # BB#0:
539; CHECK64-NEXT: movzbl (%rdi), %eax
540; CHECK64-NEXT: shll $24, %eax
541; CHECK64-NEXT: movzbl 1(%rsi), %ecx
542; CHECK64-NEXT: shll $16, %ecx
543; CHECK64-NEXT: orl %eax, %ecx
544; CHECK64-NEXT: movzbl 2(%rdi), %edx
545; CHECK64-NEXT: shll $8, %edx
546; CHECK64-NEXT: orl %ecx, %edx
547; CHECK64-NEXT: movzbl 3(%rdi), %eax
548; CHECK64-NEXT: orl %edx, %eax
549; CHECK64-NEXT: retq
550
551 %tmp = bitcast i32* %arg to i8*
552 %tmp2 = bitcast i32* %arg1 to i8*
553 %tmp3 = load i8, i8* %tmp, align 1
554 %tmp4 = zext i8 %tmp3 to i32
555 %tmp5 = shl nuw nsw i32 %tmp4, 24
556 ; Load from an unrelated address
557 %tmp6 = getelementptr inbounds i8, i8* %tmp2, i32 1
558 %tmp7 = load i8, i8* %tmp6, align 1
559 %tmp8 = zext i8 %tmp7 to i32
560 %tmp9 = shl nuw nsw i32 %tmp8, 16
561 %tmp10 = or i32 %tmp9, %tmp5
562 %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 2
563 %tmp12 = load i8, i8* %tmp11, align 1
564 %tmp13 = zext i8 %tmp12 to i32
565 %tmp14 = shl nuw nsw i32 %tmp13, 8
566 %tmp15 = or i32 %tmp10, %tmp14
567 %tmp16 = getelementptr inbounds i8, i8* %tmp, i32 3
568 %tmp17 = load i8, i8* %tmp16, align 1
569 %tmp18 = zext i8 %tmp17 to i32
570 %tmp19 = or i32 %tmp15, %tmp18
571 ret i32 %tmp19
572}
573
574; Non-zero offsets are not supported for now
575; i8* p;
576; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
Artur Pilipenkobdf3c5a2017-02-06 14:15:31 +0000577define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
578; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
Artur Pilipenko41c00052017-01-25 08:53:31 +0000579; CHECK: # BB#0:
580; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
581; CHECK-NEXT: movzbl 1(%eax), %ecx
582; CHECK-NEXT: movzbl 2(%eax), %edx
583; CHECK-NEXT: shll $8, %edx
584; CHECK-NEXT: orl %ecx, %edx
585; CHECK-NEXT: movzbl 3(%eax), %ecx
586; CHECK-NEXT: shll $16, %ecx
587; CHECK-NEXT: orl %edx, %ecx
588; CHECK-NEXT: movzbl 4(%eax), %eax
589; CHECK-NEXT: shll $24, %eax
590; CHECK-NEXT: orl %ecx, %eax
591; CHECK-NEXT: retl
592;
Artur Pilipenkobdf3c5a2017-02-06 14:15:31 +0000593; CHECK64-LABEL: load_i32_by_i8_nonzero_offset:
Artur Pilipenko41c00052017-01-25 08:53:31 +0000594; CHECK64: # BB#0:
595; CHECK64-NEXT: movzbl 1(%rdi), %eax
596; CHECK64-NEXT: movzbl 2(%rdi), %ecx
597; CHECK64-NEXT: shll $8, %ecx
598; CHECK64-NEXT: orl %eax, %ecx
599; CHECK64-NEXT: movzbl 3(%rdi), %edx
600; CHECK64-NEXT: shll $16, %edx
601; CHECK64-NEXT: orl %ecx, %edx
602; CHECK64-NEXT: movzbl 4(%rdi), %eax
603; CHECK64-NEXT: shll $24, %eax
604; CHECK64-NEXT: orl %edx, %eax
605; CHECK64-NEXT: retq
606
607 %tmp = bitcast i32* %arg to i8*
608 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
609 %tmp2 = load i8, i8* %tmp1, align 1
610 %tmp3 = zext i8 %tmp2 to i32
611 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2
612 %tmp5 = load i8, i8* %tmp4, align 1
613 %tmp6 = zext i8 %tmp5 to i32
614 %tmp7 = shl nuw nsw i32 %tmp6, 8
615 %tmp8 = or i32 %tmp7, %tmp3
616 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3
617 %tmp10 = load i8, i8* %tmp9, align 1
618 %tmp11 = zext i8 %tmp10 to i32
619 %tmp12 = shl nuw nsw i32 %tmp11, 16
620 %tmp13 = or i32 %tmp8, %tmp12
621 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4
622 %tmp15 = load i8, i8* %tmp14, align 1
623 %tmp16 = zext i8 %tmp15 to i32
624 %tmp17 = shl nuw nsw i32 %tmp16, 24
625 %tmp18 = or i32 %tmp13, %tmp17
626 ret i32 %tmp18
627}
628
Artur Pilipenkobdf3c5a2017-02-06 14:15:31 +0000629; i8* p;
630; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
631define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
632; CHECK-LABEL: load_i32_by_i8_neg_offset:
633; CHECK: # BB#0:
634; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
635; CHECK-NEXT: movzbl -4(%eax), %ecx
636; CHECK-NEXT: movzbl -3(%eax), %edx
637; CHECK-NEXT: shll $8, %edx
638; CHECK-NEXT: orl %ecx, %edx
639; CHECK-NEXT: movzbl -2(%eax), %ecx
640; CHECK-NEXT: shll $16, %ecx
641; CHECK-NEXT: orl %edx, %ecx
642; CHECK-NEXT: movzbl -1(%eax), %eax
643; CHECK-NEXT: shll $24, %eax
644; CHECK-NEXT: orl %ecx, %eax
645; CHECK-NEXT: retl
646;
647; CHECK64-LABEL: load_i32_by_i8_neg_offset:
648; CHECK64: # BB#0:
649; CHECK64-NEXT: movzbl -4(%rdi), %eax
650; CHECK64-NEXT: movzbl -3(%rdi), %ecx
651; CHECK64-NEXT: shll $8, %ecx
652; CHECK64-NEXT: orl %eax, %ecx
653; CHECK64-NEXT: movzbl -2(%rdi), %edx
654; CHECK64-NEXT: shll $16, %edx
655; CHECK64-NEXT: orl %ecx, %edx
656; CHECK64-NEXT: movzbl -1(%rdi), %eax
657; CHECK64-NEXT: shll $24, %eax
658; CHECK64-NEXT: orl %edx, %eax
659; CHECK64-NEXT: retq
660
661 %tmp = bitcast i32* %arg to i8*
662 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
663 %tmp2 = load i8, i8* %tmp1, align 1
664 %tmp3 = zext i8 %tmp2 to i32
665 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3
666 %tmp5 = load i8, i8* %tmp4, align 1
667 %tmp6 = zext i8 %tmp5 to i32
668 %tmp7 = shl nuw nsw i32 %tmp6, 8
669 %tmp8 = or i32 %tmp7, %tmp3
670 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2
671 %tmp10 = load i8, i8* %tmp9, align 1
672 %tmp11 = zext i8 %tmp10 to i32
673 %tmp12 = shl nuw nsw i32 %tmp11, 16
674 %tmp13 = or i32 %tmp8, %tmp12
675 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1
676 %tmp15 = load i8, i8* %tmp14, align 1
677 %tmp16 = zext i8 %tmp15 to i32
678 %tmp17 = shl nuw nsw i32 %tmp16, 24
679 %tmp18 = or i32 %tmp13, %tmp17
680 ret i32 %tmp18
681}
682
683; i8* p;
684; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
685define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
686; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
687; CHECK: # BB#0:
688; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
689; CHECK-NEXT: movzbl 4(%eax), %ecx
690; CHECK-NEXT: movzbl 3(%eax), %edx
691; CHECK-NEXT: shll $8, %edx
692; CHECK-NEXT: orl %ecx, %edx
693; CHECK-NEXT: movzbl 2(%eax), %ecx
694; CHECK-NEXT: shll $16, %ecx
695; CHECK-NEXT: orl %edx, %ecx
696; CHECK-NEXT: movzbl 1(%eax), %eax
697; CHECK-NEXT: shll $24, %eax
698; CHECK-NEXT: orl %ecx, %eax
699; CHECK-NEXT: retl
700;
701; CHECK64-LABEL: load_i32_by_i8_nonzero_offset_bswap:
702; CHECK64: # BB#0:
703; CHECK64-NEXT: movzbl 4(%rdi), %eax
704; CHECK64-NEXT: movzbl 3(%rdi), %ecx
705; CHECK64-NEXT: shll $8, %ecx
706; CHECK64-NEXT: orl %eax, %ecx
707; CHECK64-NEXT: movzbl 2(%rdi), %edx
708; CHECK64-NEXT: shll $16, %edx
709; CHECK64-NEXT: orl %ecx, %edx
710; CHECK64-NEXT: movzbl 1(%rdi), %eax
711; CHECK64-NEXT: shll $24, %eax
712; CHECK64-NEXT: orl %edx, %eax
713; CHECK64-NEXT: retq
714
715 %tmp = bitcast i32* %arg to i8*
716 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
717 %tmp2 = load i8, i8* %tmp1, align 1
718 %tmp3 = zext i8 %tmp2 to i32
719 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3
720 %tmp5 = load i8, i8* %tmp4, align 1
721 %tmp6 = zext i8 %tmp5 to i32
722 %tmp7 = shl nuw nsw i32 %tmp6, 8
723 %tmp8 = or i32 %tmp7, %tmp3
724 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
725 %tmp10 = load i8, i8* %tmp9, align 1
726 %tmp11 = zext i8 %tmp10 to i32
727 %tmp12 = shl nuw nsw i32 %tmp11, 16
728 %tmp13 = or i32 %tmp8, %tmp12
729 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1
730 %tmp15 = load i8, i8* %tmp14, align 1
731 %tmp16 = zext i8 %tmp15 to i32
732 %tmp17 = shl nuw nsw i32 %tmp16, 24
733 %tmp18 = or i32 %tmp13, %tmp17
734 ret i32 %tmp18
735}
736
737; i8* p;
738; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
739define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
740; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
741; CHECK: # BB#0:
742; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
743; CHECK-NEXT: movzbl -1(%eax), %ecx
744; CHECK-NEXT: movzbl -2(%eax), %edx
745; CHECK-NEXT: shll $8, %edx
746; CHECK-NEXT: orl %ecx, %edx
747; CHECK-NEXT: movzbl -3(%eax), %ecx
748; CHECK-NEXT: shll $16, %ecx
749; CHECK-NEXT: orl %edx, %ecx
750; CHECK-NEXT: movzbl -4(%eax), %eax
751; CHECK-NEXT: shll $24, %eax
752; CHECK-NEXT: orl %ecx, %eax
753; CHECK-NEXT: retl
754;
755; CHECK64-LABEL: load_i32_by_i8_neg_offset_bswap:
756; CHECK64: # BB#0:
757; CHECK64-NEXT: movzbl -1(%rdi), %eax
758; CHECK64-NEXT: movzbl -2(%rdi), %ecx
759; CHECK64-NEXT: shll $8, %ecx
760; CHECK64-NEXT: orl %eax, %ecx
761; CHECK64-NEXT: movzbl -3(%rdi), %edx
762; CHECK64-NEXT: shll $16, %edx
763; CHECK64-NEXT: orl %ecx, %edx
764; CHECK64-NEXT: movzbl -4(%rdi), %eax
765; CHECK64-NEXT: shll $24, %eax
766; CHECK64-NEXT: orl %edx, %eax
767; CHECK64-NEXT: retq
768
769 %tmp = bitcast i32* %arg to i8*
770 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
771 %tmp2 = load i8, i8* %tmp1, align 1
772 %tmp3 = zext i8 %tmp2 to i32
773 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2
774 %tmp5 = load i8, i8* %tmp4, align 1
775 %tmp6 = zext i8 %tmp5 to i32
776 %tmp7 = shl nuw nsw i32 %tmp6, 8
777 %tmp8 = or i32 %tmp7, %tmp3
778 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3
779 %tmp10 = load i8, i8* %tmp9, align 1
780 %tmp11 = zext i8 %tmp10 to i32
781 %tmp12 = shl nuw nsw i32 %tmp11, 16
782 %tmp13 = or i32 %tmp8, %tmp12
783 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4
784 %tmp15 = load i8, i8* %tmp14, align 1
785 %tmp16 = zext i8 %tmp15 to i32
786 %tmp17 = shl nuw nsw i32 %tmp16, 24
787 %tmp18 = or i32 %tmp13, %tmp17
788 ret i32 %tmp18
789}
790
Artur Pilipenko41c00052017-01-25 08:53:31 +0000791; i8* p; i32 i;
792; ((i32) p[i] << 24) | ((i32) p[i + 1] << 16) | ((i32) p[i + 2] << 8) | (i32) p[i + 3]
793define i32 @load_i32_by_i8_bswap_base_index_offset(i32* %arg, i32 %arg1) {
794; CHECK-LABEL: load_i32_by_i8_bswap_base_index_offset:
795; CHECK: # BB#0:
796; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
797; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
798; CHECK-NEXT: movl (%ecx,%eax), %eax
799; CHECK-NEXT: bswapl %eax
800; CHECK-NEXT: retl
801;
802; CHECK64-LABEL: load_i32_by_i8_bswap_base_index_offset:
803; CHECK64: # BB#0:
804; CHECK64-NEXT: movslq %esi, %rax
805; CHECK64-NEXT: movzbl (%rdi,%rax), %ecx
806; CHECK64-NEXT: shll $24, %ecx
807; CHECK64-NEXT: movzbl 1(%rdi,%rax), %edx
808; CHECK64-NEXT: shll $16, %edx
809; CHECK64-NEXT: orl %ecx, %edx
810; CHECK64-NEXT: movzbl 2(%rdi,%rax), %ecx
811; CHECK64-NEXT: shll $8, %ecx
812; CHECK64-NEXT: orl %edx, %ecx
813; CHECK64-NEXT: movzbl 3(%rdi,%rax), %eax
814; CHECK64-NEXT: orl %ecx, %eax
815; CHECK64-NEXT: retq
Artur Pilipenko41c00052017-01-25 08:53:31 +0000816 %tmp = bitcast i32* %arg to i8*
817 %tmp2 = getelementptr inbounds i8, i8* %tmp, i32 %arg1
818 %tmp3 = load i8, i8* %tmp2, align 1
819 %tmp4 = zext i8 %tmp3 to i32
820 %tmp5 = shl nuw nsw i32 %tmp4, 24
821 %tmp6 = add nuw nsw i32 %arg1, 1
822 %tmp7 = getelementptr inbounds i8, i8* %tmp, i32 %tmp6
823 %tmp8 = load i8, i8* %tmp7, align 1
824 %tmp9 = zext i8 %tmp8 to i32
825 %tmp10 = shl nuw nsw i32 %tmp9, 16
826 %tmp11 = or i32 %tmp10, %tmp5
827 %tmp12 = add nuw nsw i32 %arg1, 2
828 %tmp13 = getelementptr inbounds i8, i8* %tmp, i32 %tmp12
829 %tmp14 = load i8, i8* %tmp13, align 1
830 %tmp15 = zext i8 %tmp14 to i32
831 %tmp16 = shl nuw nsw i32 %tmp15, 8
832 %tmp17 = or i32 %tmp11, %tmp16
833 %tmp18 = add nuw nsw i32 %arg1, 3
834 %tmp19 = getelementptr inbounds i8, i8* %tmp, i32 %tmp18
835 %tmp20 = load i8, i8* %tmp19, align 1
836 %tmp21 = zext i8 %tmp20 to i32
837 %tmp22 = or i32 %tmp17, %tmp21
838 ret i32 %tmp22
839}
840
841; Verify that we don't crash handling shl i32 %conv57, 32
842define void @shift_i32_by_32(i8* %src1, i8* %src2, i64* %dst) {
843; CHECK-LABEL: shift_i32_by_32:
844; CHECK: # BB#0: # %entry
845; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
846; CHECK-NEXT: movl $-1, 4(%eax)
847; CHECK-NEXT: movl $-1, (%eax)
848; CHECK-NEXT: retl
849;
850; CHECK64-LABEL: shift_i32_by_32:
851; CHECK64: # BB#0: # %entry
852; CHECK64-NEXT: movq $-1, (%rdx)
853; CHECK64-NEXT: retq
854entry:
855 %load1 = load i8, i8* %src1, align 1
856 %conv46 = zext i8 %load1 to i32
857 %shl47 = shl i32 %conv46, 56
858 %or55 = or i32 %shl47, 0
859 %load2 = load i8, i8* %src2, align 1
860 %conv57 = zext i8 %load2 to i32
861 %shl58 = shl i32 %conv57, 32
862 %or59 = or i32 %or55, %shl58
863 %or74 = or i32 %or59, 0
864 %conv75 = sext i32 %or74 to i64
865 store i64 %conv75, i64* %dst, align 8
866 ret void
867}
Artur Pilipenkod3464bf2017-02-06 17:48:08 +0000868
869declare i16 @llvm.bswap.i16(i16)
870
871; i16* p;
872; (i32) bswap(p[1]) | (i32) bswap(p[0] << 16)
873define i32 @load_i32_by_bswap_i16(i32* %arg) {
874; CHECK-LABEL: load_i32_by_bswap_i16:
875; CHECK: # BB#0:
876; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
877; CHECK-NEXT: movl (%eax), %eax
878; CHECK-NEXT: bswapl %eax
879; CHECK-NEXT: retl
880;
881; CHECK64-LABEL: load_i32_by_bswap_i16:
882; CHECK64: # BB#0:
883; CHECK64-NEXT: movl (%rdi), %eax
884; CHECK64-NEXT: bswapl %eax
885; CHECK64-NEXT: retq
886
887
888 %tmp = bitcast i32* %arg to i16*
889 %tmp1 = load i16, i16* %tmp, align 4
890 %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1)
891 %tmp2 = zext i16 %tmp11 to i32
892 %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
893 %tmp4 = load i16, i16* %tmp3, align 1
894 %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4)
895 %tmp5 = zext i16 %tmp41 to i32
896 %tmp6 = shl nuw nsw i32 %tmp2, 16
897 %tmp7 = or i32 %tmp6, %tmp5
898 ret i32 %tmp7
899}
Artur Pilipenko469596e2017-02-07 14:09:37 +0000900
901; i16* p;
902; (i32) p[0] | (sext(p[1] << 16) to i32)
903define i32 @load_i32_by_sext_i16(i32* %arg) {
904; CHECK-LABEL: load_i32_by_sext_i16:
905; CHECK: # BB#0:
906; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
907; CHECK-NEXT: movzwl (%eax), %ecx
908; CHECK-NEXT: movzwl 2(%eax), %eax
909; CHECK-NEXT: shll $16, %eax
910; CHECK-NEXT: orl %ecx, %eax
911; CHECK-NEXT: retl
912;
913; CHECK64-LABEL: load_i32_by_sext_i16:
914; CHECK64: # BB#0:
915; CHECK64-NEXT: movzwl (%rdi), %ecx
916; CHECK64-NEXT: movzwl 2(%rdi), %eax
917; CHECK64-NEXT: shll $16, %eax
918; CHECK64-NEXT: orl %ecx, %eax
919; CHECK64-NEXT: retq
920 %tmp = bitcast i32* %arg to i16*
921 %tmp1 = load i16, i16* %tmp, align 1
922 %tmp2 = zext i16 %tmp1 to i32
923 %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
924 %tmp4 = load i16, i16* %tmp3, align 1
925 %tmp5 = sext i16 %tmp4 to i32
926 %tmp6 = shl nuw nsw i32 %tmp5, 16
927 %tmp7 = or i32 %tmp6, %tmp2
928 ret i32 %tmp7
929}
930
931; i8* arg; i32 i;
932; p = arg + 12;
933; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24)
934define i32 @load_i32_by_i8_base_offset_index(i8* %arg, i32 %i) {
935; CHECK-LABEL: load_i32_by_i8_base_offset_index:
936; CHECK: # BB#0:
937; CHECK-NEXT: pushl %esi
938; CHECK-NEXT: .Lcfi4:
939; CHECK-NEXT: .cfi_def_cfa_offset 8
940; CHECK-NEXT: .Lcfi5:
941; CHECK-NEXT: .cfi_offset %esi, -8
942; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
943; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
944; CHECK-NEXT: movzbl 12(%eax,%ecx), %edx
945; CHECK-NEXT: movzbl 13(%eax,%ecx), %esi
946; CHECK-NEXT: shll $8, %esi
947; CHECK-NEXT: orl %edx, %esi
948; CHECK-NEXT: movzbl 14(%eax,%ecx), %edx
949; CHECK-NEXT: shll $16, %edx
950; CHECK-NEXT: orl %esi, %edx
951; CHECK-NEXT: movzbl 15(%eax,%ecx), %eax
952; CHECK-NEXT: shll $24, %eax
953; CHECK-NEXT: orl %edx, %eax
954; CHECK-NEXT: popl %esi
955; CHECK-NEXT: retl
956;
957; CHECK64-LABEL: load_i32_by_i8_base_offset_index:
958; CHECK64: # BB#0:
959; CHECK64-NEXT: movl %esi, %eax
960; CHECK64-NEXT: movl 12(%rdi,%rax), %eax
961; CHECK64-NEXT: retq
962 %tmp = add nuw nsw i32 %i, 3
963 %tmp2 = add nuw nsw i32 %i, 2
964 %tmp3 = add nuw nsw i32 %i, 1
965 %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
966 %tmp5 = zext i32 %i to i64
967 %tmp6 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp5
968 %tmp7 = load i8, i8* %tmp6, align 1
969 %tmp8 = zext i8 %tmp7 to i32
970 %tmp9 = zext i32 %tmp3 to i64
971 %tmp10 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp9
972 %tmp11 = load i8, i8* %tmp10, align 1
973 %tmp12 = zext i8 %tmp11 to i32
974 %tmp13 = shl nuw nsw i32 %tmp12, 8
975 %tmp14 = or i32 %tmp13, %tmp8
976 %tmp15 = zext i32 %tmp2 to i64
977 %tmp16 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp15
978 %tmp17 = load i8, i8* %tmp16, align 1
979 %tmp18 = zext i8 %tmp17 to i32
980 %tmp19 = shl nuw nsw i32 %tmp18, 16
981 %tmp20 = or i32 %tmp14, %tmp19
982 %tmp21 = zext i32 %tmp to i64
983 %tmp22 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp21
984 %tmp23 = load i8, i8* %tmp22, align 1
985 %tmp24 = zext i8 %tmp23 to i32
986 %tmp25 = shl nuw i32 %tmp24, 24
987 %tmp26 = or i32 %tmp20, %tmp25
988 ret i32 %tmp26
989}
990
991; i8* arg; i32 i;
992; p = arg + 12;
993; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24)
994define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
995; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
996; CHECK: # BB#0:
997; CHECK-NEXT: pushl %esi
998; CHECK-NEXT: .Lcfi6:
999; CHECK-NEXT: .cfi_def_cfa_offset 8
1000; CHECK-NEXT: .Lcfi7:
1001; CHECK-NEXT: .cfi_offset %esi, -8
1002; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1003; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
1004; CHECK-NEXT: movzbl 13(%eax,%ecx), %edx
1005; CHECK-NEXT: movzbl 14(%eax,%ecx), %esi
1006; CHECK-NEXT: shll $8, %esi
1007; CHECK-NEXT: orl %edx, %esi
1008; CHECK-NEXT: movzbl 15(%eax,%ecx), %edx
1009; CHECK-NEXT: shll $16, %edx
1010; CHECK-NEXT: orl %esi, %edx
1011; CHECK-NEXT: movzbl 16(%eax,%ecx), %eax
1012; CHECK-NEXT: shll $24, %eax
1013; CHECK-NEXT: orl %edx, %eax
1014; CHECK-NEXT: popl %esi
1015; CHECK-NEXT: retl
1016;
1017; CHECK64-LABEL: load_i32_by_i8_base_offset_index_2:
1018; CHECK64: # BB#0:
1019; CHECK64-NEXT: movl %esi, %eax
1020; CHECK64-NEXT: movzbl 13(%rdi,%rax), %ecx
1021; CHECK64-NEXT: movzbl 14(%rdi,%rax), %edx
1022; CHECK64-NEXT: shll $8, %edx
1023; CHECK64-NEXT: orl %ecx, %edx
1024; CHECK64-NEXT: movzbl 15(%rdi,%rax), %ecx
1025; CHECK64-NEXT: shll $16, %ecx
1026; CHECK64-NEXT: orl %edx, %ecx
1027; CHECK64-NEXT: movzbl 16(%rdi,%rax), %eax
1028; CHECK64-NEXT: shll $24, %eax
1029; CHECK64-NEXT: orl %ecx, %eax
1030; CHECK64-NEXT: retq
1031 %tmp = add nuw nsw i32 %i, 4
1032 %tmp2 = add nuw nsw i32 %i, 3
1033 %tmp3 = add nuw nsw i32 %i, 2
1034 %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
1035 %tmp5 = add nuw nsw i32 %i, 1
1036 %tmp27 = zext i32 %tmp5 to i64
1037 %tmp28 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp27
1038 %tmp29 = load i8, i8* %tmp28, align 1
1039 %tmp30 = zext i8 %tmp29 to i32
1040 %tmp31 = zext i32 %tmp3 to i64
1041 %tmp32 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp31
1042 %tmp33 = load i8, i8* %tmp32, align 1
1043 %tmp34 = zext i8 %tmp33 to i32
1044 %tmp35 = shl nuw nsw i32 %tmp34, 8
1045 %tmp36 = or i32 %tmp35, %tmp30
1046 %tmp37 = zext i32 %tmp2 to i64
1047 %tmp38 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp37
1048 %tmp39 = load i8, i8* %tmp38, align 1
1049 %tmp40 = zext i8 %tmp39 to i32
1050 %tmp41 = shl nuw nsw i32 %tmp40, 16
1051 %tmp42 = or i32 %tmp36, %tmp41
1052 %tmp43 = zext i32 %tmp to i64
1053 %tmp44 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp43
1054 %tmp45 = load i8, i8* %tmp44, align 1
1055 %tmp46 = zext i8 %tmp45 to i32
1056 %tmp47 = shl nuw i32 %tmp46, 24
1057 %tmp48 = or i32 %tmp42, %tmp47
1058 ret i32 %tmp48
1059}
1060
1061; i8* arg; i32 i;
1062;
1063; p0 = arg;
1064; p1 = arg + i + 1;
1065; p2 = arg + i + 2;
1066; p3 = arg + i + 3;
1067;
1068; (i32) p0[12] | ((i32) p1[12] << 8) | ((i32) p2[12] << 16) | ((i32) p3[12] << 24)
1069;
1070; This test excercises zero and any extend loads as a part of load combine pattern.
1071; In order to fold the pattern above we need to reassociate the address computation
1072; first. By the time the address computation is reassociated loads are combined to
1073; to zext and aext loads.
1074define i32 @load_i32_by_i8_zaext_loads(i8* %arg, i32 %arg1) {
1075; CHECK-LABEL: load_i32_by_i8_zaext_loads:
1076; CHECK: # BB#0:
1077; CHECK-NEXT: pushl %esi
1078; CHECK-NEXT: .Lcfi8:
1079; CHECK-NEXT: .cfi_def_cfa_offset 8
1080; CHECK-NEXT: .Lcfi9:
1081; CHECK-NEXT: .cfi_offset %esi, -8
1082; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1083; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
1084; CHECK-NEXT: movzbl 12(%eax,%ecx), %edx
1085; CHECK-NEXT: movzbl 13(%eax,%ecx), %esi
1086; CHECK-NEXT: shll $8, %esi
1087; CHECK-NEXT: orl %edx, %esi
1088; CHECK-NEXT: movzbl 14(%eax,%ecx), %edx
1089; CHECK-NEXT: shll $16, %edx
1090; CHECK-NEXT: orl %esi, %edx
1091; CHECK-NEXT: movzbl 15(%eax,%ecx), %eax
1092; CHECK-NEXT: shll $24, %eax
1093; CHECK-NEXT: orl %edx, %eax
1094; CHECK-NEXT: popl %esi
1095; CHECK-NEXT: retl
1096;
1097; CHECK64-LABEL: load_i32_by_i8_zaext_loads:
1098; CHECK64: # BB#0:
1099; CHECK64-NEXT: movl %esi, %eax
1100; CHECK64-NEXT: movzbl 12(%rdi,%rax), %ecx
1101; CHECK64-NEXT: movzbl 13(%rdi,%rax), %edx
1102; CHECK64-NEXT: shll $8, %edx
1103; CHECK64-NEXT: orl %ecx, %edx
1104; CHECK64-NEXT: movzbl 14(%rdi,%rax), %ecx
1105; CHECK64-NEXT: shll $16, %ecx
1106; CHECK64-NEXT: orl %edx, %ecx
1107; CHECK64-NEXT: movzbl 15(%rdi,%rax), %eax
1108; CHECK64-NEXT: shll $24, %eax
1109; CHECK64-NEXT: orl %ecx, %eax
1110; CHECK64-NEXT: retq
1111 %tmp = add nuw nsw i32 %arg1, 3
1112 %tmp2 = add nuw nsw i32 %arg1, 2
1113 %tmp3 = add nuw nsw i32 %arg1, 1
1114 %tmp4 = zext i32 %tmp to i64
1115 %tmp5 = zext i32 %tmp2 to i64
1116 %tmp6 = zext i32 %tmp3 to i64
1117 %tmp24 = getelementptr inbounds i8, i8* %arg, i64 %tmp4
1118 %tmp30 = getelementptr inbounds i8, i8* %arg, i64 %tmp5
1119 %tmp31 = getelementptr inbounds i8, i8* %arg, i64 %tmp6
1120 %tmp32 = getelementptr inbounds i8, i8* %arg, i64 12
1121 %tmp33 = zext i32 %arg1 to i64
1122 %tmp34 = getelementptr inbounds i8, i8* %tmp32, i64 %tmp33
1123 %tmp35 = load i8, i8* %tmp34, align 1
1124 %tmp36 = zext i8 %tmp35 to i32
1125 %tmp37 = getelementptr inbounds i8, i8* %tmp31, i64 12
1126 %tmp38 = load i8, i8* %tmp37, align 1
1127 %tmp39 = zext i8 %tmp38 to i32
1128 %tmp40 = shl nuw nsw i32 %tmp39, 8
1129 %tmp41 = or i32 %tmp40, %tmp36
1130 %tmp42 = getelementptr inbounds i8, i8* %tmp30, i64 12
1131 %tmp43 = load i8, i8* %tmp42, align 1
1132 %tmp44 = zext i8 %tmp43 to i32
1133 %tmp45 = shl nuw nsw i32 %tmp44, 16
1134 %tmp46 = or i32 %tmp41, %tmp45
1135 %tmp47 = getelementptr inbounds i8, i8* %tmp24, i64 12
1136 %tmp48 = load i8, i8* %tmp47, align 1
1137 %tmp49 = zext i8 %tmp48 to i32
1138 %tmp50 = shl nuw i32 %tmp49, 24
1139 %tmp51 = or i32 %tmp46, %tmp50
1140 ret i32 %tmp51
1141}
1142
1143; The same as load_i32_by_i8_zaext_loads but the last load is combined to
1144; a sext load.
1145;
1146; i8* arg; i32 i;
1147;
1148; p0 = arg;
1149; p1 = arg + i + 1;
1150; p2 = arg + i + 2;
1151; p3 = arg + i + 3;
1152;
1153; (i32) p0[12] | ((i32) p1[12] << 8) | ((i32) p2[12] << 16) | ((i32) p3[12] << 24)
1154define i32 @load_i32_by_i8_zsext_loads(i8* %arg, i32 %arg1) {
1155; CHECK-LABEL: load_i32_by_i8_zsext_loads:
1156; CHECK: # BB#0:
1157; CHECK-NEXT: pushl %esi
1158; CHECK-NEXT: .Lcfi10:
1159; CHECK-NEXT: .cfi_def_cfa_offset 8
1160; CHECK-NEXT: .Lcfi11:
1161; CHECK-NEXT: .cfi_offset %esi, -8
1162; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1163; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
1164; CHECK-NEXT: movzbl 12(%eax,%ecx), %edx
1165; CHECK-NEXT: movzbl 13(%eax,%ecx), %esi
1166; CHECK-NEXT: shll $8, %esi
1167; CHECK-NEXT: orl %edx, %esi
1168; CHECK-NEXT: movzbl 14(%eax,%ecx), %edx
1169; CHECK-NEXT: shll $16, %edx
1170; CHECK-NEXT: orl %esi, %edx
1171; CHECK-NEXT: movsbl 15(%eax,%ecx), %eax
1172; CHECK-NEXT: shll $24, %eax
1173; CHECK-NEXT: orl %edx, %eax
1174; CHECK-NEXT: popl %esi
1175; CHECK-NEXT: retl
1176;
1177; CHECK64-LABEL: load_i32_by_i8_zsext_loads:
1178; CHECK64: # BB#0:
1179; CHECK64-NEXT: movl %esi, %eax
1180; CHECK64-NEXT: movzbl 12(%rdi,%rax), %ecx
1181; CHECK64-NEXT: movzbl 13(%rdi,%rax), %edx
1182; CHECK64-NEXT: shll $8, %edx
1183; CHECK64-NEXT: orl %ecx, %edx
1184; CHECK64-NEXT: movzbl 14(%rdi,%rax), %ecx
1185; CHECK64-NEXT: shll $16, %ecx
1186; CHECK64-NEXT: orl %edx, %ecx
1187; CHECK64-NEXT: movsbl 15(%rdi,%rax), %eax
1188; CHECK64-NEXT: shll $24, %eax
1189; CHECK64-NEXT: orl %ecx, %eax
1190; CHECK64-NEXT: retq
1191 %tmp = add nuw nsw i32 %arg1, 3
1192 %tmp2 = add nuw nsw i32 %arg1, 2
1193 %tmp3 = add nuw nsw i32 %arg1, 1
1194 %tmp4 = zext i32 %tmp to i64
1195 %tmp5 = zext i32 %tmp2 to i64
1196 %tmp6 = zext i32 %tmp3 to i64
1197 %tmp24 = getelementptr inbounds i8, i8* %arg, i64 %tmp4
1198 %tmp30 = getelementptr inbounds i8, i8* %arg, i64 %tmp5
1199 %tmp31 = getelementptr inbounds i8, i8* %arg, i64 %tmp6
1200 %tmp32 = getelementptr inbounds i8, i8* %arg, i64 12
1201 %tmp33 = zext i32 %arg1 to i64
1202 %tmp34 = getelementptr inbounds i8, i8* %tmp32, i64 %tmp33
1203 %tmp35 = load i8, i8* %tmp34, align 1
1204 %tmp36 = zext i8 %tmp35 to i32
1205 %tmp37 = getelementptr inbounds i8, i8* %tmp31, i64 12
1206 %tmp38 = load i8, i8* %tmp37, align 1
1207 %tmp39 = zext i8 %tmp38 to i32
1208 %tmp40 = shl nuw nsw i32 %tmp39, 8
1209 %tmp41 = or i32 %tmp40, %tmp36
1210 %tmp42 = getelementptr inbounds i8, i8* %tmp30, i64 12
1211 %tmp43 = load i8, i8* %tmp42, align 1
1212 %tmp44 = zext i8 %tmp43 to i32
1213 %tmp45 = shl nuw nsw i32 %tmp44, 16
1214 %tmp46 = or i32 %tmp41, %tmp45
1215 %tmp47 = getelementptr inbounds i8, i8* %tmp24, i64 12
1216 %tmp48 = load i8, i8* %tmp47, align 1
1217 %tmp49 = sext i8 %tmp48 to i16
1218 %tmp50 = zext i16 %tmp49 to i32
1219 %tmp51 = shl nuw i32 %tmp50, 24
1220 %tmp52 = or i32 %tmp46, %tmp51
1221 ret i32 %tmp52
1222}