blob: 7c88165305786826e1863b639d2b9b0bd078ad1a [file] [log] [blame]
Artur Pilipenko41c00052017-01-25 08:53:31 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s
3; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=CHECK64
4
5; i8* p;
6; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
7define i32 @load_i32_by_i8(i32* %arg) {
8; CHECK-LABEL: load_i32_by_i8:
9; CHECK: # BB#0:
10; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
11; CHECK-NEXT: movl (%eax), %eax
12; CHECK-NEXT: retl
13;
14; CHECK64-LABEL: load_i32_by_i8:
15; CHECK64: # BB#0:
16; CHECK64-NEXT: movl (%rdi), %eax
17; CHECK64-NEXT: retq
18
19 %tmp = bitcast i32* %arg to i8*
20 %tmp1 = load i8, i8* %tmp, align 1
21 %tmp2 = zext i8 %tmp1 to i32
22 %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
23 %tmp4 = load i8, i8* %tmp3, align 1
24 %tmp5 = zext i8 %tmp4 to i32
25 %tmp6 = shl nuw nsw i32 %tmp5, 8
26 %tmp7 = or i32 %tmp6, %tmp2
27 %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
28 %tmp9 = load i8, i8* %tmp8, align 1
29 %tmp10 = zext i8 %tmp9 to i32
30 %tmp11 = shl nuw nsw i32 %tmp10, 16
31 %tmp12 = or i32 %tmp7, %tmp11
32 %tmp13 = getelementptr inbounds i8, i8* %tmp, i32 3
33 %tmp14 = load i8, i8* %tmp13, align 1
34 %tmp15 = zext i8 %tmp14 to i32
35 %tmp16 = shl nuw nsw i32 %tmp15, 24
36 %tmp17 = or i32 %tmp12, %tmp16
37 ret i32 %tmp17
38}
39
40; i8* p;
41; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
42define i32 @load_i32_by_i8_bswap(i32* %arg) {
43; CHECK-LABEL: load_i32_by_i8_bswap:
44; CHECK: # BB#0:
45; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
46; CHECK-NEXT: movl (%eax), %eax
47; CHECK-NEXT: bswapl %eax
48; CHECK-NEXT: retl
49;
50; CHECK64-LABEL: load_i32_by_i8_bswap:
51; CHECK64: # BB#0:
52; CHECK64-NEXT: movl (%rdi), %eax
53; CHECK64-NEXT: bswapl %eax
54; CHECK64-NEXT: retq
55
56 %tmp = bitcast i32* %arg to i8*
57 %tmp1 = load i8, i8* %tmp, align 1
58 %tmp2 = zext i8 %tmp1 to i32
59 %tmp3 = shl nuw nsw i32 %tmp2, 24
60 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
61 %tmp5 = load i8, i8* %tmp4, align 1
62 %tmp6 = zext i8 %tmp5 to i32
63 %tmp7 = shl nuw nsw i32 %tmp6, 16
64 %tmp8 = or i32 %tmp7, %tmp3
65 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
66 %tmp10 = load i8, i8* %tmp9, align 1
67 %tmp11 = zext i8 %tmp10 to i32
68 %tmp12 = shl nuw nsw i32 %tmp11, 8
69 %tmp13 = or i32 %tmp8, %tmp12
70 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
71 %tmp15 = load i8, i8* %tmp14, align 1
72 %tmp16 = zext i8 %tmp15 to i32
73 %tmp17 = or i32 %tmp13, %tmp16
74 ret i32 %tmp17
75}
76
77; i16* p;
78; (i32) p[0] | ((i32) p[1] << 16)
79define i32 @load_i32_by_i16(i32* %arg) {
80; CHECK-LABEL: load_i32_by_i16:
81; CHECK: # BB#0:
82; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
83; CHECK-NEXT: movl (%eax), %eax
84; CHECK-NEXT: retl
85;
86; CHECK64-LABEL: load_i32_by_i16:
87; CHECK64: # BB#0:
88; CHECK64-NEXT: movl (%rdi), %eax
89; CHECK64-NEXT: retq
90
91 %tmp = bitcast i32* %arg to i16*
92 %tmp1 = load i16, i16* %tmp, align 1
93 %tmp2 = zext i16 %tmp1 to i32
94 %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
95 %tmp4 = load i16, i16* %tmp3, align 1
96 %tmp5 = zext i16 %tmp4 to i32
97 %tmp6 = shl nuw nsw i32 %tmp5, 16
98 %tmp7 = or i32 %tmp6, %tmp2
99 ret i32 %tmp7
100}
101
102; i16* p_16;
103; i8* p_8 = (i8*) p_16;
104; (i32) p_16[0] | ((i32) p[2] << 16) | ((i32) p[3] << 24)
105define i32 @load_i32_by_i16_i8(i32* %arg) {
106; CHECK-LABEL: load_i32_by_i16_i8:
107; CHECK: # BB#0:
108; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
109; CHECK-NEXT: movl (%eax), %eax
110; CHECK-NEXT: retl
111;
112; CHECK64-LABEL: load_i32_by_i16_i8:
113; CHECK64: # BB#0:
114; CHECK64-NEXT: movl (%rdi), %eax
115; CHECK64-NEXT: retq
116
117 %tmp = bitcast i32* %arg to i16*
118 %tmp1 = bitcast i32* %arg to i8*
119 %tmp2 = load i16, i16* %tmp, align 1
120 %tmp3 = zext i16 %tmp2 to i32
121 %tmp4 = getelementptr inbounds i8, i8* %tmp1, i32 2
122 %tmp5 = load i8, i8* %tmp4, align 1
123 %tmp6 = zext i8 %tmp5 to i32
124 %tmp7 = shl nuw nsw i32 %tmp6, 16
125 %tmp8 = getelementptr inbounds i8, i8* %tmp1, i32 3
126 %tmp9 = load i8, i8* %tmp8, align 1
127 %tmp10 = zext i8 %tmp9 to i32
128 %tmp11 = shl nuw nsw i32 %tmp10, 24
129 %tmp12 = or i32 %tmp7, %tmp11
130 %tmp13 = or i32 %tmp12, %tmp3
131 ret i32 %tmp13
132}
133
134
135; i8* p;
136; (i32) ((i16) p[0] | ((i16) p[1] << 8)) | (((i32) ((i16) p[3] | ((i16) p[4] << 8)) << 16)
137define i32 @load_i32_by_i16_by_i8(i32* %arg) {
138; CHECK-LABEL: load_i32_by_i16_by_i8:
139; CHECK: # BB#0:
140; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
141; CHECK-NEXT: movl (%eax), %eax
142; CHECK-NEXT: retl
143;
144; CHECK64-LABEL: load_i32_by_i16_by_i8:
145; CHECK64: # BB#0:
146; CHECK64-NEXT: movl (%rdi), %eax
147; CHECK64-NEXT: retq
148
149 %tmp = bitcast i32* %arg to i8*
150 %tmp1 = load i8, i8* %tmp, align 1
151 %tmp2 = zext i8 %tmp1 to i16
152 %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
153 %tmp4 = load i8, i8* %tmp3, align 1
154 %tmp5 = zext i8 %tmp4 to i16
155 %tmp6 = shl nuw nsw i16 %tmp5, 8
156 %tmp7 = or i16 %tmp6, %tmp2
157 %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
158 %tmp9 = load i8, i8* %tmp8, align 1
159 %tmp10 = zext i8 %tmp9 to i16
160 %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3
161 %tmp12 = load i8, i8* %tmp11, align 1
162 %tmp13 = zext i8 %tmp12 to i16
163 %tmp14 = shl nuw nsw i16 %tmp13, 8
164 %tmp15 = or i16 %tmp14, %tmp10
165 %tmp16 = zext i16 %tmp7 to i32
166 %tmp17 = zext i16 %tmp15 to i32
167 %tmp18 = shl nuw nsw i32 %tmp17, 16
168 %tmp19 = or i32 %tmp18, %tmp16
169 ret i32 %tmp19
170}
171
172; i8* p;
173; ((i32) (((i16) p[0] << 8) | (i16) p[1]) << 16) | (i32) (((i16) p[3] << 8) | (i16) p[4])
174define i32 @load_i32_by_i16_by_i8_bswap(i32* %arg) {
175; CHECK-LABEL: load_i32_by_i16_by_i8_bswap:
176; CHECK: # BB#0:
177; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
178; CHECK-NEXT: movl (%eax), %eax
179; CHECK-NEXT: bswapl %eax
180; CHECK-NEXT: retl
181;
182; CHECK64-LABEL: load_i32_by_i16_by_i8_bswap:
183; CHECK64: # BB#0:
184; CHECK64-NEXT: movl (%rdi), %eax
185; CHECK64-NEXT: bswapl %eax
186; CHECK64-NEXT: retq
187
188 %tmp = bitcast i32* %arg to i8*
189 %tmp1 = load i8, i8* %tmp, align 1
190 %tmp2 = zext i8 %tmp1 to i16
191 %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
192 %tmp4 = load i8, i8* %tmp3, align 1
193 %tmp5 = zext i8 %tmp4 to i16
194 %tmp6 = shl nuw nsw i16 %tmp2, 8
195 %tmp7 = or i16 %tmp6, %tmp5
196 %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
197 %tmp9 = load i8, i8* %tmp8, align 1
198 %tmp10 = zext i8 %tmp9 to i16
199 %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3
200 %tmp12 = load i8, i8* %tmp11, align 1
201 %tmp13 = zext i8 %tmp12 to i16
202 %tmp14 = shl nuw nsw i16 %tmp10, 8
203 %tmp15 = or i16 %tmp14, %tmp13
204 %tmp16 = zext i16 %tmp7 to i32
205 %tmp17 = zext i16 %tmp15 to i32
206 %tmp18 = shl nuw nsw i32 %tmp16, 16
207 %tmp19 = or i32 %tmp18, %tmp17
208 ret i32 %tmp19
209}
210
211; i8* p;
212; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56)
213define i64 @load_i64_by_i8(i64* %arg) {
214; CHECK-LABEL: load_i64_by_i8:
215; CHECK: # BB#0:
216; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
217; CHECK-NEXT: movl (%ecx), %eax
218; CHECK-NEXT: movl 4(%ecx), %edx
219; CHECK-NEXT: retl
220;
221; CHECK64-LABEL: load_i64_by_i8:
222; CHECK64: # BB#0:
223; CHECK64-NEXT: movq (%rdi), %rax
224; CHECK64-NEXT: retq
225
226 %tmp = bitcast i64* %arg to i8*
227 %tmp1 = load i8, i8* %tmp, align 1
228 %tmp2 = zext i8 %tmp1 to i64
229 %tmp3 = getelementptr inbounds i8, i8* %tmp, i64 1
230 %tmp4 = load i8, i8* %tmp3, align 1
231 %tmp5 = zext i8 %tmp4 to i64
232 %tmp6 = shl nuw nsw i64 %tmp5, 8
233 %tmp7 = or i64 %tmp6, %tmp2
234 %tmp8 = getelementptr inbounds i8, i8* %tmp, i64 2
235 %tmp9 = load i8, i8* %tmp8, align 1
236 %tmp10 = zext i8 %tmp9 to i64
237 %tmp11 = shl nuw nsw i64 %tmp10, 16
238 %tmp12 = or i64 %tmp7, %tmp11
239 %tmp13 = getelementptr inbounds i8, i8* %tmp, i64 3
240 %tmp14 = load i8, i8* %tmp13, align 1
241 %tmp15 = zext i8 %tmp14 to i64
242 %tmp16 = shl nuw nsw i64 %tmp15, 24
243 %tmp17 = or i64 %tmp12, %tmp16
244 %tmp18 = getelementptr inbounds i8, i8* %tmp, i64 4
245 %tmp19 = load i8, i8* %tmp18, align 1
246 %tmp20 = zext i8 %tmp19 to i64
247 %tmp21 = shl nuw nsw i64 %tmp20, 32
248 %tmp22 = or i64 %tmp17, %tmp21
249 %tmp23 = getelementptr inbounds i8, i8* %tmp, i64 5
250 %tmp24 = load i8, i8* %tmp23, align 1
251 %tmp25 = zext i8 %tmp24 to i64
252 %tmp26 = shl nuw nsw i64 %tmp25, 40
253 %tmp27 = or i64 %tmp22, %tmp26
254 %tmp28 = getelementptr inbounds i8, i8* %tmp, i64 6
255 %tmp29 = load i8, i8* %tmp28, align 1
256 %tmp30 = zext i8 %tmp29 to i64
257 %tmp31 = shl nuw nsw i64 %tmp30, 48
258 %tmp32 = or i64 %tmp27, %tmp31
259 %tmp33 = getelementptr inbounds i8, i8* %tmp, i64 7
260 %tmp34 = load i8, i8* %tmp33, align 1
261 %tmp35 = zext i8 %tmp34 to i64
262 %tmp36 = shl nuw i64 %tmp35, 56
263 %tmp37 = or i64 %tmp32, %tmp36
264 ret i64 %tmp37
265}
266
267; i8* p;
268; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7]
269define i64 @load_i64_by_i8_bswap(i64* %arg) {
270; CHECK-LABEL: load_i64_by_i8_bswap:
271; CHECK: # BB#0:
272; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
273; CHECK-NEXT: movl (%eax), %edx
274; CHECK-NEXT: movl 4(%eax), %eax
275; CHECK-NEXT: bswapl %eax
276; CHECK-NEXT: bswapl %edx
277; CHECK-NEXT: retl
278;
279; CHECK64-LABEL: load_i64_by_i8_bswap:
280; CHECK64: # BB#0:
281; CHECK64-NEXT: movq (%rdi), %rax
282; CHECK64-NEXT: bswapq %rax
283; CHECK64-NEXT: retq
284
285 %tmp = bitcast i64* %arg to i8*
286 %tmp1 = load i8, i8* %tmp, align 1
287 %tmp2 = zext i8 %tmp1 to i64
288 %tmp3 = shl nuw i64 %tmp2, 56
289 %tmp4 = getelementptr inbounds i8, i8* %tmp, i64 1
290 %tmp5 = load i8, i8* %tmp4, align 1
291 %tmp6 = zext i8 %tmp5 to i64
292 %tmp7 = shl nuw nsw i64 %tmp6, 48
293 %tmp8 = or i64 %tmp7, %tmp3
294 %tmp9 = getelementptr inbounds i8, i8* %tmp, i64 2
295 %tmp10 = load i8, i8* %tmp9, align 1
296 %tmp11 = zext i8 %tmp10 to i64
297 %tmp12 = shl nuw nsw i64 %tmp11, 40
298 %tmp13 = or i64 %tmp8, %tmp12
299 %tmp14 = getelementptr inbounds i8, i8* %tmp, i64 3
300 %tmp15 = load i8, i8* %tmp14, align 1
301 %tmp16 = zext i8 %tmp15 to i64
302 %tmp17 = shl nuw nsw i64 %tmp16, 32
303 %tmp18 = or i64 %tmp13, %tmp17
304 %tmp19 = getelementptr inbounds i8, i8* %tmp, i64 4
305 %tmp20 = load i8, i8* %tmp19, align 1
306 %tmp21 = zext i8 %tmp20 to i64
307 %tmp22 = shl nuw nsw i64 %tmp21, 24
308 %tmp23 = or i64 %tmp18, %tmp22
309 %tmp24 = getelementptr inbounds i8, i8* %tmp, i64 5
310 %tmp25 = load i8, i8* %tmp24, align 1
311 %tmp26 = zext i8 %tmp25 to i64
312 %tmp27 = shl nuw nsw i64 %tmp26, 16
313 %tmp28 = or i64 %tmp23, %tmp27
314 %tmp29 = getelementptr inbounds i8, i8* %tmp, i64 6
315 %tmp30 = load i8, i8* %tmp29, align 1
316 %tmp31 = zext i8 %tmp30 to i64
317 %tmp32 = shl nuw nsw i64 %tmp31, 8
318 %tmp33 = or i64 %tmp28, %tmp32
319 %tmp34 = getelementptr inbounds i8, i8* %tmp, i64 7
320 %tmp35 = load i8, i8* %tmp34, align 1
321 %tmp36 = zext i8 %tmp35 to i64
322 %tmp37 = or i64 %tmp33, %tmp36
323 ret i64 %tmp37
324}
325
326; Part of the load by bytes pattern is used outside of the pattern
327; i8* p;
328; i32 x = (i32) p[1]
329; res = ((i32) p[0] << 24) | (x << 16) | ((i32) p[2] << 8) | (i32) p[3]
330; x | res
331define i32 @load_i32_by_i8_bswap_uses(i32* %arg) {
332; CHECK-LABEL: load_i32_by_i8_bswap_uses:
333; CHECK: # BB#0:
334; CHECK-NEXT: pushl %esi
335; CHECK-NEXT: .Lcfi0:
336; CHECK-NEXT: .cfi_def_cfa_offset 8
337; CHECK-NEXT: .Lcfi1:
338; CHECK-NEXT: .cfi_offset %esi, -8
339; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
340; CHECK-NEXT: movzbl (%eax), %ecx
341; CHECK-NEXT: shll $24, %ecx
342; CHECK-NEXT: movzbl 1(%eax), %edx
343; CHECK-NEXT: movl %edx, %esi
344; CHECK-NEXT: shll $16, %esi
345; CHECK-NEXT: orl %ecx, %esi
346; CHECK-NEXT: movzbl 2(%eax), %ecx
347; CHECK-NEXT: shll $8, %ecx
348; CHECK-NEXT: orl %esi, %ecx
349; CHECK-NEXT: movzbl 3(%eax), %eax
350; CHECK-NEXT: orl %ecx, %eax
351; CHECK-NEXT: orl %edx, %eax
352; CHECK-NEXT: popl %esi
353; CHECK-NEXT: retl
354;
355; CHECK64-LABEL: load_i32_by_i8_bswap_uses:
356; CHECK64: # BB#0:
357; CHECK64-NEXT: movzbl (%rdi), %eax
358; CHECK64-NEXT: shll $24, %eax
359; CHECK64-NEXT: movzbl 1(%rdi), %ecx
360; CHECK64-NEXT: movl %ecx, %edx
361; CHECK64-NEXT: shll $16, %edx
362; CHECK64-NEXT: orl %eax, %edx
363; CHECK64-NEXT: movzbl 2(%rdi), %esi
364; CHECK64-NEXT: shll $8, %esi
365; CHECK64-NEXT: orl %edx, %esi
366; CHECK64-NEXT: movzbl 3(%rdi), %eax
367; CHECK64-NEXT: orl %esi, %eax
368; CHECK64-NEXT: orl %ecx, %eax
369; CHECK64-NEXT: retq
370
371 %tmp = bitcast i32* %arg to i8*
372 %tmp1 = load i8, i8* %tmp, align 1
373 %tmp2 = zext i8 %tmp1 to i32
374 %tmp3 = shl nuw nsw i32 %tmp2, 24
375 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
376 %tmp5 = load i8, i8* %tmp4, align 1
377 %tmp6 = zext i8 %tmp5 to i32
378 %tmp7 = shl nuw nsw i32 %tmp6, 16
379 %tmp8 = or i32 %tmp7, %tmp3
380 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
381 %tmp10 = load i8, i8* %tmp9, align 1
382 %tmp11 = zext i8 %tmp10 to i32
383 %tmp12 = shl nuw nsw i32 %tmp11, 8
384 %tmp13 = or i32 %tmp8, %tmp12
385 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
386 %tmp15 = load i8, i8* %tmp14, align 1
387 %tmp16 = zext i8 %tmp15 to i32
388 %tmp17 = or i32 %tmp13, %tmp16
389 ; Use individual part of the pattern outside of the pattern
390 %tmp18 = or i32 %tmp6, %tmp17
391 ret i32 %tmp18
392}
393
394; One of the loads is volatile
395; i8* p;
396; p0 = volatile *p;
397; ((i32) p0 << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
398define i32 @load_i32_by_i8_bswap_volatile(i32* %arg) {
399; CHECK-LABEL: load_i32_by_i8_bswap_volatile:
400; CHECK: # BB#0:
401; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
402; CHECK-NEXT: movzbl (%eax), %ecx
403; CHECK-NEXT: shll $24, %ecx
404; CHECK-NEXT: movzbl 1(%eax), %edx
405; CHECK-NEXT: shll $16, %edx
406; CHECK-NEXT: orl %ecx, %edx
407; CHECK-NEXT: movzbl 2(%eax), %ecx
408; CHECK-NEXT: shll $8, %ecx
409; CHECK-NEXT: orl %edx, %ecx
410; CHECK-NEXT: movzbl 3(%eax), %eax
411; CHECK-NEXT: orl %ecx, %eax
412; CHECK-NEXT: retl
413;
414; CHECK64-LABEL: load_i32_by_i8_bswap_volatile:
415; CHECK64: # BB#0:
416; CHECK64-NEXT: movzbl (%rdi), %eax
417; CHECK64-NEXT: shll $24, %eax
418; CHECK64-NEXT: movzbl 1(%rdi), %ecx
419; CHECK64-NEXT: shll $16, %ecx
420; CHECK64-NEXT: orl %eax, %ecx
421; CHECK64-NEXT: movzbl 2(%rdi), %edx
422; CHECK64-NEXT: shll $8, %edx
423; CHECK64-NEXT: orl %ecx, %edx
424; CHECK64-NEXT: movzbl 3(%rdi), %eax
425; CHECK64-NEXT: orl %edx, %eax
426; CHECK64-NEXT: retq
427
428 %tmp = bitcast i32* %arg to i8*
429 %tmp1 = load volatile i8, i8* %tmp, align 1
430 %tmp2 = zext i8 %tmp1 to i32
431 %tmp3 = shl nuw nsw i32 %tmp2, 24
432 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
433 %tmp5 = load i8, i8* %tmp4, align 1
434 %tmp6 = zext i8 %tmp5 to i32
435 %tmp7 = shl nuw nsw i32 %tmp6, 16
436 %tmp8 = or i32 %tmp7, %tmp3
437 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
438 %tmp10 = load i8, i8* %tmp9, align 1
439 %tmp11 = zext i8 %tmp10 to i32
440 %tmp12 = shl nuw nsw i32 %tmp11, 8
441 %tmp13 = or i32 %tmp8, %tmp12
442 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
443 %tmp15 = load i8, i8* %tmp14, align 1
444 %tmp16 = zext i8 %tmp15 to i32
445 %tmp17 = or i32 %tmp13, %tmp16
446 ret i32 %tmp17
447}
448
449; There is a store in between individual loads
450; i8* p, q;
451; res1 = ((i32) p[0] << 24) | ((i32) p[1] << 16)
452; *q = 0;
453; res2 = ((i32) p[2] << 8) | (i32) p[3]
454; res1 | res2
455define i32 @load_i32_by_i8_bswap_store_in_between(i32* %arg, i32* %arg1) {
456; CHECK-LABEL: load_i32_by_i8_bswap_store_in_between:
457; CHECK: # BB#0:
458; CHECK-NEXT: pushl %esi
459; CHECK-NEXT: .Lcfi2:
460; CHECK-NEXT: .cfi_def_cfa_offset 8
461; CHECK-NEXT: .Lcfi3:
462; CHECK-NEXT: .cfi_offset %esi, -8
463; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
464; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
465; CHECK-NEXT: movzbl (%ecx), %edx
466; CHECK-NEXT: shll $24, %edx
467; CHECK-NEXT: movzbl 1(%ecx), %esi
468; CHECK-NEXT: movl $0, (%eax)
469; CHECK-NEXT: shll $16, %esi
470; CHECK-NEXT: orl %edx, %esi
471; CHECK-NEXT: movzbl 2(%ecx), %edx
472; CHECK-NEXT: shll $8, %edx
473; CHECK-NEXT: orl %esi, %edx
474; CHECK-NEXT: movzbl 3(%ecx), %eax
475; CHECK-NEXT: orl %edx, %eax
476; CHECK-NEXT: popl %esi
477; CHECK-NEXT: retl
478;
479; CHECK64-LABEL: load_i32_by_i8_bswap_store_in_between:
480; CHECK64: # BB#0:
481; CHECK64-NEXT: movzbl (%rdi), %eax
482; CHECK64-NEXT: shll $24, %eax
483; CHECK64-NEXT: movzbl 1(%rdi), %ecx
484; CHECK64-NEXT: movl $0, (%rsi)
485; CHECK64-NEXT: shll $16, %ecx
486; CHECK64-NEXT: orl %eax, %ecx
487; CHECK64-NEXT: movzbl 2(%rdi), %edx
488; CHECK64-NEXT: shll $8, %edx
489; CHECK64-NEXT: orl %ecx, %edx
490; CHECK64-NEXT: movzbl 3(%rdi), %eax
491; CHECK64-NEXT: orl %edx, %eax
492; CHECK64-NEXT: retq
493
494 %tmp = bitcast i32* %arg to i8*
495 %tmp2 = load i8, i8* %tmp, align 1
496 %tmp3 = zext i8 %tmp2 to i32
497 %tmp4 = shl nuw nsw i32 %tmp3, 24
498 %tmp5 = getelementptr inbounds i8, i8* %tmp, i32 1
499 %tmp6 = load i8, i8* %tmp5, align 1
500 ; This store will prevent folding of the pattern
501 store i32 0, i32* %arg1
502 %tmp7 = zext i8 %tmp6 to i32
503 %tmp8 = shl nuw nsw i32 %tmp7, 16
504 %tmp9 = or i32 %tmp8, %tmp4
505 %tmp10 = getelementptr inbounds i8, i8* %tmp, i32 2
506 %tmp11 = load i8, i8* %tmp10, align 1
507 %tmp12 = zext i8 %tmp11 to i32
508 %tmp13 = shl nuw nsw i32 %tmp12, 8
509 %tmp14 = or i32 %tmp9, %tmp13
510 %tmp15 = getelementptr inbounds i8, i8* %tmp, i32 3
511 %tmp16 = load i8, i8* %tmp15, align 1
512 %tmp17 = zext i8 %tmp16 to i32
513 %tmp18 = or i32 %tmp14, %tmp17
514 ret i32 %tmp18
515}
516
517; One of the loads is from an unrelated location
518; i8* p, q;
519; ((i32) p[0] << 24) | ((i32) q[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
520define i32 @load_i32_by_i8_bswap_unrelated_load(i32* %arg, i32* %arg1) {
521; CHECK-LABEL: load_i32_by_i8_bswap_unrelated_load:
522; CHECK: # BB#0:
523; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
524; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
525; CHECK-NEXT: movzbl (%ecx), %edx
526; CHECK-NEXT: shll $24, %edx
527; CHECK-NEXT: movzbl 1(%eax), %eax
528; CHECK-NEXT: shll $16, %eax
529; CHECK-NEXT: orl %edx, %eax
530; CHECK-NEXT: movzbl 2(%ecx), %edx
531; CHECK-NEXT: shll $8, %edx
532; CHECK-NEXT: orl %eax, %edx
533; CHECK-NEXT: movzbl 3(%ecx), %eax
534; CHECK-NEXT: orl %edx, %eax
535; CHECK-NEXT: retl
536;
537; CHECK64-LABEL: load_i32_by_i8_bswap_unrelated_load:
538; CHECK64: # BB#0:
539; CHECK64-NEXT: movzbl (%rdi), %eax
540; CHECK64-NEXT: shll $24, %eax
541; CHECK64-NEXT: movzbl 1(%rsi), %ecx
542; CHECK64-NEXT: shll $16, %ecx
543; CHECK64-NEXT: orl %eax, %ecx
544; CHECK64-NEXT: movzbl 2(%rdi), %edx
545; CHECK64-NEXT: shll $8, %edx
546; CHECK64-NEXT: orl %ecx, %edx
547; CHECK64-NEXT: movzbl 3(%rdi), %eax
548; CHECK64-NEXT: orl %edx, %eax
549; CHECK64-NEXT: retq
550
551 %tmp = bitcast i32* %arg to i8*
552 %tmp2 = bitcast i32* %arg1 to i8*
553 %tmp3 = load i8, i8* %tmp, align 1
554 %tmp4 = zext i8 %tmp3 to i32
555 %tmp5 = shl nuw nsw i32 %tmp4, 24
556 ; Load from an unrelated address
557 %tmp6 = getelementptr inbounds i8, i8* %tmp2, i32 1
558 %tmp7 = load i8, i8* %tmp6, align 1
559 %tmp8 = zext i8 %tmp7 to i32
560 %tmp9 = shl nuw nsw i32 %tmp8, 16
561 %tmp10 = or i32 %tmp9, %tmp5
562 %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 2
563 %tmp12 = load i8, i8* %tmp11, align 1
564 %tmp13 = zext i8 %tmp12 to i32
565 %tmp14 = shl nuw nsw i32 %tmp13, 8
566 %tmp15 = or i32 %tmp10, %tmp14
567 %tmp16 = getelementptr inbounds i8, i8* %tmp, i32 3
568 %tmp17 = load i8, i8* %tmp16, align 1
569 %tmp18 = zext i8 %tmp17 to i32
570 %tmp19 = or i32 %tmp15, %tmp18
571 ret i32 %tmp19
572}
573
574; Non-zero offsets are not supported for now
575; i8* p;
576; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
Artur Pilipenkobdf3c5a2017-02-06 14:15:31 +0000577define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
578; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
Artur Pilipenko41c00052017-01-25 08:53:31 +0000579; CHECK: # BB#0:
580; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
581; CHECK-NEXT: movzbl 1(%eax), %ecx
582; CHECK-NEXT: movzbl 2(%eax), %edx
583; CHECK-NEXT: shll $8, %edx
584; CHECK-NEXT: orl %ecx, %edx
585; CHECK-NEXT: movzbl 3(%eax), %ecx
586; CHECK-NEXT: shll $16, %ecx
587; CHECK-NEXT: orl %edx, %ecx
588; CHECK-NEXT: movzbl 4(%eax), %eax
589; CHECK-NEXT: shll $24, %eax
590; CHECK-NEXT: orl %ecx, %eax
591; CHECK-NEXT: retl
592;
Artur Pilipenkobdf3c5a2017-02-06 14:15:31 +0000593; CHECK64-LABEL: load_i32_by_i8_nonzero_offset:
Artur Pilipenko41c00052017-01-25 08:53:31 +0000594; CHECK64: # BB#0:
595; CHECK64-NEXT: movzbl 1(%rdi), %eax
596; CHECK64-NEXT: movzbl 2(%rdi), %ecx
597; CHECK64-NEXT: shll $8, %ecx
598; CHECK64-NEXT: orl %eax, %ecx
599; CHECK64-NEXT: movzbl 3(%rdi), %edx
600; CHECK64-NEXT: shll $16, %edx
601; CHECK64-NEXT: orl %ecx, %edx
602; CHECK64-NEXT: movzbl 4(%rdi), %eax
603; CHECK64-NEXT: shll $24, %eax
604; CHECK64-NEXT: orl %edx, %eax
605; CHECK64-NEXT: retq
606
607 %tmp = bitcast i32* %arg to i8*
608 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
609 %tmp2 = load i8, i8* %tmp1, align 1
610 %tmp3 = zext i8 %tmp2 to i32
611 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2
612 %tmp5 = load i8, i8* %tmp4, align 1
613 %tmp6 = zext i8 %tmp5 to i32
614 %tmp7 = shl nuw nsw i32 %tmp6, 8
615 %tmp8 = or i32 %tmp7, %tmp3
616 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3
617 %tmp10 = load i8, i8* %tmp9, align 1
618 %tmp11 = zext i8 %tmp10 to i32
619 %tmp12 = shl nuw nsw i32 %tmp11, 16
620 %tmp13 = or i32 %tmp8, %tmp12
621 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4
622 %tmp15 = load i8, i8* %tmp14, align 1
623 %tmp16 = zext i8 %tmp15 to i32
624 %tmp17 = shl nuw nsw i32 %tmp16, 24
625 %tmp18 = or i32 %tmp13, %tmp17
626 ret i32 %tmp18
627}
628
Artur Pilipenkobdf3c5a2017-02-06 14:15:31 +0000629; i8* p;
630; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
631define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
632; CHECK-LABEL: load_i32_by_i8_neg_offset:
633; CHECK: # BB#0:
634; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
635; CHECK-NEXT: movzbl -4(%eax), %ecx
636; CHECK-NEXT: movzbl -3(%eax), %edx
637; CHECK-NEXT: shll $8, %edx
638; CHECK-NEXT: orl %ecx, %edx
639; CHECK-NEXT: movzbl -2(%eax), %ecx
640; CHECK-NEXT: shll $16, %ecx
641; CHECK-NEXT: orl %edx, %ecx
642; CHECK-NEXT: movzbl -1(%eax), %eax
643; CHECK-NEXT: shll $24, %eax
644; CHECK-NEXT: orl %ecx, %eax
645; CHECK-NEXT: retl
646;
647; CHECK64-LABEL: load_i32_by_i8_neg_offset:
648; CHECK64: # BB#0:
649; CHECK64-NEXT: movzbl -4(%rdi), %eax
650; CHECK64-NEXT: movzbl -3(%rdi), %ecx
651; CHECK64-NEXT: shll $8, %ecx
652; CHECK64-NEXT: orl %eax, %ecx
653; CHECK64-NEXT: movzbl -2(%rdi), %edx
654; CHECK64-NEXT: shll $16, %edx
655; CHECK64-NEXT: orl %ecx, %edx
656; CHECK64-NEXT: movzbl -1(%rdi), %eax
657; CHECK64-NEXT: shll $24, %eax
658; CHECK64-NEXT: orl %edx, %eax
659; CHECK64-NEXT: retq
660
661 %tmp = bitcast i32* %arg to i8*
662 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
663 %tmp2 = load i8, i8* %tmp1, align 1
664 %tmp3 = zext i8 %tmp2 to i32
665 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3
666 %tmp5 = load i8, i8* %tmp4, align 1
667 %tmp6 = zext i8 %tmp5 to i32
668 %tmp7 = shl nuw nsw i32 %tmp6, 8
669 %tmp8 = or i32 %tmp7, %tmp3
670 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2
671 %tmp10 = load i8, i8* %tmp9, align 1
672 %tmp11 = zext i8 %tmp10 to i32
673 %tmp12 = shl nuw nsw i32 %tmp11, 16
674 %tmp13 = or i32 %tmp8, %tmp12
675 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1
676 %tmp15 = load i8, i8* %tmp14, align 1
677 %tmp16 = zext i8 %tmp15 to i32
678 %tmp17 = shl nuw nsw i32 %tmp16, 24
679 %tmp18 = or i32 %tmp13, %tmp17
680 ret i32 %tmp18
681}
682
683; i8* p;
684; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
685define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
686; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
687; CHECK: # BB#0:
688; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
689; CHECK-NEXT: movzbl 4(%eax), %ecx
690; CHECK-NEXT: movzbl 3(%eax), %edx
691; CHECK-NEXT: shll $8, %edx
692; CHECK-NEXT: orl %ecx, %edx
693; CHECK-NEXT: movzbl 2(%eax), %ecx
694; CHECK-NEXT: shll $16, %ecx
695; CHECK-NEXT: orl %edx, %ecx
696; CHECK-NEXT: movzbl 1(%eax), %eax
697; CHECK-NEXT: shll $24, %eax
698; CHECK-NEXT: orl %ecx, %eax
699; CHECK-NEXT: retl
700;
701; CHECK64-LABEL: load_i32_by_i8_nonzero_offset_bswap:
702; CHECK64: # BB#0:
703; CHECK64-NEXT: movzbl 4(%rdi), %eax
704; CHECK64-NEXT: movzbl 3(%rdi), %ecx
705; CHECK64-NEXT: shll $8, %ecx
706; CHECK64-NEXT: orl %eax, %ecx
707; CHECK64-NEXT: movzbl 2(%rdi), %edx
708; CHECK64-NEXT: shll $16, %edx
709; CHECK64-NEXT: orl %ecx, %edx
710; CHECK64-NEXT: movzbl 1(%rdi), %eax
711; CHECK64-NEXT: shll $24, %eax
712; CHECK64-NEXT: orl %edx, %eax
713; CHECK64-NEXT: retq
714
715 %tmp = bitcast i32* %arg to i8*
716 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
717 %tmp2 = load i8, i8* %tmp1, align 1
718 %tmp3 = zext i8 %tmp2 to i32
719 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3
720 %tmp5 = load i8, i8* %tmp4, align 1
721 %tmp6 = zext i8 %tmp5 to i32
722 %tmp7 = shl nuw nsw i32 %tmp6, 8
723 %tmp8 = or i32 %tmp7, %tmp3
724 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
725 %tmp10 = load i8, i8* %tmp9, align 1
726 %tmp11 = zext i8 %tmp10 to i32
727 %tmp12 = shl nuw nsw i32 %tmp11, 16
728 %tmp13 = or i32 %tmp8, %tmp12
729 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1
730 %tmp15 = load i8, i8* %tmp14, align 1
731 %tmp16 = zext i8 %tmp15 to i32
732 %tmp17 = shl nuw nsw i32 %tmp16, 24
733 %tmp18 = or i32 %tmp13, %tmp17
734 ret i32 %tmp18
735}
736
737; i8* p;
738; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
739define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
740; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
741; CHECK: # BB#0:
742; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
743; CHECK-NEXT: movzbl -1(%eax), %ecx
744; CHECK-NEXT: movzbl -2(%eax), %edx
745; CHECK-NEXT: shll $8, %edx
746; CHECK-NEXT: orl %ecx, %edx
747; CHECK-NEXT: movzbl -3(%eax), %ecx
748; CHECK-NEXT: shll $16, %ecx
749; CHECK-NEXT: orl %edx, %ecx
750; CHECK-NEXT: movzbl -4(%eax), %eax
751; CHECK-NEXT: shll $24, %eax
752; CHECK-NEXT: orl %ecx, %eax
753; CHECK-NEXT: retl
754;
755; CHECK64-LABEL: load_i32_by_i8_neg_offset_bswap:
756; CHECK64: # BB#0:
757; CHECK64-NEXT: movzbl -1(%rdi), %eax
758; CHECK64-NEXT: movzbl -2(%rdi), %ecx
759; CHECK64-NEXT: shll $8, %ecx
760; CHECK64-NEXT: orl %eax, %ecx
761; CHECK64-NEXT: movzbl -3(%rdi), %edx
762; CHECK64-NEXT: shll $16, %edx
763; CHECK64-NEXT: orl %ecx, %edx
764; CHECK64-NEXT: movzbl -4(%rdi), %eax
765; CHECK64-NEXT: shll $24, %eax
766; CHECK64-NEXT: orl %edx, %eax
767; CHECK64-NEXT: retq
768
769 %tmp = bitcast i32* %arg to i8*
770 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
771 %tmp2 = load i8, i8* %tmp1, align 1
772 %tmp3 = zext i8 %tmp2 to i32
773 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2
774 %tmp5 = load i8, i8* %tmp4, align 1
775 %tmp6 = zext i8 %tmp5 to i32
776 %tmp7 = shl nuw nsw i32 %tmp6, 8
777 %tmp8 = or i32 %tmp7, %tmp3
778 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3
779 %tmp10 = load i8, i8* %tmp9, align 1
780 %tmp11 = zext i8 %tmp10 to i32
781 %tmp12 = shl nuw nsw i32 %tmp11, 16
782 %tmp13 = or i32 %tmp8, %tmp12
783 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4
784 %tmp15 = load i8, i8* %tmp14, align 1
785 %tmp16 = zext i8 %tmp15 to i32
786 %tmp17 = shl nuw nsw i32 %tmp16, 24
787 %tmp18 = or i32 %tmp13, %tmp17
788 ret i32 %tmp18
789}
790
Artur Pilipenko41c00052017-01-25 08:53:31 +0000791; i8* p; i32 i;
792; ((i32) p[i] << 24) | ((i32) p[i + 1] << 16) | ((i32) p[i + 2] << 8) | (i32) p[i + 3]
793define i32 @load_i32_by_i8_bswap_base_index_offset(i32* %arg, i32 %arg1) {
794; CHECK-LABEL: load_i32_by_i8_bswap_base_index_offset:
795; CHECK: # BB#0:
796; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
797; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
798; CHECK-NEXT: movl (%ecx,%eax), %eax
799; CHECK-NEXT: bswapl %eax
800; CHECK-NEXT: retl
801;
802; CHECK64-LABEL: load_i32_by_i8_bswap_base_index_offset:
803; CHECK64: # BB#0:
804; CHECK64-NEXT: movslq %esi, %rax
805; CHECK64-NEXT: movzbl (%rdi,%rax), %ecx
806; CHECK64-NEXT: shll $24, %ecx
807; CHECK64-NEXT: movzbl 1(%rdi,%rax), %edx
808; CHECK64-NEXT: shll $16, %edx
809; CHECK64-NEXT: orl %ecx, %edx
810; CHECK64-NEXT: movzbl 2(%rdi,%rax), %ecx
811; CHECK64-NEXT: shll $8, %ecx
812; CHECK64-NEXT: orl %edx, %ecx
813; CHECK64-NEXT: movzbl 3(%rdi,%rax), %eax
814; CHECK64-NEXT: orl %ecx, %eax
815; CHECK64-NEXT: retq
816; TODO: Currently we don't fold the pattern for x86-64 target because we don't
817; see that the loads are adjacent. It happens because BaseIndexOffset doesn't
818; look through zexts.
819
820 %tmp = bitcast i32* %arg to i8*
821 %tmp2 = getelementptr inbounds i8, i8* %tmp, i32 %arg1
822 %tmp3 = load i8, i8* %tmp2, align 1
823 %tmp4 = zext i8 %tmp3 to i32
824 %tmp5 = shl nuw nsw i32 %tmp4, 24
825 %tmp6 = add nuw nsw i32 %arg1, 1
826 %tmp7 = getelementptr inbounds i8, i8* %tmp, i32 %tmp6
827 %tmp8 = load i8, i8* %tmp7, align 1
828 %tmp9 = zext i8 %tmp8 to i32
829 %tmp10 = shl nuw nsw i32 %tmp9, 16
830 %tmp11 = or i32 %tmp10, %tmp5
831 %tmp12 = add nuw nsw i32 %arg1, 2
832 %tmp13 = getelementptr inbounds i8, i8* %tmp, i32 %tmp12
833 %tmp14 = load i8, i8* %tmp13, align 1
834 %tmp15 = zext i8 %tmp14 to i32
835 %tmp16 = shl nuw nsw i32 %tmp15, 8
836 %tmp17 = or i32 %tmp11, %tmp16
837 %tmp18 = add nuw nsw i32 %arg1, 3
838 %tmp19 = getelementptr inbounds i8, i8* %tmp, i32 %tmp18
839 %tmp20 = load i8, i8* %tmp19, align 1
840 %tmp21 = zext i8 %tmp20 to i32
841 %tmp22 = or i32 %tmp17, %tmp21
842 ret i32 %tmp22
843}
844
845; Verify that we don't crash handling shl i32 %conv57, 32
846define void @shift_i32_by_32(i8* %src1, i8* %src2, i64* %dst) {
847; CHECK-LABEL: shift_i32_by_32:
848; CHECK: # BB#0: # %entry
849; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
850; CHECK-NEXT: movl $-1, 4(%eax)
851; CHECK-NEXT: movl $-1, (%eax)
852; CHECK-NEXT: retl
853;
854; CHECK64-LABEL: shift_i32_by_32:
855; CHECK64: # BB#0: # %entry
856; CHECK64-NEXT: movq $-1, (%rdx)
857; CHECK64-NEXT: retq
858entry:
859 %load1 = load i8, i8* %src1, align 1
860 %conv46 = zext i8 %load1 to i32
861 %shl47 = shl i32 %conv46, 56
862 %or55 = or i32 %shl47, 0
863 %load2 = load i8, i8* %src2, align 1
864 %conv57 = zext i8 %load2 to i32
865 %shl58 = shl i32 %conv57, 32
866 %or59 = or i32 %or55, %shl58
867 %or74 = or i32 %or59, 0
868 %conv75 = sext i32 %or74 to i64
869 store i64 %conv75, i64* %dst, align 8
870 ret void
871}
Artur Pilipenkod3464bf2017-02-06 17:48:08 +0000872
873declare i16 @llvm.bswap.i16(i16)
874
875; i16* p;
876; (i32) bswap(p[1]) | (i32) bswap(p[0] << 16)
877define i32 @load_i32_by_bswap_i16(i32* %arg) {
878; CHECK-LABEL: load_i32_by_bswap_i16:
879; CHECK: # BB#0:
880; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
881; CHECK-NEXT: movl (%eax), %eax
882; CHECK-NEXT: bswapl %eax
883; CHECK-NEXT: retl
884;
885; CHECK64-LABEL: load_i32_by_bswap_i16:
886; CHECK64: # BB#0:
887; CHECK64-NEXT: movl (%rdi), %eax
888; CHECK64-NEXT: bswapl %eax
889; CHECK64-NEXT: retq
890
891
892 %tmp = bitcast i32* %arg to i16*
893 %tmp1 = load i16, i16* %tmp, align 4
894 %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1)
895 %tmp2 = zext i16 %tmp11 to i32
896 %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
897 %tmp4 = load i16, i16* %tmp3, align 1
898 %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4)
899 %tmp5 = zext i16 %tmp41 to i32
900 %tmp6 = shl nuw nsw i32 %tmp2, 16
901 %tmp7 = or i32 %tmp6, %tmp5
902 ret i32 %tmp7
903}