blob: e737a51cf405a398b6a5a5ad0e41860a1da352d7 [file] [log] [blame]
Artur Pilipenko41c00052017-01-25 08:53:31 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
Simon Pilgrim8670993d2017-02-17 23:00:21 +00002; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=BSWAP
3; RUN: llc < %s -mtriple=i686-unknown -mattr=+movbe | FileCheck %s --check-prefix=CHECK --check-prefix=MOVBE
4; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=CHECK64 --check-prefix=BSWAP64
5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+movbe | FileCheck %s --check-prefix=CHECK64 --check-prefix=MOVBE64
Artur Pilipenko41c00052017-01-25 08:53:31 +00006
7; i8* p;
8; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
9define i32 @load_i32_by_i8(i32* %arg) {
10; CHECK-LABEL: load_i32_by_i8:
11; CHECK: # BB#0:
12; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
13; CHECK-NEXT: movl (%eax), %eax
14; CHECK-NEXT: retl
15;
16; CHECK64-LABEL: load_i32_by_i8:
17; CHECK64: # BB#0:
18; CHECK64-NEXT: movl (%rdi), %eax
19; CHECK64-NEXT: retq
Artur Pilipenko41c00052017-01-25 08:53:31 +000020 %tmp = bitcast i32* %arg to i8*
21 %tmp1 = load i8, i8* %tmp, align 1
22 %tmp2 = zext i8 %tmp1 to i32
23 %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
24 %tmp4 = load i8, i8* %tmp3, align 1
25 %tmp5 = zext i8 %tmp4 to i32
26 %tmp6 = shl nuw nsw i32 %tmp5, 8
27 %tmp7 = or i32 %tmp6, %tmp2
28 %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
29 %tmp9 = load i8, i8* %tmp8, align 1
30 %tmp10 = zext i8 %tmp9 to i32
31 %tmp11 = shl nuw nsw i32 %tmp10, 16
32 %tmp12 = or i32 %tmp7, %tmp11
33 %tmp13 = getelementptr inbounds i8, i8* %tmp, i32 3
34 %tmp14 = load i8, i8* %tmp13, align 1
35 %tmp15 = zext i8 %tmp14 to i32
36 %tmp16 = shl nuw nsw i32 %tmp15, 24
37 %tmp17 = or i32 %tmp12, %tmp16
38 ret i32 %tmp17
39}
40
41; i8* p;
42; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
43define i32 @load_i32_by_i8_bswap(i32* %arg) {
Simon Pilgrim8670993d2017-02-17 23:00:21 +000044; BSWAP-LABEL: load_i32_by_i8_bswap:
45; BSWAP: # BB#0:
46; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax
47; BSWAP-NEXT: movl (%eax), %eax
48; BSWAP-NEXT: bswapl %eax
49; BSWAP-NEXT: retl
Artur Pilipenko41c00052017-01-25 08:53:31 +000050;
Simon Pilgrim8670993d2017-02-17 23:00:21 +000051; MOVBE-LABEL: load_i32_by_i8_bswap:
52; MOVBE: # BB#0:
53; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax
54; MOVBE-NEXT: movbel (%eax), %eax
55; MOVBE-NEXT: retl
56;
57; BSWAP64-LABEL: load_i32_by_i8_bswap:
58; BSWAP64: # BB#0:
59; BSWAP64-NEXT: movl (%rdi), %eax
60; BSWAP64-NEXT: bswapl %eax
61; BSWAP64-NEXT: retq
62;
63; MOVBE64-LABEL: load_i32_by_i8_bswap:
64; MOVBE64: # BB#0:
65; MOVBE64-NEXT: movbel (%rdi), %eax
66; MOVBE64-NEXT: retq
Artur Pilipenko41c00052017-01-25 08:53:31 +000067 %tmp = bitcast i32* %arg to i8*
68 %tmp1 = load i8, i8* %tmp, align 1
69 %tmp2 = zext i8 %tmp1 to i32
70 %tmp3 = shl nuw nsw i32 %tmp2, 24
71 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
72 %tmp5 = load i8, i8* %tmp4, align 1
73 %tmp6 = zext i8 %tmp5 to i32
74 %tmp7 = shl nuw nsw i32 %tmp6, 16
75 %tmp8 = or i32 %tmp7, %tmp3
76 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
77 %tmp10 = load i8, i8* %tmp9, align 1
78 %tmp11 = zext i8 %tmp10 to i32
79 %tmp12 = shl nuw nsw i32 %tmp11, 8
80 %tmp13 = or i32 %tmp8, %tmp12
81 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
82 %tmp15 = load i8, i8* %tmp14, align 1
83 %tmp16 = zext i8 %tmp15 to i32
84 %tmp17 = or i32 %tmp13, %tmp16
85 ret i32 %tmp17
86}
87
88; i16* p;
89; (i32) p[0] | ((i32) p[1] << 16)
90define i32 @load_i32_by_i16(i32* %arg) {
91; CHECK-LABEL: load_i32_by_i16:
92; CHECK: # BB#0:
93; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
94; CHECK-NEXT: movl (%eax), %eax
95; CHECK-NEXT: retl
96;
97; CHECK64-LABEL: load_i32_by_i16:
98; CHECK64: # BB#0:
99; CHECK64-NEXT: movl (%rdi), %eax
100; CHECK64-NEXT: retq
Artur Pilipenko41c00052017-01-25 08:53:31 +0000101 %tmp = bitcast i32* %arg to i16*
102 %tmp1 = load i16, i16* %tmp, align 1
103 %tmp2 = zext i16 %tmp1 to i32
104 %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
105 %tmp4 = load i16, i16* %tmp3, align 1
106 %tmp5 = zext i16 %tmp4 to i32
107 %tmp6 = shl nuw nsw i32 %tmp5, 16
108 %tmp7 = or i32 %tmp6, %tmp2
109 ret i32 %tmp7
110}
111
112; i16* p_16;
113; i8* p_8 = (i8*) p_16;
114; (i32) p_16[0] | ((i32) p[2] << 16) | ((i32) p[3] << 24)
115define i32 @load_i32_by_i16_i8(i32* %arg) {
116; CHECK-LABEL: load_i32_by_i16_i8:
117; CHECK: # BB#0:
118; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
119; CHECK-NEXT: movl (%eax), %eax
120; CHECK-NEXT: retl
121;
122; CHECK64-LABEL: load_i32_by_i16_i8:
123; CHECK64: # BB#0:
124; CHECK64-NEXT: movl (%rdi), %eax
125; CHECK64-NEXT: retq
Artur Pilipenko41c00052017-01-25 08:53:31 +0000126 %tmp = bitcast i32* %arg to i16*
127 %tmp1 = bitcast i32* %arg to i8*
128 %tmp2 = load i16, i16* %tmp, align 1
129 %tmp3 = zext i16 %tmp2 to i32
130 %tmp4 = getelementptr inbounds i8, i8* %tmp1, i32 2
131 %tmp5 = load i8, i8* %tmp4, align 1
132 %tmp6 = zext i8 %tmp5 to i32
133 %tmp7 = shl nuw nsw i32 %tmp6, 16
134 %tmp8 = getelementptr inbounds i8, i8* %tmp1, i32 3
135 %tmp9 = load i8, i8* %tmp8, align 1
136 %tmp10 = zext i8 %tmp9 to i32
137 %tmp11 = shl nuw nsw i32 %tmp10, 24
138 %tmp12 = or i32 %tmp7, %tmp11
139 %tmp13 = or i32 %tmp12, %tmp3
140 ret i32 %tmp13
141}
142
143
144; i8* p;
145; (i32) ((i16) p[0] | ((i16) p[1] << 8)) | (((i32) ((i16) p[3] | ((i16) p[4] << 8)) << 16)
146define i32 @load_i32_by_i16_by_i8(i32* %arg) {
147; CHECK-LABEL: load_i32_by_i16_by_i8:
148; CHECK: # BB#0:
149; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
150; CHECK-NEXT: movl (%eax), %eax
151; CHECK-NEXT: retl
152;
153; CHECK64-LABEL: load_i32_by_i16_by_i8:
154; CHECK64: # BB#0:
155; CHECK64-NEXT: movl (%rdi), %eax
156; CHECK64-NEXT: retq
Artur Pilipenko41c00052017-01-25 08:53:31 +0000157 %tmp = bitcast i32* %arg to i8*
158 %tmp1 = load i8, i8* %tmp, align 1
159 %tmp2 = zext i8 %tmp1 to i16
160 %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
161 %tmp4 = load i8, i8* %tmp3, align 1
162 %tmp5 = zext i8 %tmp4 to i16
163 %tmp6 = shl nuw nsw i16 %tmp5, 8
164 %tmp7 = or i16 %tmp6, %tmp2
165 %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
166 %tmp9 = load i8, i8* %tmp8, align 1
167 %tmp10 = zext i8 %tmp9 to i16
168 %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3
169 %tmp12 = load i8, i8* %tmp11, align 1
170 %tmp13 = zext i8 %tmp12 to i16
171 %tmp14 = shl nuw nsw i16 %tmp13, 8
172 %tmp15 = or i16 %tmp14, %tmp10
173 %tmp16 = zext i16 %tmp7 to i32
174 %tmp17 = zext i16 %tmp15 to i32
175 %tmp18 = shl nuw nsw i32 %tmp17, 16
176 %tmp19 = or i32 %tmp18, %tmp16
177 ret i32 %tmp19
178}
179
180; i8* p;
181; ((i32) (((i16) p[0] << 8) | (i16) p[1]) << 16) | (i32) (((i16) p[3] << 8) | (i16) p[4])
182define i32 @load_i32_by_i16_by_i8_bswap(i32* %arg) {
Simon Pilgrim8670993d2017-02-17 23:00:21 +0000183; BSWAP-LABEL: load_i32_by_i16_by_i8_bswap:
184; BSWAP: # BB#0:
185; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax
186; BSWAP-NEXT: movl (%eax), %eax
187; BSWAP-NEXT: bswapl %eax
188; BSWAP-NEXT: retl
Artur Pilipenko41c00052017-01-25 08:53:31 +0000189;
Simon Pilgrim8670993d2017-02-17 23:00:21 +0000190; MOVBE-LABEL: load_i32_by_i16_by_i8_bswap:
191; MOVBE: # BB#0:
192; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax
193; MOVBE-NEXT: movbel (%eax), %eax
194; MOVBE-NEXT: retl
195;
196; BSWAP64-LABEL: load_i32_by_i16_by_i8_bswap:
197; BSWAP64: # BB#0:
198; BSWAP64-NEXT: movl (%rdi), %eax
199; BSWAP64-NEXT: bswapl %eax
200; BSWAP64-NEXT: retq
201;
202; MOVBE64-LABEL: load_i32_by_i16_by_i8_bswap:
203; MOVBE64: # BB#0:
204; MOVBE64-NEXT: movbel (%rdi), %eax
205; MOVBE64-NEXT: retq
Artur Pilipenko41c00052017-01-25 08:53:31 +0000206 %tmp = bitcast i32* %arg to i8*
207 %tmp1 = load i8, i8* %tmp, align 1
208 %tmp2 = zext i8 %tmp1 to i16
209 %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
210 %tmp4 = load i8, i8* %tmp3, align 1
211 %tmp5 = zext i8 %tmp4 to i16
212 %tmp6 = shl nuw nsw i16 %tmp2, 8
213 %tmp7 = or i16 %tmp6, %tmp5
214 %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
215 %tmp9 = load i8, i8* %tmp8, align 1
216 %tmp10 = zext i8 %tmp9 to i16
217 %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3
218 %tmp12 = load i8, i8* %tmp11, align 1
219 %tmp13 = zext i8 %tmp12 to i16
220 %tmp14 = shl nuw nsw i16 %tmp10, 8
221 %tmp15 = or i16 %tmp14, %tmp13
222 %tmp16 = zext i16 %tmp7 to i32
223 %tmp17 = zext i16 %tmp15 to i32
224 %tmp18 = shl nuw nsw i32 %tmp16, 16
225 %tmp19 = or i32 %tmp18, %tmp17
226 ret i32 %tmp19
227}
228
229; i8* p;
230; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56)
231define i64 @load_i64_by_i8(i64* %arg) {
232; CHECK-LABEL: load_i64_by_i8:
233; CHECK: # BB#0:
234; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
235; CHECK-NEXT: movl (%ecx), %eax
236; CHECK-NEXT: movl 4(%ecx), %edx
237; CHECK-NEXT: retl
238;
239; CHECK64-LABEL: load_i64_by_i8:
240; CHECK64: # BB#0:
241; CHECK64-NEXT: movq (%rdi), %rax
242; CHECK64-NEXT: retq
Artur Pilipenko41c00052017-01-25 08:53:31 +0000243 %tmp = bitcast i64* %arg to i8*
244 %tmp1 = load i8, i8* %tmp, align 1
245 %tmp2 = zext i8 %tmp1 to i64
246 %tmp3 = getelementptr inbounds i8, i8* %tmp, i64 1
247 %tmp4 = load i8, i8* %tmp3, align 1
248 %tmp5 = zext i8 %tmp4 to i64
249 %tmp6 = shl nuw nsw i64 %tmp5, 8
250 %tmp7 = or i64 %tmp6, %tmp2
251 %tmp8 = getelementptr inbounds i8, i8* %tmp, i64 2
252 %tmp9 = load i8, i8* %tmp8, align 1
253 %tmp10 = zext i8 %tmp9 to i64
254 %tmp11 = shl nuw nsw i64 %tmp10, 16
255 %tmp12 = or i64 %tmp7, %tmp11
256 %tmp13 = getelementptr inbounds i8, i8* %tmp, i64 3
257 %tmp14 = load i8, i8* %tmp13, align 1
258 %tmp15 = zext i8 %tmp14 to i64
259 %tmp16 = shl nuw nsw i64 %tmp15, 24
260 %tmp17 = or i64 %tmp12, %tmp16
261 %tmp18 = getelementptr inbounds i8, i8* %tmp, i64 4
262 %tmp19 = load i8, i8* %tmp18, align 1
263 %tmp20 = zext i8 %tmp19 to i64
264 %tmp21 = shl nuw nsw i64 %tmp20, 32
265 %tmp22 = or i64 %tmp17, %tmp21
266 %tmp23 = getelementptr inbounds i8, i8* %tmp, i64 5
267 %tmp24 = load i8, i8* %tmp23, align 1
268 %tmp25 = zext i8 %tmp24 to i64
269 %tmp26 = shl nuw nsw i64 %tmp25, 40
270 %tmp27 = or i64 %tmp22, %tmp26
271 %tmp28 = getelementptr inbounds i8, i8* %tmp, i64 6
272 %tmp29 = load i8, i8* %tmp28, align 1
273 %tmp30 = zext i8 %tmp29 to i64
274 %tmp31 = shl nuw nsw i64 %tmp30, 48
275 %tmp32 = or i64 %tmp27, %tmp31
276 %tmp33 = getelementptr inbounds i8, i8* %tmp, i64 7
277 %tmp34 = load i8, i8* %tmp33, align 1
278 %tmp35 = zext i8 %tmp34 to i64
279 %tmp36 = shl nuw i64 %tmp35, 56
280 %tmp37 = or i64 %tmp32, %tmp36
281 ret i64 %tmp37
282}
283
284; i8* p;
285; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7]
286define i64 @load_i64_by_i8_bswap(i64* %arg) {
Simon Pilgrim8670993d2017-02-17 23:00:21 +0000287; BSWAP-LABEL: load_i64_by_i8_bswap:
288; BSWAP: # BB#0:
289; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax
290; BSWAP-NEXT: movl (%eax), %edx
291; BSWAP-NEXT: movl 4(%eax), %eax
292; BSWAP-NEXT: bswapl %eax
293; BSWAP-NEXT: bswapl %edx
294; BSWAP-NEXT: retl
Artur Pilipenko41c00052017-01-25 08:53:31 +0000295;
Simon Pilgrim8670993d2017-02-17 23:00:21 +0000296; MOVBE-LABEL: load_i64_by_i8_bswap:
297; MOVBE: # BB#0:
298; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %ecx
299; MOVBE-NEXT: movbel 4(%ecx), %eax
300; MOVBE-NEXT: movbel (%ecx), %edx
301; MOVBE-NEXT: retl
302;
303; BSWAP64-LABEL: load_i64_by_i8_bswap:
304; BSWAP64: # BB#0:
305; BSWAP64-NEXT: movq (%rdi), %rax
306; BSWAP64-NEXT: bswapq %rax
307; BSWAP64-NEXT: retq
308;
309; MOVBE64-LABEL: load_i64_by_i8_bswap:
310; MOVBE64: # BB#0:
311; MOVBE64-NEXT: movbeq (%rdi), %rax
312; MOVBE64-NEXT: retq
Artur Pilipenko41c00052017-01-25 08:53:31 +0000313 %tmp = bitcast i64* %arg to i8*
314 %tmp1 = load i8, i8* %tmp, align 1
315 %tmp2 = zext i8 %tmp1 to i64
316 %tmp3 = shl nuw i64 %tmp2, 56
317 %tmp4 = getelementptr inbounds i8, i8* %tmp, i64 1
318 %tmp5 = load i8, i8* %tmp4, align 1
319 %tmp6 = zext i8 %tmp5 to i64
320 %tmp7 = shl nuw nsw i64 %tmp6, 48
321 %tmp8 = or i64 %tmp7, %tmp3
322 %tmp9 = getelementptr inbounds i8, i8* %tmp, i64 2
323 %tmp10 = load i8, i8* %tmp9, align 1
324 %tmp11 = zext i8 %tmp10 to i64
325 %tmp12 = shl nuw nsw i64 %tmp11, 40
326 %tmp13 = or i64 %tmp8, %tmp12
327 %tmp14 = getelementptr inbounds i8, i8* %tmp, i64 3
328 %tmp15 = load i8, i8* %tmp14, align 1
329 %tmp16 = zext i8 %tmp15 to i64
330 %tmp17 = shl nuw nsw i64 %tmp16, 32
331 %tmp18 = or i64 %tmp13, %tmp17
332 %tmp19 = getelementptr inbounds i8, i8* %tmp, i64 4
333 %tmp20 = load i8, i8* %tmp19, align 1
334 %tmp21 = zext i8 %tmp20 to i64
335 %tmp22 = shl nuw nsw i64 %tmp21, 24
336 %tmp23 = or i64 %tmp18, %tmp22
337 %tmp24 = getelementptr inbounds i8, i8* %tmp, i64 5
338 %tmp25 = load i8, i8* %tmp24, align 1
339 %tmp26 = zext i8 %tmp25 to i64
340 %tmp27 = shl nuw nsw i64 %tmp26, 16
341 %tmp28 = or i64 %tmp23, %tmp27
342 %tmp29 = getelementptr inbounds i8, i8* %tmp, i64 6
343 %tmp30 = load i8, i8* %tmp29, align 1
344 %tmp31 = zext i8 %tmp30 to i64
345 %tmp32 = shl nuw nsw i64 %tmp31, 8
346 %tmp33 = or i64 %tmp28, %tmp32
347 %tmp34 = getelementptr inbounds i8, i8* %tmp, i64 7
348 %tmp35 = load i8, i8* %tmp34, align 1
349 %tmp36 = zext i8 %tmp35 to i64
350 %tmp37 = or i64 %tmp33, %tmp36
351 ret i64 %tmp37
352}
353
354; Part of the load by bytes pattern is used outside of the pattern
355; i8* p;
356; i32 x = (i32) p[1]
357; res = ((i32) p[0] << 24) | (x << 16) | ((i32) p[2] << 8) | (i32) p[3]
358; x | res
359define i32 @load_i32_by_i8_bswap_uses(i32* %arg) {
360; CHECK-LABEL: load_i32_by_i8_bswap_uses:
361; CHECK: # BB#0:
362; CHECK-NEXT: pushl %esi
363; CHECK-NEXT: .Lcfi0:
364; CHECK-NEXT: .cfi_def_cfa_offset 8
365; CHECK-NEXT: .Lcfi1:
366; CHECK-NEXT: .cfi_offset %esi, -8
367; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
368; CHECK-NEXT: movzbl (%eax), %ecx
369; CHECK-NEXT: shll $24, %ecx
370; CHECK-NEXT: movzbl 1(%eax), %edx
371; CHECK-NEXT: movl %edx, %esi
372; CHECK-NEXT: shll $16, %esi
373; CHECK-NEXT: orl %ecx, %esi
374; CHECK-NEXT: movzbl 2(%eax), %ecx
375; CHECK-NEXT: shll $8, %ecx
376; CHECK-NEXT: orl %esi, %ecx
377; CHECK-NEXT: movzbl 3(%eax), %eax
378; CHECK-NEXT: orl %ecx, %eax
379; CHECK-NEXT: orl %edx, %eax
380; CHECK-NEXT: popl %esi
381; CHECK-NEXT: retl
382;
383; CHECK64-LABEL: load_i32_by_i8_bswap_uses:
384; CHECK64: # BB#0:
385; CHECK64-NEXT: movzbl (%rdi), %eax
386; CHECK64-NEXT: shll $24, %eax
387; CHECK64-NEXT: movzbl 1(%rdi), %ecx
388; CHECK64-NEXT: movl %ecx, %edx
389; CHECK64-NEXT: shll $16, %edx
390; CHECK64-NEXT: orl %eax, %edx
391; CHECK64-NEXT: movzbl 2(%rdi), %esi
392; CHECK64-NEXT: shll $8, %esi
393; CHECK64-NEXT: orl %edx, %esi
394; CHECK64-NEXT: movzbl 3(%rdi), %eax
395; CHECK64-NEXT: orl %esi, %eax
396; CHECK64-NEXT: orl %ecx, %eax
397; CHECK64-NEXT: retq
Artur Pilipenko41c00052017-01-25 08:53:31 +0000398 %tmp = bitcast i32* %arg to i8*
399 %tmp1 = load i8, i8* %tmp, align 1
400 %tmp2 = zext i8 %tmp1 to i32
401 %tmp3 = shl nuw nsw i32 %tmp2, 24
402 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
403 %tmp5 = load i8, i8* %tmp4, align 1
404 %tmp6 = zext i8 %tmp5 to i32
405 %tmp7 = shl nuw nsw i32 %tmp6, 16
406 %tmp8 = or i32 %tmp7, %tmp3
407 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
408 %tmp10 = load i8, i8* %tmp9, align 1
409 %tmp11 = zext i8 %tmp10 to i32
410 %tmp12 = shl nuw nsw i32 %tmp11, 8
411 %tmp13 = or i32 %tmp8, %tmp12
412 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
413 %tmp15 = load i8, i8* %tmp14, align 1
414 %tmp16 = zext i8 %tmp15 to i32
415 %tmp17 = or i32 %tmp13, %tmp16
416 ; Use individual part of the pattern outside of the pattern
417 %tmp18 = or i32 %tmp6, %tmp17
418 ret i32 %tmp18
419}
420
421; One of the loads is volatile
422; i8* p;
423; p0 = volatile *p;
424; ((i32) p0 << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
425define i32 @load_i32_by_i8_bswap_volatile(i32* %arg) {
426; CHECK-LABEL: load_i32_by_i8_bswap_volatile:
427; CHECK: # BB#0:
428; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
429; CHECK-NEXT: movzbl (%eax), %ecx
430; CHECK-NEXT: shll $24, %ecx
431; CHECK-NEXT: movzbl 1(%eax), %edx
432; CHECK-NEXT: shll $16, %edx
433; CHECK-NEXT: orl %ecx, %edx
434; CHECK-NEXT: movzbl 2(%eax), %ecx
435; CHECK-NEXT: shll $8, %ecx
436; CHECK-NEXT: orl %edx, %ecx
437; CHECK-NEXT: movzbl 3(%eax), %eax
438; CHECK-NEXT: orl %ecx, %eax
439; CHECK-NEXT: retl
440;
441; CHECK64-LABEL: load_i32_by_i8_bswap_volatile:
442; CHECK64: # BB#0:
443; CHECK64-NEXT: movzbl (%rdi), %eax
444; CHECK64-NEXT: shll $24, %eax
445; CHECK64-NEXT: movzbl 1(%rdi), %ecx
446; CHECK64-NEXT: shll $16, %ecx
447; CHECK64-NEXT: orl %eax, %ecx
448; CHECK64-NEXT: movzbl 2(%rdi), %edx
449; CHECK64-NEXT: shll $8, %edx
450; CHECK64-NEXT: orl %ecx, %edx
451; CHECK64-NEXT: movzbl 3(%rdi), %eax
452; CHECK64-NEXT: orl %edx, %eax
453; CHECK64-NEXT: retq
Artur Pilipenko41c00052017-01-25 08:53:31 +0000454 %tmp = bitcast i32* %arg to i8*
455 %tmp1 = load volatile i8, i8* %tmp, align 1
456 %tmp2 = zext i8 %tmp1 to i32
457 %tmp3 = shl nuw nsw i32 %tmp2, 24
458 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
459 %tmp5 = load i8, i8* %tmp4, align 1
460 %tmp6 = zext i8 %tmp5 to i32
461 %tmp7 = shl nuw nsw i32 %tmp6, 16
462 %tmp8 = or i32 %tmp7, %tmp3
463 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
464 %tmp10 = load i8, i8* %tmp9, align 1
465 %tmp11 = zext i8 %tmp10 to i32
466 %tmp12 = shl nuw nsw i32 %tmp11, 8
467 %tmp13 = or i32 %tmp8, %tmp12
468 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
469 %tmp15 = load i8, i8* %tmp14, align 1
470 %tmp16 = zext i8 %tmp15 to i32
471 %tmp17 = or i32 %tmp13, %tmp16
472 ret i32 %tmp17
473}
474
475; There is a store in between individual loads
476; i8* p, q;
477; res1 = ((i32) p[0] << 24) | ((i32) p[1] << 16)
478; *q = 0;
479; res2 = ((i32) p[2] << 8) | (i32) p[3]
480; res1 | res2
481define i32 @load_i32_by_i8_bswap_store_in_between(i32* %arg, i32* %arg1) {
482; CHECK-LABEL: load_i32_by_i8_bswap_store_in_between:
483; CHECK: # BB#0:
484; CHECK-NEXT: pushl %esi
485; CHECK-NEXT: .Lcfi2:
486; CHECK-NEXT: .cfi_def_cfa_offset 8
487; CHECK-NEXT: .Lcfi3:
488; CHECK-NEXT: .cfi_offset %esi, -8
489; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
490; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
491; CHECK-NEXT: movzbl (%ecx), %edx
492; CHECK-NEXT: shll $24, %edx
493; CHECK-NEXT: movzbl 1(%ecx), %esi
494; CHECK-NEXT: movl $0, (%eax)
495; CHECK-NEXT: shll $16, %esi
496; CHECK-NEXT: orl %edx, %esi
497; CHECK-NEXT: movzbl 2(%ecx), %edx
498; CHECK-NEXT: shll $8, %edx
499; CHECK-NEXT: orl %esi, %edx
500; CHECK-NEXT: movzbl 3(%ecx), %eax
501; CHECK-NEXT: orl %edx, %eax
502; CHECK-NEXT: popl %esi
503; CHECK-NEXT: retl
504;
505; CHECK64-LABEL: load_i32_by_i8_bswap_store_in_between:
506; CHECK64: # BB#0:
507; CHECK64-NEXT: movzbl (%rdi), %eax
508; CHECK64-NEXT: shll $24, %eax
509; CHECK64-NEXT: movzbl 1(%rdi), %ecx
510; CHECK64-NEXT: movl $0, (%rsi)
511; CHECK64-NEXT: shll $16, %ecx
512; CHECK64-NEXT: orl %eax, %ecx
513; CHECK64-NEXT: movzbl 2(%rdi), %edx
514; CHECK64-NEXT: shll $8, %edx
515; CHECK64-NEXT: orl %ecx, %edx
516; CHECK64-NEXT: movzbl 3(%rdi), %eax
517; CHECK64-NEXT: orl %edx, %eax
518; CHECK64-NEXT: retq
Artur Pilipenko41c00052017-01-25 08:53:31 +0000519 %tmp = bitcast i32* %arg to i8*
520 %tmp2 = load i8, i8* %tmp, align 1
521 %tmp3 = zext i8 %tmp2 to i32
522 %tmp4 = shl nuw nsw i32 %tmp3, 24
523 %tmp5 = getelementptr inbounds i8, i8* %tmp, i32 1
524 %tmp6 = load i8, i8* %tmp5, align 1
525 ; This store will prevent folding of the pattern
526 store i32 0, i32* %arg1
527 %tmp7 = zext i8 %tmp6 to i32
528 %tmp8 = shl nuw nsw i32 %tmp7, 16
529 %tmp9 = or i32 %tmp8, %tmp4
530 %tmp10 = getelementptr inbounds i8, i8* %tmp, i32 2
531 %tmp11 = load i8, i8* %tmp10, align 1
532 %tmp12 = zext i8 %tmp11 to i32
533 %tmp13 = shl nuw nsw i32 %tmp12, 8
534 %tmp14 = or i32 %tmp9, %tmp13
535 %tmp15 = getelementptr inbounds i8, i8* %tmp, i32 3
536 %tmp16 = load i8, i8* %tmp15, align 1
537 %tmp17 = zext i8 %tmp16 to i32
538 %tmp18 = or i32 %tmp14, %tmp17
539 ret i32 %tmp18
540}
541
542; One of the loads is from an unrelated location
543; i8* p, q;
544; ((i32) p[0] << 24) | ((i32) q[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
545define i32 @load_i32_by_i8_bswap_unrelated_load(i32* %arg, i32* %arg1) {
546; CHECK-LABEL: load_i32_by_i8_bswap_unrelated_load:
547; CHECK: # BB#0:
548; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
549; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
550; CHECK-NEXT: movzbl (%ecx), %edx
551; CHECK-NEXT: shll $24, %edx
552; CHECK-NEXT: movzbl 1(%eax), %eax
553; CHECK-NEXT: shll $16, %eax
554; CHECK-NEXT: orl %edx, %eax
555; CHECK-NEXT: movzbl 2(%ecx), %edx
556; CHECK-NEXT: shll $8, %edx
557; CHECK-NEXT: orl %eax, %edx
558; CHECK-NEXT: movzbl 3(%ecx), %eax
559; CHECK-NEXT: orl %edx, %eax
560; CHECK-NEXT: retl
561;
562; CHECK64-LABEL: load_i32_by_i8_bswap_unrelated_load:
563; CHECK64: # BB#0:
564; CHECK64-NEXT: movzbl (%rdi), %eax
565; CHECK64-NEXT: shll $24, %eax
566; CHECK64-NEXT: movzbl 1(%rsi), %ecx
567; CHECK64-NEXT: shll $16, %ecx
568; CHECK64-NEXT: orl %eax, %ecx
569; CHECK64-NEXT: movzbl 2(%rdi), %edx
570; CHECK64-NEXT: shll $8, %edx
571; CHECK64-NEXT: orl %ecx, %edx
572; CHECK64-NEXT: movzbl 3(%rdi), %eax
573; CHECK64-NEXT: orl %edx, %eax
574; CHECK64-NEXT: retq
Artur Pilipenko41c00052017-01-25 08:53:31 +0000575 %tmp = bitcast i32* %arg to i8*
576 %tmp2 = bitcast i32* %arg1 to i8*
577 %tmp3 = load i8, i8* %tmp, align 1
578 %tmp4 = zext i8 %tmp3 to i32
579 %tmp5 = shl nuw nsw i32 %tmp4, 24
580 ; Load from an unrelated address
581 %tmp6 = getelementptr inbounds i8, i8* %tmp2, i32 1
582 %tmp7 = load i8, i8* %tmp6, align 1
583 %tmp8 = zext i8 %tmp7 to i32
584 %tmp9 = shl nuw nsw i32 %tmp8, 16
585 %tmp10 = or i32 %tmp9, %tmp5
586 %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 2
587 %tmp12 = load i8, i8* %tmp11, align 1
588 %tmp13 = zext i8 %tmp12 to i32
589 %tmp14 = shl nuw nsw i32 %tmp13, 8
590 %tmp15 = or i32 %tmp10, %tmp14
591 %tmp16 = getelementptr inbounds i8, i8* %tmp, i32 3
592 %tmp17 = load i8, i8* %tmp16, align 1
593 %tmp18 = zext i8 %tmp17 to i32
594 %tmp19 = or i32 %tmp15, %tmp18
595 ret i32 %tmp19
596}
597
Artur Pilipenko41c00052017-01-25 08:53:31 +0000598; i8* p;
599; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
Artur Pilipenkobdf3c5a2017-02-06 14:15:31 +0000600define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
601; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
Artur Pilipenko41c00052017-01-25 08:53:31 +0000602; CHECK: # BB#0:
603; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
Artur Pilipenko4a640312017-02-09 12:06:01 +0000604; CHECK-NEXT: movl 1(%eax), %eax
Artur Pilipenko41c00052017-01-25 08:53:31 +0000605; CHECK-NEXT: retl
606;
Artur Pilipenkobdf3c5a2017-02-06 14:15:31 +0000607; CHECK64-LABEL: load_i32_by_i8_nonzero_offset:
Artur Pilipenko41c00052017-01-25 08:53:31 +0000608; CHECK64: # BB#0:
Artur Pilipenko4a640312017-02-09 12:06:01 +0000609; CHECK64-NEXT: movl 1(%rdi), %eax
Artur Pilipenko41c00052017-01-25 08:53:31 +0000610; CHECK64-NEXT: retq
Artur Pilipenko41c00052017-01-25 08:53:31 +0000611 %tmp = bitcast i32* %arg to i8*
612 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
613 %tmp2 = load i8, i8* %tmp1, align 1
614 %tmp3 = zext i8 %tmp2 to i32
615 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2
616 %tmp5 = load i8, i8* %tmp4, align 1
617 %tmp6 = zext i8 %tmp5 to i32
618 %tmp7 = shl nuw nsw i32 %tmp6, 8
619 %tmp8 = or i32 %tmp7, %tmp3
620 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3
621 %tmp10 = load i8, i8* %tmp9, align 1
622 %tmp11 = zext i8 %tmp10 to i32
623 %tmp12 = shl nuw nsw i32 %tmp11, 16
624 %tmp13 = or i32 %tmp8, %tmp12
625 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4
626 %tmp15 = load i8, i8* %tmp14, align 1
627 %tmp16 = zext i8 %tmp15 to i32
628 %tmp17 = shl nuw nsw i32 %tmp16, 24
629 %tmp18 = or i32 %tmp13, %tmp17
630 ret i32 %tmp18
631}
632
Artur Pilipenkobdf3c5a2017-02-06 14:15:31 +0000633; i8* p;
634; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
635define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
636; CHECK-LABEL: load_i32_by_i8_neg_offset:
637; CHECK: # BB#0:
638; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
Artur Pilipenko4a640312017-02-09 12:06:01 +0000639; CHECK-NEXT: movl -4(%eax), %eax
Artur Pilipenkobdf3c5a2017-02-06 14:15:31 +0000640; CHECK-NEXT: retl
641;
642; CHECK64-LABEL: load_i32_by_i8_neg_offset:
643; CHECK64: # BB#0:
Artur Pilipenko4a640312017-02-09 12:06:01 +0000644; CHECK64-NEXT: movl -4(%rdi), %eax
Artur Pilipenkobdf3c5a2017-02-06 14:15:31 +0000645; CHECK64-NEXT: retq
Artur Pilipenkobdf3c5a2017-02-06 14:15:31 +0000646 %tmp = bitcast i32* %arg to i8*
647 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
648 %tmp2 = load i8, i8* %tmp1, align 1
649 %tmp3 = zext i8 %tmp2 to i32
650 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3
651 %tmp5 = load i8, i8* %tmp4, align 1
652 %tmp6 = zext i8 %tmp5 to i32
653 %tmp7 = shl nuw nsw i32 %tmp6, 8
654 %tmp8 = or i32 %tmp7, %tmp3
655 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2
656 %tmp10 = load i8, i8* %tmp9, align 1
657 %tmp11 = zext i8 %tmp10 to i32
658 %tmp12 = shl nuw nsw i32 %tmp11, 16
659 %tmp13 = or i32 %tmp8, %tmp12
660 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1
661 %tmp15 = load i8, i8* %tmp14, align 1
662 %tmp16 = zext i8 %tmp15 to i32
663 %tmp17 = shl nuw nsw i32 %tmp16, 24
664 %tmp18 = or i32 %tmp13, %tmp17
665 ret i32 %tmp18
666}
667
668; i8* p;
669; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
670define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
Simon Pilgrim8670993d2017-02-17 23:00:21 +0000671; BSWAP-LABEL: load_i32_by_i8_nonzero_offset_bswap:
672; BSWAP: # BB#0:
673; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax
674; BSWAP-NEXT: movl 1(%eax), %eax
675; BSWAP-NEXT: bswapl %eax
676; BSWAP-NEXT: retl
Artur Pilipenkobdf3c5a2017-02-06 14:15:31 +0000677;
Simon Pilgrim8670993d2017-02-17 23:00:21 +0000678; MOVBE-LABEL: load_i32_by_i8_nonzero_offset_bswap:
679; MOVBE: # BB#0:
680; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax
681; MOVBE-NEXT: movbel 1(%eax), %eax
682; MOVBE-NEXT: retl
683;
684; BSWAP64-LABEL: load_i32_by_i8_nonzero_offset_bswap:
685; BSWAP64: # BB#0:
686; BSWAP64-NEXT: movl 1(%rdi), %eax
687; BSWAP64-NEXT: bswapl %eax
688; BSWAP64-NEXT: retq
689;
690; MOVBE64-LABEL: load_i32_by_i8_nonzero_offset_bswap:
691; MOVBE64: # BB#0:
692; MOVBE64-NEXT: movbel 1(%rdi), %eax
693; MOVBE64-NEXT: retq
Artur Pilipenkobdf3c5a2017-02-06 14:15:31 +0000694 %tmp = bitcast i32* %arg to i8*
695 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
696 %tmp2 = load i8, i8* %tmp1, align 1
697 %tmp3 = zext i8 %tmp2 to i32
698 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3
699 %tmp5 = load i8, i8* %tmp4, align 1
700 %tmp6 = zext i8 %tmp5 to i32
701 %tmp7 = shl nuw nsw i32 %tmp6, 8
702 %tmp8 = or i32 %tmp7, %tmp3
703 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
704 %tmp10 = load i8, i8* %tmp9, align 1
705 %tmp11 = zext i8 %tmp10 to i32
706 %tmp12 = shl nuw nsw i32 %tmp11, 16
707 %tmp13 = or i32 %tmp8, %tmp12
708 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1
709 %tmp15 = load i8, i8* %tmp14, align 1
710 %tmp16 = zext i8 %tmp15 to i32
711 %tmp17 = shl nuw nsw i32 %tmp16, 24
712 %tmp18 = or i32 %tmp13, %tmp17
713 ret i32 %tmp18
714}
715
716; i8* p;
717; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
718define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
Simon Pilgrim8670993d2017-02-17 23:00:21 +0000719; BSWAP-LABEL: load_i32_by_i8_neg_offset_bswap:
720; BSWAP: # BB#0:
721; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax
722; BSWAP-NEXT: movl -4(%eax), %eax
723; BSWAP-NEXT: bswapl %eax
724; BSWAP-NEXT: retl
Artur Pilipenkobdf3c5a2017-02-06 14:15:31 +0000725;
Simon Pilgrim8670993d2017-02-17 23:00:21 +0000726; MOVBE-LABEL: load_i32_by_i8_neg_offset_bswap:
727; MOVBE: # BB#0:
728; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax
729; MOVBE-NEXT: movbel -4(%eax), %eax
730; MOVBE-NEXT: retl
731;
732; BSWAP64-LABEL: load_i32_by_i8_neg_offset_bswap:
733; BSWAP64: # BB#0:
734; BSWAP64-NEXT: movl -4(%rdi), %eax
735; BSWAP64-NEXT: bswapl %eax
736; BSWAP64-NEXT: retq
737;
738; MOVBE64-LABEL: load_i32_by_i8_neg_offset_bswap:
739; MOVBE64: # BB#0:
740; MOVBE64-NEXT: movbel -4(%rdi), %eax
741; MOVBE64-NEXT: retq
Artur Pilipenkobdf3c5a2017-02-06 14:15:31 +0000742 %tmp = bitcast i32* %arg to i8*
743 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
744 %tmp2 = load i8, i8* %tmp1, align 1
745 %tmp3 = zext i8 %tmp2 to i32
746 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2
747 %tmp5 = load i8, i8* %tmp4, align 1
748 %tmp6 = zext i8 %tmp5 to i32
749 %tmp7 = shl nuw nsw i32 %tmp6, 8
750 %tmp8 = or i32 %tmp7, %tmp3
751 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3
752 %tmp10 = load i8, i8* %tmp9, align 1
753 %tmp11 = zext i8 %tmp10 to i32
754 %tmp12 = shl nuw nsw i32 %tmp11, 16
755 %tmp13 = or i32 %tmp8, %tmp12
756 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4
757 %tmp15 = load i8, i8* %tmp14, align 1
758 %tmp16 = zext i8 %tmp15 to i32
759 %tmp17 = shl nuw nsw i32 %tmp16, 24
760 %tmp18 = or i32 %tmp13, %tmp17
761 ret i32 %tmp18
762}
763
Artur Pilipenko41c00052017-01-25 08:53:31 +0000764; i8* p; i32 i;
765; ((i32) p[i] << 24) | ((i32) p[i + 1] << 16) | ((i32) p[i + 2] << 8) | (i32) p[i + 3]
766define i32 @load_i32_by_i8_bswap_base_index_offset(i32* %arg, i32 %arg1) {
Simon Pilgrim8670993d2017-02-17 23:00:21 +0000767; BSWAP-LABEL: load_i32_by_i8_bswap_base_index_offset:
768; BSWAP: # BB#0:
769; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax
770; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %ecx
771; BSWAP-NEXT: movl (%ecx,%eax), %eax
772; BSWAP-NEXT: bswapl %eax
773; BSWAP-NEXT: retl
Artur Pilipenko41c00052017-01-25 08:53:31 +0000774;
Simon Pilgrim8670993d2017-02-17 23:00:21 +0000775; MOVBE-LABEL: load_i32_by_i8_bswap_base_index_offset:
776; MOVBE: # BB#0:
777; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax
778; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %ecx
779; MOVBE-NEXT: movbel (%ecx,%eax), %eax
780; MOVBE-NEXT: retl
781;
782; BSWAP64-LABEL: load_i32_by_i8_bswap_base_index_offset:
783; BSWAP64: # BB#0:
784; BSWAP64-NEXT: movslq %esi, %rax
785; BSWAP64-NEXT: movl (%rdi,%rax), %eax
786; BSWAP64-NEXT: bswapl %eax
787; BSWAP64-NEXT: retq
788;
789; MOVBE64-LABEL: load_i32_by_i8_bswap_base_index_offset:
790; MOVBE64: # BB#0:
791; MOVBE64-NEXT: movslq %esi, %rax
792; MOVBE64-NEXT: movbel (%rdi,%rax), %eax
793; MOVBE64-NEXT: retq
Artur Pilipenko41c00052017-01-25 08:53:31 +0000794 %tmp = bitcast i32* %arg to i8*
795 %tmp2 = getelementptr inbounds i8, i8* %tmp, i32 %arg1
796 %tmp3 = load i8, i8* %tmp2, align 1
797 %tmp4 = zext i8 %tmp3 to i32
798 %tmp5 = shl nuw nsw i32 %tmp4, 24
799 %tmp6 = add nuw nsw i32 %arg1, 1
800 %tmp7 = getelementptr inbounds i8, i8* %tmp, i32 %tmp6
801 %tmp8 = load i8, i8* %tmp7, align 1
802 %tmp9 = zext i8 %tmp8 to i32
803 %tmp10 = shl nuw nsw i32 %tmp9, 16
804 %tmp11 = or i32 %tmp10, %tmp5
805 %tmp12 = add nuw nsw i32 %arg1, 2
806 %tmp13 = getelementptr inbounds i8, i8* %tmp, i32 %tmp12
807 %tmp14 = load i8, i8* %tmp13, align 1
808 %tmp15 = zext i8 %tmp14 to i32
809 %tmp16 = shl nuw nsw i32 %tmp15, 8
810 %tmp17 = or i32 %tmp11, %tmp16
811 %tmp18 = add nuw nsw i32 %arg1, 3
812 %tmp19 = getelementptr inbounds i8, i8* %tmp, i32 %tmp18
813 %tmp20 = load i8, i8* %tmp19, align 1
814 %tmp21 = zext i8 %tmp20 to i32
815 %tmp22 = or i32 %tmp17, %tmp21
816 ret i32 %tmp22
817}
818
819; Verify that we don't crash handling shl i32 %conv57, 32
820define void @shift_i32_by_32(i8* %src1, i8* %src2, i64* %dst) {
821; CHECK-LABEL: shift_i32_by_32:
822; CHECK: # BB#0: # %entry
823; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
824; CHECK-NEXT: movl $-1, 4(%eax)
825; CHECK-NEXT: movl $-1, (%eax)
826; CHECK-NEXT: retl
827;
828; CHECK64-LABEL: shift_i32_by_32:
829; CHECK64: # BB#0: # %entry
830; CHECK64-NEXT: movq $-1, (%rdx)
831; CHECK64-NEXT: retq
832entry:
833 %load1 = load i8, i8* %src1, align 1
834 %conv46 = zext i8 %load1 to i32
835 %shl47 = shl i32 %conv46, 56
836 %or55 = or i32 %shl47, 0
837 %load2 = load i8, i8* %src2, align 1
838 %conv57 = zext i8 %load2 to i32
839 %shl58 = shl i32 %conv57, 32
840 %or59 = or i32 %or55, %shl58
841 %or74 = or i32 %or59, 0
842 %conv75 = sext i32 %or74 to i64
843 store i64 %conv75, i64* %dst, align 8
844 ret void
845}
Artur Pilipenkod3464bf2017-02-06 17:48:08 +0000846
847declare i16 @llvm.bswap.i16(i16)
848
849; i16* p;
850; (i32) bswap(p[1]) | (i32) bswap(p[0] << 16)
851define i32 @load_i32_by_bswap_i16(i32* %arg) {
Simon Pilgrim8670993d2017-02-17 23:00:21 +0000852; BSWAP-LABEL: load_i32_by_bswap_i16:
853; BSWAP: # BB#0:
854; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax
855; BSWAP-NEXT: movl (%eax), %eax
856; BSWAP-NEXT: bswapl %eax
857; BSWAP-NEXT: retl
Artur Pilipenkod3464bf2017-02-06 17:48:08 +0000858;
Simon Pilgrim8670993d2017-02-17 23:00:21 +0000859; MOVBE-LABEL: load_i32_by_bswap_i16:
860; MOVBE: # BB#0:
861; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax
862; MOVBE-NEXT: movbel (%eax), %eax
863; MOVBE-NEXT: retl
864;
865; BSWAP64-LABEL: load_i32_by_bswap_i16:
866; BSWAP64: # BB#0:
867; BSWAP64-NEXT: movl (%rdi), %eax
868; BSWAP64-NEXT: bswapl %eax
869; BSWAP64-NEXT: retq
870;
871; MOVBE64-LABEL: load_i32_by_bswap_i16:
872; MOVBE64: # BB#0:
873; MOVBE64-NEXT: movbel (%rdi), %eax
874; MOVBE64-NEXT: retq
Artur Pilipenkod3464bf2017-02-06 17:48:08 +0000875 %tmp = bitcast i32* %arg to i16*
876 %tmp1 = load i16, i16* %tmp, align 4
877 %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1)
878 %tmp2 = zext i16 %tmp11 to i32
879 %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
880 %tmp4 = load i16, i16* %tmp3, align 1
881 %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4)
882 %tmp5 = zext i16 %tmp41 to i32
883 %tmp6 = shl nuw nsw i32 %tmp2, 16
884 %tmp7 = or i32 %tmp6, %tmp5
885 ret i32 %tmp7
886}
Artur Pilipenko469596e2017-02-07 14:09:37 +0000887
888; i16* p;
889; (i32) p[0] | (sext(p[1] << 16) to i32)
890define i32 @load_i32_by_sext_i16(i32* %arg) {
891; CHECK-LABEL: load_i32_by_sext_i16:
892; CHECK: # BB#0:
893; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
Artur Pilipenko85d75822017-02-16 17:07:27 +0000894; CHECK-NEXT: movl (%eax), %eax
Artur Pilipenko469596e2017-02-07 14:09:37 +0000895; CHECK-NEXT: retl
896;
897; CHECK64-LABEL: load_i32_by_sext_i16:
898; CHECK64: # BB#0:
Artur Pilipenko85d75822017-02-16 17:07:27 +0000899; CHECK64-NEXT: movl (%rdi), %eax
Artur Pilipenko469596e2017-02-07 14:09:37 +0000900; CHECK64-NEXT: retq
901 %tmp = bitcast i32* %arg to i16*
902 %tmp1 = load i16, i16* %tmp, align 1
903 %tmp2 = zext i16 %tmp1 to i32
904 %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
905 %tmp4 = load i16, i16* %tmp3, align 1
906 %tmp5 = sext i16 %tmp4 to i32
907 %tmp6 = shl nuw nsw i32 %tmp5, 16
908 %tmp7 = or i32 %tmp6, %tmp2
909 ret i32 %tmp7
910}
911
912; i8* arg; i32 i;
913; p = arg + 12;
914; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24)
915define i32 @load_i32_by_i8_base_offset_index(i8* %arg, i32 %i) {
916; CHECK-LABEL: load_i32_by_i8_base_offset_index:
917; CHECK: # BB#0:
Artur Pilipenko469596e2017-02-07 14:09:37 +0000918; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
919; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
Artur Pilipenko85d75822017-02-16 17:07:27 +0000920; CHECK-NEXT: movl 12(%eax,%ecx), %eax
Artur Pilipenko469596e2017-02-07 14:09:37 +0000921; CHECK-NEXT: retl
922;
923; CHECK64-LABEL: load_i32_by_i8_base_offset_index:
924; CHECK64: # BB#0:
925; CHECK64-NEXT: movl %esi, %eax
926; CHECK64-NEXT: movl 12(%rdi,%rax), %eax
927; CHECK64-NEXT: retq
928 %tmp = add nuw nsw i32 %i, 3
929 %tmp2 = add nuw nsw i32 %i, 2
930 %tmp3 = add nuw nsw i32 %i, 1
931 %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
932 %tmp5 = zext i32 %i to i64
933 %tmp6 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp5
934 %tmp7 = load i8, i8* %tmp6, align 1
935 %tmp8 = zext i8 %tmp7 to i32
936 %tmp9 = zext i32 %tmp3 to i64
937 %tmp10 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp9
938 %tmp11 = load i8, i8* %tmp10, align 1
939 %tmp12 = zext i8 %tmp11 to i32
940 %tmp13 = shl nuw nsw i32 %tmp12, 8
941 %tmp14 = or i32 %tmp13, %tmp8
942 %tmp15 = zext i32 %tmp2 to i64
943 %tmp16 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp15
944 %tmp17 = load i8, i8* %tmp16, align 1
945 %tmp18 = zext i8 %tmp17 to i32
946 %tmp19 = shl nuw nsw i32 %tmp18, 16
947 %tmp20 = or i32 %tmp14, %tmp19
948 %tmp21 = zext i32 %tmp to i64
949 %tmp22 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp21
950 %tmp23 = load i8, i8* %tmp22, align 1
951 %tmp24 = zext i8 %tmp23 to i32
952 %tmp25 = shl nuw i32 %tmp24, 24
953 %tmp26 = or i32 %tmp20, %tmp25
954 ret i32 %tmp26
955}
956
957; i8* arg; i32 i;
958; p = arg + 12;
959; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24)
960define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
961; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
962; CHECK: # BB#0:
Artur Pilipenko469596e2017-02-07 14:09:37 +0000963; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
964; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
Artur Pilipenko85d75822017-02-16 17:07:27 +0000965; CHECK-NEXT: movl 13(%eax,%ecx), %eax
Artur Pilipenko469596e2017-02-07 14:09:37 +0000966; CHECK-NEXT: retl
967;
968; CHECK64-LABEL: load_i32_by_i8_base_offset_index_2:
969; CHECK64: # BB#0:
970; CHECK64-NEXT: movl %esi, %eax
Artur Pilipenko4a640312017-02-09 12:06:01 +0000971; CHECK64-NEXT: movl 13(%rdi,%rax), %eax
Artur Pilipenko469596e2017-02-07 14:09:37 +0000972; CHECK64-NEXT: retq
973 %tmp = add nuw nsw i32 %i, 4
974 %tmp2 = add nuw nsw i32 %i, 3
975 %tmp3 = add nuw nsw i32 %i, 2
976 %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
977 %tmp5 = add nuw nsw i32 %i, 1
978 %tmp27 = zext i32 %tmp5 to i64
979 %tmp28 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp27
980 %tmp29 = load i8, i8* %tmp28, align 1
981 %tmp30 = zext i8 %tmp29 to i32
982 %tmp31 = zext i32 %tmp3 to i64
983 %tmp32 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp31
984 %tmp33 = load i8, i8* %tmp32, align 1
985 %tmp34 = zext i8 %tmp33 to i32
986 %tmp35 = shl nuw nsw i32 %tmp34, 8
987 %tmp36 = or i32 %tmp35, %tmp30
988 %tmp37 = zext i32 %tmp2 to i64
989 %tmp38 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp37
990 %tmp39 = load i8, i8* %tmp38, align 1
991 %tmp40 = zext i8 %tmp39 to i32
992 %tmp41 = shl nuw nsw i32 %tmp40, 16
993 %tmp42 = or i32 %tmp36, %tmp41
994 %tmp43 = zext i32 %tmp to i64
995 %tmp44 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp43
996 %tmp45 = load i8, i8* %tmp44, align 1
997 %tmp46 = zext i8 %tmp45 to i32
998 %tmp47 = shl nuw i32 %tmp46, 24
999 %tmp48 = or i32 %tmp42, %tmp47
1000 ret i32 %tmp48
1001}
1002
1003; i8* arg; i32 i;
1004;
1005; p0 = arg;
1006; p1 = arg + i + 1;
1007; p2 = arg + i + 2;
1008; p3 = arg + i + 3;
1009;
1010; (i32) p0[12] | ((i32) p1[12] << 8) | ((i32) p2[12] << 16) | ((i32) p3[12] << 24)
1011;
1012; This test excercises zero and any extend loads as a part of load combine pattern.
1013; In order to fold the pattern above we need to reassociate the address computation
1014; first. By the time the address computation is reassociated loads are combined to
1015; to zext and aext loads.
1016define i32 @load_i32_by_i8_zaext_loads(i8* %arg, i32 %arg1) {
1017; CHECK-LABEL: load_i32_by_i8_zaext_loads:
1018; CHECK: # BB#0:
Artur Pilipenko469596e2017-02-07 14:09:37 +00001019; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1020; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
Artur Pilipenko85d75822017-02-16 17:07:27 +00001021; CHECK-NEXT: movl 12(%eax,%ecx), %eax
Artur Pilipenko469596e2017-02-07 14:09:37 +00001022; CHECK-NEXT: retl
1023;
1024; CHECK64-LABEL: load_i32_by_i8_zaext_loads:
1025; CHECK64: # BB#0:
1026; CHECK64-NEXT: movl %esi, %eax
Artur Pilipenko85d75822017-02-16 17:07:27 +00001027; CHECK64-NEXT: movl 12(%rdi,%rax), %eax
Artur Pilipenko469596e2017-02-07 14:09:37 +00001028; CHECK64-NEXT: retq
1029 %tmp = add nuw nsw i32 %arg1, 3
1030 %tmp2 = add nuw nsw i32 %arg1, 2
1031 %tmp3 = add nuw nsw i32 %arg1, 1
1032 %tmp4 = zext i32 %tmp to i64
1033 %tmp5 = zext i32 %tmp2 to i64
1034 %tmp6 = zext i32 %tmp3 to i64
1035 %tmp24 = getelementptr inbounds i8, i8* %arg, i64 %tmp4
1036 %tmp30 = getelementptr inbounds i8, i8* %arg, i64 %tmp5
1037 %tmp31 = getelementptr inbounds i8, i8* %arg, i64 %tmp6
1038 %tmp32 = getelementptr inbounds i8, i8* %arg, i64 12
1039 %tmp33 = zext i32 %arg1 to i64
1040 %tmp34 = getelementptr inbounds i8, i8* %tmp32, i64 %tmp33
1041 %tmp35 = load i8, i8* %tmp34, align 1
1042 %tmp36 = zext i8 %tmp35 to i32
1043 %tmp37 = getelementptr inbounds i8, i8* %tmp31, i64 12
1044 %tmp38 = load i8, i8* %tmp37, align 1
1045 %tmp39 = zext i8 %tmp38 to i32
1046 %tmp40 = shl nuw nsw i32 %tmp39, 8
1047 %tmp41 = or i32 %tmp40, %tmp36
1048 %tmp42 = getelementptr inbounds i8, i8* %tmp30, i64 12
1049 %tmp43 = load i8, i8* %tmp42, align 1
1050 %tmp44 = zext i8 %tmp43 to i32
1051 %tmp45 = shl nuw nsw i32 %tmp44, 16
1052 %tmp46 = or i32 %tmp41, %tmp45
1053 %tmp47 = getelementptr inbounds i8, i8* %tmp24, i64 12
1054 %tmp48 = load i8, i8* %tmp47, align 1
1055 %tmp49 = zext i8 %tmp48 to i32
1056 %tmp50 = shl nuw i32 %tmp49, 24
1057 %tmp51 = or i32 %tmp46, %tmp50
1058 ret i32 %tmp51
1059}
1060
1061; The same as load_i32_by_i8_zaext_loads but the last load is combined to
1062; a sext load.
1063;
1064; i8* arg; i32 i;
1065;
1066; p0 = arg;
1067; p1 = arg + i + 1;
1068; p2 = arg + i + 2;
1069; p3 = arg + i + 3;
1070;
1071; (i32) p0[12] | ((i32) p1[12] << 8) | ((i32) p2[12] << 16) | ((i32) p3[12] << 24)
1072define i32 @load_i32_by_i8_zsext_loads(i8* %arg, i32 %arg1) {
1073; CHECK-LABEL: load_i32_by_i8_zsext_loads:
1074; CHECK: # BB#0:
Artur Pilipenko469596e2017-02-07 14:09:37 +00001075; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1076; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
Artur Pilipenko85d75822017-02-16 17:07:27 +00001077; CHECK-NEXT: movl 12(%eax,%ecx), %eax
Artur Pilipenko469596e2017-02-07 14:09:37 +00001078; CHECK-NEXT: retl
1079;
1080; CHECK64-LABEL: load_i32_by_i8_zsext_loads:
1081; CHECK64: # BB#0:
1082; CHECK64-NEXT: movl %esi, %eax
Artur Pilipenko85d75822017-02-16 17:07:27 +00001083; CHECK64-NEXT: movl 12(%rdi,%rax), %eax
Artur Pilipenko469596e2017-02-07 14:09:37 +00001084; CHECK64-NEXT: retq
1085 %tmp = add nuw nsw i32 %arg1, 3
1086 %tmp2 = add nuw nsw i32 %arg1, 2
1087 %tmp3 = add nuw nsw i32 %arg1, 1
1088 %tmp4 = zext i32 %tmp to i64
1089 %tmp5 = zext i32 %tmp2 to i64
1090 %tmp6 = zext i32 %tmp3 to i64
1091 %tmp24 = getelementptr inbounds i8, i8* %arg, i64 %tmp4
1092 %tmp30 = getelementptr inbounds i8, i8* %arg, i64 %tmp5
1093 %tmp31 = getelementptr inbounds i8, i8* %arg, i64 %tmp6
1094 %tmp32 = getelementptr inbounds i8, i8* %arg, i64 12
1095 %tmp33 = zext i32 %arg1 to i64
1096 %tmp34 = getelementptr inbounds i8, i8* %tmp32, i64 %tmp33
1097 %tmp35 = load i8, i8* %tmp34, align 1
1098 %tmp36 = zext i8 %tmp35 to i32
1099 %tmp37 = getelementptr inbounds i8, i8* %tmp31, i64 12
1100 %tmp38 = load i8, i8* %tmp37, align 1
1101 %tmp39 = zext i8 %tmp38 to i32
1102 %tmp40 = shl nuw nsw i32 %tmp39, 8
1103 %tmp41 = or i32 %tmp40, %tmp36
1104 %tmp42 = getelementptr inbounds i8, i8* %tmp30, i64 12
1105 %tmp43 = load i8, i8* %tmp42, align 1
1106 %tmp44 = zext i8 %tmp43 to i32
1107 %tmp45 = shl nuw nsw i32 %tmp44, 16
1108 %tmp46 = or i32 %tmp41, %tmp45
1109 %tmp47 = getelementptr inbounds i8, i8* %tmp24, i64 12
1110 %tmp48 = load i8, i8* %tmp47, align 1
1111 %tmp49 = sext i8 %tmp48 to i16
1112 %tmp50 = zext i16 %tmp49 to i32
1113 %tmp51 = shl nuw i32 %tmp50, 24
1114 %tmp52 = or i32 %tmp46, %tmp51
1115 ret i32 %tmp52
1116}
Artur Pilipenko0e4583b2017-02-09 15:13:40 +00001117
1118; i8* p;
1119; (i32) p[0] | ((i32) p[1] << 8)
1120define i32 @zext_load_i32_by_i8(i32* %arg) {
1121; CHECK-LABEL: zext_load_i32_by_i8:
1122; CHECK: # BB#0:
1123; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1124; CHECK-NEXT: movzbl (%eax), %ecx
1125; CHECK-NEXT: movzbl 1(%eax), %eax
1126; CHECK-NEXT: shll $8, %eax
1127; CHECK-NEXT: orl %ecx, %eax
1128; CHECK-NEXT: retl
1129;
1130; CHECK64-LABEL: zext_load_i32_by_i8:
1131; CHECK64: # BB#0:
1132; CHECK64-NEXT: movzbl (%rdi), %ecx
1133; CHECK64-NEXT: movzbl 1(%rdi), %eax
1134; CHECK64-NEXT: shll $8, %eax
1135; CHECK64-NEXT: orl %ecx, %eax
1136; CHECK64-NEXT: retq
1137 %tmp = bitcast i32* %arg to i8*
1138 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
1139 %tmp2 = load i8, i8* %tmp1, align 1
1140 %tmp3 = zext i8 %tmp2 to i32
1141 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
1142 %tmp5 = load i8, i8* %tmp4, align 1
1143 %tmp6 = zext i8 %tmp5 to i32
1144 %tmp7 = shl nuw nsw i32 %tmp6, 8
1145 %tmp8 = or i32 %tmp7, %tmp3
1146 ret i32 %tmp8
1147}
1148
1149; i8* p;
1150; ((i32) p[0] << 8) | ((i32) p[1] << 16)
1151define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) {
1152; CHECK-LABEL: zext_load_i32_by_i8_shl_8:
1153; CHECK: # BB#0:
1154; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1155; CHECK-NEXT: movzbl (%eax), %ecx
1156; CHECK-NEXT: shll $8, %ecx
1157; CHECK-NEXT: movzbl 1(%eax), %eax
1158; CHECK-NEXT: shll $16, %eax
1159; CHECK-NEXT: orl %ecx, %eax
1160; CHECK-NEXT: retl
1161;
1162; CHECK64-LABEL: zext_load_i32_by_i8_shl_8:
1163; CHECK64: # BB#0:
1164; CHECK64-NEXT: movzbl (%rdi), %ecx
1165; CHECK64-NEXT: shll $8, %ecx
1166; CHECK64-NEXT: movzbl 1(%rdi), %eax
1167; CHECK64-NEXT: shll $16, %eax
1168; CHECK64-NEXT: orl %ecx, %eax
1169; CHECK64-NEXT: retq
1170 %tmp = bitcast i32* %arg to i8*
1171 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
1172 %tmp2 = load i8, i8* %tmp1, align 1
1173 %tmp3 = zext i8 %tmp2 to i32
1174 %tmp30 = shl nuw nsw i32 %tmp3, 8
1175 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
1176 %tmp5 = load i8, i8* %tmp4, align 1
1177 %tmp6 = zext i8 %tmp5 to i32
1178 %tmp7 = shl nuw nsw i32 %tmp6, 16
1179 %tmp8 = or i32 %tmp7, %tmp30
1180 ret i32 %tmp8
1181}
1182
1183; i8* p;
1184; ((i32) p[0] << 16) | ((i32) p[1] << 24)
1185define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) {
1186; CHECK-LABEL: zext_load_i32_by_i8_shl_16:
1187; CHECK: # BB#0:
1188; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1189; CHECK-NEXT: movzbl (%eax), %ecx
1190; CHECK-NEXT: shll $16, %ecx
1191; CHECK-NEXT: movzbl 1(%eax), %eax
1192; CHECK-NEXT: shll $24, %eax
1193; CHECK-NEXT: orl %ecx, %eax
1194; CHECK-NEXT: retl
1195;
1196; CHECK64-LABEL: zext_load_i32_by_i8_shl_16:
1197; CHECK64: # BB#0:
1198; CHECK64-NEXT: movzbl (%rdi), %ecx
1199; CHECK64-NEXT: shll $16, %ecx
1200; CHECK64-NEXT: movzbl 1(%rdi), %eax
1201; CHECK64-NEXT: shll $24, %eax
1202; CHECK64-NEXT: orl %ecx, %eax
1203; CHECK64-NEXT: retq
1204 %tmp = bitcast i32* %arg to i8*
1205 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
1206 %tmp2 = load i8, i8* %tmp1, align 1
1207 %tmp3 = zext i8 %tmp2 to i32
1208 %tmp30 = shl nuw nsw i32 %tmp3, 16
1209 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
1210 %tmp5 = load i8, i8* %tmp4, align 1
1211 %tmp6 = zext i8 %tmp5 to i32
1212 %tmp7 = shl nuw nsw i32 %tmp6, 24
1213 %tmp8 = or i32 %tmp7, %tmp30
1214 ret i32 %tmp8
1215}
1216
1217; i8* p;
1218; (i32) p[1] | ((i32) p[0] << 8)
1219define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
1220; CHECK-LABEL: zext_load_i32_by_i8_bswap:
1221; CHECK: # BB#0:
1222; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1223; CHECK-NEXT: movzbl 1(%eax), %ecx
1224; CHECK-NEXT: movzbl (%eax), %eax
1225; CHECK-NEXT: shll $8, %eax
1226; CHECK-NEXT: orl %ecx, %eax
1227; CHECK-NEXT: retl
1228;
1229; CHECK64-LABEL: zext_load_i32_by_i8_bswap:
1230; CHECK64: # BB#0:
1231; CHECK64-NEXT: movzbl 1(%rdi), %ecx
1232; CHECK64-NEXT: movzbl (%rdi), %eax
1233; CHECK64-NEXT: shll $8, %eax
1234; CHECK64-NEXT: orl %ecx, %eax
1235; CHECK64-NEXT: retq
1236 %tmp = bitcast i32* %arg to i8*
1237 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
1238 %tmp2 = load i8, i8* %tmp1, align 1
1239 %tmp3 = zext i8 %tmp2 to i32
1240 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
1241 %tmp5 = load i8, i8* %tmp4, align 1
1242 %tmp6 = zext i8 %tmp5 to i32
1243 %tmp7 = shl nuw nsw i32 %tmp6, 8
1244 %tmp8 = or i32 %tmp7, %tmp3
1245 ret i32 %tmp8
1246}
1247
1248; i8* p;
1249; ((i32) p[1] << 8) | ((i32) p[0] << 16)
1250define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) {
1251; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8:
1252; CHECK: # BB#0:
1253; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1254; CHECK-NEXT: movzbl 1(%eax), %ecx
1255; CHECK-NEXT: shll $8, %ecx
1256; CHECK-NEXT: movzbl (%eax), %eax
1257; CHECK-NEXT: shll $16, %eax
1258; CHECK-NEXT: orl %ecx, %eax
1259; CHECK-NEXT: retl
1260;
1261; CHECK64-LABEL: zext_load_i32_by_i8_bswap_shl_8:
1262; CHECK64: # BB#0:
1263; CHECK64-NEXT: movzbl 1(%rdi), %ecx
1264; CHECK64-NEXT: shll $8, %ecx
1265; CHECK64-NEXT: movzbl (%rdi), %eax
1266; CHECK64-NEXT: shll $16, %eax
1267; CHECK64-NEXT: orl %ecx, %eax
1268; CHECK64-NEXT: retq
1269 %tmp = bitcast i32* %arg to i8*
1270 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
1271 %tmp2 = load i8, i8* %tmp1, align 1
1272 %tmp3 = zext i8 %tmp2 to i32
1273 %tmp30 = shl nuw nsw i32 %tmp3, 8
1274 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
1275 %tmp5 = load i8, i8* %tmp4, align 1
1276 %tmp6 = zext i8 %tmp5 to i32
1277 %tmp7 = shl nuw nsw i32 %tmp6, 16
1278 %tmp8 = or i32 %tmp7, %tmp30
1279 ret i32 %tmp8
1280}
1281
1282; i8* p;
1283; ((i32) p[1] << 16) | ((i32) p[0] << 24)
1284define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) {
1285; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16:
1286; CHECK: # BB#0:
1287; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1288; CHECK-NEXT: movzbl 1(%eax), %ecx
1289; CHECK-NEXT: shll $16, %ecx
1290; CHECK-NEXT: movzbl (%eax), %eax
1291; CHECK-NEXT: shll $24, %eax
1292; CHECK-NEXT: orl %ecx, %eax
1293; CHECK-NEXT: retl
1294;
1295; CHECK64-LABEL: zext_load_i32_by_i8_bswap_shl_16:
1296; CHECK64: # BB#0:
1297; CHECK64-NEXT: movzbl 1(%rdi), %ecx
1298; CHECK64-NEXT: shll $16, %ecx
1299; CHECK64-NEXT: movzbl (%rdi), %eax
1300; CHECK64-NEXT: shll $24, %eax
1301; CHECK64-NEXT: orl %ecx, %eax
1302; CHECK64-NEXT: retq
1303 %tmp = bitcast i32* %arg to i8*
1304 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
1305 %tmp2 = load i8, i8* %tmp1, align 1
1306 %tmp3 = zext i8 %tmp2 to i32
1307 %tmp30 = shl nuw nsw i32 %tmp3, 16
1308 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
1309 %tmp5 = load i8, i8* %tmp4, align 1
1310 %tmp6 = zext i8 %tmp5 to i32
1311 %tmp7 = shl nuw nsw i32 %tmp6, 24
1312 %tmp8 = or i32 %tmp7, %tmp30
1313 ret i32 %tmp8
1314}