Blame - llvm/test/CodeGen/X86/load-combine.ll - toolchain/llvm-project

blob: e737a51cf405a398b6a5a5ad0e41860a1da352d7 [file] [log] [blame]

Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	1	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
Simon Pilgrim	8670993d	2017-02-17 23:00:21 +0000	[diff] [blame]	2	; RUN: llc < %s -mtriple=i686-unknown \| FileCheck %s --check-prefix=CHECK --check-prefix=BSWAP
				3	; RUN: llc < %s -mtriple=i686-unknown -mattr=+movbe \| FileCheck %s --check-prefix=CHECK --check-prefix=MOVBE
				4	; RUN: llc < %s -mtriple=x86_64-unknown \| FileCheck %s --check-prefix=CHECK64 --check-prefix=BSWAP64
				5	; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+movbe \| FileCheck %s --check-prefix=CHECK64 --check-prefix=MOVBE64
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	6
				7	; i8* p;
				8	; (i32) p[0] \| ((i32) p[1] << 8) \| ((i32) p[2] << 16) \| ((i32) p[3] << 24)
				9	define i32 @load_i32_by_i8(i32* %arg) {
				10	; CHECK-LABEL: load_i32_by_i8:
				11	; CHECK: # BB#0:
				12	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				13	; CHECK-NEXT: movl (%eax), %eax
				14	; CHECK-NEXT: retl
				15	;
				16	; CHECK64-LABEL: load_i32_by_i8:
				17	; CHECK64: # BB#0:
				18	; CHECK64-NEXT: movl (%rdi), %eax
				19	; CHECK64-NEXT: retq
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	20	%tmp = bitcast i32* %arg to i8*
				21	%tmp1 = load i8, i8* %tmp, align 1
				22	%tmp2 = zext i8 %tmp1 to i32
				23	%tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
				24	%tmp4 = load i8, i8* %tmp3, align 1
				25	%tmp5 = zext i8 %tmp4 to i32
				26	%tmp6 = shl nuw nsw i32 %tmp5, 8
				27	%tmp7 = or i32 %tmp6, %tmp2
				28	%tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
				29	%tmp9 = load i8, i8* %tmp8, align 1
				30	%tmp10 = zext i8 %tmp9 to i32
				31	%tmp11 = shl nuw nsw i32 %tmp10, 16
				32	%tmp12 = or i32 %tmp7, %tmp11
				33	%tmp13 = getelementptr inbounds i8, i8* %tmp, i32 3
				34	%tmp14 = load i8, i8* %tmp13, align 1
				35	%tmp15 = zext i8 %tmp14 to i32
				36	%tmp16 = shl nuw nsw i32 %tmp15, 24
				37	%tmp17 = or i32 %tmp12, %tmp16
				38	ret i32 %tmp17
				39	}
				40
				41	; i8* p;
				42	; ((i32) p[0] << 24) \| ((i32) p[1] << 16) \| ((i32) p[2] << 8) \| (i32) p[3]
				43	define i32 @load_i32_by_i8_bswap(i32* %arg) {
Simon Pilgrim	8670993d	2017-02-17 23:00:21 +0000	[diff] [blame]	44	; BSWAP-LABEL: load_i32_by_i8_bswap:
				45	; BSWAP: # BB#0:
				46	; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax
				47	; BSWAP-NEXT: movl (%eax), %eax
				48	; BSWAP-NEXT: bswapl %eax
				49	; BSWAP-NEXT: retl
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	50	;
Simon Pilgrim	8670993d	2017-02-17 23:00:21 +0000	[diff] [blame]	51	; MOVBE-LABEL: load_i32_by_i8_bswap:
				52	; MOVBE: # BB#0:
				53	; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax
				54	; MOVBE-NEXT: movbel (%eax), %eax
				55	; MOVBE-NEXT: retl
				56	;
				57	; BSWAP64-LABEL: load_i32_by_i8_bswap:
				58	; BSWAP64: # BB#0:
				59	; BSWAP64-NEXT: movl (%rdi), %eax
				60	; BSWAP64-NEXT: bswapl %eax
				61	; BSWAP64-NEXT: retq
				62	;
				63	; MOVBE64-LABEL: load_i32_by_i8_bswap:
				64	; MOVBE64: # BB#0:
				65	; MOVBE64-NEXT: movbel (%rdi), %eax
				66	; MOVBE64-NEXT: retq
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	67	%tmp = bitcast i32* %arg to i8*
				68	%tmp1 = load i8, i8* %tmp, align 1
				69	%tmp2 = zext i8 %tmp1 to i32
				70	%tmp3 = shl nuw nsw i32 %tmp2, 24
				71	%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
				72	%tmp5 = load i8, i8* %tmp4, align 1
				73	%tmp6 = zext i8 %tmp5 to i32
				74	%tmp7 = shl nuw nsw i32 %tmp6, 16
				75	%tmp8 = or i32 %tmp7, %tmp3
				76	%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
				77	%tmp10 = load i8, i8* %tmp9, align 1
				78	%tmp11 = zext i8 %tmp10 to i32
				79	%tmp12 = shl nuw nsw i32 %tmp11, 8
				80	%tmp13 = or i32 %tmp8, %tmp12
				81	%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
				82	%tmp15 = load i8, i8* %tmp14, align 1
				83	%tmp16 = zext i8 %tmp15 to i32
				84	%tmp17 = or i32 %tmp13, %tmp16
				85	ret i32 %tmp17
				86	}
				87
				88	; i16* p;
				89	; (i32) p[0] \| ((i32) p[1] << 16)
				90	define i32 @load_i32_by_i16(i32* %arg) {
				91	; CHECK-LABEL: load_i32_by_i16:
				92	; CHECK: # BB#0:
				93	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				94	; CHECK-NEXT: movl (%eax), %eax
				95	; CHECK-NEXT: retl
				96	;
				97	; CHECK64-LABEL: load_i32_by_i16:
				98	; CHECK64: # BB#0:
				99	; CHECK64-NEXT: movl (%rdi), %eax
				100	; CHECK64-NEXT: retq
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	101	%tmp = bitcast i32* %arg to i16*
				102	%tmp1 = load i16, i16* %tmp, align 1
				103	%tmp2 = zext i16 %tmp1 to i32
				104	%tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
				105	%tmp4 = load i16, i16* %tmp3, align 1
				106	%tmp5 = zext i16 %tmp4 to i32
				107	%tmp6 = shl nuw nsw i32 %tmp5, 16
				108	%tmp7 = or i32 %tmp6, %tmp2
				109	ret i32 %tmp7
				110	}
				111
				112	; i16* p_16;
				113	; i8* p_8 = (i8*) p_16;
				114	; (i32) p_16[0] \| ((i32) p[2] << 16) \| ((i32) p[3] << 24)
				115	define i32 @load_i32_by_i16_i8(i32* %arg) {
				116	; CHECK-LABEL: load_i32_by_i16_i8:
				117	; CHECK: # BB#0:
				118	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				119	; CHECK-NEXT: movl (%eax), %eax
				120	; CHECK-NEXT: retl
				121	;
				122	; CHECK64-LABEL: load_i32_by_i16_i8:
				123	; CHECK64: # BB#0:
				124	; CHECK64-NEXT: movl (%rdi), %eax
				125	; CHECK64-NEXT: retq
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	126	%tmp = bitcast i32* %arg to i16*
				127	%tmp1 = bitcast i32* %arg to i8*
				128	%tmp2 = load i16, i16* %tmp, align 1
				129	%tmp3 = zext i16 %tmp2 to i32
				130	%tmp4 = getelementptr inbounds i8, i8* %tmp1, i32 2
				131	%tmp5 = load i8, i8* %tmp4, align 1
				132	%tmp6 = zext i8 %tmp5 to i32
				133	%tmp7 = shl nuw nsw i32 %tmp6, 16
				134	%tmp8 = getelementptr inbounds i8, i8* %tmp1, i32 3
				135	%tmp9 = load i8, i8* %tmp8, align 1
				136	%tmp10 = zext i8 %tmp9 to i32
				137	%tmp11 = shl nuw nsw i32 %tmp10, 24
				138	%tmp12 = or i32 %tmp7, %tmp11
				139	%tmp13 = or i32 %tmp12, %tmp3
				140	ret i32 %tmp13
				141	}
				142
				143
				144	; i8* p;
				145	; (i32) ((i16) p[0] \| ((i16) p[1] << 8)) \| (((i32) ((i16) p[3] \| ((i16) p[4] << 8)) << 16)
				146	define i32 @load_i32_by_i16_by_i8(i32* %arg) {
				147	; CHECK-LABEL: load_i32_by_i16_by_i8:
				148	; CHECK: # BB#0:
				149	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				150	; CHECK-NEXT: movl (%eax), %eax
				151	; CHECK-NEXT: retl
				152	;
				153	; CHECK64-LABEL: load_i32_by_i16_by_i8:
				154	; CHECK64: # BB#0:
				155	; CHECK64-NEXT: movl (%rdi), %eax
				156	; CHECK64-NEXT: retq
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	157	%tmp = bitcast i32* %arg to i8*
				158	%tmp1 = load i8, i8* %tmp, align 1
				159	%tmp2 = zext i8 %tmp1 to i16
				160	%tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
				161	%tmp4 = load i8, i8* %tmp3, align 1
				162	%tmp5 = zext i8 %tmp4 to i16
				163	%tmp6 = shl nuw nsw i16 %tmp5, 8
				164	%tmp7 = or i16 %tmp6, %tmp2
				165	%tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
				166	%tmp9 = load i8, i8* %tmp8, align 1
				167	%tmp10 = zext i8 %tmp9 to i16
				168	%tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3
				169	%tmp12 = load i8, i8* %tmp11, align 1
				170	%tmp13 = zext i8 %tmp12 to i16
				171	%tmp14 = shl nuw nsw i16 %tmp13, 8
				172	%tmp15 = or i16 %tmp14, %tmp10
				173	%tmp16 = zext i16 %tmp7 to i32
				174	%tmp17 = zext i16 %tmp15 to i32
				175	%tmp18 = shl nuw nsw i32 %tmp17, 16
				176	%tmp19 = or i32 %tmp18, %tmp16
				177	ret i32 %tmp19
				178	}
				179
				180	; i8* p;
				181	; ((i32) (((i16) p[0] << 8) \| (i16) p[1]) << 16) \| (i32) (((i16) p[3] << 8) \| (i16) p[4])
				182	define i32 @load_i32_by_i16_by_i8_bswap(i32* %arg) {
Simon Pilgrim	8670993d	2017-02-17 23:00:21 +0000	[diff] [blame]	183	; BSWAP-LABEL: load_i32_by_i16_by_i8_bswap:
				184	; BSWAP: # BB#0:
				185	; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax
				186	; BSWAP-NEXT: movl (%eax), %eax
				187	; BSWAP-NEXT: bswapl %eax
				188	; BSWAP-NEXT: retl
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	189	;
Simon Pilgrim	8670993d	2017-02-17 23:00:21 +0000	[diff] [blame]	190	; MOVBE-LABEL: load_i32_by_i16_by_i8_bswap:
				191	; MOVBE: # BB#0:
				192	; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax
				193	; MOVBE-NEXT: movbel (%eax), %eax
				194	; MOVBE-NEXT: retl
				195	;
				196	; BSWAP64-LABEL: load_i32_by_i16_by_i8_bswap:
				197	; BSWAP64: # BB#0:
				198	; BSWAP64-NEXT: movl (%rdi), %eax
				199	; BSWAP64-NEXT: bswapl %eax
				200	; BSWAP64-NEXT: retq
				201	;
				202	; MOVBE64-LABEL: load_i32_by_i16_by_i8_bswap:
				203	; MOVBE64: # BB#0:
				204	; MOVBE64-NEXT: movbel (%rdi), %eax
				205	; MOVBE64-NEXT: retq
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	206	%tmp = bitcast i32* %arg to i8*
				207	%tmp1 = load i8, i8* %tmp, align 1
				208	%tmp2 = zext i8 %tmp1 to i16
				209	%tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
				210	%tmp4 = load i8, i8* %tmp3, align 1
				211	%tmp5 = zext i8 %tmp4 to i16
				212	%tmp6 = shl nuw nsw i16 %tmp2, 8
				213	%tmp7 = or i16 %tmp6, %tmp5
				214	%tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
				215	%tmp9 = load i8, i8* %tmp8, align 1
				216	%tmp10 = zext i8 %tmp9 to i16
				217	%tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3
				218	%tmp12 = load i8, i8* %tmp11, align 1
				219	%tmp13 = zext i8 %tmp12 to i16
				220	%tmp14 = shl nuw nsw i16 %tmp10, 8
				221	%tmp15 = or i16 %tmp14, %tmp13
				222	%tmp16 = zext i16 %tmp7 to i32
				223	%tmp17 = zext i16 %tmp15 to i32
				224	%tmp18 = shl nuw nsw i32 %tmp16, 16
				225	%tmp19 = or i32 %tmp18, %tmp17
				226	ret i32 %tmp19
				227	}
				228
				229	; i8* p;
				230	; (i64) p[0] \| ((i64) p[1] << 8) \| ((i64) p[2] << 16) \| ((i64) p[3] << 24) \| ((i64) p[4] << 32) \| ((i64) p[5] << 40) \| ((i64) p[6] << 48) \| ((i64) p[7] << 56)
				231	define i64 @load_i64_by_i8(i64* %arg) {
				232	; CHECK-LABEL: load_i64_by_i8:
				233	; CHECK: # BB#0:
				234	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
				235	; CHECK-NEXT: movl (%ecx), %eax
				236	; CHECK-NEXT: movl 4(%ecx), %edx
				237	; CHECK-NEXT: retl
				238	;
				239	; CHECK64-LABEL: load_i64_by_i8:
				240	; CHECK64: # BB#0:
				241	; CHECK64-NEXT: movq (%rdi), %rax
				242	; CHECK64-NEXT: retq
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	243	%tmp = bitcast i64* %arg to i8*
				244	%tmp1 = load i8, i8* %tmp, align 1
				245	%tmp2 = zext i8 %tmp1 to i64
				246	%tmp3 = getelementptr inbounds i8, i8* %tmp, i64 1
				247	%tmp4 = load i8, i8* %tmp3, align 1
				248	%tmp5 = zext i8 %tmp4 to i64
				249	%tmp6 = shl nuw nsw i64 %tmp5, 8
				250	%tmp7 = or i64 %tmp6, %tmp2
				251	%tmp8 = getelementptr inbounds i8, i8* %tmp, i64 2
				252	%tmp9 = load i8, i8* %tmp8, align 1
				253	%tmp10 = zext i8 %tmp9 to i64
				254	%tmp11 = shl nuw nsw i64 %tmp10, 16
				255	%tmp12 = or i64 %tmp7, %tmp11
				256	%tmp13 = getelementptr inbounds i8, i8* %tmp, i64 3
				257	%tmp14 = load i8, i8* %tmp13, align 1
				258	%tmp15 = zext i8 %tmp14 to i64
				259	%tmp16 = shl nuw nsw i64 %tmp15, 24
				260	%tmp17 = or i64 %tmp12, %tmp16
				261	%tmp18 = getelementptr inbounds i8, i8* %tmp, i64 4
				262	%tmp19 = load i8, i8* %tmp18, align 1
				263	%tmp20 = zext i8 %tmp19 to i64
				264	%tmp21 = shl nuw nsw i64 %tmp20, 32
				265	%tmp22 = or i64 %tmp17, %tmp21
				266	%tmp23 = getelementptr inbounds i8, i8* %tmp, i64 5
				267	%tmp24 = load i8, i8* %tmp23, align 1
				268	%tmp25 = zext i8 %tmp24 to i64
				269	%tmp26 = shl nuw nsw i64 %tmp25, 40
				270	%tmp27 = or i64 %tmp22, %tmp26
				271	%tmp28 = getelementptr inbounds i8, i8* %tmp, i64 6
				272	%tmp29 = load i8, i8* %tmp28, align 1
				273	%tmp30 = zext i8 %tmp29 to i64
				274	%tmp31 = shl nuw nsw i64 %tmp30, 48
				275	%tmp32 = or i64 %tmp27, %tmp31
				276	%tmp33 = getelementptr inbounds i8, i8* %tmp, i64 7
				277	%tmp34 = load i8, i8* %tmp33, align 1
				278	%tmp35 = zext i8 %tmp34 to i64
				279	%tmp36 = shl nuw i64 %tmp35, 56
				280	%tmp37 = or i64 %tmp32, %tmp36
				281	ret i64 %tmp37
				282	}
				283
				284	; i8* p;
				285	; ((i64) p[0] << 56) \| ((i64) p[1] << 48) \| ((i64) p[2] << 40) \| ((i64) p[3] << 32) \| ((i64) p[4] << 24) \| ((i64) p[5] << 16) \| ((i64) p[6] << 8) \| (i64) p[7]
				286	define i64 @load_i64_by_i8_bswap(i64* %arg) {
Simon Pilgrim	8670993d	2017-02-17 23:00:21 +0000	[diff] [blame]	287	; BSWAP-LABEL: load_i64_by_i8_bswap:
				288	; BSWAP: # BB#0:
				289	; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax
				290	; BSWAP-NEXT: movl (%eax), %edx
				291	; BSWAP-NEXT: movl 4(%eax), %eax
				292	; BSWAP-NEXT: bswapl %eax
				293	; BSWAP-NEXT: bswapl %edx
				294	; BSWAP-NEXT: retl
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	295	;
Simon Pilgrim	8670993d	2017-02-17 23:00:21 +0000	[diff] [blame]	296	; MOVBE-LABEL: load_i64_by_i8_bswap:
				297	; MOVBE: # BB#0:
				298	; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %ecx
				299	; MOVBE-NEXT: movbel 4(%ecx), %eax
				300	; MOVBE-NEXT: movbel (%ecx), %edx
				301	; MOVBE-NEXT: retl
				302	;
				303	; BSWAP64-LABEL: load_i64_by_i8_bswap:
				304	; BSWAP64: # BB#0:
				305	; BSWAP64-NEXT: movq (%rdi), %rax
				306	; BSWAP64-NEXT: bswapq %rax
				307	; BSWAP64-NEXT: retq
				308	;
				309	; MOVBE64-LABEL: load_i64_by_i8_bswap:
				310	; MOVBE64: # BB#0:
				311	; MOVBE64-NEXT: movbeq (%rdi), %rax
				312	; MOVBE64-NEXT: retq
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	313	%tmp = bitcast i64* %arg to i8*
				314	%tmp1 = load i8, i8* %tmp, align 1
				315	%tmp2 = zext i8 %tmp1 to i64
				316	%tmp3 = shl nuw i64 %tmp2, 56
				317	%tmp4 = getelementptr inbounds i8, i8* %tmp, i64 1
				318	%tmp5 = load i8, i8* %tmp4, align 1
				319	%tmp6 = zext i8 %tmp5 to i64
				320	%tmp7 = shl nuw nsw i64 %tmp6, 48
				321	%tmp8 = or i64 %tmp7, %tmp3
				322	%tmp9 = getelementptr inbounds i8, i8* %tmp, i64 2
				323	%tmp10 = load i8, i8* %tmp9, align 1
				324	%tmp11 = zext i8 %tmp10 to i64
				325	%tmp12 = shl nuw nsw i64 %tmp11, 40
				326	%tmp13 = or i64 %tmp8, %tmp12
				327	%tmp14 = getelementptr inbounds i8, i8* %tmp, i64 3
				328	%tmp15 = load i8, i8* %tmp14, align 1
				329	%tmp16 = zext i8 %tmp15 to i64
				330	%tmp17 = shl nuw nsw i64 %tmp16, 32
				331	%tmp18 = or i64 %tmp13, %tmp17
				332	%tmp19 = getelementptr inbounds i8, i8* %tmp, i64 4
				333	%tmp20 = load i8, i8* %tmp19, align 1
				334	%tmp21 = zext i8 %tmp20 to i64
				335	%tmp22 = shl nuw nsw i64 %tmp21, 24
				336	%tmp23 = or i64 %tmp18, %tmp22
				337	%tmp24 = getelementptr inbounds i8, i8* %tmp, i64 5
				338	%tmp25 = load i8, i8* %tmp24, align 1
				339	%tmp26 = zext i8 %tmp25 to i64
				340	%tmp27 = shl nuw nsw i64 %tmp26, 16
				341	%tmp28 = or i64 %tmp23, %tmp27
				342	%tmp29 = getelementptr inbounds i8, i8* %tmp, i64 6
				343	%tmp30 = load i8, i8* %tmp29, align 1
				344	%tmp31 = zext i8 %tmp30 to i64
				345	%tmp32 = shl nuw nsw i64 %tmp31, 8
				346	%tmp33 = or i64 %tmp28, %tmp32
				347	%tmp34 = getelementptr inbounds i8, i8* %tmp, i64 7
				348	%tmp35 = load i8, i8* %tmp34, align 1
				349	%tmp36 = zext i8 %tmp35 to i64
				350	%tmp37 = or i64 %tmp33, %tmp36
				351	ret i64 %tmp37
				352	}
				353
				354	; Part of the load by bytes pattern is used outside of the pattern
				355	; i8* p;
				356	; i32 x = (i32) p[1]
				357	; res = ((i32) p[0] << 24) \| (x << 16) \| ((i32) p[2] << 8) \| (i32) p[3]
				358	; x \| res
				359	define i32 @load_i32_by_i8_bswap_uses(i32* %arg) {
				360	; CHECK-LABEL: load_i32_by_i8_bswap_uses:
				361	; CHECK: # BB#0:
				362	; CHECK-NEXT: pushl %esi
				363	; CHECK-NEXT: .Lcfi0:
				364	; CHECK-NEXT: .cfi_def_cfa_offset 8
				365	; CHECK-NEXT: .Lcfi1:
				366	; CHECK-NEXT: .cfi_offset %esi, -8
				367	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				368	; CHECK-NEXT: movzbl (%eax), %ecx
				369	; CHECK-NEXT: shll $24, %ecx
				370	; CHECK-NEXT: movzbl 1(%eax), %edx
				371	; CHECK-NEXT: movl %edx, %esi
				372	; CHECK-NEXT: shll $16, %esi
				373	; CHECK-NEXT: orl %ecx, %esi
				374	; CHECK-NEXT: movzbl 2(%eax), %ecx
				375	; CHECK-NEXT: shll $8, %ecx
				376	; CHECK-NEXT: orl %esi, %ecx
				377	; CHECK-NEXT: movzbl 3(%eax), %eax
				378	; CHECK-NEXT: orl %ecx, %eax
				379	; CHECK-NEXT: orl %edx, %eax
				380	; CHECK-NEXT: popl %esi
				381	; CHECK-NEXT: retl
				382	;
				383	; CHECK64-LABEL: load_i32_by_i8_bswap_uses:
				384	; CHECK64: # BB#0:
				385	; CHECK64-NEXT: movzbl (%rdi), %eax
				386	; CHECK64-NEXT: shll $24, %eax
				387	; CHECK64-NEXT: movzbl 1(%rdi), %ecx
				388	; CHECK64-NEXT: movl %ecx, %edx
				389	; CHECK64-NEXT: shll $16, %edx
				390	; CHECK64-NEXT: orl %eax, %edx
				391	; CHECK64-NEXT: movzbl 2(%rdi), %esi
				392	; CHECK64-NEXT: shll $8, %esi
				393	; CHECK64-NEXT: orl %edx, %esi
				394	; CHECK64-NEXT: movzbl 3(%rdi), %eax
				395	; CHECK64-NEXT: orl %esi, %eax
				396	; CHECK64-NEXT: orl %ecx, %eax
				397	; CHECK64-NEXT: retq
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	398	%tmp = bitcast i32* %arg to i8*
				399	%tmp1 = load i8, i8* %tmp, align 1
				400	%tmp2 = zext i8 %tmp1 to i32
				401	%tmp3 = shl nuw nsw i32 %tmp2, 24
				402	%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
				403	%tmp5 = load i8, i8* %tmp4, align 1
				404	%tmp6 = zext i8 %tmp5 to i32
				405	%tmp7 = shl nuw nsw i32 %tmp6, 16
				406	%tmp8 = or i32 %tmp7, %tmp3
				407	%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
				408	%tmp10 = load i8, i8* %tmp9, align 1
				409	%tmp11 = zext i8 %tmp10 to i32
				410	%tmp12 = shl nuw nsw i32 %tmp11, 8
				411	%tmp13 = or i32 %tmp8, %tmp12
				412	%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
				413	%tmp15 = load i8, i8* %tmp14, align 1
				414	%tmp16 = zext i8 %tmp15 to i32
				415	%tmp17 = or i32 %tmp13, %tmp16
				416	; Use individual part of the pattern outside of the pattern
				417	%tmp18 = or i32 %tmp6, %tmp17
				418	ret i32 %tmp18
				419	}
				420
				421	; One of the loads is volatile
				422	; i8* p;
				423	; p0 = volatile *p;
				424	; ((i32) p0 << 24) \| ((i32) p[1] << 16) \| ((i32) p[2] << 8) \| (i32) p[3]
				425	define i32 @load_i32_by_i8_bswap_volatile(i32* %arg) {
				426	; CHECK-LABEL: load_i32_by_i8_bswap_volatile:
				427	; CHECK: # BB#0:
				428	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				429	; CHECK-NEXT: movzbl (%eax), %ecx
				430	; CHECK-NEXT: shll $24, %ecx
				431	; CHECK-NEXT: movzbl 1(%eax), %edx
				432	; CHECK-NEXT: shll $16, %edx
				433	; CHECK-NEXT: orl %ecx, %edx
				434	; CHECK-NEXT: movzbl 2(%eax), %ecx
				435	; CHECK-NEXT: shll $8, %ecx
				436	; CHECK-NEXT: orl %edx, %ecx
				437	; CHECK-NEXT: movzbl 3(%eax), %eax
				438	; CHECK-NEXT: orl %ecx, %eax
				439	; CHECK-NEXT: retl
				440	;
				441	; CHECK64-LABEL: load_i32_by_i8_bswap_volatile:
				442	; CHECK64: # BB#0:
				443	; CHECK64-NEXT: movzbl (%rdi), %eax
				444	; CHECK64-NEXT: shll $24, %eax
				445	; CHECK64-NEXT: movzbl 1(%rdi), %ecx
				446	; CHECK64-NEXT: shll $16, %ecx
				447	; CHECK64-NEXT: orl %eax, %ecx
				448	; CHECK64-NEXT: movzbl 2(%rdi), %edx
				449	; CHECK64-NEXT: shll $8, %edx
				450	; CHECK64-NEXT: orl %ecx, %edx
				451	; CHECK64-NEXT: movzbl 3(%rdi), %eax
				452	; CHECK64-NEXT: orl %edx, %eax
				453	; CHECK64-NEXT: retq
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	454	%tmp = bitcast i32* %arg to i8*
				455	%tmp1 = load volatile i8, i8* %tmp, align 1
				456	%tmp2 = zext i8 %tmp1 to i32
				457	%tmp3 = shl nuw nsw i32 %tmp2, 24
				458	%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
				459	%tmp5 = load i8, i8* %tmp4, align 1
				460	%tmp6 = zext i8 %tmp5 to i32
				461	%tmp7 = shl nuw nsw i32 %tmp6, 16
				462	%tmp8 = or i32 %tmp7, %tmp3
				463	%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
				464	%tmp10 = load i8, i8* %tmp9, align 1
				465	%tmp11 = zext i8 %tmp10 to i32
				466	%tmp12 = shl nuw nsw i32 %tmp11, 8
				467	%tmp13 = or i32 %tmp8, %tmp12
				468	%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
				469	%tmp15 = load i8, i8* %tmp14, align 1
				470	%tmp16 = zext i8 %tmp15 to i32
				471	%tmp17 = or i32 %tmp13, %tmp16
				472	ret i32 %tmp17
				473	}
				474
				475	; There is a store in between individual loads
				476	; i8* p, q;
				477	; res1 = ((i32) p[0] << 24) \| ((i32) p[1] << 16)
				478	; *q = 0;
				479	; res2 = ((i32) p[2] << 8) \| (i32) p[3]
				480	; res1 \| res2
				481	define i32 @load_i32_by_i8_bswap_store_in_between(i32* %arg, i32* %arg1) {
				482	; CHECK-LABEL: load_i32_by_i8_bswap_store_in_between:
				483	; CHECK: # BB#0:
				484	; CHECK-NEXT: pushl %esi
				485	; CHECK-NEXT: .Lcfi2:
				486	; CHECK-NEXT: .cfi_def_cfa_offset 8
				487	; CHECK-NEXT: .Lcfi3:
				488	; CHECK-NEXT: .cfi_offset %esi, -8
				489	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				490	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
				491	; CHECK-NEXT: movzbl (%ecx), %edx
				492	; CHECK-NEXT: shll $24, %edx
				493	; CHECK-NEXT: movzbl 1(%ecx), %esi
				494	; CHECK-NEXT: movl $0, (%eax)
				495	; CHECK-NEXT: shll $16, %esi
				496	; CHECK-NEXT: orl %edx, %esi
				497	; CHECK-NEXT: movzbl 2(%ecx), %edx
				498	; CHECK-NEXT: shll $8, %edx
				499	; CHECK-NEXT: orl %esi, %edx
				500	; CHECK-NEXT: movzbl 3(%ecx), %eax
				501	; CHECK-NEXT: orl %edx, %eax
				502	; CHECK-NEXT: popl %esi
				503	; CHECK-NEXT: retl
				504	;
				505	; CHECK64-LABEL: load_i32_by_i8_bswap_store_in_between:
				506	; CHECK64: # BB#0:
				507	; CHECK64-NEXT: movzbl (%rdi), %eax
				508	; CHECK64-NEXT: shll $24, %eax
				509	; CHECK64-NEXT: movzbl 1(%rdi), %ecx
				510	; CHECK64-NEXT: movl $0, (%rsi)
				511	; CHECK64-NEXT: shll $16, %ecx
				512	; CHECK64-NEXT: orl %eax, %ecx
				513	; CHECK64-NEXT: movzbl 2(%rdi), %edx
				514	; CHECK64-NEXT: shll $8, %edx
				515	; CHECK64-NEXT: orl %ecx, %edx
				516	; CHECK64-NEXT: movzbl 3(%rdi), %eax
				517	; CHECK64-NEXT: orl %edx, %eax
				518	; CHECK64-NEXT: retq
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	519	%tmp = bitcast i32* %arg to i8*
				520	%tmp2 = load i8, i8* %tmp, align 1
				521	%tmp3 = zext i8 %tmp2 to i32
				522	%tmp4 = shl nuw nsw i32 %tmp3, 24
				523	%tmp5 = getelementptr inbounds i8, i8* %tmp, i32 1
				524	%tmp6 = load i8, i8* %tmp5, align 1
				525	; This store will prevent folding of the pattern
				526	store i32 0, i32* %arg1
				527	%tmp7 = zext i8 %tmp6 to i32
				528	%tmp8 = shl nuw nsw i32 %tmp7, 16
				529	%tmp9 = or i32 %tmp8, %tmp4
				530	%tmp10 = getelementptr inbounds i8, i8* %tmp, i32 2
				531	%tmp11 = load i8, i8* %tmp10, align 1
				532	%tmp12 = zext i8 %tmp11 to i32
				533	%tmp13 = shl nuw nsw i32 %tmp12, 8
				534	%tmp14 = or i32 %tmp9, %tmp13
				535	%tmp15 = getelementptr inbounds i8, i8* %tmp, i32 3
				536	%tmp16 = load i8, i8* %tmp15, align 1
				537	%tmp17 = zext i8 %tmp16 to i32
				538	%tmp18 = or i32 %tmp14, %tmp17
				539	ret i32 %tmp18
				540	}
				541
				542	; One of the loads is from an unrelated location
				543	; i8* p, q;
				544	; ((i32) p[0] << 24) \| ((i32) q[1] << 16) \| ((i32) p[2] << 8) \| (i32) p[3]
				545	define i32 @load_i32_by_i8_bswap_unrelated_load(i32* %arg, i32* %arg1) {
				546	; CHECK-LABEL: load_i32_by_i8_bswap_unrelated_load:
				547	; CHECK: # BB#0:
				548	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				549	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
				550	; CHECK-NEXT: movzbl (%ecx), %edx
				551	; CHECK-NEXT: shll $24, %edx
				552	; CHECK-NEXT: movzbl 1(%eax), %eax
				553	; CHECK-NEXT: shll $16, %eax
				554	; CHECK-NEXT: orl %edx, %eax
				555	; CHECK-NEXT: movzbl 2(%ecx), %edx
				556	; CHECK-NEXT: shll $8, %edx
				557	; CHECK-NEXT: orl %eax, %edx
				558	; CHECK-NEXT: movzbl 3(%ecx), %eax
				559	; CHECK-NEXT: orl %edx, %eax
				560	; CHECK-NEXT: retl
				561	;
				562	; CHECK64-LABEL: load_i32_by_i8_bswap_unrelated_load:
				563	; CHECK64: # BB#0:
				564	; CHECK64-NEXT: movzbl (%rdi), %eax
				565	; CHECK64-NEXT: shll $24, %eax
				566	; CHECK64-NEXT: movzbl 1(%rsi), %ecx
				567	; CHECK64-NEXT: shll $16, %ecx
				568	; CHECK64-NEXT: orl %eax, %ecx
				569	; CHECK64-NEXT: movzbl 2(%rdi), %edx
				570	; CHECK64-NEXT: shll $8, %edx
				571	; CHECK64-NEXT: orl %ecx, %edx
				572	; CHECK64-NEXT: movzbl 3(%rdi), %eax
				573	; CHECK64-NEXT: orl %edx, %eax
				574	; CHECK64-NEXT: retq
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	575	%tmp = bitcast i32* %arg to i8*
				576	%tmp2 = bitcast i32* %arg1 to i8*
				577	%tmp3 = load i8, i8* %tmp, align 1
				578	%tmp4 = zext i8 %tmp3 to i32
				579	%tmp5 = shl nuw nsw i32 %tmp4, 24
				580	; Load from an unrelated address
				581	%tmp6 = getelementptr inbounds i8, i8* %tmp2, i32 1
				582	%tmp7 = load i8, i8* %tmp6, align 1
				583	%tmp8 = zext i8 %tmp7 to i32
				584	%tmp9 = shl nuw nsw i32 %tmp8, 16
				585	%tmp10 = or i32 %tmp9, %tmp5
				586	%tmp11 = getelementptr inbounds i8, i8* %tmp, i32 2
				587	%tmp12 = load i8, i8* %tmp11, align 1
				588	%tmp13 = zext i8 %tmp12 to i32
				589	%tmp14 = shl nuw nsw i32 %tmp13, 8
				590	%tmp15 = or i32 %tmp10, %tmp14
				591	%tmp16 = getelementptr inbounds i8, i8* %tmp, i32 3
				592	%tmp17 = load i8, i8* %tmp16, align 1
				593	%tmp18 = zext i8 %tmp17 to i32
				594	%tmp19 = or i32 %tmp15, %tmp18
				595	ret i32 %tmp19
				596	}
				597
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	598	; i8* p;
				599	; (i32) p[1] \| ((i32) p[2] << 8) \| ((i32) p[3] << 16) \| ((i32) p[4] << 24)
Artur Pilipenko	bdf3c5a	2017-02-06 14:15:31 +0000	[diff] [blame]	600	define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
				601	; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	602	; CHECK: # BB#0:
				603	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
Artur Pilipenko	4a64031	2017-02-09 12:06:01 +0000	[diff] [blame]	604	; CHECK-NEXT: movl 1(%eax), %eax
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	605	; CHECK-NEXT: retl
				606	;
Artur Pilipenko	bdf3c5a	2017-02-06 14:15:31 +0000	[diff] [blame]	607	; CHECK64-LABEL: load_i32_by_i8_nonzero_offset:
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	608	; CHECK64: # BB#0:
Artur Pilipenko	4a64031	2017-02-09 12:06:01 +0000	[diff] [blame]	609	; CHECK64-NEXT: movl 1(%rdi), %eax
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	610	; CHECK64-NEXT: retq
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	611	%tmp = bitcast i32* %arg to i8*
				612	%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
				613	%tmp2 = load i8, i8* %tmp1, align 1
				614	%tmp3 = zext i8 %tmp2 to i32
				615	%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2
				616	%tmp5 = load i8, i8* %tmp4, align 1
				617	%tmp6 = zext i8 %tmp5 to i32
				618	%tmp7 = shl nuw nsw i32 %tmp6, 8
				619	%tmp8 = or i32 %tmp7, %tmp3
				620	%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3
				621	%tmp10 = load i8, i8* %tmp9, align 1
				622	%tmp11 = zext i8 %tmp10 to i32
				623	%tmp12 = shl nuw nsw i32 %tmp11, 16
				624	%tmp13 = or i32 %tmp8, %tmp12
				625	%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4
				626	%tmp15 = load i8, i8* %tmp14, align 1
				627	%tmp16 = zext i8 %tmp15 to i32
				628	%tmp17 = shl nuw nsw i32 %tmp16, 24
				629	%tmp18 = or i32 %tmp13, %tmp17
				630	ret i32 %tmp18
				631	}
				632
Artur Pilipenko	bdf3c5a	2017-02-06 14:15:31 +0000	[diff] [blame]	633	; i8* p;
				634	; (i32) p[-4] \| ((i32) p[-3] << 8) \| ((i32) p[-2] << 16) \| ((i32) p[-1] << 24)
				635	define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
				636	; CHECK-LABEL: load_i32_by_i8_neg_offset:
				637	; CHECK: # BB#0:
				638	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
Artur Pilipenko	4a64031	2017-02-09 12:06:01 +0000	[diff] [blame]	639	; CHECK-NEXT: movl -4(%eax), %eax
Artur Pilipenko	bdf3c5a	2017-02-06 14:15:31 +0000	[diff] [blame]	640	; CHECK-NEXT: retl
				641	;
				642	; CHECK64-LABEL: load_i32_by_i8_neg_offset:
				643	; CHECK64: # BB#0:
Artur Pilipenko	4a64031	2017-02-09 12:06:01 +0000	[diff] [blame]	644	; CHECK64-NEXT: movl -4(%rdi), %eax
Artur Pilipenko	bdf3c5a	2017-02-06 14:15:31 +0000	[diff] [blame]	645	; CHECK64-NEXT: retq
Artur Pilipenko	bdf3c5a	2017-02-06 14:15:31 +0000	[diff] [blame]	646	%tmp = bitcast i32* %arg to i8*
				647	%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
				648	%tmp2 = load i8, i8* %tmp1, align 1
				649	%tmp3 = zext i8 %tmp2 to i32
				650	%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3
				651	%tmp5 = load i8, i8* %tmp4, align 1
				652	%tmp6 = zext i8 %tmp5 to i32
				653	%tmp7 = shl nuw nsw i32 %tmp6, 8
				654	%tmp8 = or i32 %tmp7, %tmp3
				655	%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2
				656	%tmp10 = load i8, i8* %tmp9, align 1
				657	%tmp11 = zext i8 %tmp10 to i32
				658	%tmp12 = shl nuw nsw i32 %tmp11, 16
				659	%tmp13 = or i32 %tmp8, %tmp12
				660	%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1
				661	%tmp15 = load i8, i8* %tmp14, align 1
				662	%tmp16 = zext i8 %tmp15 to i32
				663	%tmp17 = shl nuw nsw i32 %tmp16, 24
				664	%tmp18 = or i32 %tmp13, %tmp17
				665	ret i32 %tmp18
				666	}
				667
				668	; i8* p;
				669	; (i32) p[4] \| ((i32) p[3] << 8) \| ((i32) p[2] << 16) \| ((i32) p[1] << 24)
				670	define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
Simon Pilgrim	8670993d	2017-02-17 23:00:21 +0000	[diff] [blame]	671	; BSWAP-LABEL: load_i32_by_i8_nonzero_offset_bswap:
				672	; BSWAP: # BB#0:
				673	; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax
				674	; BSWAP-NEXT: movl 1(%eax), %eax
				675	; BSWAP-NEXT: bswapl %eax
				676	; BSWAP-NEXT: retl
Artur Pilipenko	bdf3c5a	2017-02-06 14:15:31 +0000	[diff] [blame]	677	;
Simon Pilgrim	8670993d	2017-02-17 23:00:21 +0000	[diff] [blame]	678	; MOVBE-LABEL: load_i32_by_i8_nonzero_offset_bswap:
				679	; MOVBE: # BB#0:
				680	; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax
				681	; MOVBE-NEXT: movbel 1(%eax), %eax
				682	; MOVBE-NEXT: retl
				683	;
				684	; BSWAP64-LABEL: load_i32_by_i8_nonzero_offset_bswap:
				685	; BSWAP64: # BB#0:
				686	; BSWAP64-NEXT: movl 1(%rdi), %eax
				687	; BSWAP64-NEXT: bswapl %eax
				688	; BSWAP64-NEXT: retq
				689	;
				690	; MOVBE64-LABEL: load_i32_by_i8_nonzero_offset_bswap:
				691	; MOVBE64: # BB#0:
				692	; MOVBE64-NEXT: movbel 1(%rdi), %eax
				693	; MOVBE64-NEXT: retq
Artur Pilipenko	bdf3c5a	2017-02-06 14:15:31 +0000	[diff] [blame]	694	%tmp = bitcast i32* %arg to i8*
				695	%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
				696	%tmp2 = load i8, i8* %tmp1, align 1
				697	%tmp3 = zext i8 %tmp2 to i32
				698	%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3
				699	%tmp5 = load i8, i8* %tmp4, align 1
				700	%tmp6 = zext i8 %tmp5 to i32
				701	%tmp7 = shl nuw nsw i32 %tmp6, 8
				702	%tmp8 = or i32 %tmp7, %tmp3
				703	%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
				704	%tmp10 = load i8, i8* %tmp9, align 1
				705	%tmp11 = zext i8 %tmp10 to i32
				706	%tmp12 = shl nuw nsw i32 %tmp11, 16
				707	%tmp13 = or i32 %tmp8, %tmp12
				708	%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1
				709	%tmp15 = load i8, i8* %tmp14, align 1
				710	%tmp16 = zext i8 %tmp15 to i32
				711	%tmp17 = shl nuw nsw i32 %tmp16, 24
				712	%tmp18 = or i32 %tmp13, %tmp17
				713	ret i32 %tmp18
				714	}
				715
				716	; i8* p;
				717	; (i32) p[-1] \| ((i32) p[-2] << 8) \| ((i32) p[-3] << 16) \| ((i32) p[-4] << 24)
				718	define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
Simon Pilgrim	8670993d	2017-02-17 23:00:21 +0000	[diff] [blame]	719	; BSWAP-LABEL: load_i32_by_i8_neg_offset_bswap:
				720	; BSWAP: # BB#0:
				721	; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax
				722	; BSWAP-NEXT: movl -4(%eax), %eax
				723	; BSWAP-NEXT: bswapl %eax
				724	; BSWAP-NEXT: retl
Artur Pilipenko	bdf3c5a	2017-02-06 14:15:31 +0000	[diff] [blame]	725	;
Simon Pilgrim	8670993d	2017-02-17 23:00:21 +0000	[diff] [blame]	726	; MOVBE-LABEL: load_i32_by_i8_neg_offset_bswap:
				727	; MOVBE: # BB#0:
				728	; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax
				729	; MOVBE-NEXT: movbel -4(%eax), %eax
				730	; MOVBE-NEXT: retl
				731	;
				732	; BSWAP64-LABEL: load_i32_by_i8_neg_offset_bswap:
				733	; BSWAP64: # BB#0:
				734	; BSWAP64-NEXT: movl -4(%rdi), %eax
				735	; BSWAP64-NEXT: bswapl %eax
				736	; BSWAP64-NEXT: retq
				737	;
				738	; MOVBE64-LABEL: load_i32_by_i8_neg_offset_bswap:
				739	; MOVBE64: # BB#0:
				740	; MOVBE64-NEXT: movbel -4(%rdi), %eax
				741	; MOVBE64-NEXT: retq
Artur Pilipenko	bdf3c5a	2017-02-06 14:15:31 +0000	[diff] [blame]	742	%tmp = bitcast i32* %arg to i8*
				743	%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
				744	%tmp2 = load i8, i8* %tmp1, align 1
				745	%tmp3 = zext i8 %tmp2 to i32
				746	%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2
				747	%tmp5 = load i8, i8* %tmp4, align 1
				748	%tmp6 = zext i8 %tmp5 to i32
				749	%tmp7 = shl nuw nsw i32 %tmp6, 8
				750	%tmp8 = or i32 %tmp7, %tmp3
				751	%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3
				752	%tmp10 = load i8, i8* %tmp9, align 1
				753	%tmp11 = zext i8 %tmp10 to i32
				754	%tmp12 = shl nuw nsw i32 %tmp11, 16
				755	%tmp13 = or i32 %tmp8, %tmp12
				756	%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4
				757	%tmp15 = load i8, i8* %tmp14, align 1
				758	%tmp16 = zext i8 %tmp15 to i32
				759	%tmp17 = shl nuw nsw i32 %tmp16, 24
				760	%tmp18 = or i32 %tmp13, %tmp17
				761	ret i32 %tmp18
				762	}
				763
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	764	; i8* p; i32 i;
				765	; ((i32) p[i] << 24) \| ((i32) p[i + 1] << 16) \| ((i32) p[i + 2] << 8) \| (i32) p[i + 3]
				766	define i32 @load_i32_by_i8_bswap_base_index_offset(i32* %arg, i32 %arg1) {
Simon Pilgrim	8670993d	2017-02-17 23:00:21 +0000	[diff] [blame]	767	; BSWAP-LABEL: load_i32_by_i8_bswap_base_index_offset:
				768	; BSWAP: # BB#0:
				769	; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax
				770	; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %ecx
				771	; BSWAP-NEXT: movl (%ecx,%eax), %eax
				772	; BSWAP-NEXT: bswapl %eax
				773	; BSWAP-NEXT: retl
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	774	;
Simon Pilgrim	8670993d	2017-02-17 23:00:21 +0000	[diff] [blame]	775	; MOVBE-LABEL: load_i32_by_i8_bswap_base_index_offset:
				776	; MOVBE: # BB#0:
				777	; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax
				778	; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %ecx
				779	; MOVBE-NEXT: movbel (%ecx,%eax), %eax
				780	; MOVBE-NEXT: retl
				781	;
				782	; BSWAP64-LABEL: load_i32_by_i8_bswap_base_index_offset:
				783	; BSWAP64: # BB#0:
				784	; BSWAP64-NEXT: movslq %esi, %rax
				785	; BSWAP64-NEXT: movl (%rdi,%rax), %eax
				786	; BSWAP64-NEXT: bswapl %eax
				787	; BSWAP64-NEXT: retq
				788	;
				789	; MOVBE64-LABEL: load_i32_by_i8_bswap_base_index_offset:
				790	; MOVBE64: # BB#0:
				791	; MOVBE64-NEXT: movslq %esi, %rax
				792	; MOVBE64-NEXT: movbel (%rdi,%rax), %eax
				793	; MOVBE64-NEXT: retq
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	794	%tmp = bitcast i32* %arg to i8*
				795	%tmp2 = getelementptr inbounds i8, i8* %tmp, i32 %arg1
				796	%tmp3 = load i8, i8* %tmp2, align 1
				797	%tmp4 = zext i8 %tmp3 to i32
				798	%tmp5 = shl nuw nsw i32 %tmp4, 24
				799	%tmp6 = add nuw nsw i32 %arg1, 1
				800	%tmp7 = getelementptr inbounds i8, i8* %tmp, i32 %tmp6
				801	%tmp8 = load i8, i8* %tmp7, align 1
				802	%tmp9 = zext i8 %tmp8 to i32
				803	%tmp10 = shl nuw nsw i32 %tmp9, 16
				804	%tmp11 = or i32 %tmp10, %tmp5
				805	%tmp12 = add nuw nsw i32 %arg1, 2
				806	%tmp13 = getelementptr inbounds i8, i8* %tmp, i32 %tmp12
				807	%tmp14 = load i8, i8* %tmp13, align 1
				808	%tmp15 = zext i8 %tmp14 to i32
				809	%tmp16 = shl nuw nsw i32 %tmp15, 8
				810	%tmp17 = or i32 %tmp11, %tmp16
				811	%tmp18 = add nuw nsw i32 %arg1, 3
				812	%tmp19 = getelementptr inbounds i8, i8* %tmp, i32 %tmp18
				813	%tmp20 = load i8, i8* %tmp19, align 1
				814	%tmp21 = zext i8 %tmp20 to i32
				815	%tmp22 = or i32 %tmp17, %tmp21
				816	ret i32 %tmp22
				817	}
				818
				819	; Verify that we don't crash handling shl i32 %conv57, 32
				820	define void @shift_i32_by_32(i8* %src1, i8* %src2, i64* %dst) {
				821	; CHECK-LABEL: shift_i32_by_32:
				822	; CHECK: # BB#0: # %entry
				823	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				824	; CHECK-NEXT: movl $-1, 4(%eax)
				825	; CHECK-NEXT: movl $-1, (%eax)
				826	; CHECK-NEXT: retl
				827	;
				828	; CHECK64-LABEL: shift_i32_by_32:
				829	; CHECK64: # BB#0: # %entry
				830	; CHECK64-NEXT: movq $-1, (%rdx)
				831	; CHECK64-NEXT: retq
				832	entry:
				833	%load1 = load i8, i8* %src1, align 1
				834	%conv46 = zext i8 %load1 to i32
				835	%shl47 = shl i32 %conv46, 56
				836	%or55 = or i32 %shl47, 0
				837	%load2 = load i8, i8* %src2, align 1
				838	%conv57 = zext i8 %load2 to i32
				839	%shl58 = shl i32 %conv57, 32
				840	%or59 = or i32 %or55, %shl58
				841	%or74 = or i32 %or59, 0
				842	%conv75 = sext i32 %or74 to i64
				843	store i64 %conv75, i64* %dst, align 8
				844	ret void
				845	}
Artur Pilipenko	d3464bf	2017-02-06 17:48:08 +0000	[diff] [blame]	846
				847	declare i16 @llvm.bswap.i16(i16)
				848
				849	; i16* p;
				850	; (i32) bswap(p[1]) \| (i32) bswap(p[0] << 16)
				851	define i32 @load_i32_by_bswap_i16(i32* %arg) {
Simon Pilgrim	8670993d	2017-02-17 23:00:21 +0000	[diff] [blame]	852	; BSWAP-LABEL: load_i32_by_bswap_i16:
				853	; BSWAP: # BB#0:
				854	; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax
				855	; BSWAP-NEXT: movl (%eax), %eax
				856	; BSWAP-NEXT: bswapl %eax
				857	; BSWAP-NEXT: retl
Artur Pilipenko	d3464bf	2017-02-06 17:48:08 +0000	[diff] [blame]	858	;
Simon Pilgrim	8670993d	2017-02-17 23:00:21 +0000	[diff] [blame]	859	; MOVBE-LABEL: load_i32_by_bswap_i16:
				860	; MOVBE: # BB#0:
				861	; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax
				862	; MOVBE-NEXT: movbel (%eax), %eax
				863	; MOVBE-NEXT: retl
				864	;
				865	; BSWAP64-LABEL: load_i32_by_bswap_i16:
				866	; BSWAP64: # BB#0:
				867	; BSWAP64-NEXT: movl (%rdi), %eax
				868	; BSWAP64-NEXT: bswapl %eax
				869	; BSWAP64-NEXT: retq
				870	;
				871	; MOVBE64-LABEL: load_i32_by_bswap_i16:
				872	; MOVBE64: # BB#0:
				873	; MOVBE64-NEXT: movbel (%rdi), %eax
				874	; MOVBE64-NEXT: retq
Artur Pilipenko	d3464bf	2017-02-06 17:48:08 +0000	[diff] [blame]	875	%tmp = bitcast i32* %arg to i16*
				876	%tmp1 = load i16, i16* %tmp, align 4
				877	%tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1)
				878	%tmp2 = zext i16 %tmp11 to i32
				879	%tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
				880	%tmp4 = load i16, i16* %tmp3, align 1
				881	%tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4)
				882	%tmp5 = zext i16 %tmp41 to i32
				883	%tmp6 = shl nuw nsw i32 %tmp2, 16
				884	%tmp7 = or i32 %tmp6, %tmp5
				885	ret i32 %tmp7
				886	}
Artur Pilipenko	469596e	2017-02-07 14:09:37 +0000	[diff] [blame]	887
				888	; i16* p;
				889	; (i32) p[0] \| (sext(p[1] << 16) to i32)
				890	define i32 @load_i32_by_sext_i16(i32* %arg) {
				891	; CHECK-LABEL: load_i32_by_sext_i16:
				892	; CHECK: # BB#0:
				893	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
Artur Pilipenko	85d7582	2017-02-16 17:07:27 +0000	[diff] [blame]	894	; CHECK-NEXT: movl (%eax), %eax
Artur Pilipenko	469596e	2017-02-07 14:09:37 +0000	[diff] [blame]	895	; CHECK-NEXT: retl
				896	;
				897	; CHECK64-LABEL: load_i32_by_sext_i16:
				898	; CHECK64: # BB#0:
Artur Pilipenko	85d7582	2017-02-16 17:07:27 +0000	[diff] [blame]	899	; CHECK64-NEXT: movl (%rdi), %eax
Artur Pilipenko	469596e	2017-02-07 14:09:37 +0000	[diff] [blame]	900	; CHECK64-NEXT: retq
				901	%tmp = bitcast i32* %arg to i16*
				902	%tmp1 = load i16, i16* %tmp, align 1
				903	%tmp2 = zext i16 %tmp1 to i32
				904	%tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
				905	%tmp4 = load i16, i16* %tmp3, align 1
				906	%tmp5 = sext i16 %tmp4 to i32
				907	%tmp6 = shl nuw nsw i32 %tmp5, 16
				908	%tmp7 = or i32 %tmp6, %tmp2
				909	ret i32 %tmp7
				910	}
				911
				912	; i8* arg; i32 i;
				913	; p = arg + 12;
				914	; (i32) p[i] \| ((i32) p[i + 1] << 8) \| ((i32) p[i + 2] << 16) \| ((i32) p[i + 3] << 24)
				915	define i32 @load_i32_by_i8_base_offset_index(i8* %arg, i32 %i) {
				916	; CHECK-LABEL: load_i32_by_i8_base_offset_index:
				917	; CHECK: # BB#0:
Artur Pilipenko	469596e	2017-02-07 14:09:37 +0000	[diff] [blame]	918	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				919	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
Artur Pilipenko	85d7582	2017-02-16 17:07:27 +0000	[diff] [blame]	920	; CHECK-NEXT: movl 12(%eax,%ecx), %eax
Artur Pilipenko	469596e	2017-02-07 14:09:37 +0000	[diff] [blame]	921	; CHECK-NEXT: retl
				922	;
				923	; CHECK64-LABEL: load_i32_by_i8_base_offset_index:
				924	; CHECK64: # BB#0:
				925	; CHECK64-NEXT: movl %esi, %eax
				926	; CHECK64-NEXT: movl 12(%rdi,%rax), %eax
				927	; CHECK64-NEXT: retq
				928	%tmp = add nuw nsw i32 %i, 3
				929	%tmp2 = add nuw nsw i32 %i, 2
				930	%tmp3 = add nuw nsw i32 %i, 1
				931	%tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
				932	%tmp5 = zext i32 %i to i64
				933	%tmp6 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp5
				934	%tmp7 = load i8, i8* %tmp6, align 1
				935	%tmp8 = zext i8 %tmp7 to i32
				936	%tmp9 = zext i32 %tmp3 to i64
				937	%tmp10 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp9
				938	%tmp11 = load i8, i8* %tmp10, align 1
				939	%tmp12 = zext i8 %tmp11 to i32
				940	%tmp13 = shl nuw nsw i32 %tmp12, 8
				941	%tmp14 = or i32 %tmp13, %tmp8
				942	%tmp15 = zext i32 %tmp2 to i64
				943	%tmp16 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp15
				944	%tmp17 = load i8, i8* %tmp16, align 1
				945	%tmp18 = zext i8 %tmp17 to i32
				946	%tmp19 = shl nuw nsw i32 %tmp18, 16
				947	%tmp20 = or i32 %tmp14, %tmp19
				948	%tmp21 = zext i32 %tmp to i64
				949	%tmp22 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp21
				950	%tmp23 = load i8, i8* %tmp22, align 1
				951	%tmp24 = zext i8 %tmp23 to i32
				952	%tmp25 = shl nuw i32 %tmp24, 24
				953	%tmp26 = or i32 %tmp20, %tmp25
				954	ret i32 %tmp26
				955	}
				956
				957	; i8* arg; i32 i;
				958	; p = arg + 12;
				959	; (i32) p[i + 1] \| ((i32) p[i + 2] << 8) \| ((i32) p[i + 3] << 16) \| ((i32) p[i + 4] << 24)
				960	define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
				961	; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
				962	; CHECK: # BB#0:
Artur Pilipenko	469596e	2017-02-07 14:09:37 +0000	[diff] [blame]	963	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				964	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
Artur Pilipenko	85d7582	2017-02-16 17:07:27 +0000	[diff] [blame]	965	; CHECK-NEXT: movl 13(%eax,%ecx), %eax
Artur Pilipenko	469596e	2017-02-07 14:09:37 +0000	[diff] [blame]	966	; CHECK-NEXT: retl
				967	;
				968	; CHECK64-LABEL: load_i32_by_i8_base_offset_index_2:
				969	; CHECK64: # BB#0:
				970	; CHECK64-NEXT: movl %esi, %eax
Artur Pilipenko	4a64031	2017-02-09 12:06:01 +0000	[diff] [blame]	971	; CHECK64-NEXT: movl 13(%rdi,%rax), %eax
Artur Pilipenko	469596e	2017-02-07 14:09:37 +0000	[diff] [blame]	972	; CHECK64-NEXT: retq
				973	%tmp = add nuw nsw i32 %i, 4
				974	%tmp2 = add nuw nsw i32 %i, 3
				975	%tmp3 = add nuw nsw i32 %i, 2
				976	%tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
				977	%tmp5 = add nuw nsw i32 %i, 1
				978	%tmp27 = zext i32 %tmp5 to i64
				979	%tmp28 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp27
				980	%tmp29 = load i8, i8* %tmp28, align 1
				981	%tmp30 = zext i8 %tmp29 to i32
				982	%tmp31 = zext i32 %tmp3 to i64
				983	%tmp32 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp31
				984	%tmp33 = load i8, i8* %tmp32, align 1
				985	%tmp34 = zext i8 %tmp33 to i32
				986	%tmp35 = shl nuw nsw i32 %tmp34, 8
				987	%tmp36 = or i32 %tmp35, %tmp30
				988	%tmp37 = zext i32 %tmp2 to i64
				989	%tmp38 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp37
				990	%tmp39 = load i8, i8* %tmp38, align 1
				991	%tmp40 = zext i8 %tmp39 to i32
				992	%tmp41 = shl nuw nsw i32 %tmp40, 16
				993	%tmp42 = or i32 %tmp36, %tmp41
				994	%tmp43 = zext i32 %tmp to i64
				995	%tmp44 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp43
				996	%tmp45 = load i8, i8* %tmp44, align 1
				997	%tmp46 = zext i8 %tmp45 to i32
				998	%tmp47 = shl nuw i32 %tmp46, 24
				999	%tmp48 = or i32 %tmp42, %tmp47
				1000	ret i32 %tmp48
				1001	}
				1002
				1003	; i8* arg; i32 i;
				1004	;
				1005	; p0 = arg;
				1006	; p1 = arg + i + 1;
				1007	; p2 = arg + i + 2;
				1008	; p3 = arg + i + 3;
				1009	;
				1010	; (i32) p0[12] \| ((i32) p1[12] << 8) \| ((i32) p2[12] << 16) \| ((i32) p3[12] << 24)
				1011	;
				1012	; This test excercises zero and any extend loads as a part of load combine pattern.
				1013	; In order to fold the pattern above we need to reassociate the address computation
				1014	; first. By the time the address computation is reassociated loads are combined to
				1015	; to zext and aext loads.
				1016	define i32 @load_i32_by_i8_zaext_loads(i8* %arg, i32 %arg1) {
				1017	; CHECK-LABEL: load_i32_by_i8_zaext_loads:
				1018	; CHECK: # BB#0:
Artur Pilipenko	469596e	2017-02-07 14:09:37 +0000	[diff] [blame]	1019	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				1020	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
Artur Pilipenko	85d7582	2017-02-16 17:07:27 +0000	[diff] [blame]	1021	; CHECK-NEXT: movl 12(%eax,%ecx), %eax
Artur Pilipenko	469596e	2017-02-07 14:09:37 +0000	[diff] [blame]	1022	; CHECK-NEXT: retl
				1023	;
				1024	; CHECK64-LABEL: load_i32_by_i8_zaext_loads:
				1025	; CHECK64: # BB#0:
				1026	; CHECK64-NEXT: movl %esi, %eax
Artur Pilipenko	85d7582	2017-02-16 17:07:27 +0000	[diff] [blame]	1027	; CHECK64-NEXT: movl 12(%rdi,%rax), %eax
Artur Pilipenko	469596e	2017-02-07 14:09:37 +0000	[diff] [blame]	1028	; CHECK64-NEXT: retq
				1029	%tmp = add nuw nsw i32 %arg1, 3
				1030	%tmp2 = add nuw nsw i32 %arg1, 2
				1031	%tmp3 = add nuw nsw i32 %arg1, 1
				1032	%tmp4 = zext i32 %tmp to i64
				1033	%tmp5 = zext i32 %tmp2 to i64
				1034	%tmp6 = zext i32 %tmp3 to i64
				1035	%tmp24 = getelementptr inbounds i8, i8* %arg, i64 %tmp4
				1036	%tmp30 = getelementptr inbounds i8, i8* %arg, i64 %tmp5
				1037	%tmp31 = getelementptr inbounds i8, i8* %arg, i64 %tmp6
				1038	%tmp32 = getelementptr inbounds i8, i8* %arg, i64 12
				1039	%tmp33 = zext i32 %arg1 to i64
				1040	%tmp34 = getelementptr inbounds i8, i8* %tmp32, i64 %tmp33
				1041	%tmp35 = load i8, i8* %tmp34, align 1
				1042	%tmp36 = zext i8 %tmp35 to i32
				1043	%tmp37 = getelementptr inbounds i8, i8* %tmp31, i64 12
				1044	%tmp38 = load i8, i8* %tmp37, align 1
				1045	%tmp39 = zext i8 %tmp38 to i32
				1046	%tmp40 = shl nuw nsw i32 %tmp39, 8
				1047	%tmp41 = or i32 %tmp40, %tmp36
				1048	%tmp42 = getelementptr inbounds i8, i8* %tmp30, i64 12
				1049	%tmp43 = load i8, i8* %tmp42, align 1
				1050	%tmp44 = zext i8 %tmp43 to i32
				1051	%tmp45 = shl nuw nsw i32 %tmp44, 16
				1052	%tmp46 = or i32 %tmp41, %tmp45
				1053	%tmp47 = getelementptr inbounds i8, i8* %tmp24, i64 12
				1054	%tmp48 = load i8, i8* %tmp47, align 1
				1055	%tmp49 = zext i8 %tmp48 to i32
				1056	%tmp50 = shl nuw i32 %tmp49, 24
				1057	%tmp51 = or i32 %tmp46, %tmp50
				1058	ret i32 %tmp51
				1059	}
				1060
				1061	; The same as load_i32_by_i8_zaext_loads but the last load is combined to
				1062	; a sext load.
				1063	;
				1064	; i8* arg; i32 i;
				1065	;
				1066	; p0 = arg;
				1067	; p1 = arg + i + 1;
				1068	; p2 = arg + i + 2;
				1069	; p3 = arg + i + 3;
				1070	;
				1071	; (i32) p0[12] \| ((i32) p1[12] << 8) \| ((i32) p2[12] << 16) \| ((i32) p3[12] << 24)
				1072	define i32 @load_i32_by_i8_zsext_loads(i8* %arg, i32 %arg1) {
				1073	; CHECK-LABEL: load_i32_by_i8_zsext_loads:
				1074	; CHECK: # BB#0:
Artur Pilipenko	469596e	2017-02-07 14:09:37 +0000	[diff] [blame]	1075	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				1076	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
Artur Pilipenko	85d7582	2017-02-16 17:07:27 +0000	[diff] [blame]	1077	; CHECK-NEXT: movl 12(%eax,%ecx), %eax
Artur Pilipenko	469596e	2017-02-07 14:09:37 +0000	[diff] [blame]	1078	; CHECK-NEXT: retl
				1079	;
				1080	; CHECK64-LABEL: load_i32_by_i8_zsext_loads:
				1081	; CHECK64: # BB#0:
				1082	; CHECK64-NEXT: movl %esi, %eax
Artur Pilipenko	85d7582	2017-02-16 17:07:27 +0000	[diff] [blame]	1083	; CHECK64-NEXT: movl 12(%rdi,%rax), %eax
Artur Pilipenko	469596e	2017-02-07 14:09:37 +0000	[diff] [blame]	1084	; CHECK64-NEXT: retq
				1085	%tmp = add nuw nsw i32 %arg1, 3
				1086	%tmp2 = add nuw nsw i32 %arg1, 2
				1087	%tmp3 = add nuw nsw i32 %arg1, 1
				1088	%tmp4 = zext i32 %tmp to i64
				1089	%tmp5 = zext i32 %tmp2 to i64
				1090	%tmp6 = zext i32 %tmp3 to i64
				1091	%tmp24 = getelementptr inbounds i8, i8* %arg, i64 %tmp4
				1092	%tmp30 = getelementptr inbounds i8, i8* %arg, i64 %tmp5
				1093	%tmp31 = getelementptr inbounds i8, i8* %arg, i64 %tmp6
				1094	%tmp32 = getelementptr inbounds i8, i8* %arg, i64 12
				1095	%tmp33 = zext i32 %arg1 to i64
				1096	%tmp34 = getelementptr inbounds i8, i8* %tmp32, i64 %tmp33
				1097	%tmp35 = load i8, i8* %tmp34, align 1
				1098	%tmp36 = zext i8 %tmp35 to i32
				1099	%tmp37 = getelementptr inbounds i8, i8* %tmp31, i64 12
				1100	%tmp38 = load i8, i8* %tmp37, align 1
				1101	%tmp39 = zext i8 %tmp38 to i32
				1102	%tmp40 = shl nuw nsw i32 %tmp39, 8
				1103	%tmp41 = or i32 %tmp40, %tmp36
				1104	%tmp42 = getelementptr inbounds i8, i8* %tmp30, i64 12
				1105	%tmp43 = load i8, i8* %tmp42, align 1
				1106	%tmp44 = zext i8 %tmp43 to i32
				1107	%tmp45 = shl nuw nsw i32 %tmp44, 16
				1108	%tmp46 = or i32 %tmp41, %tmp45
				1109	%tmp47 = getelementptr inbounds i8, i8* %tmp24, i64 12
				1110	%tmp48 = load i8, i8* %tmp47, align 1
				1111	%tmp49 = sext i8 %tmp48 to i16
				1112	%tmp50 = zext i16 %tmp49 to i32
				1113	%tmp51 = shl nuw i32 %tmp50, 24
				1114	%tmp52 = or i32 %tmp46, %tmp51
				1115	ret i32 %tmp52
				1116	}
Artur Pilipenko	0e4583b	2017-02-09 15:13:40 +0000	[diff] [blame]	1117
				1118	; i8* p;
				1119	; (i32) p[0] \| ((i32) p[1] << 8)
				1120	define i32 @zext_load_i32_by_i8(i32* %arg) {
				1121	; CHECK-LABEL: zext_load_i32_by_i8:
				1122	; CHECK: # BB#0:
				1123	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				1124	; CHECK-NEXT: movzbl (%eax), %ecx
				1125	; CHECK-NEXT: movzbl 1(%eax), %eax
				1126	; CHECK-NEXT: shll $8, %eax
				1127	; CHECK-NEXT: orl %ecx, %eax
				1128	; CHECK-NEXT: retl
				1129	;
				1130	; CHECK64-LABEL: zext_load_i32_by_i8:
				1131	; CHECK64: # BB#0:
				1132	; CHECK64-NEXT: movzbl (%rdi), %ecx
				1133	; CHECK64-NEXT: movzbl 1(%rdi), %eax
				1134	; CHECK64-NEXT: shll $8, %eax
				1135	; CHECK64-NEXT: orl %ecx, %eax
				1136	; CHECK64-NEXT: retq
				1137	%tmp = bitcast i32* %arg to i8*
				1138	%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
				1139	%tmp2 = load i8, i8* %tmp1, align 1
				1140	%tmp3 = zext i8 %tmp2 to i32
				1141	%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
				1142	%tmp5 = load i8, i8* %tmp4, align 1
				1143	%tmp6 = zext i8 %tmp5 to i32
				1144	%tmp7 = shl nuw nsw i32 %tmp6, 8
				1145	%tmp8 = or i32 %tmp7, %tmp3
				1146	ret i32 %tmp8
				1147	}
				1148
				1149	; i8* p;
				1150	; ((i32) p[0] << 8) \| ((i32) p[1] << 16)
				1151	define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) {
				1152	; CHECK-LABEL: zext_load_i32_by_i8_shl_8:
				1153	; CHECK: # BB#0:
				1154	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				1155	; CHECK-NEXT: movzbl (%eax), %ecx
				1156	; CHECK-NEXT: shll $8, %ecx
				1157	; CHECK-NEXT: movzbl 1(%eax), %eax
				1158	; CHECK-NEXT: shll $16, %eax
				1159	; CHECK-NEXT: orl %ecx, %eax
				1160	; CHECK-NEXT: retl
				1161	;
				1162	; CHECK64-LABEL: zext_load_i32_by_i8_shl_8:
				1163	; CHECK64: # BB#0:
				1164	; CHECK64-NEXT: movzbl (%rdi), %ecx
				1165	; CHECK64-NEXT: shll $8, %ecx
				1166	; CHECK64-NEXT: movzbl 1(%rdi), %eax
				1167	; CHECK64-NEXT: shll $16, %eax
				1168	; CHECK64-NEXT: orl %ecx, %eax
				1169	; CHECK64-NEXT: retq
				1170	%tmp = bitcast i32* %arg to i8*
				1171	%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
				1172	%tmp2 = load i8, i8* %tmp1, align 1
				1173	%tmp3 = zext i8 %tmp2 to i32
				1174	%tmp30 = shl nuw nsw i32 %tmp3, 8
				1175	%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
				1176	%tmp5 = load i8, i8* %tmp4, align 1
				1177	%tmp6 = zext i8 %tmp5 to i32
				1178	%tmp7 = shl nuw nsw i32 %tmp6, 16
				1179	%tmp8 = or i32 %tmp7, %tmp30
				1180	ret i32 %tmp8
				1181	}
				1182
				1183	; i8* p;
				1184	; ((i32) p[0] << 16) \| ((i32) p[1] << 24)
				1185	define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) {
				1186	; CHECK-LABEL: zext_load_i32_by_i8_shl_16:
				1187	; CHECK: # BB#0:
				1188	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				1189	; CHECK-NEXT: movzbl (%eax), %ecx
				1190	; CHECK-NEXT: shll $16, %ecx
				1191	; CHECK-NEXT: movzbl 1(%eax), %eax
				1192	; CHECK-NEXT: shll $24, %eax
				1193	; CHECK-NEXT: orl %ecx, %eax
				1194	; CHECK-NEXT: retl
				1195	;
				1196	; CHECK64-LABEL: zext_load_i32_by_i8_shl_16:
				1197	; CHECK64: # BB#0:
				1198	; CHECK64-NEXT: movzbl (%rdi), %ecx
				1199	; CHECK64-NEXT: shll $16, %ecx
				1200	; CHECK64-NEXT: movzbl 1(%rdi), %eax
				1201	; CHECK64-NEXT: shll $24, %eax
				1202	; CHECK64-NEXT: orl %ecx, %eax
				1203	; CHECK64-NEXT: retq
				1204	%tmp = bitcast i32* %arg to i8*
				1205	%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
				1206	%tmp2 = load i8, i8* %tmp1, align 1
				1207	%tmp3 = zext i8 %tmp2 to i32
				1208	%tmp30 = shl nuw nsw i32 %tmp3, 16
				1209	%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
				1210	%tmp5 = load i8, i8* %tmp4, align 1
				1211	%tmp6 = zext i8 %tmp5 to i32
				1212	%tmp7 = shl nuw nsw i32 %tmp6, 24
				1213	%tmp8 = or i32 %tmp7, %tmp30
				1214	ret i32 %tmp8
				1215	}
				1216
				1217	; i8* p;
				1218	; (i32) p[1] \| ((i32) p[0] << 8)
				1219	define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
				1220	; CHECK-LABEL: zext_load_i32_by_i8_bswap:
				1221	; CHECK: # BB#0:
				1222	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				1223	; CHECK-NEXT: movzbl 1(%eax), %ecx
				1224	; CHECK-NEXT: movzbl (%eax), %eax
				1225	; CHECK-NEXT: shll $8, %eax
				1226	; CHECK-NEXT: orl %ecx, %eax
				1227	; CHECK-NEXT: retl
				1228	;
				1229	; CHECK64-LABEL: zext_load_i32_by_i8_bswap:
				1230	; CHECK64: # BB#0:
				1231	; CHECK64-NEXT: movzbl 1(%rdi), %ecx
				1232	; CHECK64-NEXT: movzbl (%rdi), %eax
				1233	; CHECK64-NEXT: shll $8, %eax
				1234	; CHECK64-NEXT: orl %ecx, %eax
				1235	; CHECK64-NEXT: retq
				1236	%tmp = bitcast i32* %arg to i8*
				1237	%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
				1238	%tmp2 = load i8, i8* %tmp1, align 1
				1239	%tmp3 = zext i8 %tmp2 to i32
				1240	%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
				1241	%tmp5 = load i8, i8* %tmp4, align 1
				1242	%tmp6 = zext i8 %tmp5 to i32
				1243	%tmp7 = shl nuw nsw i32 %tmp6, 8
				1244	%tmp8 = or i32 %tmp7, %tmp3
				1245	ret i32 %tmp8
				1246	}
				1247
				1248	; i8* p;
				1249	; ((i32) p[1] << 8) \| ((i32) p[0] << 16)
				1250	define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) {
				1251	; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8:
				1252	; CHECK: # BB#0:
				1253	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				1254	; CHECK-NEXT: movzbl 1(%eax), %ecx
				1255	; CHECK-NEXT: shll $8, %ecx
				1256	; CHECK-NEXT: movzbl (%eax), %eax
				1257	; CHECK-NEXT: shll $16, %eax
				1258	; CHECK-NEXT: orl %ecx, %eax
				1259	; CHECK-NEXT: retl
				1260	;
				1261	; CHECK64-LABEL: zext_load_i32_by_i8_bswap_shl_8:
				1262	; CHECK64: # BB#0:
				1263	; CHECK64-NEXT: movzbl 1(%rdi), %ecx
				1264	; CHECK64-NEXT: shll $8, %ecx
				1265	; CHECK64-NEXT: movzbl (%rdi), %eax
				1266	; CHECK64-NEXT: shll $16, %eax
				1267	; CHECK64-NEXT: orl %ecx, %eax
				1268	; CHECK64-NEXT: retq
				1269	%tmp = bitcast i32* %arg to i8*
				1270	%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
				1271	%tmp2 = load i8, i8* %tmp1, align 1
				1272	%tmp3 = zext i8 %tmp2 to i32
				1273	%tmp30 = shl nuw nsw i32 %tmp3, 8
				1274	%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
				1275	%tmp5 = load i8, i8* %tmp4, align 1
				1276	%tmp6 = zext i8 %tmp5 to i32
				1277	%tmp7 = shl nuw nsw i32 %tmp6, 16
				1278	%tmp8 = or i32 %tmp7, %tmp30
				1279	ret i32 %tmp8
				1280	}
				1281
				1282	; i8* p;
				1283	; ((i32) p[1] << 16) \| ((i32) p[0] << 24)
				1284	define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) {
				1285	; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16:
				1286	; CHECK: # BB#0:
				1287	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				1288	; CHECK-NEXT: movzbl 1(%eax), %ecx
				1289	; CHECK-NEXT: shll $16, %ecx
				1290	; CHECK-NEXT: movzbl (%eax), %eax
				1291	; CHECK-NEXT: shll $24, %eax
				1292	; CHECK-NEXT: orl %ecx, %eax
				1293	; CHECK-NEXT: retl
				1294	;
				1295	; CHECK64-LABEL: zext_load_i32_by_i8_bswap_shl_16:
				1296	; CHECK64: # BB#0:
				1297	; CHECK64-NEXT: movzbl 1(%rdi), %ecx
				1298	; CHECK64-NEXT: shll $16, %ecx
				1299	; CHECK64-NEXT: movzbl (%rdi), %eax
				1300	; CHECK64-NEXT: shll $24, %eax
				1301	; CHECK64-NEXT: orl %ecx, %eax
				1302	; CHECK64-NEXT: retq
				1303	%tmp = bitcast i32* %arg to i8*
				1304	%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
				1305	%tmp2 = load i8, i8* %tmp1, align 1
				1306	%tmp3 = zext i8 %tmp2 to i32
				1307	%tmp30 = shl nuw nsw i32 %tmp3, 16
				1308	%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
				1309	%tmp5 = load i8, i8* %tmp4, align 1
				1310	%tmp6 = zext i8 %tmp5 to i32
				1311	%tmp7 = shl nuw nsw i32 %tmp6, 24
				1312	%tmp8 = or i32 %tmp7, %tmp30
				1313	ret i32 %tmp8
				1314	}