Blame - llvm/test/CodeGen/X86/load-combine.ll - toolchain/llvm-project

blob: a3e604f1da0861de5f2ee32f5e1209e07ffdcf37 [file] [log] [blame]

Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	1	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				2	; RUN: llc < %s -mtriple=i686-unknown \| FileCheck %s
				3	; RUN: llc < %s -mtriple=x86_64-unknown \| FileCheck %s --check-prefix=CHECK64
				4
				5	; i8* p;
				6	; (i32) p[0] \| ((i32) p[1] << 8) \| ((i32) p[2] << 16) \| ((i32) p[3] << 24)
				7	define i32 @load_i32_by_i8(i32* %arg) {
				8	; CHECK-LABEL: load_i32_by_i8:
				9	; CHECK: # BB#0:
				10	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				11	; CHECK-NEXT: movl (%eax), %eax
				12	; CHECK-NEXT: retl
				13	;
				14	; CHECK64-LABEL: load_i32_by_i8:
				15	; CHECK64: # BB#0:
				16	; CHECK64-NEXT: movl (%rdi), %eax
				17	; CHECK64-NEXT: retq
				18
				19	%tmp = bitcast i32* %arg to i8*
				20	%tmp1 = load i8, i8* %tmp, align 1
				21	%tmp2 = zext i8 %tmp1 to i32
				22	%tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
				23	%tmp4 = load i8, i8* %tmp3, align 1
				24	%tmp5 = zext i8 %tmp4 to i32
				25	%tmp6 = shl nuw nsw i32 %tmp5, 8
				26	%tmp7 = or i32 %tmp6, %tmp2
				27	%tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
				28	%tmp9 = load i8, i8* %tmp8, align 1
				29	%tmp10 = zext i8 %tmp9 to i32
				30	%tmp11 = shl nuw nsw i32 %tmp10, 16
				31	%tmp12 = or i32 %tmp7, %tmp11
				32	%tmp13 = getelementptr inbounds i8, i8* %tmp, i32 3
				33	%tmp14 = load i8, i8* %tmp13, align 1
				34	%tmp15 = zext i8 %tmp14 to i32
				35	%tmp16 = shl nuw nsw i32 %tmp15, 24
				36	%tmp17 = or i32 %tmp12, %tmp16
				37	ret i32 %tmp17
				38	}
				39
				40	; i8* p;
				41	; ((i32) p[0] << 24) \| ((i32) p[1] << 16) \| ((i32) p[2] << 8) \| (i32) p[3]
				42	define i32 @load_i32_by_i8_bswap(i32* %arg) {
				43	; CHECK-LABEL: load_i32_by_i8_bswap:
				44	; CHECK: # BB#0:
				45	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				46	; CHECK-NEXT: movl (%eax), %eax
				47	; CHECK-NEXT: bswapl %eax
				48	; CHECK-NEXT: retl
				49	;
				50	; CHECK64-LABEL: load_i32_by_i8_bswap:
				51	; CHECK64: # BB#0:
				52	; CHECK64-NEXT: movl (%rdi), %eax
				53	; CHECK64-NEXT: bswapl %eax
				54	; CHECK64-NEXT: retq
				55
				56	%tmp = bitcast i32* %arg to i8*
				57	%tmp1 = load i8, i8* %tmp, align 1
				58	%tmp2 = zext i8 %tmp1 to i32
				59	%tmp3 = shl nuw nsw i32 %tmp2, 24
				60	%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
				61	%tmp5 = load i8, i8* %tmp4, align 1
				62	%tmp6 = zext i8 %tmp5 to i32
				63	%tmp7 = shl nuw nsw i32 %tmp6, 16
				64	%tmp8 = or i32 %tmp7, %tmp3
				65	%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
				66	%tmp10 = load i8, i8* %tmp9, align 1
				67	%tmp11 = zext i8 %tmp10 to i32
				68	%tmp12 = shl nuw nsw i32 %tmp11, 8
				69	%tmp13 = or i32 %tmp8, %tmp12
				70	%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
				71	%tmp15 = load i8, i8* %tmp14, align 1
				72	%tmp16 = zext i8 %tmp15 to i32
				73	%tmp17 = or i32 %tmp13, %tmp16
				74	ret i32 %tmp17
				75	}
				76
				77	; i16* p;
				78	; (i32) p[0] \| ((i32) p[1] << 16)
				79	define i32 @load_i32_by_i16(i32* %arg) {
				80	; CHECK-LABEL: load_i32_by_i16:
				81	; CHECK: # BB#0:
				82	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				83	; CHECK-NEXT: movl (%eax), %eax
				84	; CHECK-NEXT: retl
				85	;
				86	; CHECK64-LABEL: load_i32_by_i16:
				87	; CHECK64: # BB#0:
				88	; CHECK64-NEXT: movl (%rdi), %eax
				89	; CHECK64-NEXT: retq
				90
				91	%tmp = bitcast i32* %arg to i16*
				92	%tmp1 = load i16, i16* %tmp, align 1
				93	%tmp2 = zext i16 %tmp1 to i32
				94	%tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
				95	%tmp4 = load i16, i16* %tmp3, align 1
				96	%tmp5 = zext i16 %tmp4 to i32
				97	%tmp6 = shl nuw nsw i32 %tmp5, 16
				98	%tmp7 = or i32 %tmp6, %tmp2
				99	ret i32 %tmp7
				100	}
				101
				102	; i16* p_16;
				103	; i8* p_8 = (i8*) p_16;
				104	; (i32) p_16[0] \| ((i32) p[2] << 16) \| ((i32) p[3] << 24)
				105	define i32 @load_i32_by_i16_i8(i32* %arg) {
				106	; CHECK-LABEL: load_i32_by_i16_i8:
				107	; CHECK: # BB#0:
				108	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				109	; CHECK-NEXT: movl (%eax), %eax
				110	; CHECK-NEXT: retl
				111	;
				112	; CHECK64-LABEL: load_i32_by_i16_i8:
				113	; CHECK64: # BB#0:
				114	; CHECK64-NEXT: movl (%rdi), %eax
				115	; CHECK64-NEXT: retq
				116
				117	%tmp = bitcast i32* %arg to i16*
				118	%tmp1 = bitcast i32* %arg to i8*
				119	%tmp2 = load i16, i16* %tmp, align 1
				120	%tmp3 = zext i16 %tmp2 to i32
				121	%tmp4 = getelementptr inbounds i8, i8* %tmp1, i32 2
				122	%tmp5 = load i8, i8* %tmp4, align 1
				123	%tmp6 = zext i8 %tmp5 to i32
				124	%tmp7 = shl nuw nsw i32 %tmp6, 16
				125	%tmp8 = getelementptr inbounds i8, i8* %tmp1, i32 3
				126	%tmp9 = load i8, i8* %tmp8, align 1
				127	%tmp10 = zext i8 %tmp9 to i32
				128	%tmp11 = shl nuw nsw i32 %tmp10, 24
				129	%tmp12 = or i32 %tmp7, %tmp11
				130	%tmp13 = or i32 %tmp12, %tmp3
				131	ret i32 %tmp13
				132	}
				133
				134
				135	; i8* p;
				136	; (i32) ((i16) p[0] \| ((i16) p[1] << 8)) \| (((i32) ((i16) p[3] \| ((i16) p[4] << 8)) << 16)
				137	define i32 @load_i32_by_i16_by_i8(i32* %arg) {
				138	; CHECK-LABEL: load_i32_by_i16_by_i8:
				139	; CHECK: # BB#0:
				140	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				141	; CHECK-NEXT: movl (%eax), %eax
				142	; CHECK-NEXT: retl
				143	;
				144	; CHECK64-LABEL: load_i32_by_i16_by_i8:
				145	; CHECK64: # BB#0:
				146	; CHECK64-NEXT: movl (%rdi), %eax
				147	; CHECK64-NEXT: retq
				148
				149	%tmp = bitcast i32* %arg to i8*
				150	%tmp1 = load i8, i8* %tmp, align 1
				151	%tmp2 = zext i8 %tmp1 to i16
				152	%tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
				153	%tmp4 = load i8, i8* %tmp3, align 1
				154	%tmp5 = zext i8 %tmp4 to i16
				155	%tmp6 = shl nuw nsw i16 %tmp5, 8
				156	%tmp7 = or i16 %tmp6, %tmp2
				157	%tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
				158	%tmp9 = load i8, i8* %tmp8, align 1
				159	%tmp10 = zext i8 %tmp9 to i16
				160	%tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3
				161	%tmp12 = load i8, i8* %tmp11, align 1
				162	%tmp13 = zext i8 %tmp12 to i16
				163	%tmp14 = shl nuw nsw i16 %tmp13, 8
				164	%tmp15 = or i16 %tmp14, %tmp10
				165	%tmp16 = zext i16 %tmp7 to i32
				166	%tmp17 = zext i16 %tmp15 to i32
				167	%tmp18 = shl nuw nsw i32 %tmp17, 16
				168	%tmp19 = or i32 %tmp18, %tmp16
				169	ret i32 %tmp19
				170	}
				171
				172	; i8* p;
				173	; ((i32) (((i16) p[0] << 8) \| (i16) p[1]) << 16) \| (i32) (((i16) p[3] << 8) \| (i16) p[4])
				174	define i32 @load_i32_by_i16_by_i8_bswap(i32* %arg) {
				175	; CHECK-LABEL: load_i32_by_i16_by_i8_bswap:
				176	; CHECK: # BB#0:
				177	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				178	; CHECK-NEXT: movl (%eax), %eax
				179	; CHECK-NEXT: bswapl %eax
				180	; CHECK-NEXT: retl
				181	;
				182	; CHECK64-LABEL: load_i32_by_i16_by_i8_bswap:
				183	; CHECK64: # BB#0:
				184	; CHECK64-NEXT: movl (%rdi), %eax
				185	; CHECK64-NEXT: bswapl %eax
				186	; CHECK64-NEXT: retq
				187
				188	%tmp = bitcast i32* %arg to i8*
				189	%tmp1 = load i8, i8* %tmp, align 1
				190	%tmp2 = zext i8 %tmp1 to i16
				191	%tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
				192	%tmp4 = load i8, i8* %tmp3, align 1
				193	%tmp5 = zext i8 %tmp4 to i16
				194	%tmp6 = shl nuw nsw i16 %tmp2, 8
				195	%tmp7 = or i16 %tmp6, %tmp5
				196	%tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
				197	%tmp9 = load i8, i8* %tmp8, align 1
				198	%tmp10 = zext i8 %tmp9 to i16
				199	%tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3
				200	%tmp12 = load i8, i8* %tmp11, align 1
				201	%tmp13 = zext i8 %tmp12 to i16
				202	%tmp14 = shl nuw nsw i16 %tmp10, 8
				203	%tmp15 = or i16 %tmp14, %tmp13
				204	%tmp16 = zext i16 %tmp7 to i32
				205	%tmp17 = zext i16 %tmp15 to i32
				206	%tmp18 = shl nuw nsw i32 %tmp16, 16
				207	%tmp19 = or i32 %tmp18, %tmp17
				208	ret i32 %tmp19
				209	}
				210
				211	; i8* p;
				212	; (i64) p[0] \| ((i64) p[1] << 8) \| ((i64) p[2] << 16) \| ((i64) p[3] << 24) \| ((i64) p[4] << 32) \| ((i64) p[5] << 40) \| ((i64) p[6] << 48) \| ((i64) p[7] << 56)
				213	define i64 @load_i64_by_i8(i64* %arg) {
				214	; CHECK-LABEL: load_i64_by_i8:
				215	; CHECK: # BB#0:
				216	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
				217	; CHECK-NEXT: movl (%ecx), %eax
				218	; CHECK-NEXT: movl 4(%ecx), %edx
				219	; CHECK-NEXT: retl
				220	;
				221	; CHECK64-LABEL: load_i64_by_i8:
				222	; CHECK64: # BB#0:
				223	; CHECK64-NEXT: movq (%rdi), %rax
				224	; CHECK64-NEXT: retq
				225
				226	%tmp = bitcast i64* %arg to i8*
				227	%tmp1 = load i8, i8* %tmp, align 1
				228	%tmp2 = zext i8 %tmp1 to i64
				229	%tmp3 = getelementptr inbounds i8, i8* %tmp, i64 1
				230	%tmp4 = load i8, i8* %tmp3, align 1
				231	%tmp5 = zext i8 %tmp4 to i64
				232	%tmp6 = shl nuw nsw i64 %tmp5, 8
				233	%tmp7 = or i64 %tmp6, %tmp2
				234	%tmp8 = getelementptr inbounds i8, i8* %tmp, i64 2
				235	%tmp9 = load i8, i8* %tmp8, align 1
				236	%tmp10 = zext i8 %tmp9 to i64
				237	%tmp11 = shl nuw nsw i64 %tmp10, 16
				238	%tmp12 = or i64 %tmp7, %tmp11
				239	%tmp13 = getelementptr inbounds i8, i8* %tmp, i64 3
				240	%tmp14 = load i8, i8* %tmp13, align 1
				241	%tmp15 = zext i8 %tmp14 to i64
				242	%tmp16 = shl nuw nsw i64 %tmp15, 24
				243	%tmp17 = or i64 %tmp12, %tmp16
				244	%tmp18 = getelementptr inbounds i8, i8* %tmp, i64 4
				245	%tmp19 = load i8, i8* %tmp18, align 1
				246	%tmp20 = zext i8 %tmp19 to i64
				247	%tmp21 = shl nuw nsw i64 %tmp20, 32
				248	%tmp22 = or i64 %tmp17, %tmp21
				249	%tmp23 = getelementptr inbounds i8, i8* %tmp, i64 5
				250	%tmp24 = load i8, i8* %tmp23, align 1
				251	%tmp25 = zext i8 %tmp24 to i64
				252	%tmp26 = shl nuw nsw i64 %tmp25, 40
				253	%tmp27 = or i64 %tmp22, %tmp26
				254	%tmp28 = getelementptr inbounds i8, i8* %tmp, i64 6
				255	%tmp29 = load i8, i8* %tmp28, align 1
				256	%tmp30 = zext i8 %tmp29 to i64
				257	%tmp31 = shl nuw nsw i64 %tmp30, 48
				258	%tmp32 = or i64 %tmp27, %tmp31
				259	%tmp33 = getelementptr inbounds i8, i8* %tmp, i64 7
				260	%tmp34 = load i8, i8* %tmp33, align 1
				261	%tmp35 = zext i8 %tmp34 to i64
				262	%tmp36 = shl nuw i64 %tmp35, 56
				263	%tmp37 = or i64 %tmp32, %tmp36
				264	ret i64 %tmp37
				265	}
				266
				267	; i8* p;
				268	; ((i64) p[0] << 56) \| ((i64) p[1] << 48) \| ((i64) p[2] << 40) \| ((i64) p[3] << 32) \| ((i64) p[4] << 24) \| ((i64) p[5] << 16) \| ((i64) p[6] << 8) \| (i64) p[7]
				269	define i64 @load_i64_by_i8_bswap(i64* %arg) {
				270	; CHECK-LABEL: load_i64_by_i8_bswap:
				271	; CHECK: # BB#0:
				272	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				273	; CHECK-NEXT: movl (%eax), %edx
				274	; CHECK-NEXT: movl 4(%eax), %eax
				275	; CHECK-NEXT: bswapl %eax
				276	; CHECK-NEXT: bswapl %edx
				277	; CHECK-NEXT: retl
				278	;
				279	; CHECK64-LABEL: load_i64_by_i8_bswap:
				280	; CHECK64: # BB#0:
				281	; CHECK64-NEXT: movq (%rdi), %rax
				282	; CHECK64-NEXT: bswapq %rax
				283	; CHECK64-NEXT: retq
				284
				285	%tmp = bitcast i64* %arg to i8*
				286	%tmp1 = load i8, i8* %tmp, align 1
				287	%tmp2 = zext i8 %tmp1 to i64
				288	%tmp3 = shl nuw i64 %tmp2, 56
				289	%tmp4 = getelementptr inbounds i8, i8* %tmp, i64 1
				290	%tmp5 = load i8, i8* %tmp4, align 1
				291	%tmp6 = zext i8 %tmp5 to i64
				292	%tmp7 = shl nuw nsw i64 %tmp6, 48
				293	%tmp8 = or i64 %tmp7, %tmp3
				294	%tmp9 = getelementptr inbounds i8, i8* %tmp, i64 2
				295	%tmp10 = load i8, i8* %tmp9, align 1
				296	%tmp11 = zext i8 %tmp10 to i64
				297	%tmp12 = shl nuw nsw i64 %tmp11, 40
				298	%tmp13 = or i64 %tmp8, %tmp12
				299	%tmp14 = getelementptr inbounds i8, i8* %tmp, i64 3
				300	%tmp15 = load i8, i8* %tmp14, align 1
				301	%tmp16 = zext i8 %tmp15 to i64
				302	%tmp17 = shl nuw nsw i64 %tmp16, 32
				303	%tmp18 = or i64 %tmp13, %tmp17
				304	%tmp19 = getelementptr inbounds i8, i8* %tmp, i64 4
				305	%tmp20 = load i8, i8* %tmp19, align 1
				306	%tmp21 = zext i8 %tmp20 to i64
				307	%tmp22 = shl nuw nsw i64 %tmp21, 24
				308	%tmp23 = or i64 %tmp18, %tmp22
				309	%tmp24 = getelementptr inbounds i8, i8* %tmp, i64 5
				310	%tmp25 = load i8, i8* %tmp24, align 1
				311	%tmp26 = zext i8 %tmp25 to i64
				312	%tmp27 = shl nuw nsw i64 %tmp26, 16
				313	%tmp28 = or i64 %tmp23, %tmp27
				314	%tmp29 = getelementptr inbounds i8, i8* %tmp, i64 6
				315	%tmp30 = load i8, i8* %tmp29, align 1
				316	%tmp31 = zext i8 %tmp30 to i64
				317	%tmp32 = shl nuw nsw i64 %tmp31, 8
				318	%tmp33 = or i64 %tmp28, %tmp32
				319	%tmp34 = getelementptr inbounds i8, i8* %tmp, i64 7
				320	%tmp35 = load i8, i8* %tmp34, align 1
				321	%tmp36 = zext i8 %tmp35 to i64
				322	%tmp37 = or i64 %tmp33, %tmp36
				323	ret i64 %tmp37
				324	}
				325
				326	; Part of the load by bytes pattern is used outside of the pattern
				327	; i8* p;
				328	; i32 x = (i32) p[1]
				329	; res = ((i32) p[0] << 24) \| (x << 16) \| ((i32) p[2] << 8) \| (i32) p[3]
				330	; x \| res
				331	define i32 @load_i32_by_i8_bswap_uses(i32* %arg) {
				332	; CHECK-LABEL: load_i32_by_i8_bswap_uses:
				333	; CHECK: # BB#0:
				334	; CHECK-NEXT: pushl %esi
				335	; CHECK-NEXT: .Lcfi0:
				336	; CHECK-NEXT: .cfi_def_cfa_offset 8
				337	; CHECK-NEXT: .Lcfi1:
				338	; CHECK-NEXT: .cfi_offset %esi, -8
				339	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				340	; CHECK-NEXT: movzbl (%eax), %ecx
				341	; CHECK-NEXT: shll $24, %ecx
				342	; CHECK-NEXT: movzbl 1(%eax), %edx
				343	; CHECK-NEXT: movl %edx, %esi
				344	; CHECK-NEXT: shll $16, %esi
				345	; CHECK-NEXT: orl %ecx, %esi
				346	; CHECK-NEXT: movzbl 2(%eax), %ecx
				347	; CHECK-NEXT: shll $8, %ecx
				348	; CHECK-NEXT: orl %esi, %ecx
				349	; CHECK-NEXT: movzbl 3(%eax), %eax
				350	; CHECK-NEXT: orl %ecx, %eax
				351	; CHECK-NEXT: orl %edx, %eax
				352	; CHECK-NEXT: popl %esi
				353	; CHECK-NEXT: retl
				354	;
				355	; CHECK64-LABEL: load_i32_by_i8_bswap_uses:
				356	; CHECK64: # BB#0:
				357	; CHECK64-NEXT: movzbl (%rdi), %eax
				358	; CHECK64-NEXT: shll $24, %eax
				359	; CHECK64-NEXT: movzbl 1(%rdi), %ecx
				360	; CHECK64-NEXT: movl %ecx, %edx
				361	; CHECK64-NEXT: shll $16, %edx
				362	; CHECK64-NEXT: orl %eax, %edx
				363	; CHECK64-NEXT: movzbl 2(%rdi), %esi
				364	; CHECK64-NEXT: shll $8, %esi
				365	; CHECK64-NEXT: orl %edx, %esi
				366	; CHECK64-NEXT: movzbl 3(%rdi), %eax
				367	; CHECK64-NEXT: orl %esi, %eax
				368	; CHECK64-NEXT: orl %ecx, %eax
				369	; CHECK64-NEXT: retq
				370
				371	%tmp = bitcast i32* %arg to i8*
				372	%tmp1 = load i8, i8* %tmp, align 1
				373	%tmp2 = zext i8 %tmp1 to i32
				374	%tmp3 = shl nuw nsw i32 %tmp2, 24
				375	%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
				376	%tmp5 = load i8, i8* %tmp4, align 1
				377	%tmp6 = zext i8 %tmp5 to i32
				378	%tmp7 = shl nuw nsw i32 %tmp6, 16
				379	%tmp8 = or i32 %tmp7, %tmp3
				380	%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
				381	%tmp10 = load i8, i8* %tmp9, align 1
				382	%tmp11 = zext i8 %tmp10 to i32
				383	%tmp12 = shl nuw nsw i32 %tmp11, 8
				384	%tmp13 = or i32 %tmp8, %tmp12
				385	%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
				386	%tmp15 = load i8, i8* %tmp14, align 1
				387	%tmp16 = zext i8 %tmp15 to i32
				388	%tmp17 = or i32 %tmp13, %tmp16
				389	; Use individual part of the pattern outside of the pattern
				390	%tmp18 = or i32 %tmp6, %tmp17
				391	ret i32 %tmp18
				392	}
				393
				394	; One of the loads is volatile
				395	; i8* p;
				396	; p0 = volatile *p;
				397	; ((i32) p0 << 24) \| ((i32) p[1] << 16) \| ((i32) p[2] << 8) \| (i32) p[3]
				398	define i32 @load_i32_by_i8_bswap_volatile(i32* %arg) {
				399	; CHECK-LABEL: load_i32_by_i8_bswap_volatile:
				400	; CHECK: # BB#0:
				401	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				402	; CHECK-NEXT: movzbl (%eax), %ecx
				403	; CHECK-NEXT: shll $24, %ecx
				404	; CHECK-NEXT: movzbl 1(%eax), %edx
				405	; CHECK-NEXT: shll $16, %edx
				406	; CHECK-NEXT: orl %ecx, %edx
				407	; CHECK-NEXT: movzbl 2(%eax), %ecx
				408	; CHECK-NEXT: shll $8, %ecx
				409	; CHECK-NEXT: orl %edx, %ecx
				410	; CHECK-NEXT: movzbl 3(%eax), %eax
				411	; CHECK-NEXT: orl %ecx, %eax
				412	; CHECK-NEXT: retl
				413	;
				414	; CHECK64-LABEL: load_i32_by_i8_bswap_volatile:
				415	; CHECK64: # BB#0:
				416	; CHECK64-NEXT: movzbl (%rdi), %eax
				417	; CHECK64-NEXT: shll $24, %eax
				418	; CHECK64-NEXT: movzbl 1(%rdi), %ecx
				419	; CHECK64-NEXT: shll $16, %ecx
				420	; CHECK64-NEXT: orl %eax, %ecx
				421	; CHECK64-NEXT: movzbl 2(%rdi), %edx
				422	; CHECK64-NEXT: shll $8, %edx
				423	; CHECK64-NEXT: orl %ecx, %edx
				424	; CHECK64-NEXT: movzbl 3(%rdi), %eax
				425	; CHECK64-NEXT: orl %edx, %eax
				426	; CHECK64-NEXT: retq
				427
				428	%tmp = bitcast i32* %arg to i8*
				429	%tmp1 = load volatile i8, i8* %tmp, align 1
				430	%tmp2 = zext i8 %tmp1 to i32
				431	%tmp3 = shl nuw nsw i32 %tmp2, 24
				432	%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
				433	%tmp5 = load i8, i8* %tmp4, align 1
				434	%tmp6 = zext i8 %tmp5 to i32
				435	%tmp7 = shl nuw nsw i32 %tmp6, 16
				436	%tmp8 = or i32 %tmp7, %tmp3
				437	%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
				438	%tmp10 = load i8, i8* %tmp9, align 1
				439	%tmp11 = zext i8 %tmp10 to i32
				440	%tmp12 = shl nuw nsw i32 %tmp11, 8
				441	%tmp13 = or i32 %tmp8, %tmp12
				442	%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
				443	%tmp15 = load i8, i8* %tmp14, align 1
				444	%tmp16 = zext i8 %tmp15 to i32
				445	%tmp17 = or i32 %tmp13, %tmp16
				446	ret i32 %tmp17
				447	}
				448
				449	; There is a store in between individual loads
				450	; i8* p, q;
				451	; res1 = ((i32) p[0] << 24) \| ((i32) p[1] << 16)
				452	; *q = 0;
				453	; res2 = ((i32) p[2] << 8) \| (i32) p[3]
				454	; res1 \| res2
				455	define i32 @load_i32_by_i8_bswap_store_in_between(i32* %arg, i32* %arg1) {
				456	; CHECK-LABEL: load_i32_by_i8_bswap_store_in_between:
				457	; CHECK: # BB#0:
				458	; CHECK-NEXT: pushl %esi
				459	; CHECK-NEXT: .Lcfi2:
				460	; CHECK-NEXT: .cfi_def_cfa_offset 8
				461	; CHECK-NEXT: .Lcfi3:
				462	; CHECK-NEXT: .cfi_offset %esi, -8
				463	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				464	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
				465	; CHECK-NEXT: movzbl (%ecx), %edx
				466	; CHECK-NEXT: shll $24, %edx
				467	; CHECK-NEXT: movzbl 1(%ecx), %esi
				468	; CHECK-NEXT: movl $0, (%eax)
				469	; CHECK-NEXT: shll $16, %esi
				470	; CHECK-NEXT: orl %edx, %esi
				471	; CHECK-NEXT: movzbl 2(%ecx), %edx
				472	; CHECK-NEXT: shll $8, %edx
				473	; CHECK-NEXT: orl %esi, %edx
				474	; CHECK-NEXT: movzbl 3(%ecx), %eax
				475	; CHECK-NEXT: orl %edx, %eax
				476	; CHECK-NEXT: popl %esi
				477	; CHECK-NEXT: retl
				478	;
				479	; CHECK64-LABEL: load_i32_by_i8_bswap_store_in_between:
				480	; CHECK64: # BB#0:
				481	; CHECK64-NEXT: movzbl (%rdi), %eax
				482	; CHECK64-NEXT: shll $24, %eax
				483	; CHECK64-NEXT: movzbl 1(%rdi), %ecx
				484	; CHECK64-NEXT: movl $0, (%rsi)
				485	; CHECK64-NEXT: shll $16, %ecx
				486	; CHECK64-NEXT: orl %eax, %ecx
				487	; CHECK64-NEXT: movzbl 2(%rdi), %edx
				488	; CHECK64-NEXT: shll $8, %edx
				489	; CHECK64-NEXT: orl %ecx, %edx
				490	; CHECK64-NEXT: movzbl 3(%rdi), %eax
				491	; CHECK64-NEXT: orl %edx, %eax
				492	; CHECK64-NEXT: retq
				493
				494	%tmp = bitcast i32* %arg to i8*
				495	%tmp2 = load i8, i8* %tmp, align 1
				496	%tmp3 = zext i8 %tmp2 to i32
				497	%tmp4 = shl nuw nsw i32 %tmp3, 24
				498	%tmp5 = getelementptr inbounds i8, i8* %tmp, i32 1
				499	%tmp6 = load i8, i8* %tmp5, align 1
				500	; This store will prevent folding of the pattern
				501	store i32 0, i32* %arg1
				502	%tmp7 = zext i8 %tmp6 to i32
				503	%tmp8 = shl nuw nsw i32 %tmp7, 16
				504	%tmp9 = or i32 %tmp8, %tmp4
				505	%tmp10 = getelementptr inbounds i8, i8* %tmp, i32 2
				506	%tmp11 = load i8, i8* %tmp10, align 1
				507	%tmp12 = zext i8 %tmp11 to i32
				508	%tmp13 = shl nuw nsw i32 %tmp12, 8
				509	%tmp14 = or i32 %tmp9, %tmp13
				510	%tmp15 = getelementptr inbounds i8, i8* %tmp, i32 3
				511	%tmp16 = load i8, i8* %tmp15, align 1
				512	%tmp17 = zext i8 %tmp16 to i32
				513	%tmp18 = or i32 %tmp14, %tmp17
				514	ret i32 %tmp18
				515	}
				516
				517	; One of the loads is from an unrelated location
				518	; i8* p, q;
				519	; ((i32) p[0] << 24) \| ((i32) q[1] << 16) \| ((i32) p[2] << 8) \| (i32) p[3]
				520	define i32 @load_i32_by_i8_bswap_unrelated_load(i32* %arg, i32* %arg1) {
				521	; CHECK-LABEL: load_i32_by_i8_bswap_unrelated_load:
				522	; CHECK: # BB#0:
				523	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				524	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
				525	; CHECK-NEXT: movzbl (%ecx), %edx
				526	; CHECK-NEXT: shll $24, %edx
				527	; CHECK-NEXT: movzbl 1(%eax), %eax
				528	; CHECK-NEXT: shll $16, %eax
				529	; CHECK-NEXT: orl %edx, %eax
				530	; CHECK-NEXT: movzbl 2(%ecx), %edx
				531	; CHECK-NEXT: shll $8, %edx
				532	; CHECK-NEXT: orl %eax, %edx
				533	; CHECK-NEXT: movzbl 3(%ecx), %eax
				534	; CHECK-NEXT: orl %edx, %eax
				535	; CHECK-NEXT: retl
				536	;
				537	; CHECK64-LABEL: load_i32_by_i8_bswap_unrelated_load:
				538	; CHECK64: # BB#0:
				539	; CHECK64-NEXT: movzbl (%rdi), %eax
				540	; CHECK64-NEXT: shll $24, %eax
				541	; CHECK64-NEXT: movzbl 1(%rsi), %ecx
				542	; CHECK64-NEXT: shll $16, %ecx
				543	; CHECK64-NEXT: orl %eax, %ecx
				544	; CHECK64-NEXT: movzbl 2(%rdi), %edx
				545	; CHECK64-NEXT: shll $8, %edx
				546	; CHECK64-NEXT: orl %ecx, %edx
				547	; CHECK64-NEXT: movzbl 3(%rdi), %eax
				548	; CHECK64-NEXT: orl %edx, %eax
				549	; CHECK64-NEXT: retq
				550
				551	%tmp = bitcast i32* %arg to i8*
				552	%tmp2 = bitcast i32* %arg1 to i8*
				553	%tmp3 = load i8, i8* %tmp, align 1
				554	%tmp4 = zext i8 %tmp3 to i32
				555	%tmp5 = shl nuw nsw i32 %tmp4, 24
				556	; Load from an unrelated address
				557	%tmp6 = getelementptr inbounds i8, i8* %tmp2, i32 1
				558	%tmp7 = load i8, i8* %tmp6, align 1
				559	%tmp8 = zext i8 %tmp7 to i32
				560	%tmp9 = shl nuw nsw i32 %tmp8, 16
				561	%tmp10 = or i32 %tmp9, %tmp5
				562	%tmp11 = getelementptr inbounds i8, i8* %tmp, i32 2
				563	%tmp12 = load i8, i8* %tmp11, align 1
				564	%tmp13 = zext i8 %tmp12 to i32
				565	%tmp14 = shl nuw nsw i32 %tmp13, 8
				566	%tmp15 = or i32 %tmp10, %tmp14
				567	%tmp16 = getelementptr inbounds i8, i8* %tmp, i32 3
				568	%tmp17 = load i8, i8* %tmp16, align 1
				569	%tmp18 = zext i8 %tmp17 to i32
				570	%tmp19 = or i32 %tmp15, %tmp18
				571	ret i32 %tmp19
				572	}
				573
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	574	; i8* p;
				575	; (i32) p[1] \| ((i32) p[2] << 8) \| ((i32) p[3] << 16) \| ((i32) p[4] << 24)
Artur Pilipenko	bdf3c5a	2017-02-06 14:15:31 +0000	[diff] [blame]	576	define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
				577	; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	578	; CHECK: # BB#0:
				579	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
Artur Pilipenko	4a64031	2017-02-09 12:06:01 +0000	[diff] [blame^]	580	; CHECK-NEXT: movl 1(%eax), %eax
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	581	; CHECK-NEXT: retl
				582	;
Artur Pilipenko	bdf3c5a	2017-02-06 14:15:31 +0000	[diff] [blame]	583	; CHECK64-LABEL: load_i32_by_i8_nonzero_offset:
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	584	; CHECK64: # BB#0:
Artur Pilipenko	4a64031	2017-02-09 12:06:01 +0000	[diff] [blame^]	585	; CHECK64-NEXT: movl 1(%rdi), %eax
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	586	; CHECK64-NEXT: retq
				587
				588	%tmp = bitcast i32* %arg to i8*
				589	%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
				590	%tmp2 = load i8, i8* %tmp1, align 1
				591	%tmp3 = zext i8 %tmp2 to i32
				592	%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2
				593	%tmp5 = load i8, i8* %tmp4, align 1
				594	%tmp6 = zext i8 %tmp5 to i32
				595	%tmp7 = shl nuw nsw i32 %tmp6, 8
				596	%tmp8 = or i32 %tmp7, %tmp3
				597	%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3
				598	%tmp10 = load i8, i8* %tmp9, align 1
				599	%tmp11 = zext i8 %tmp10 to i32
				600	%tmp12 = shl nuw nsw i32 %tmp11, 16
				601	%tmp13 = or i32 %tmp8, %tmp12
				602	%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4
				603	%tmp15 = load i8, i8* %tmp14, align 1
				604	%tmp16 = zext i8 %tmp15 to i32
				605	%tmp17 = shl nuw nsw i32 %tmp16, 24
				606	%tmp18 = or i32 %tmp13, %tmp17
				607	ret i32 %tmp18
				608	}
				609
Artur Pilipenko	bdf3c5a	2017-02-06 14:15:31 +0000	[diff] [blame]	610	; i8* p;
				611	; (i32) p[-4] \| ((i32) p[-3] << 8) \| ((i32) p[-2] << 16) \| ((i32) p[-1] << 24)
				612	define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
				613	; CHECK-LABEL: load_i32_by_i8_neg_offset:
				614	; CHECK: # BB#0:
				615	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
Artur Pilipenko	4a64031	2017-02-09 12:06:01 +0000	[diff] [blame^]	616	; CHECK-NEXT: movl -4(%eax), %eax
Artur Pilipenko	bdf3c5a	2017-02-06 14:15:31 +0000	[diff] [blame]	617	; CHECK-NEXT: retl
				618	;
				619	; CHECK64-LABEL: load_i32_by_i8_neg_offset:
				620	; CHECK64: # BB#0:
Artur Pilipenko	4a64031	2017-02-09 12:06:01 +0000	[diff] [blame^]	621	; CHECK64-NEXT: movl -4(%rdi), %eax
Artur Pilipenko	bdf3c5a	2017-02-06 14:15:31 +0000	[diff] [blame]	622	; CHECK64-NEXT: retq
				623
				624	%tmp = bitcast i32* %arg to i8*
				625	%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
				626	%tmp2 = load i8, i8* %tmp1, align 1
				627	%tmp3 = zext i8 %tmp2 to i32
				628	%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3
				629	%tmp5 = load i8, i8* %tmp4, align 1
				630	%tmp6 = zext i8 %tmp5 to i32
				631	%tmp7 = shl nuw nsw i32 %tmp6, 8
				632	%tmp8 = or i32 %tmp7, %tmp3
				633	%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2
				634	%tmp10 = load i8, i8* %tmp9, align 1
				635	%tmp11 = zext i8 %tmp10 to i32
				636	%tmp12 = shl nuw nsw i32 %tmp11, 16
				637	%tmp13 = or i32 %tmp8, %tmp12
				638	%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1
				639	%tmp15 = load i8, i8* %tmp14, align 1
				640	%tmp16 = zext i8 %tmp15 to i32
				641	%tmp17 = shl nuw nsw i32 %tmp16, 24
				642	%tmp18 = or i32 %tmp13, %tmp17
				643	ret i32 %tmp18
				644	}
				645
				646	; i8* p;
				647	; (i32) p[4] \| ((i32) p[3] << 8) \| ((i32) p[2] << 16) \| ((i32) p[1] << 24)
				648	define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
				649	; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
				650	; CHECK: # BB#0:
				651	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
Artur Pilipenko	4a64031	2017-02-09 12:06:01 +0000	[diff] [blame^]	652	; CHECK-NEXT: movl 1(%eax), %eax
				653	; CHECK-NEXT: bswapl %eax
Artur Pilipenko	bdf3c5a	2017-02-06 14:15:31 +0000	[diff] [blame]	654	; CHECK-NEXT: retl
				655	;
				656	; CHECK64-LABEL: load_i32_by_i8_nonzero_offset_bswap:
				657	; CHECK64: # BB#0:
Artur Pilipenko	4a64031	2017-02-09 12:06:01 +0000	[diff] [blame^]	658	; CHECK64-NEXT: movl 1(%rdi), %eax
				659	; CHECK64-NEXT: bswapl %eax
Artur Pilipenko	bdf3c5a	2017-02-06 14:15:31 +0000	[diff] [blame]	660	; CHECK64-NEXT: retq
				661
				662	%tmp = bitcast i32* %arg to i8*
				663	%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
				664	%tmp2 = load i8, i8* %tmp1, align 1
				665	%tmp3 = zext i8 %tmp2 to i32
				666	%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3
				667	%tmp5 = load i8, i8* %tmp4, align 1
				668	%tmp6 = zext i8 %tmp5 to i32
				669	%tmp7 = shl nuw nsw i32 %tmp6, 8
				670	%tmp8 = or i32 %tmp7, %tmp3
				671	%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
				672	%tmp10 = load i8, i8* %tmp9, align 1
				673	%tmp11 = zext i8 %tmp10 to i32
				674	%tmp12 = shl nuw nsw i32 %tmp11, 16
				675	%tmp13 = or i32 %tmp8, %tmp12
				676	%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1
				677	%tmp15 = load i8, i8* %tmp14, align 1
				678	%tmp16 = zext i8 %tmp15 to i32
				679	%tmp17 = shl nuw nsw i32 %tmp16, 24
				680	%tmp18 = or i32 %tmp13, %tmp17
				681	ret i32 %tmp18
				682	}
				683
				684	; i8* p;
				685	; (i32) p[-1] \| ((i32) p[-2] << 8) \| ((i32) p[-3] << 16) \| ((i32) p[-4] << 24)
				686	define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
				687	; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
				688	; CHECK: # BB#0:
				689	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
Artur Pilipenko	4a64031	2017-02-09 12:06:01 +0000	[diff] [blame^]	690	; CHECK-NEXT: movl -4(%eax), %eax
				691	; CHECK-NEXT: bswapl %eax
Artur Pilipenko	bdf3c5a	2017-02-06 14:15:31 +0000	[diff] [blame]	692	; CHECK-NEXT: retl
				693	;
				694	; CHECK64-LABEL: load_i32_by_i8_neg_offset_bswap:
				695	; CHECK64: # BB#0:
Artur Pilipenko	4a64031	2017-02-09 12:06:01 +0000	[diff] [blame^]	696	; CHECK64-NEXT: movl -4(%rdi), %eax
				697	; CHECK64-NEXT: bswapl %eax
Artur Pilipenko	bdf3c5a	2017-02-06 14:15:31 +0000	[diff] [blame]	698	; CHECK64-NEXT: retq
				699
				700	%tmp = bitcast i32* %arg to i8*
				701	%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
				702	%tmp2 = load i8, i8* %tmp1, align 1
				703	%tmp3 = zext i8 %tmp2 to i32
				704	%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2
				705	%tmp5 = load i8, i8* %tmp4, align 1
				706	%tmp6 = zext i8 %tmp5 to i32
				707	%tmp7 = shl nuw nsw i32 %tmp6, 8
				708	%tmp8 = or i32 %tmp7, %tmp3
				709	%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3
				710	%tmp10 = load i8, i8* %tmp9, align 1
				711	%tmp11 = zext i8 %tmp10 to i32
				712	%tmp12 = shl nuw nsw i32 %tmp11, 16
				713	%tmp13 = or i32 %tmp8, %tmp12
				714	%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4
				715	%tmp15 = load i8, i8* %tmp14, align 1
				716	%tmp16 = zext i8 %tmp15 to i32
				717	%tmp17 = shl nuw nsw i32 %tmp16, 24
				718	%tmp18 = or i32 %tmp13, %tmp17
				719	ret i32 %tmp18
				720	}
				721
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	722	; i8* p; i32 i;
				723	; ((i32) p[i] << 24) \| ((i32) p[i + 1] << 16) \| ((i32) p[i + 2] << 8) \| (i32) p[i + 3]
				724	define i32 @load_i32_by_i8_bswap_base_index_offset(i32* %arg, i32 %arg1) {
				725	; CHECK-LABEL: load_i32_by_i8_bswap_base_index_offset:
				726	; CHECK: # BB#0:
				727	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				728	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
				729	; CHECK-NEXT: movl (%ecx,%eax), %eax
				730	; CHECK-NEXT: bswapl %eax
				731	; CHECK-NEXT: retl
				732	;
				733	; CHECK64-LABEL: load_i32_by_i8_bswap_base_index_offset:
				734	; CHECK64: # BB#0:
				735	; CHECK64-NEXT: movslq %esi, %rax
				736	; CHECK64-NEXT: movzbl (%rdi,%rax), %ecx
				737	; CHECK64-NEXT: shll $24, %ecx
				738	; CHECK64-NEXT: movzbl 1(%rdi,%rax), %edx
				739	; CHECK64-NEXT: shll $16, %edx
				740	; CHECK64-NEXT: orl %ecx, %edx
				741	; CHECK64-NEXT: movzbl 2(%rdi,%rax), %ecx
				742	; CHECK64-NEXT: shll $8, %ecx
				743	; CHECK64-NEXT: orl %edx, %ecx
				744	; CHECK64-NEXT: movzbl 3(%rdi,%rax), %eax
				745	; CHECK64-NEXT: orl %ecx, %eax
				746	; CHECK64-NEXT: retq
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	747	%tmp = bitcast i32* %arg to i8*
				748	%tmp2 = getelementptr inbounds i8, i8* %tmp, i32 %arg1
				749	%tmp3 = load i8, i8* %tmp2, align 1
				750	%tmp4 = zext i8 %tmp3 to i32
				751	%tmp5 = shl nuw nsw i32 %tmp4, 24
				752	%tmp6 = add nuw nsw i32 %arg1, 1
				753	%tmp7 = getelementptr inbounds i8, i8* %tmp, i32 %tmp6
				754	%tmp8 = load i8, i8* %tmp7, align 1
				755	%tmp9 = zext i8 %tmp8 to i32
				756	%tmp10 = shl nuw nsw i32 %tmp9, 16
				757	%tmp11 = or i32 %tmp10, %tmp5
				758	%tmp12 = add nuw nsw i32 %arg1, 2
				759	%tmp13 = getelementptr inbounds i8, i8* %tmp, i32 %tmp12
				760	%tmp14 = load i8, i8* %tmp13, align 1
				761	%tmp15 = zext i8 %tmp14 to i32
				762	%tmp16 = shl nuw nsw i32 %tmp15, 8
				763	%tmp17 = or i32 %tmp11, %tmp16
				764	%tmp18 = add nuw nsw i32 %arg1, 3
				765	%tmp19 = getelementptr inbounds i8, i8* %tmp, i32 %tmp18
				766	%tmp20 = load i8, i8* %tmp19, align 1
				767	%tmp21 = zext i8 %tmp20 to i32
				768	%tmp22 = or i32 %tmp17, %tmp21
				769	ret i32 %tmp22
				770	}
				771
				772	; Verify that we don't crash handling shl i32 %conv57, 32
				773	define void @shift_i32_by_32(i8* %src1, i8* %src2, i64* %dst) {
				774	; CHECK-LABEL: shift_i32_by_32:
				775	; CHECK: # BB#0: # %entry
				776	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				777	; CHECK-NEXT: movl $-1, 4(%eax)
				778	; CHECK-NEXT: movl $-1, (%eax)
				779	; CHECK-NEXT: retl
				780	;
				781	; CHECK64-LABEL: shift_i32_by_32:
				782	; CHECK64: # BB#0: # %entry
				783	; CHECK64-NEXT: movq $-1, (%rdx)
				784	; CHECK64-NEXT: retq
				785	entry:
				786	%load1 = load i8, i8* %src1, align 1
				787	%conv46 = zext i8 %load1 to i32
				788	%shl47 = shl i32 %conv46, 56
				789	%or55 = or i32 %shl47, 0
				790	%load2 = load i8, i8* %src2, align 1
				791	%conv57 = zext i8 %load2 to i32
				792	%shl58 = shl i32 %conv57, 32
				793	%or59 = or i32 %or55, %shl58
				794	%or74 = or i32 %or59, 0
				795	%conv75 = sext i32 %or74 to i64
				796	store i64 %conv75, i64* %dst, align 8
				797	ret void
				798	}
Artur Pilipenko	d3464bf	2017-02-06 17:48:08 +0000	[diff] [blame]	799
				800	declare i16 @llvm.bswap.i16(i16)
				801
				802	; i16* p;
				803	; (i32) bswap(p[1]) \| (i32) bswap(p[0] << 16)
				804	define i32 @load_i32_by_bswap_i16(i32* %arg) {
				805	; CHECK-LABEL: load_i32_by_bswap_i16:
				806	; CHECK: # BB#0:
				807	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				808	; CHECK-NEXT: movl (%eax), %eax
				809	; CHECK-NEXT: bswapl %eax
				810	; CHECK-NEXT: retl
				811	;
				812	; CHECK64-LABEL: load_i32_by_bswap_i16:
				813	; CHECK64: # BB#0:
				814	; CHECK64-NEXT: movl (%rdi), %eax
				815	; CHECK64-NEXT: bswapl %eax
				816	; CHECK64-NEXT: retq
				817
				818
				819	%tmp = bitcast i32* %arg to i16*
				820	%tmp1 = load i16, i16* %tmp, align 4
				821	%tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1)
				822	%tmp2 = zext i16 %tmp11 to i32
				823	%tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
				824	%tmp4 = load i16, i16* %tmp3, align 1
				825	%tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4)
				826	%tmp5 = zext i16 %tmp41 to i32
				827	%tmp6 = shl nuw nsw i32 %tmp2, 16
				828	%tmp7 = or i32 %tmp6, %tmp5
				829	ret i32 %tmp7
				830	}
Artur Pilipenko	469596e	2017-02-07 14:09:37 +0000	[diff] [blame]	831
				832	; i16* p;
				833	; (i32) p[0] \| (sext(p[1] << 16) to i32)
				834	define i32 @load_i32_by_sext_i16(i32* %arg) {
				835	; CHECK-LABEL: load_i32_by_sext_i16:
				836	; CHECK: # BB#0:
				837	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				838	; CHECK-NEXT: movzwl (%eax), %ecx
				839	; CHECK-NEXT: movzwl 2(%eax), %eax
				840	; CHECK-NEXT: shll $16, %eax
				841	; CHECK-NEXT: orl %ecx, %eax
				842	; CHECK-NEXT: retl
				843	;
				844	; CHECK64-LABEL: load_i32_by_sext_i16:
				845	; CHECK64: # BB#0:
				846	; CHECK64-NEXT: movzwl (%rdi), %ecx
				847	; CHECK64-NEXT: movzwl 2(%rdi), %eax
				848	; CHECK64-NEXT: shll $16, %eax
				849	; CHECK64-NEXT: orl %ecx, %eax
				850	; CHECK64-NEXT: retq
				851	%tmp = bitcast i32* %arg to i16*
				852	%tmp1 = load i16, i16* %tmp, align 1
				853	%tmp2 = zext i16 %tmp1 to i32
				854	%tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
				855	%tmp4 = load i16, i16* %tmp3, align 1
				856	%tmp5 = sext i16 %tmp4 to i32
				857	%tmp6 = shl nuw nsw i32 %tmp5, 16
				858	%tmp7 = or i32 %tmp6, %tmp2
				859	ret i32 %tmp7
				860	}
				861
				862	; i8* arg; i32 i;
				863	; p = arg + 12;
				864	; (i32) p[i] \| ((i32) p[i + 1] << 8) \| ((i32) p[i + 2] << 16) \| ((i32) p[i + 3] << 24)
				865	define i32 @load_i32_by_i8_base_offset_index(i8* %arg, i32 %i) {
				866	; CHECK-LABEL: load_i32_by_i8_base_offset_index:
				867	; CHECK: # BB#0:
				868	; CHECK-NEXT: pushl %esi
				869	; CHECK-NEXT: .Lcfi4:
				870	; CHECK-NEXT: .cfi_def_cfa_offset 8
				871	; CHECK-NEXT: .Lcfi5:
				872	; CHECK-NEXT: .cfi_offset %esi, -8
				873	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				874	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
				875	; CHECK-NEXT: movzbl 12(%eax,%ecx), %edx
				876	; CHECK-NEXT: movzbl 13(%eax,%ecx), %esi
				877	; CHECK-NEXT: shll $8, %esi
				878	; CHECK-NEXT: orl %edx, %esi
				879	; CHECK-NEXT: movzbl 14(%eax,%ecx), %edx
				880	; CHECK-NEXT: shll $16, %edx
				881	; CHECK-NEXT: orl %esi, %edx
				882	; CHECK-NEXT: movzbl 15(%eax,%ecx), %eax
				883	; CHECK-NEXT: shll $24, %eax
				884	; CHECK-NEXT: orl %edx, %eax
				885	; CHECK-NEXT: popl %esi
				886	; CHECK-NEXT: retl
				887	;
				888	; CHECK64-LABEL: load_i32_by_i8_base_offset_index:
				889	; CHECK64: # BB#0:
				890	; CHECK64-NEXT: movl %esi, %eax
				891	; CHECK64-NEXT: movl 12(%rdi,%rax), %eax
				892	; CHECK64-NEXT: retq
				893	%tmp = add nuw nsw i32 %i, 3
				894	%tmp2 = add nuw nsw i32 %i, 2
				895	%tmp3 = add nuw nsw i32 %i, 1
				896	%tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
				897	%tmp5 = zext i32 %i to i64
				898	%tmp6 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp5
				899	%tmp7 = load i8, i8* %tmp6, align 1
				900	%tmp8 = zext i8 %tmp7 to i32
				901	%tmp9 = zext i32 %tmp3 to i64
				902	%tmp10 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp9
				903	%tmp11 = load i8, i8* %tmp10, align 1
				904	%tmp12 = zext i8 %tmp11 to i32
				905	%tmp13 = shl nuw nsw i32 %tmp12, 8
				906	%tmp14 = or i32 %tmp13, %tmp8
				907	%tmp15 = zext i32 %tmp2 to i64
				908	%tmp16 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp15
				909	%tmp17 = load i8, i8* %tmp16, align 1
				910	%tmp18 = zext i8 %tmp17 to i32
				911	%tmp19 = shl nuw nsw i32 %tmp18, 16
				912	%tmp20 = or i32 %tmp14, %tmp19
				913	%tmp21 = zext i32 %tmp to i64
				914	%tmp22 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp21
				915	%tmp23 = load i8, i8* %tmp22, align 1
				916	%tmp24 = zext i8 %tmp23 to i32
				917	%tmp25 = shl nuw i32 %tmp24, 24
				918	%tmp26 = or i32 %tmp20, %tmp25
				919	ret i32 %tmp26
				920	}
				921
				922	; i8* arg; i32 i;
				923	; p = arg + 12;
				924	; (i32) p[i + 1] \| ((i32) p[i + 2] << 8) \| ((i32) p[i + 3] << 16) \| ((i32) p[i + 4] << 24)
				925	define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
				926	; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
				927	; CHECK: # BB#0:
				928	; CHECK-NEXT: pushl %esi
				929	; CHECK-NEXT: .Lcfi6:
				930	; CHECK-NEXT: .cfi_def_cfa_offset 8
				931	; CHECK-NEXT: .Lcfi7:
				932	; CHECK-NEXT: .cfi_offset %esi, -8
				933	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				934	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
				935	; CHECK-NEXT: movzbl 13(%eax,%ecx), %edx
				936	; CHECK-NEXT: movzbl 14(%eax,%ecx), %esi
				937	; CHECK-NEXT: shll $8, %esi
				938	; CHECK-NEXT: orl %edx, %esi
				939	; CHECK-NEXT: movzbl 15(%eax,%ecx), %edx
				940	; CHECK-NEXT: shll $16, %edx
				941	; CHECK-NEXT: orl %esi, %edx
				942	; CHECK-NEXT: movzbl 16(%eax,%ecx), %eax
				943	; CHECK-NEXT: shll $24, %eax
				944	; CHECK-NEXT: orl %edx, %eax
				945	; CHECK-NEXT: popl %esi
				946	; CHECK-NEXT: retl
				947	;
				948	; CHECK64-LABEL: load_i32_by_i8_base_offset_index_2:
				949	; CHECK64: # BB#0:
				950	; CHECK64-NEXT: movl %esi, %eax
Artur Pilipenko	4a64031	2017-02-09 12:06:01 +0000	[diff] [blame^]	951	; CHECK64-NEXT: movl 13(%rdi,%rax), %eax
Artur Pilipenko	469596e	2017-02-07 14:09:37 +0000	[diff] [blame]	952	; CHECK64-NEXT: retq
				953	%tmp = add nuw nsw i32 %i, 4
				954	%tmp2 = add nuw nsw i32 %i, 3
				955	%tmp3 = add nuw nsw i32 %i, 2
				956	%tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
				957	%tmp5 = add nuw nsw i32 %i, 1
				958	%tmp27 = zext i32 %tmp5 to i64
				959	%tmp28 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp27
				960	%tmp29 = load i8, i8* %tmp28, align 1
				961	%tmp30 = zext i8 %tmp29 to i32
				962	%tmp31 = zext i32 %tmp3 to i64
				963	%tmp32 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp31
				964	%tmp33 = load i8, i8* %tmp32, align 1
				965	%tmp34 = zext i8 %tmp33 to i32
				966	%tmp35 = shl nuw nsw i32 %tmp34, 8
				967	%tmp36 = or i32 %tmp35, %tmp30
				968	%tmp37 = zext i32 %tmp2 to i64
				969	%tmp38 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp37
				970	%tmp39 = load i8, i8* %tmp38, align 1
				971	%tmp40 = zext i8 %tmp39 to i32
				972	%tmp41 = shl nuw nsw i32 %tmp40, 16
				973	%tmp42 = or i32 %tmp36, %tmp41
				974	%tmp43 = zext i32 %tmp to i64
				975	%tmp44 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp43
				976	%tmp45 = load i8, i8* %tmp44, align 1
				977	%tmp46 = zext i8 %tmp45 to i32
				978	%tmp47 = shl nuw i32 %tmp46, 24
				979	%tmp48 = or i32 %tmp42, %tmp47
				980	ret i32 %tmp48
				981	}
				982
				983	; i8* arg; i32 i;
				984	;
				985	; p0 = arg;
				986	; p1 = arg + i + 1;
				987	; p2 = arg + i + 2;
				988	; p3 = arg + i + 3;
				989	;
				990	; (i32) p0[12] \| ((i32) p1[12] << 8) \| ((i32) p2[12] << 16) \| ((i32) p3[12] << 24)
				991	;
				992	; This test excercises zero and any extend loads as a part of load combine pattern.
				993	; In order to fold the pattern above we need to reassociate the address computation
				994	; first. By the time the address computation is reassociated loads are combined to
				995	; to zext and aext loads.
				996	define i32 @load_i32_by_i8_zaext_loads(i8* %arg, i32 %arg1) {
				997	; CHECK-LABEL: load_i32_by_i8_zaext_loads:
				998	; CHECK: # BB#0:
				999	; CHECK-NEXT: pushl %esi
				1000	; CHECK-NEXT: .Lcfi8:
				1001	; CHECK-NEXT: .cfi_def_cfa_offset 8
				1002	; CHECK-NEXT: .Lcfi9:
				1003	; CHECK-NEXT: .cfi_offset %esi, -8
				1004	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				1005	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
				1006	; CHECK-NEXT: movzbl 12(%eax,%ecx), %edx
				1007	; CHECK-NEXT: movzbl 13(%eax,%ecx), %esi
				1008	; CHECK-NEXT: shll $8, %esi
				1009	; CHECK-NEXT: orl %edx, %esi
				1010	; CHECK-NEXT: movzbl 14(%eax,%ecx), %edx
				1011	; CHECK-NEXT: shll $16, %edx
				1012	; CHECK-NEXT: orl %esi, %edx
				1013	; CHECK-NEXT: movzbl 15(%eax,%ecx), %eax
				1014	; CHECK-NEXT: shll $24, %eax
				1015	; CHECK-NEXT: orl %edx, %eax
				1016	; CHECK-NEXT: popl %esi
				1017	; CHECK-NEXT: retl
				1018	;
				1019	; CHECK64-LABEL: load_i32_by_i8_zaext_loads:
				1020	; CHECK64: # BB#0:
				1021	; CHECK64-NEXT: movl %esi, %eax
				1022	; CHECK64-NEXT: movzbl 12(%rdi,%rax), %ecx
				1023	; CHECK64-NEXT: movzbl 13(%rdi,%rax), %edx
				1024	; CHECK64-NEXT: shll $8, %edx
				1025	; CHECK64-NEXT: orl %ecx, %edx
				1026	; CHECK64-NEXT: movzbl 14(%rdi,%rax), %ecx
				1027	; CHECK64-NEXT: shll $16, %ecx
				1028	; CHECK64-NEXT: orl %edx, %ecx
				1029	; CHECK64-NEXT: movzbl 15(%rdi,%rax), %eax
				1030	; CHECK64-NEXT: shll $24, %eax
				1031	; CHECK64-NEXT: orl %ecx, %eax
				1032	; CHECK64-NEXT: retq
				1033	%tmp = add nuw nsw i32 %arg1, 3
				1034	%tmp2 = add nuw nsw i32 %arg1, 2
				1035	%tmp3 = add nuw nsw i32 %arg1, 1
				1036	%tmp4 = zext i32 %tmp to i64
				1037	%tmp5 = zext i32 %tmp2 to i64
				1038	%tmp6 = zext i32 %tmp3 to i64
				1039	%tmp24 = getelementptr inbounds i8, i8* %arg, i64 %tmp4
				1040	%tmp30 = getelementptr inbounds i8, i8* %arg, i64 %tmp5
				1041	%tmp31 = getelementptr inbounds i8, i8* %arg, i64 %tmp6
				1042	%tmp32 = getelementptr inbounds i8, i8* %arg, i64 12
				1043	%tmp33 = zext i32 %arg1 to i64
				1044	%tmp34 = getelementptr inbounds i8, i8* %tmp32, i64 %tmp33
				1045	%tmp35 = load i8, i8* %tmp34, align 1
				1046	%tmp36 = zext i8 %tmp35 to i32
				1047	%tmp37 = getelementptr inbounds i8, i8* %tmp31, i64 12
				1048	%tmp38 = load i8, i8* %tmp37, align 1
				1049	%tmp39 = zext i8 %tmp38 to i32
				1050	%tmp40 = shl nuw nsw i32 %tmp39, 8
				1051	%tmp41 = or i32 %tmp40, %tmp36
				1052	%tmp42 = getelementptr inbounds i8, i8* %tmp30, i64 12
				1053	%tmp43 = load i8, i8* %tmp42, align 1
				1054	%tmp44 = zext i8 %tmp43 to i32
				1055	%tmp45 = shl nuw nsw i32 %tmp44, 16
				1056	%tmp46 = or i32 %tmp41, %tmp45
				1057	%tmp47 = getelementptr inbounds i8, i8* %tmp24, i64 12
				1058	%tmp48 = load i8, i8* %tmp47, align 1
				1059	%tmp49 = zext i8 %tmp48 to i32
				1060	%tmp50 = shl nuw i32 %tmp49, 24
				1061	%tmp51 = or i32 %tmp46, %tmp50
				1062	ret i32 %tmp51
				1063	}
				1064
				1065	; The same as load_i32_by_i8_zaext_loads but the last load is combined to
				1066	; a sext load.
				1067	;
				1068	; i8* arg; i32 i;
				1069	;
				1070	; p0 = arg;
				1071	; p1 = arg + i + 1;
				1072	; p2 = arg + i + 2;
				1073	; p3 = arg + i + 3;
				1074	;
				1075	; (i32) p0[12] \| ((i32) p1[12] << 8) \| ((i32) p2[12] << 16) \| ((i32) p3[12] << 24)
				1076	define i32 @load_i32_by_i8_zsext_loads(i8* %arg, i32 %arg1) {
				1077	; CHECK-LABEL: load_i32_by_i8_zsext_loads:
				1078	; CHECK: # BB#0:
				1079	; CHECK-NEXT: pushl %esi
				1080	; CHECK-NEXT: .Lcfi10:
				1081	; CHECK-NEXT: .cfi_def_cfa_offset 8
				1082	; CHECK-NEXT: .Lcfi11:
				1083	; CHECK-NEXT: .cfi_offset %esi, -8
				1084	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				1085	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
				1086	; CHECK-NEXT: movzbl 12(%eax,%ecx), %edx
				1087	; CHECK-NEXT: movzbl 13(%eax,%ecx), %esi
				1088	; CHECK-NEXT: shll $8, %esi
				1089	; CHECK-NEXT: orl %edx, %esi
				1090	; CHECK-NEXT: movzbl 14(%eax,%ecx), %edx
				1091	; CHECK-NEXT: shll $16, %edx
				1092	; CHECK-NEXT: orl %esi, %edx
				1093	; CHECK-NEXT: movsbl 15(%eax,%ecx), %eax
				1094	; CHECK-NEXT: shll $24, %eax
				1095	; CHECK-NEXT: orl %edx, %eax
				1096	; CHECK-NEXT: popl %esi
				1097	; CHECK-NEXT: retl
				1098	;
				1099	; CHECK64-LABEL: load_i32_by_i8_zsext_loads:
				1100	; CHECK64: # BB#0:
				1101	; CHECK64-NEXT: movl %esi, %eax
				1102	; CHECK64-NEXT: movzbl 12(%rdi,%rax), %ecx
				1103	; CHECK64-NEXT: movzbl 13(%rdi,%rax), %edx
				1104	; CHECK64-NEXT: shll $8, %edx
				1105	; CHECK64-NEXT: orl %ecx, %edx
				1106	; CHECK64-NEXT: movzbl 14(%rdi,%rax), %ecx
				1107	; CHECK64-NEXT: shll $16, %ecx
				1108	; CHECK64-NEXT: orl %edx, %ecx
				1109	; CHECK64-NEXT: movsbl 15(%rdi,%rax), %eax
				1110	; CHECK64-NEXT: shll $24, %eax
				1111	; CHECK64-NEXT: orl %ecx, %eax
				1112	; CHECK64-NEXT: retq
				1113	%tmp = add nuw nsw i32 %arg1, 3
				1114	%tmp2 = add nuw nsw i32 %arg1, 2
				1115	%tmp3 = add nuw nsw i32 %arg1, 1
				1116	%tmp4 = zext i32 %tmp to i64
				1117	%tmp5 = zext i32 %tmp2 to i64
				1118	%tmp6 = zext i32 %tmp3 to i64
				1119	%tmp24 = getelementptr inbounds i8, i8* %arg, i64 %tmp4
				1120	%tmp30 = getelementptr inbounds i8, i8* %arg, i64 %tmp5
				1121	%tmp31 = getelementptr inbounds i8, i8* %arg, i64 %tmp6
				1122	%tmp32 = getelementptr inbounds i8, i8* %arg, i64 12
				1123	%tmp33 = zext i32 %arg1 to i64
				1124	%tmp34 = getelementptr inbounds i8, i8* %tmp32, i64 %tmp33
				1125	%tmp35 = load i8, i8* %tmp34, align 1
				1126	%tmp36 = zext i8 %tmp35 to i32
				1127	%tmp37 = getelementptr inbounds i8, i8* %tmp31, i64 12
				1128	%tmp38 = load i8, i8* %tmp37, align 1
				1129	%tmp39 = zext i8 %tmp38 to i32
				1130	%tmp40 = shl nuw nsw i32 %tmp39, 8
				1131	%tmp41 = or i32 %tmp40, %tmp36
				1132	%tmp42 = getelementptr inbounds i8, i8* %tmp30, i64 12
				1133	%tmp43 = load i8, i8* %tmp42, align 1
				1134	%tmp44 = zext i8 %tmp43 to i32
				1135	%tmp45 = shl nuw nsw i32 %tmp44, 16
				1136	%tmp46 = or i32 %tmp41, %tmp45
				1137	%tmp47 = getelementptr inbounds i8, i8* %tmp24, i64 12
				1138	%tmp48 = load i8, i8* %tmp47, align 1
				1139	%tmp49 = sext i8 %tmp48 to i16
				1140	%tmp50 = zext i16 %tmp49 to i32
				1141	%tmp51 = shl nuw i32 %tmp50, 24
				1142	%tmp52 = or i32 %tmp46, %tmp51
				1143	ret i32 %tmp52
				1144	}