Blame - llvm/test/CodeGen/X86/load-combine.ll - toolchain/llvm-project

blob: 7c88165305786826e1863b639d2b9b0bd078ad1a [file] [log] [blame]

Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	1	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				2	; RUN: llc < %s -mtriple=i686-unknown \| FileCheck %s
				3	; RUN: llc < %s -mtriple=x86_64-unknown \| FileCheck %s --check-prefix=CHECK64
				4
				5	; i8* p;
				6	; (i32) p[0] \| ((i32) p[1] << 8) \| ((i32) p[2] << 16) \| ((i32) p[3] << 24)
				7	define i32 @load_i32_by_i8(i32* %arg) {
				8	; CHECK-LABEL: load_i32_by_i8:
				9	; CHECK: # BB#0:
				10	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				11	; CHECK-NEXT: movl (%eax), %eax
				12	; CHECK-NEXT: retl
				13	;
				14	; CHECK64-LABEL: load_i32_by_i8:
				15	; CHECK64: # BB#0:
				16	; CHECK64-NEXT: movl (%rdi), %eax
				17	; CHECK64-NEXT: retq
				18
				19	%tmp = bitcast i32* %arg to i8*
				20	%tmp1 = load i8, i8* %tmp, align 1
				21	%tmp2 = zext i8 %tmp1 to i32
				22	%tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
				23	%tmp4 = load i8, i8* %tmp3, align 1
				24	%tmp5 = zext i8 %tmp4 to i32
				25	%tmp6 = shl nuw nsw i32 %tmp5, 8
				26	%tmp7 = or i32 %tmp6, %tmp2
				27	%tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
				28	%tmp9 = load i8, i8* %tmp8, align 1
				29	%tmp10 = zext i8 %tmp9 to i32
				30	%tmp11 = shl nuw nsw i32 %tmp10, 16
				31	%tmp12 = or i32 %tmp7, %tmp11
				32	%tmp13 = getelementptr inbounds i8, i8* %tmp, i32 3
				33	%tmp14 = load i8, i8* %tmp13, align 1
				34	%tmp15 = zext i8 %tmp14 to i32
				35	%tmp16 = shl nuw nsw i32 %tmp15, 24
				36	%tmp17 = or i32 %tmp12, %tmp16
				37	ret i32 %tmp17
				38	}
				39
				40	; i8* p;
				41	; ((i32) p[0] << 24) \| ((i32) p[1] << 16) \| ((i32) p[2] << 8) \| (i32) p[3]
				42	define i32 @load_i32_by_i8_bswap(i32* %arg) {
				43	; CHECK-LABEL: load_i32_by_i8_bswap:
				44	; CHECK: # BB#0:
				45	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				46	; CHECK-NEXT: movl (%eax), %eax
				47	; CHECK-NEXT: bswapl %eax
				48	; CHECK-NEXT: retl
				49	;
				50	; CHECK64-LABEL: load_i32_by_i8_bswap:
				51	; CHECK64: # BB#0:
				52	; CHECK64-NEXT: movl (%rdi), %eax
				53	; CHECK64-NEXT: bswapl %eax
				54	; CHECK64-NEXT: retq
				55
				56	%tmp = bitcast i32* %arg to i8*
				57	%tmp1 = load i8, i8* %tmp, align 1
				58	%tmp2 = zext i8 %tmp1 to i32
				59	%tmp3 = shl nuw nsw i32 %tmp2, 24
				60	%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
				61	%tmp5 = load i8, i8* %tmp4, align 1
				62	%tmp6 = zext i8 %tmp5 to i32
				63	%tmp7 = shl nuw nsw i32 %tmp6, 16
				64	%tmp8 = or i32 %tmp7, %tmp3
				65	%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
				66	%tmp10 = load i8, i8* %tmp9, align 1
				67	%tmp11 = zext i8 %tmp10 to i32
				68	%tmp12 = shl nuw nsw i32 %tmp11, 8
				69	%tmp13 = or i32 %tmp8, %tmp12
				70	%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
				71	%tmp15 = load i8, i8* %tmp14, align 1
				72	%tmp16 = zext i8 %tmp15 to i32
				73	%tmp17 = or i32 %tmp13, %tmp16
				74	ret i32 %tmp17
				75	}
				76
				77	; i16* p;
				78	; (i32) p[0] \| ((i32) p[1] << 16)
				79	define i32 @load_i32_by_i16(i32* %arg) {
				80	; CHECK-LABEL: load_i32_by_i16:
				81	; CHECK: # BB#0:
				82	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				83	; CHECK-NEXT: movl (%eax), %eax
				84	; CHECK-NEXT: retl
				85	;
				86	; CHECK64-LABEL: load_i32_by_i16:
				87	; CHECK64: # BB#0:
				88	; CHECK64-NEXT: movl (%rdi), %eax
				89	; CHECK64-NEXT: retq
				90
				91	%tmp = bitcast i32* %arg to i16*
				92	%tmp1 = load i16, i16* %tmp, align 1
				93	%tmp2 = zext i16 %tmp1 to i32
				94	%tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
				95	%tmp4 = load i16, i16* %tmp3, align 1
				96	%tmp5 = zext i16 %tmp4 to i32
				97	%tmp6 = shl nuw nsw i32 %tmp5, 16
				98	%tmp7 = or i32 %tmp6, %tmp2
				99	ret i32 %tmp7
				100	}
				101
				102	; i16* p_16;
				103	; i8* p_8 = (i8*) p_16;
				104	; (i32) p_16[0] \| ((i32) p[2] << 16) \| ((i32) p[3] << 24)
				105	define i32 @load_i32_by_i16_i8(i32* %arg) {
				106	; CHECK-LABEL: load_i32_by_i16_i8:
				107	; CHECK: # BB#0:
				108	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				109	; CHECK-NEXT: movl (%eax), %eax
				110	; CHECK-NEXT: retl
				111	;
				112	; CHECK64-LABEL: load_i32_by_i16_i8:
				113	; CHECK64: # BB#0:
				114	; CHECK64-NEXT: movl (%rdi), %eax
				115	; CHECK64-NEXT: retq
				116
				117	%tmp = bitcast i32* %arg to i16*
				118	%tmp1 = bitcast i32* %arg to i8*
				119	%tmp2 = load i16, i16* %tmp, align 1
				120	%tmp3 = zext i16 %tmp2 to i32
				121	%tmp4 = getelementptr inbounds i8, i8* %tmp1, i32 2
				122	%tmp5 = load i8, i8* %tmp4, align 1
				123	%tmp6 = zext i8 %tmp5 to i32
				124	%tmp7 = shl nuw nsw i32 %tmp6, 16
				125	%tmp8 = getelementptr inbounds i8, i8* %tmp1, i32 3
				126	%tmp9 = load i8, i8* %tmp8, align 1
				127	%tmp10 = zext i8 %tmp9 to i32
				128	%tmp11 = shl nuw nsw i32 %tmp10, 24
				129	%tmp12 = or i32 %tmp7, %tmp11
				130	%tmp13 = or i32 %tmp12, %tmp3
				131	ret i32 %tmp13
				132	}
				133
				134
				135	; i8* p;
				136	; (i32) ((i16) p[0] \| ((i16) p[1] << 8)) \| (((i32) ((i16) p[3] \| ((i16) p[4] << 8)) << 16)
				137	define i32 @load_i32_by_i16_by_i8(i32* %arg) {
				138	; CHECK-LABEL: load_i32_by_i16_by_i8:
				139	; CHECK: # BB#0:
				140	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				141	; CHECK-NEXT: movl (%eax), %eax
				142	; CHECK-NEXT: retl
				143	;
				144	; CHECK64-LABEL: load_i32_by_i16_by_i8:
				145	; CHECK64: # BB#0:
				146	; CHECK64-NEXT: movl (%rdi), %eax
				147	; CHECK64-NEXT: retq
				148
				149	%tmp = bitcast i32* %arg to i8*
				150	%tmp1 = load i8, i8* %tmp, align 1
				151	%tmp2 = zext i8 %tmp1 to i16
				152	%tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
				153	%tmp4 = load i8, i8* %tmp3, align 1
				154	%tmp5 = zext i8 %tmp4 to i16
				155	%tmp6 = shl nuw nsw i16 %tmp5, 8
				156	%tmp7 = or i16 %tmp6, %tmp2
				157	%tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
				158	%tmp9 = load i8, i8* %tmp8, align 1
				159	%tmp10 = zext i8 %tmp9 to i16
				160	%tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3
				161	%tmp12 = load i8, i8* %tmp11, align 1
				162	%tmp13 = zext i8 %tmp12 to i16
				163	%tmp14 = shl nuw nsw i16 %tmp13, 8
				164	%tmp15 = or i16 %tmp14, %tmp10
				165	%tmp16 = zext i16 %tmp7 to i32
				166	%tmp17 = zext i16 %tmp15 to i32
				167	%tmp18 = shl nuw nsw i32 %tmp17, 16
				168	%tmp19 = or i32 %tmp18, %tmp16
				169	ret i32 %tmp19
				170	}
				171
				172	; i8* p;
				173	; ((i32) (((i16) p[0] << 8) \| (i16) p[1]) << 16) \| (i32) (((i16) p[3] << 8) \| (i16) p[4])
				174	define i32 @load_i32_by_i16_by_i8_bswap(i32* %arg) {
				175	; CHECK-LABEL: load_i32_by_i16_by_i8_bswap:
				176	; CHECK: # BB#0:
				177	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				178	; CHECK-NEXT: movl (%eax), %eax
				179	; CHECK-NEXT: bswapl %eax
				180	; CHECK-NEXT: retl
				181	;
				182	; CHECK64-LABEL: load_i32_by_i16_by_i8_bswap:
				183	; CHECK64: # BB#0:
				184	; CHECK64-NEXT: movl (%rdi), %eax
				185	; CHECK64-NEXT: bswapl %eax
				186	; CHECK64-NEXT: retq
				187
				188	%tmp = bitcast i32* %arg to i8*
				189	%tmp1 = load i8, i8* %tmp, align 1
				190	%tmp2 = zext i8 %tmp1 to i16
				191	%tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
				192	%tmp4 = load i8, i8* %tmp3, align 1
				193	%tmp5 = zext i8 %tmp4 to i16
				194	%tmp6 = shl nuw nsw i16 %tmp2, 8
				195	%tmp7 = or i16 %tmp6, %tmp5
				196	%tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
				197	%tmp9 = load i8, i8* %tmp8, align 1
				198	%tmp10 = zext i8 %tmp9 to i16
				199	%tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3
				200	%tmp12 = load i8, i8* %tmp11, align 1
				201	%tmp13 = zext i8 %tmp12 to i16
				202	%tmp14 = shl nuw nsw i16 %tmp10, 8
				203	%tmp15 = or i16 %tmp14, %tmp13
				204	%tmp16 = zext i16 %tmp7 to i32
				205	%tmp17 = zext i16 %tmp15 to i32
				206	%tmp18 = shl nuw nsw i32 %tmp16, 16
				207	%tmp19 = or i32 %tmp18, %tmp17
				208	ret i32 %tmp19
				209	}
				210
				211	; i8* p;
				212	; (i64) p[0] \| ((i64) p[1] << 8) \| ((i64) p[2] << 16) \| ((i64) p[3] << 24) \| ((i64) p[4] << 32) \| ((i64) p[5] << 40) \| ((i64) p[6] << 48) \| ((i64) p[7] << 56)
				213	define i64 @load_i64_by_i8(i64* %arg) {
				214	; CHECK-LABEL: load_i64_by_i8:
				215	; CHECK: # BB#0:
				216	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
				217	; CHECK-NEXT: movl (%ecx), %eax
				218	; CHECK-NEXT: movl 4(%ecx), %edx
				219	; CHECK-NEXT: retl
				220	;
				221	; CHECK64-LABEL: load_i64_by_i8:
				222	; CHECK64: # BB#0:
				223	; CHECK64-NEXT: movq (%rdi), %rax
				224	; CHECK64-NEXT: retq
				225
				226	%tmp = bitcast i64* %arg to i8*
				227	%tmp1 = load i8, i8* %tmp, align 1
				228	%tmp2 = zext i8 %tmp1 to i64
				229	%tmp3 = getelementptr inbounds i8, i8* %tmp, i64 1
				230	%tmp4 = load i8, i8* %tmp3, align 1
				231	%tmp5 = zext i8 %tmp4 to i64
				232	%tmp6 = shl nuw nsw i64 %tmp5, 8
				233	%tmp7 = or i64 %tmp6, %tmp2
				234	%tmp8 = getelementptr inbounds i8, i8* %tmp, i64 2
				235	%tmp9 = load i8, i8* %tmp8, align 1
				236	%tmp10 = zext i8 %tmp9 to i64
				237	%tmp11 = shl nuw nsw i64 %tmp10, 16
				238	%tmp12 = or i64 %tmp7, %tmp11
				239	%tmp13 = getelementptr inbounds i8, i8* %tmp, i64 3
				240	%tmp14 = load i8, i8* %tmp13, align 1
				241	%tmp15 = zext i8 %tmp14 to i64
				242	%tmp16 = shl nuw nsw i64 %tmp15, 24
				243	%tmp17 = or i64 %tmp12, %tmp16
				244	%tmp18 = getelementptr inbounds i8, i8* %tmp, i64 4
				245	%tmp19 = load i8, i8* %tmp18, align 1
				246	%tmp20 = zext i8 %tmp19 to i64
				247	%tmp21 = shl nuw nsw i64 %tmp20, 32
				248	%tmp22 = or i64 %tmp17, %tmp21
				249	%tmp23 = getelementptr inbounds i8, i8* %tmp, i64 5
				250	%tmp24 = load i8, i8* %tmp23, align 1
				251	%tmp25 = zext i8 %tmp24 to i64
				252	%tmp26 = shl nuw nsw i64 %tmp25, 40
				253	%tmp27 = or i64 %tmp22, %tmp26
				254	%tmp28 = getelementptr inbounds i8, i8* %tmp, i64 6
				255	%tmp29 = load i8, i8* %tmp28, align 1
				256	%tmp30 = zext i8 %tmp29 to i64
				257	%tmp31 = shl nuw nsw i64 %tmp30, 48
				258	%tmp32 = or i64 %tmp27, %tmp31
				259	%tmp33 = getelementptr inbounds i8, i8* %tmp, i64 7
				260	%tmp34 = load i8, i8* %tmp33, align 1
				261	%tmp35 = zext i8 %tmp34 to i64
				262	%tmp36 = shl nuw i64 %tmp35, 56
				263	%tmp37 = or i64 %tmp32, %tmp36
				264	ret i64 %tmp37
				265	}
				266
				267	; i8* p;
				268	; ((i64) p[0] << 56) \| ((i64) p[1] << 48) \| ((i64) p[2] << 40) \| ((i64) p[3] << 32) \| ((i64) p[4] << 24) \| ((i64) p[5] << 16) \| ((i64) p[6] << 8) \| (i64) p[7]
				269	define i64 @load_i64_by_i8_bswap(i64* %arg) {
				270	; CHECK-LABEL: load_i64_by_i8_bswap:
				271	; CHECK: # BB#0:
				272	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				273	; CHECK-NEXT: movl (%eax), %edx
				274	; CHECK-NEXT: movl 4(%eax), %eax
				275	; CHECK-NEXT: bswapl %eax
				276	; CHECK-NEXT: bswapl %edx
				277	; CHECK-NEXT: retl
				278	;
				279	; CHECK64-LABEL: load_i64_by_i8_bswap:
				280	; CHECK64: # BB#0:
				281	; CHECK64-NEXT: movq (%rdi), %rax
				282	; CHECK64-NEXT: bswapq %rax
				283	; CHECK64-NEXT: retq
				284
				285	%tmp = bitcast i64* %arg to i8*
				286	%tmp1 = load i8, i8* %tmp, align 1
				287	%tmp2 = zext i8 %tmp1 to i64
				288	%tmp3 = shl nuw i64 %tmp2, 56
				289	%tmp4 = getelementptr inbounds i8, i8* %tmp, i64 1
				290	%tmp5 = load i8, i8* %tmp4, align 1
				291	%tmp6 = zext i8 %tmp5 to i64
				292	%tmp7 = shl nuw nsw i64 %tmp6, 48
				293	%tmp8 = or i64 %tmp7, %tmp3
				294	%tmp9 = getelementptr inbounds i8, i8* %tmp, i64 2
				295	%tmp10 = load i8, i8* %tmp9, align 1
				296	%tmp11 = zext i8 %tmp10 to i64
				297	%tmp12 = shl nuw nsw i64 %tmp11, 40
				298	%tmp13 = or i64 %tmp8, %tmp12
				299	%tmp14 = getelementptr inbounds i8, i8* %tmp, i64 3
				300	%tmp15 = load i8, i8* %tmp14, align 1
				301	%tmp16 = zext i8 %tmp15 to i64
				302	%tmp17 = shl nuw nsw i64 %tmp16, 32
				303	%tmp18 = or i64 %tmp13, %tmp17
				304	%tmp19 = getelementptr inbounds i8, i8* %tmp, i64 4
				305	%tmp20 = load i8, i8* %tmp19, align 1
				306	%tmp21 = zext i8 %tmp20 to i64
				307	%tmp22 = shl nuw nsw i64 %tmp21, 24
				308	%tmp23 = or i64 %tmp18, %tmp22
				309	%tmp24 = getelementptr inbounds i8, i8* %tmp, i64 5
				310	%tmp25 = load i8, i8* %tmp24, align 1
				311	%tmp26 = zext i8 %tmp25 to i64
				312	%tmp27 = shl nuw nsw i64 %tmp26, 16
				313	%tmp28 = or i64 %tmp23, %tmp27
				314	%tmp29 = getelementptr inbounds i8, i8* %tmp, i64 6
				315	%tmp30 = load i8, i8* %tmp29, align 1
				316	%tmp31 = zext i8 %tmp30 to i64
				317	%tmp32 = shl nuw nsw i64 %tmp31, 8
				318	%tmp33 = or i64 %tmp28, %tmp32
				319	%tmp34 = getelementptr inbounds i8, i8* %tmp, i64 7
				320	%tmp35 = load i8, i8* %tmp34, align 1
				321	%tmp36 = zext i8 %tmp35 to i64
				322	%tmp37 = or i64 %tmp33, %tmp36
				323	ret i64 %tmp37
				324	}
				325
				326	; Part of the load by bytes pattern is used outside of the pattern
				327	; i8* p;
				328	; i32 x = (i32) p[1]
				329	; res = ((i32) p[0] << 24) \| (x << 16) \| ((i32) p[2] << 8) \| (i32) p[3]
				330	; x \| res
				331	define i32 @load_i32_by_i8_bswap_uses(i32* %arg) {
				332	; CHECK-LABEL: load_i32_by_i8_bswap_uses:
				333	; CHECK: # BB#0:
				334	; CHECK-NEXT: pushl %esi
				335	; CHECK-NEXT: .Lcfi0:
				336	; CHECK-NEXT: .cfi_def_cfa_offset 8
				337	; CHECK-NEXT: .Lcfi1:
				338	; CHECK-NEXT: .cfi_offset %esi, -8
				339	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				340	; CHECK-NEXT: movzbl (%eax), %ecx
				341	; CHECK-NEXT: shll $24, %ecx
				342	; CHECK-NEXT: movzbl 1(%eax), %edx
				343	; CHECK-NEXT: movl %edx, %esi
				344	; CHECK-NEXT: shll $16, %esi
				345	; CHECK-NEXT: orl %ecx, %esi
				346	; CHECK-NEXT: movzbl 2(%eax), %ecx
				347	; CHECK-NEXT: shll $8, %ecx
				348	; CHECK-NEXT: orl %esi, %ecx
				349	; CHECK-NEXT: movzbl 3(%eax), %eax
				350	; CHECK-NEXT: orl %ecx, %eax
				351	; CHECK-NEXT: orl %edx, %eax
				352	; CHECK-NEXT: popl %esi
				353	; CHECK-NEXT: retl
				354	;
				355	; CHECK64-LABEL: load_i32_by_i8_bswap_uses:
				356	; CHECK64: # BB#0:
				357	; CHECK64-NEXT: movzbl (%rdi), %eax
				358	; CHECK64-NEXT: shll $24, %eax
				359	; CHECK64-NEXT: movzbl 1(%rdi), %ecx
				360	; CHECK64-NEXT: movl %ecx, %edx
				361	; CHECK64-NEXT: shll $16, %edx
				362	; CHECK64-NEXT: orl %eax, %edx
				363	; CHECK64-NEXT: movzbl 2(%rdi), %esi
				364	; CHECK64-NEXT: shll $8, %esi
				365	; CHECK64-NEXT: orl %edx, %esi
				366	; CHECK64-NEXT: movzbl 3(%rdi), %eax
				367	; CHECK64-NEXT: orl %esi, %eax
				368	; CHECK64-NEXT: orl %ecx, %eax
				369	; CHECK64-NEXT: retq
				370
				371	%tmp = bitcast i32* %arg to i8*
				372	%tmp1 = load i8, i8* %tmp, align 1
				373	%tmp2 = zext i8 %tmp1 to i32
				374	%tmp3 = shl nuw nsw i32 %tmp2, 24
				375	%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
				376	%tmp5 = load i8, i8* %tmp4, align 1
				377	%tmp6 = zext i8 %tmp5 to i32
				378	%tmp7 = shl nuw nsw i32 %tmp6, 16
				379	%tmp8 = or i32 %tmp7, %tmp3
				380	%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
				381	%tmp10 = load i8, i8* %tmp9, align 1
				382	%tmp11 = zext i8 %tmp10 to i32
				383	%tmp12 = shl nuw nsw i32 %tmp11, 8
				384	%tmp13 = or i32 %tmp8, %tmp12
				385	%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
				386	%tmp15 = load i8, i8* %tmp14, align 1
				387	%tmp16 = zext i8 %tmp15 to i32
				388	%tmp17 = or i32 %tmp13, %tmp16
				389	; Use individual part of the pattern outside of the pattern
				390	%tmp18 = or i32 %tmp6, %tmp17
				391	ret i32 %tmp18
				392	}
				393
				394	; One of the loads is volatile
				395	; i8* p;
				396	; p0 = volatile *p;
				397	; ((i32) p0 << 24) \| ((i32) p[1] << 16) \| ((i32) p[2] << 8) \| (i32) p[3]
				398	define i32 @load_i32_by_i8_bswap_volatile(i32* %arg) {
				399	; CHECK-LABEL: load_i32_by_i8_bswap_volatile:
				400	; CHECK: # BB#0:
				401	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				402	; CHECK-NEXT: movzbl (%eax), %ecx
				403	; CHECK-NEXT: shll $24, %ecx
				404	; CHECK-NEXT: movzbl 1(%eax), %edx
				405	; CHECK-NEXT: shll $16, %edx
				406	; CHECK-NEXT: orl %ecx, %edx
				407	; CHECK-NEXT: movzbl 2(%eax), %ecx
				408	; CHECK-NEXT: shll $8, %ecx
				409	; CHECK-NEXT: orl %edx, %ecx
				410	; CHECK-NEXT: movzbl 3(%eax), %eax
				411	; CHECK-NEXT: orl %ecx, %eax
				412	; CHECK-NEXT: retl
				413	;
				414	; CHECK64-LABEL: load_i32_by_i8_bswap_volatile:
				415	; CHECK64: # BB#0:
				416	; CHECK64-NEXT: movzbl (%rdi), %eax
				417	; CHECK64-NEXT: shll $24, %eax
				418	; CHECK64-NEXT: movzbl 1(%rdi), %ecx
				419	; CHECK64-NEXT: shll $16, %ecx
				420	; CHECK64-NEXT: orl %eax, %ecx
				421	; CHECK64-NEXT: movzbl 2(%rdi), %edx
				422	; CHECK64-NEXT: shll $8, %edx
				423	; CHECK64-NEXT: orl %ecx, %edx
				424	; CHECK64-NEXT: movzbl 3(%rdi), %eax
				425	; CHECK64-NEXT: orl %edx, %eax
				426	; CHECK64-NEXT: retq
				427
				428	%tmp = bitcast i32* %arg to i8*
				429	%tmp1 = load volatile i8, i8* %tmp, align 1
				430	%tmp2 = zext i8 %tmp1 to i32
				431	%tmp3 = shl nuw nsw i32 %tmp2, 24
				432	%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
				433	%tmp5 = load i8, i8* %tmp4, align 1
				434	%tmp6 = zext i8 %tmp5 to i32
				435	%tmp7 = shl nuw nsw i32 %tmp6, 16
				436	%tmp8 = or i32 %tmp7, %tmp3
				437	%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
				438	%tmp10 = load i8, i8* %tmp9, align 1
				439	%tmp11 = zext i8 %tmp10 to i32
				440	%tmp12 = shl nuw nsw i32 %tmp11, 8
				441	%tmp13 = or i32 %tmp8, %tmp12
				442	%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
				443	%tmp15 = load i8, i8* %tmp14, align 1
				444	%tmp16 = zext i8 %tmp15 to i32
				445	%tmp17 = or i32 %tmp13, %tmp16
				446	ret i32 %tmp17
				447	}
				448
				449	; There is a store in between individual loads
				450	; i8* p, q;
				451	; res1 = ((i32) p[0] << 24) \| ((i32) p[1] << 16)
				452	; *q = 0;
				453	; res2 = ((i32) p[2] << 8) \| (i32) p[3]
				454	; res1 \| res2
				455	define i32 @load_i32_by_i8_bswap_store_in_between(i32* %arg, i32* %arg1) {
				456	; CHECK-LABEL: load_i32_by_i8_bswap_store_in_between:
				457	; CHECK: # BB#0:
				458	; CHECK-NEXT: pushl %esi
				459	; CHECK-NEXT: .Lcfi2:
				460	; CHECK-NEXT: .cfi_def_cfa_offset 8
				461	; CHECK-NEXT: .Lcfi3:
				462	; CHECK-NEXT: .cfi_offset %esi, -8
				463	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				464	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
				465	; CHECK-NEXT: movzbl (%ecx), %edx
				466	; CHECK-NEXT: shll $24, %edx
				467	; CHECK-NEXT: movzbl 1(%ecx), %esi
				468	; CHECK-NEXT: movl $0, (%eax)
				469	; CHECK-NEXT: shll $16, %esi
				470	; CHECK-NEXT: orl %edx, %esi
				471	; CHECK-NEXT: movzbl 2(%ecx), %edx
				472	; CHECK-NEXT: shll $8, %edx
				473	; CHECK-NEXT: orl %esi, %edx
				474	; CHECK-NEXT: movzbl 3(%ecx), %eax
				475	; CHECK-NEXT: orl %edx, %eax
				476	; CHECK-NEXT: popl %esi
				477	; CHECK-NEXT: retl
				478	;
				479	; CHECK64-LABEL: load_i32_by_i8_bswap_store_in_between:
				480	; CHECK64: # BB#0:
				481	; CHECK64-NEXT: movzbl (%rdi), %eax
				482	; CHECK64-NEXT: shll $24, %eax
				483	; CHECK64-NEXT: movzbl 1(%rdi), %ecx
				484	; CHECK64-NEXT: movl $0, (%rsi)
				485	; CHECK64-NEXT: shll $16, %ecx
				486	; CHECK64-NEXT: orl %eax, %ecx
				487	; CHECK64-NEXT: movzbl 2(%rdi), %edx
				488	; CHECK64-NEXT: shll $8, %edx
				489	; CHECK64-NEXT: orl %ecx, %edx
				490	; CHECK64-NEXT: movzbl 3(%rdi), %eax
				491	; CHECK64-NEXT: orl %edx, %eax
				492	; CHECK64-NEXT: retq
				493
				494	%tmp = bitcast i32* %arg to i8*
				495	%tmp2 = load i8, i8* %tmp, align 1
				496	%tmp3 = zext i8 %tmp2 to i32
				497	%tmp4 = shl nuw nsw i32 %tmp3, 24
				498	%tmp5 = getelementptr inbounds i8, i8* %tmp, i32 1
				499	%tmp6 = load i8, i8* %tmp5, align 1
				500	; This store will prevent folding of the pattern
				501	store i32 0, i32* %arg1
				502	%tmp7 = zext i8 %tmp6 to i32
				503	%tmp8 = shl nuw nsw i32 %tmp7, 16
				504	%tmp9 = or i32 %tmp8, %tmp4
				505	%tmp10 = getelementptr inbounds i8, i8* %tmp, i32 2
				506	%tmp11 = load i8, i8* %tmp10, align 1
				507	%tmp12 = zext i8 %tmp11 to i32
				508	%tmp13 = shl nuw nsw i32 %tmp12, 8
				509	%tmp14 = or i32 %tmp9, %tmp13
				510	%tmp15 = getelementptr inbounds i8, i8* %tmp, i32 3
				511	%tmp16 = load i8, i8* %tmp15, align 1
				512	%tmp17 = zext i8 %tmp16 to i32
				513	%tmp18 = or i32 %tmp14, %tmp17
				514	ret i32 %tmp18
				515	}
				516
				517	; One of the loads is from an unrelated location
				518	; i8* p, q;
				519	; ((i32) p[0] << 24) \| ((i32) q[1] << 16) \| ((i32) p[2] << 8) \| (i32) p[3]
				520	define i32 @load_i32_by_i8_bswap_unrelated_load(i32* %arg, i32* %arg1) {
				521	; CHECK-LABEL: load_i32_by_i8_bswap_unrelated_load:
				522	; CHECK: # BB#0:
				523	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				524	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
				525	; CHECK-NEXT: movzbl (%ecx), %edx
				526	; CHECK-NEXT: shll $24, %edx
				527	; CHECK-NEXT: movzbl 1(%eax), %eax
				528	; CHECK-NEXT: shll $16, %eax
				529	; CHECK-NEXT: orl %edx, %eax
				530	; CHECK-NEXT: movzbl 2(%ecx), %edx
				531	; CHECK-NEXT: shll $8, %edx
				532	; CHECK-NEXT: orl %eax, %edx
				533	; CHECK-NEXT: movzbl 3(%ecx), %eax
				534	; CHECK-NEXT: orl %edx, %eax
				535	; CHECK-NEXT: retl
				536	;
				537	; CHECK64-LABEL: load_i32_by_i8_bswap_unrelated_load:
				538	; CHECK64: # BB#0:
				539	; CHECK64-NEXT: movzbl (%rdi), %eax
				540	; CHECK64-NEXT: shll $24, %eax
				541	; CHECK64-NEXT: movzbl 1(%rsi), %ecx
				542	; CHECK64-NEXT: shll $16, %ecx
				543	; CHECK64-NEXT: orl %eax, %ecx
				544	; CHECK64-NEXT: movzbl 2(%rdi), %edx
				545	; CHECK64-NEXT: shll $8, %edx
				546	; CHECK64-NEXT: orl %ecx, %edx
				547	; CHECK64-NEXT: movzbl 3(%rdi), %eax
				548	; CHECK64-NEXT: orl %edx, %eax
				549	; CHECK64-NEXT: retq
				550
				551	%tmp = bitcast i32* %arg to i8*
				552	%tmp2 = bitcast i32* %arg1 to i8*
				553	%tmp3 = load i8, i8* %tmp, align 1
				554	%tmp4 = zext i8 %tmp3 to i32
				555	%tmp5 = shl nuw nsw i32 %tmp4, 24
				556	; Load from an unrelated address
				557	%tmp6 = getelementptr inbounds i8, i8* %tmp2, i32 1
				558	%tmp7 = load i8, i8* %tmp6, align 1
				559	%tmp8 = zext i8 %tmp7 to i32
				560	%tmp9 = shl nuw nsw i32 %tmp8, 16
				561	%tmp10 = or i32 %tmp9, %tmp5
				562	%tmp11 = getelementptr inbounds i8, i8* %tmp, i32 2
				563	%tmp12 = load i8, i8* %tmp11, align 1
				564	%tmp13 = zext i8 %tmp12 to i32
				565	%tmp14 = shl nuw nsw i32 %tmp13, 8
				566	%tmp15 = or i32 %tmp10, %tmp14
				567	%tmp16 = getelementptr inbounds i8, i8* %tmp, i32 3
				568	%tmp17 = load i8, i8* %tmp16, align 1
				569	%tmp18 = zext i8 %tmp17 to i32
				570	%tmp19 = or i32 %tmp15, %tmp18
				571	ret i32 %tmp19
				572	}
				573
				574	; Non-zero offsets are not supported for now
				575	; i8* p;
				576	; (i32) p[1] \| ((i32) p[2] << 8) \| ((i32) p[3] << 16) \| ((i32) p[4] << 24)
Artur Pilipenko	bdf3c5a	2017-02-06 14:15:31 +0000	[diff] [blame]	577	define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
				578	; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	579	; CHECK: # BB#0:
				580	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				581	; CHECK-NEXT: movzbl 1(%eax), %ecx
				582	; CHECK-NEXT: movzbl 2(%eax), %edx
				583	; CHECK-NEXT: shll $8, %edx
				584	; CHECK-NEXT: orl %ecx, %edx
				585	; CHECK-NEXT: movzbl 3(%eax), %ecx
				586	; CHECK-NEXT: shll $16, %ecx
				587	; CHECK-NEXT: orl %edx, %ecx
				588	; CHECK-NEXT: movzbl 4(%eax), %eax
				589	; CHECK-NEXT: shll $24, %eax
				590	; CHECK-NEXT: orl %ecx, %eax
				591	; CHECK-NEXT: retl
				592	;
Artur Pilipenko	bdf3c5a	2017-02-06 14:15:31 +0000	[diff] [blame]	593	; CHECK64-LABEL: load_i32_by_i8_nonzero_offset:
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	594	; CHECK64: # BB#0:
				595	; CHECK64-NEXT: movzbl 1(%rdi), %eax
				596	; CHECK64-NEXT: movzbl 2(%rdi), %ecx
				597	; CHECK64-NEXT: shll $8, %ecx
				598	; CHECK64-NEXT: orl %eax, %ecx
				599	; CHECK64-NEXT: movzbl 3(%rdi), %edx
				600	; CHECK64-NEXT: shll $16, %edx
				601	; CHECK64-NEXT: orl %ecx, %edx
				602	; CHECK64-NEXT: movzbl 4(%rdi), %eax
				603	; CHECK64-NEXT: shll $24, %eax
				604	; CHECK64-NEXT: orl %edx, %eax
				605	; CHECK64-NEXT: retq
				606
				607	%tmp = bitcast i32* %arg to i8*
				608	%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
				609	%tmp2 = load i8, i8* %tmp1, align 1
				610	%tmp3 = zext i8 %tmp2 to i32
				611	%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2
				612	%tmp5 = load i8, i8* %tmp4, align 1
				613	%tmp6 = zext i8 %tmp5 to i32
				614	%tmp7 = shl nuw nsw i32 %tmp6, 8
				615	%tmp8 = or i32 %tmp7, %tmp3
				616	%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3
				617	%tmp10 = load i8, i8* %tmp9, align 1
				618	%tmp11 = zext i8 %tmp10 to i32
				619	%tmp12 = shl nuw nsw i32 %tmp11, 16
				620	%tmp13 = or i32 %tmp8, %tmp12
				621	%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4
				622	%tmp15 = load i8, i8* %tmp14, align 1
				623	%tmp16 = zext i8 %tmp15 to i32
				624	%tmp17 = shl nuw nsw i32 %tmp16, 24
				625	%tmp18 = or i32 %tmp13, %tmp17
				626	ret i32 %tmp18
				627	}
				628
Artur Pilipenko	bdf3c5a	2017-02-06 14:15:31 +0000	[diff] [blame]	629	; i8* p;
				630	; (i32) p[-4] \| ((i32) p[-3] << 8) \| ((i32) p[-2] << 16) \| ((i32) p[-1] << 24)
				631	define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
				632	; CHECK-LABEL: load_i32_by_i8_neg_offset:
				633	; CHECK: # BB#0:
				634	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				635	; CHECK-NEXT: movzbl -4(%eax), %ecx
				636	; CHECK-NEXT: movzbl -3(%eax), %edx
				637	; CHECK-NEXT: shll $8, %edx
				638	; CHECK-NEXT: orl %ecx, %edx
				639	; CHECK-NEXT: movzbl -2(%eax), %ecx
				640	; CHECK-NEXT: shll $16, %ecx
				641	; CHECK-NEXT: orl %edx, %ecx
				642	; CHECK-NEXT: movzbl -1(%eax), %eax
				643	; CHECK-NEXT: shll $24, %eax
				644	; CHECK-NEXT: orl %ecx, %eax
				645	; CHECK-NEXT: retl
				646	;
				647	; CHECK64-LABEL: load_i32_by_i8_neg_offset:
				648	; CHECK64: # BB#0:
				649	; CHECK64-NEXT: movzbl -4(%rdi), %eax
				650	; CHECK64-NEXT: movzbl -3(%rdi), %ecx
				651	; CHECK64-NEXT: shll $8, %ecx
				652	; CHECK64-NEXT: orl %eax, %ecx
				653	; CHECK64-NEXT: movzbl -2(%rdi), %edx
				654	; CHECK64-NEXT: shll $16, %edx
				655	; CHECK64-NEXT: orl %ecx, %edx
				656	; CHECK64-NEXT: movzbl -1(%rdi), %eax
				657	; CHECK64-NEXT: shll $24, %eax
				658	; CHECK64-NEXT: orl %edx, %eax
				659	; CHECK64-NEXT: retq
				660
				661	%tmp = bitcast i32* %arg to i8*
				662	%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
				663	%tmp2 = load i8, i8* %tmp1, align 1
				664	%tmp3 = zext i8 %tmp2 to i32
				665	%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3
				666	%tmp5 = load i8, i8* %tmp4, align 1
				667	%tmp6 = zext i8 %tmp5 to i32
				668	%tmp7 = shl nuw nsw i32 %tmp6, 8
				669	%tmp8 = or i32 %tmp7, %tmp3
				670	%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2
				671	%tmp10 = load i8, i8* %tmp9, align 1
				672	%tmp11 = zext i8 %tmp10 to i32
				673	%tmp12 = shl nuw nsw i32 %tmp11, 16
				674	%tmp13 = or i32 %tmp8, %tmp12
				675	%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1
				676	%tmp15 = load i8, i8* %tmp14, align 1
				677	%tmp16 = zext i8 %tmp15 to i32
				678	%tmp17 = shl nuw nsw i32 %tmp16, 24
				679	%tmp18 = or i32 %tmp13, %tmp17
				680	ret i32 %tmp18
				681	}
				682
				683	; i8* p;
				684	; (i32) p[4] \| ((i32) p[3] << 8) \| ((i32) p[2] << 16) \| ((i32) p[1] << 24)
				685	define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
				686	; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
				687	; CHECK: # BB#0:
				688	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				689	; CHECK-NEXT: movzbl 4(%eax), %ecx
				690	; CHECK-NEXT: movzbl 3(%eax), %edx
				691	; CHECK-NEXT: shll $8, %edx
				692	; CHECK-NEXT: orl %ecx, %edx
				693	; CHECK-NEXT: movzbl 2(%eax), %ecx
				694	; CHECK-NEXT: shll $16, %ecx
				695	; CHECK-NEXT: orl %edx, %ecx
				696	; CHECK-NEXT: movzbl 1(%eax), %eax
				697	; CHECK-NEXT: shll $24, %eax
				698	; CHECK-NEXT: orl %ecx, %eax
				699	; CHECK-NEXT: retl
				700	;
				701	; CHECK64-LABEL: load_i32_by_i8_nonzero_offset_bswap:
				702	; CHECK64: # BB#0:
				703	; CHECK64-NEXT: movzbl 4(%rdi), %eax
				704	; CHECK64-NEXT: movzbl 3(%rdi), %ecx
				705	; CHECK64-NEXT: shll $8, %ecx
				706	; CHECK64-NEXT: orl %eax, %ecx
				707	; CHECK64-NEXT: movzbl 2(%rdi), %edx
				708	; CHECK64-NEXT: shll $16, %edx
				709	; CHECK64-NEXT: orl %ecx, %edx
				710	; CHECK64-NEXT: movzbl 1(%rdi), %eax
				711	; CHECK64-NEXT: shll $24, %eax
				712	; CHECK64-NEXT: orl %edx, %eax
				713	; CHECK64-NEXT: retq
				714
				715	%tmp = bitcast i32* %arg to i8*
				716	%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
				717	%tmp2 = load i8, i8* %tmp1, align 1
				718	%tmp3 = zext i8 %tmp2 to i32
				719	%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3
				720	%tmp5 = load i8, i8* %tmp4, align 1
				721	%tmp6 = zext i8 %tmp5 to i32
				722	%tmp7 = shl nuw nsw i32 %tmp6, 8
				723	%tmp8 = or i32 %tmp7, %tmp3
				724	%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
				725	%tmp10 = load i8, i8* %tmp9, align 1
				726	%tmp11 = zext i8 %tmp10 to i32
				727	%tmp12 = shl nuw nsw i32 %tmp11, 16
				728	%tmp13 = or i32 %tmp8, %tmp12
				729	%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1
				730	%tmp15 = load i8, i8* %tmp14, align 1
				731	%tmp16 = zext i8 %tmp15 to i32
				732	%tmp17 = shl nuw nsw i32 %tmp16, 24
				733	%tmp18 = or i32 %tmp13, %tmp17
				734	ret i32 %tmp18
				735	}
				736
				737	; i8* p;
				738	; (i32) p[-1] \| ((i32) p[-2] << 8) \| ((i32) p[-3] << 16) \| ((i32) p[-4] << 24)
				739	define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
				740	; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
				741	; CHECK: # BB#0:
				742	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				743	; CHECK-NEXT: movzbl -1(%eax), %ecx
				744	; CHECK-NEXT: movzbl -2(%eax), %edx
				745	; CHECK-NEXT: shll $8, %edx
				746	; CHECK-NEXT: orl %ecx, %edx
				747	; CHECK-NEXT: movzbl -3(%eax), %ecx
				748	; CHECK-NEXT: shll $16, %ecx
				749	; CHECK-NEXT: orl %edx, %ecx
				750	; CHECK-NEXT: movzbl -4(%eax), %eax
				751	; CHECK-NEXT: shll $24, %eax
				752	; CHECK-NEXT: orl %ecx, %eax
				753	; CHECK-NEXT: retl
				754	;
				755	; CHECK64-LABEL: load_i32_by_i8_neg_offset_bswap:
				756	; CHECK64: # BB#0:
				757	; CHECK64-NEXT: movzbl -1(%rdi), %eax
				758	; CHECK64-NEXT: movzbl -2(%rdi), %ecx
				759	; CHECK64-NEXT: shll $8, %ecx
				760	; CHECK64-NEXT: orl %eax, %ecx
				761	; CHECK64-NEXT: movzbl -3(%rdi), %edx
				762	; CHECK64-NEXT: shll $16, %edx
				763	; CHECK64-NEXT: orl %ecx, %edx
				764	; CHECK64-NEXT: movzbl -4(%rdi), %eax
				765	; CHECK64-NEXT: shll $24, %eax
				766	; CHECK64-NEXT: orl %edx, %eax
				767	; CHECK64-NEXT: retq
				768
				769	%tmp = bitcast i32* %arg to i8*
				770	%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
				771	%tmp2 = load i8, i8* %tmp1, align 1
				772	%tmp3 = zext i8 %tmp2 to i32
				773	%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2
				774	%tmp5 = load i8, i8* %tmp4, align 1
				775	%tmp6 = zext i8 %tmp5 to i32
				776	%tmp7 = shl nuw nsw i32 %tmp6, 8
				777	%tmp8 = or i32 %tmp7, %tmp3
				778	%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3
				779	%tmp10 = load i8, i8* %tmp9, align 1
				780	%tmp11 = zext i8 %tmp10 to i32
				781	%tmp12 = shl nuw nsw i32 %tmp11, 16
				782	%tmp13 = or i32 %tmp8, %tmp12
				783	%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4
				784	%tmp15 = load i8, i8* %tmp14, align 1
				785	%tmp16 = zext i8 %tmp15 to i32
				786	%tmp17 = shl nuw nsw i32 %tmp16, 24
				787	%tmp18 = or i32 %tmp13, %tmp17
				788	ret i32 %tmp18
				789	}
				790
Artur Pilipenko	41c0005	2017-01-25 08:53:31 +0000	[diff] [blame]	791	; i8* p; i32 i;
				792	; ((i32) p[i] << 24) \| ((i32) p[i + 1] << 16) \| ((i32) p[i + 2] << 8) \| (i32) p[i + 3]
				793	define i32 @load_i32_by_i8_bswap_base_index_offset(i32* %arg, i32 %arg1) {
				794	; CHECK-LABEL: load_i32_by_i8_bswap_base_index_offset:
				795	; CHECK: # BB#0:
				796	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				797	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
				798	; CHECK-NEXT: movl (%ecx,%eax), %eax
				799	; CHECK-NEXT: bswapl %eax
				800	; CHECK-NEXT: retl
				801	;
				802	; CHECK64-LABEL: load_i32_by_i8_bswap_base_index_offset:
				803	; CHECK64: # BB#0:
				804	; CHECK64-NEXT: movslq %esi, %rax
				805	; CHECK64-NEXT: movzbl (%rdi,%rax), %ecx
				806	; CHECK64-NEXT: shll $24, %ecx
				807	; CHECK64-NEXT: movzbl 1(%rdi,%rax), %edx
				808	; CHECK64-NEXT: shll $16, %edx
				809	; CHECK64-NEXT: orl %ecx, %edx
				810	; CHECK64-NEXT: movzbl 2(%rdi,%rax), %ecx
				811	; CHECK64-NEXT: shll $8, %ecx
				812	; CHECK64-NEXT: orl %edx, %ecx
				813	; CHECK64-NEXT: movzbl 3(%rdi,%rax), %eax
				814	; CHECK64-NEXT: orl %ecx, %eax
				815	; CHECK64-NEXT: retq
				816	; TODO: Currently we don't fold the pattern for x86-64 target because we don't
				817	; see that the loads are adjacent. It happens because BaseIndexOffset doesn't
				818	; look through zexts.
				819
				820	%tmp = bitcast i32* %arg to i8*
				821	%tmp2 = getelementptr inbounds i8, i8* %tmp, i32 %arg1
				822	%tmp3 = load i8, i8* %tmp2, align 1
				823	%tmp4 = zext i8 %tmp3 to i32
				824	%tmp5 = shl nuw nsw i32 %tmp4, 24
				825	%tmp6 = add nuw nsw i32 %arg1, 1
				826	%tmp7 = getelementptr inbounds i8, i8* %tmp, i32 %tmp6
				827	%tmp8 = load i8, i8* %tmp7, align 1
				828	%tmp9 = zext i8 %tmp8 to i32
				829	%tmp10 = shl nuw nsw i32 %tmp9, 16
				830	%tmp11 = or i32 %tmp10, %tmp5
				831	%tmp12 = add nuw nsw i32 %arg1, 2
				832	%tmp13 = getelementptr inbounds i8, i8* %tmp, i32 %tmp12
				833	%tmp14 = load i8, i8* %tmp13, align 1
				834	%tmp15 = zext i8 %tmp14 to i32
				835	%tmp16 = shl nuw nsw i32 %tmp15, 8
				836	%tmp17 = or i32 %tmp11, %tmp16
				837	%tmp18 = add nuw nsw i32 %arg1, 3
				838	%tmp19 = getelementptr inbounds i8, i8* %tmp, i32 %tmp18
				839	%tmp20 = load i8, i8* %tmp19, align 1
				840	%tmp21 = zext i8 %tmp20 to i32
				841	%tmp22 = or i32 %tmp17, %tmp21
				842	ret i32 %tmp22
				843	}
				844
				845	; Verify that we don't crash handling shl i32 %conv57, 32
				846	define void @shift_i32_by_32(i8* %src1, i8* %src2, i64* %dst) {
				847	; CHECK-LABEL: shift_i32_by_32:
				848	; CHECK: # BB#0: # %entry
				849	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				850	; CHECK-NEXT: movl $-1, 4(%eax)
				851	; CHECK-NEXT: movl $-1, (%eax)
				852	; CHECK-NEXT: retl
				853	;
				854	; CHECK64-LABEL: shift_i32_by_32:
				855	; CHECK64: # BB#0: # %entry
				856	; CHECK64-NEXT: movq $-1, (%rdx)
				857	; CHECK64-NEXT: retq
				858	entry:
				859	%load1 = load i8, i8* %src1, align 1
				860	%conv46 = zext i8 %load1 to i32
				861	%shl47 = shl i32 %conv46, 56
				862	%or55 = or i32 %shl47, 0
				863	%load2 = load i8, i8* %src2, align 1
				864	%conv57 = zext i8 %load2 to i32
				865	%shl58 = shl i32 %conv57, 32
				866	%or59 = or i32 %or55, %shl58
				867	%or74 = or i32 %or59, 0
				868	%conv75 = sext i32 %or74 to i64
				869	store i64 %conv75, i64* %dst, align 8
				870	ret void
				871	}
Artur Pilipenko	d3464bf	2017-02-06 17:48:08 +0000	[diff] [blame^]	872
				873	declare i16 @llvm.bswap.i16(i16)
				874
				875	; i16* p;
				876	; (i32) bswap(p[1]) \| (i32) bswap(p[0] << 16)
				877	define i32 @load_i32_by_bswap_i16(i32* %arg) {
				878	; CHECK-LABEL: load_i32_by_bswap_i16:
				879	; CHECK: # BB#0:
				880	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
				881	; CHECK-NEXT: movl (%eax), %eax
				882	; CHECK-NEXT: bswapl %eax
				883	; CHECK-NEXT: retl
				884	;
				885	; CHECK64-LABEL: load_i32_by_bswap_i16:
				886	; CHECK64: # BB#0:
				887	; CHECK64-NEXT: movl (%rdi), %eax
				888	; CHECK64-NEXT: bswapl %eax
				889	; CHECK64-NEXT: retq
				890
				891
				892	%tmp = bitcast i32* %arg to i16*
				893	%tmp1 = load i16, i16* %tmp, align 4
				894	%tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1)
				895	%tmp2 = zext i16 %tmp11 to i32
				896	%tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
				897	%tmp4 = load i16, i16* %tmp3, align 1
				898	%tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4)
				899	%tmp5 = zext i16 %tmp41 to i32
				900	%tmp6 = shl nuw nsw i32 %tmp2, 16
				901	%tmp7 = or i32 %tmp6, %tmp5
				902	ret i32 %tmp7
				903	}