Blame - llvm/test/CodeGen/X86/add-nsw-sext.ll - toolchain/llvm-project

blob: 658c58b3d61b4ed280f9751f55d220662f567ef8 [file] [log] [blame]

James Y Knight	7c90506	2015-11-23 21:33:58 +0000	[diff] [blame]	1	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
Sanjay Patel	e2b5280	2015-10-14 21:47:03 +0000	[diff] [blame]	2	; RUN: llc < %s -mtriple=x86_64-unknown-unknown \| FileCheck %s
				3
				4	; The fundamental problem: an add separated from other arithmetic by a sext can't
Sanjay Patel	bbd5244	2015-10-16 22:14:12 +0000	[diff] [blame]	5	; be combined with the later instructions. However, if the first add is 'nsw',
Sanjay Patel	e2b5280	2015-10-14 21:47:03 +0000	[diff] [blame]	6	; then we can promote the sext ahead of that add to allow optimizations.
				7
				8	define i64 @add_nsw_consts(i32 %i) {
				9	; CHECK-LABEL: add_nsw_consts:
				10	; CHECK: # BB#0:
Sanjay Patel	e2b5280	2015-10-14 21:47:03 +0000	[diff] [blame]	11	; CHECK-NEXT: movslq %edi, %rax
Sanjay Patel	bbd5244	2015-10-16 22:14:12 +0000	[diff] [blame]	12	; CHECK-NEXT: addq $12, %rax
Sanjay Patel	e2b5280	2015-10-14 21:47:03 +0000	[diff] [blame]	13	; CHECK-NEXT: retq
				14
				15	%add = add nsw i32 %i, 5
				16	%ext = sext i32 %add to i64
				17	%idx = add i64 %ext, 7
				18	ret i64 %idx
				19	}
				20
				21	; An x86 bonus: If we promote the sext ahead of the 'add nsw',
				22	; we allow LEA formation and eliminate an add instruction.
				23
				24	define i64 @add_nsw_sext_add(i32 %i, i64 %x) {
				25	; CHECK-LABEL: add_nsw_sext_add:
				26	; CHECK: # BB#0:
Sanjay Patel	e2b5280	2015-10-14 21:47:03 +0000	[diff] [blame]	27	; CHECK-NEXT: movslq %edi, %rax
Matt Arsenault	982224c	2016-02-27 19:57:45 +0000	[diff] [blame]	28	; CHECK-NEXT: leaq 5(%rsi,%rax), %rax
Sanjay Patel	e2b5280	2015-10-14 21:47:03 +0000	[diff] [blame]	29	; CHECK-NEXT: retq
				30
				31	%add = add nsw i32 %i, 5
				32	%ext = sext i32 %add to i64
				33	%idx = add i64 %x, %ext
				34	ret i64 %idx
				35	}
				36
				37	; Throw in a scale (left shift) because an LEA can do that too.
				38	; Use a negative constant (LEA displacement) to verify that's handled correctly.
				39
				40	define i64 @add_nsw_sext_lsh_add(i32 %i, i64 %x) {
				41	; CHECK-LABEL: add_nsw_sext_lsh_add:
				42	; CHECK: # BB#0:
Sanjay Patel	e2b5280	2015-10-14 21:47:03 +0000	[diff] [blame]	43	; CHECK-NEXT: movslq %edi, %rax
Sanjay Patel	bbd5244	2015-10-16 22:14:12 +0000	[diff] [blame]	44	; CHECK-NEXT: leaq -40(%rsi,%rax,8), %rax
Sanjay Patel	e2b5280	2015-10-14 21:47:03 +0000	[diff] [blame]	45	; CHECK-NEXT: retq
				46
				47	%add = add nsw i32 %i, -5
				48	%ext = sext i32 %add to i64
				49	%shl = shl i64 %ext, 3
				50	%idx = add i64 %x, %shl
				51	ret i64 %idx
				52	}
				53
				54	; Don't promote the sext if it has no users. The wider add instruction needs an
				55	; extra byte to encode.
				56
				57	define i64 @add_nsw_sext(i32 %i, i64 %x) {
				58	; CHECK-LABEL: add_nsw_sext:
				59	; CHECK: # BB#0:
				60	; CHECK-NEXT: addl $5, %edi
				61	; CHECK-NEXT: movslq %edi, %rax
				62	; CHECK-NEXT: retq
				63
				64	%add = add nsw i32 %i, 5
				65	%ext = sext i32 %add to i64
				66	ret i64 %ext
				67	}
				68
				69	; The typical use case: a 64-bit system where an 'int' is used as an index into an array.
				70
				71	define i8* @gep8(i32 %i, i8* %x) {
				72	; CHECK-LABEL: gep8:
				73	; CHECK: # BB#0:
Sanjay Patel	e2b5280	2015-10-14 21:47:03 +0000	[diff] [blame]	74	; CHECK-NEXT: movslq %edi, %rax
Matt Arsenault	982224c	2016-02-27 19:57:45 +0000	[diff] [blame]	75	; CHECK-NEXT: leaq 5(%rsi,%rax), %rax
Sanjay Patel	e2b5280	2015-10-14 21:47:03 +0000	[diff] [blame]	76	; CHECK-NEXT: retq
				77
				78	%add = add nsw i32 %i, 5
				79	%ext = sext i32 %add to i64
				80	%idx = getelementptr i8, i8* %x, i64 %ext
				81	ret i8* %idx
				82	}
				83
				84	define i16* @gep16(i32 %i, i16* %x) {
				85	; CHECK-LABEL: gep16:
				86	; CHECK: # BB#0:
Sanjay Patel	e2b5280	2015-10-14 21:47:03 +0000	[diff] [blame]	87	; CHECK-NEXT: movslq %edi, %rax
Sanjay Patel	bbd5244	2015-10-16 22:14:12 +0000	[diff] [blame]	88	; CHECK-NEXT: leaq -10(%rsi,%rax,2), %rax
Sanjay Patel	e2b5280	2015-10-14 21:47:03 +0000	[diff] [blame]	89	; CHECK-NEXT: retq
				90
				91	%add = add nsw i32 %i, -5
				92	%ext = sext i32 %add to i64
				93	%idx = getelementptr i16, i16* %x, i64 %ext
				94	ret i16* %idx
				95	}
				96
				97	define i32* @gep32(i32 %i, i32* %x) {
				98	; CHECK-LABEL: gep32:
				99	; CHECK: # BB#0:
Sanjay Patel	e2b5280	2015-10-14 21:47:03 +0000	[diff] [blame]	100	; CHECK-NEXT: movslq %edi, %rax
Sanjay Patel	bbd5244	2015-10-16 22:14:12 +0000	[diff] [blame]	101	; CHECK-NEXT: leaq 20(%rsi,%rax,4), %rax
Sanjay Patel	e2b5280	2015-10-14 21:47:03 +0000	[diff] [blame]	102	; CHECK-NEXT: retq
				103
				104	%add = add nsw i32 %i, 5
				105	%ext = sext i32 %add to i64
				106	%idx = getelementptr i32, i32* %x, i64 %ext
				107	ret i32* %idx
				108	}
				109
				110	define i64* @gep64(i32 %i, i64* %x) {
				111	; CHECK-LABEL: gep64:
				112	; CHECK: # BB#0:
Sanjay Patel	e2b5280	2015-10-14 21:47:03 +0000	[diff] [blame]	113	; CHECK-NEXT: movslq %edi, %rax
Sanjay Patel	bbd5244	2015-10-16 22:14:12 +0000	[diff] [blame]	114	; CHECK-NEXT: leaq -40(%rsi,%rax,8), %rax
Sanjay Patel	e2b5280	2015-10-14 21:47:03 +0000	[diff] [blame]	115	; CHECK-NEXT: retq
				116
				117	%add = add nsw i32 %i, -5
				118	%ext = sext i32 %add to i64
				119	%idx = getelementptr i64, i64* %x, i64 %ext
				120	ret i64* %idx
				121	}
				122
				123	; LEA can't scale by 16, but the adds can still be combined into an LEA.
				124
				125	define i128* @gep128(i32 %i, i128* %x) {
				126	; CHECK-LABEL: gep128:
				127	; CHECK: # BB#0:
Sanjay Patel	e2b5280	2015-10-14 21:47:03 +0000	[diff] [blame]	128	; CHECK-NEXT: movslq %edi, %rax
				129	; CHECK-NEXT: shlq $4, %rax
Matt Arsenault	982224c	2016-02-27 19:57:45 +0000	[diff] [blame]	130	; CHECK-NEXT: leaq 80(%rsi,%rax), %rax
Sanjay Patel	e2b5280	2015-10-14 21:47:03 +0000	[diff] [blame]	131	; CHECK-NEXT: retq
				132
				133	%add = add nsw i32 %i, 5
				134	%ext = sext i32 %add to i64
				135	%idx = getelementptr i128, i128* %x, i64 %ext
				136	ret i128* %idx
				137	}
				138
				139	; A bigger win can be achieved when there is more than one use of the
				140	; sign extended value. In this case, we can eliminate sign extension
				141	; instructions plus use more efficient addressing modes for memory ops.
				142
				143	define void @PR20134(i32* %a, i32 %i) {
				144	; CHECK-LABEL: PR20134:
				145	; CHECK: # BB#0:
Sanjay Patel	bbd5244	2015-10-16 22:14:12 +0000	[diff] [blame]	146	; CHECK-NEXT: movslq %esi, %rax
				147	; CHECK-NEXT: movl 4(%rdi,%rax,4), %ecx
				148	; CHECK-NEXT: addl 8(%rdi,%rax,4), %ecx
				149	; CHECK-NEXT: movl %ecx, (%rdi,%rax,4)
Sanjay Patel	e2b5280	2015-10-14 21:47:03 +0000	[diff] [blame]	150	; CHECK-NEXT: retq
				151
				152	%add1 = add nsw i32 %i, 1
				153	%idx1 = sext i32 %add1 to i64
				154	%gep1 = getelementptr i32, i32* %a, i64 %idx1
				155	%load1 = load i32, i32* %gep1, align 4
				156
				157	%add2 = add nsw i32 %i, 2
				158	%idx2 = sext i32 %add2 to i64
				159	%gep2 = getelementptr i32, i32* %a, i64 %idx2
				160	%load2 = load i32, i32* %gep2, align 4
				161
				162	%add3 = add i32 %load1, %load2
				163	%idx3 = sext i32 %i to i64
				164	%gep3 = getelementptr i32, i32* %a, i64 %idx3
				165	store i32 %add3, i32* %gep3, align 4
				166	ret void
				167	}
				168