Blame - llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll - toolchain/llvm-project

blob: 25cbb7b105f04fd6e9c1666d0331f031a90db4f8 [file] [log] [blame]

Matt Arsenault	f28683c	2017-06-26 17:53:59 +0000	[diff] [blame]	1	; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefix=GCN %s
Matt Arsenault	2b1f9aa	2017-05-17 21:56:25 +0000	[diff] [blame]	2
				3	; Test that non-entry function frame indices are expanded properly to
				4	; give an index relative to the scratch wave offset register
				5
				6	; Materialize into a mov. Make sure there isn't an unnecessary copy.
				7	; GCN-LABEL: {{^}}func_mov_fi_i32:
				8	; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Matthias Braun	7a482e2	2017-06-20 18:43:14 +0000	[diff] [blame]	9	; GCN: s_sub_u32 s6, s5, s4
				10	; GCN-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s6, 6
				11	; GCN-NEXT: v_add_i32_e64 v0, s[6:7], 4, [[SCALED]]
Matt Arsenault	2b1f9aa	2017-05-17 21:56:25 +0000	[diff] [blame]	12	; GCN-NOT: v_mov
				13	; GCN: ds_write_b32 v0, v0
				14	define void @func_mov_fi_i32() #0 {
				15	%alloca = alloca i32
				16	store volatile i32* %alloca, i32* addrspace(3)* undef
				17	ret void
				18	}
				19
				20	; Materialize into an add of a constant offset from the FI.
				21	; FIXME: Should be able to merge adds
				22
				23	; GCN-LABEL: {{^}}func_add_constant_to_fi_i32:
				24	; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Matthias Braun	7a482e2	2017-06-20 18:43:14 +0000	[diff] [blame]	25	; GCN: s_sub_u32 s6, s5, s4
				26	; GCN-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s6, 6
				27	; GCN-NEXT: v_add_i32_e64 v0, s[6:7], 4, [[SCALED]]
Matt Arsenault	2b1f9aa	2017-05-17 21:56:25 +0000	[diff] [blame]	28	; GCN-NEXT: v_add_i32_e32 v0, vcc, 4, v0
				29	; GCN-NOT: v_mov
				30	; GCN: ds_write_b32 v0, v0
				31	define void @func_add_constant_to_fi_i32() #0 {
				32	%alloca = alloca [2 x i32], align 4
				33	%gep0 = getelementptr inbounds [2 x i32], [2 x i32]* %alloca, i32 0, i32 1
				34	store volatile i32* %gep0, i32* addrspace(3)* undef
				35	ret void
				36	}
				37
				38	; A user the materialized frame index can't be meaningfully folded
				39	; into.
				40
				41	; GCN-LABEL: {{^}}func_other_fi_user_i32:
Matthias Braun	7a482e2	2017-06-20 18:43:14 +0000	[diff] [blame]	42	; GCN: s_sub_u32 s6, s5, s4
				43	; GCN-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s6, 6
				44	; GCN-NEXT: v_add_i32_e64 v0, s[6:7], 4, [[SCALED]]
Matt Arsenault	2b1f9aa	2017-05-17 21:56:25 +0000	[diff] [blame]	45	; GCN-NEXT: v_mul_lo_i32 v0, v0, 9
				46	; GCN-NOT: v_mov
				47	; GCN: ds_write_b32 v0, v0
				48	define void @func_other_fi_user_i32() #0 {
				49	%alloca = alloca [2 x i32], align 4
				50	%ptrtoint = ptrtoint [2 x i32]* %alloca to i32
				51	%mul = mul i32 %ptrtoint, 9
				52	store volatile i32 %mul, i32 addrspace(3)* undef
				53	ret void
				54	}
				55
				56	; GCN-LABEL: {{^}}func_store_private_arg_i32_ptr:
				57	; GCN: v_mov_b32_e32 v1, 15{{$}}
				58	; GCN: buffer_store_dword v1, v0, s[0:3], s4 offen{{$}}
				59	define void @func_store_private_arg_i32_ptr(i32* %ptr) #0 {
				60	store volatile i32 15, i32* %ptr
				61	ret void
				62	}
				63
				64	; GCN-LABEL: {{^}}func_load_private_arg_i32_ptr:
				65	; GCN: s_waitcnt
				66	; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], s4 offen{{$}}
				67	define void @func_load_private_arg_i32_ptr(i32* %ptr) #0 {
				68	%val = load volatile i32, i32* %ptr
				69	ret void
				70	}
				71
				72	; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr:
				73	; GCN: s_waitcnt
Matt Arsenault	f28683c	2017-06-26 17:53:59 +0000	[diff] [blame]	74	; GCN-NEXT: s_mov_b32 s5, s32
				75	; GCN-NEXT: s_sub_u32 [[SUB:s[0-9]+]], s5, s4
				76	; GCN-NEXT: v_lshr_b32_e64 v0, [[SUB]], 6
Matt Arsenault	2b1f9aa	2017-05-17 21:56:25 +0000	[diff] [blame]	77	; GCN-NEXT: v_add_i32_e32 v0, vcc, 4, v0
				78	; GCN-NOT: v_mov
				79	; GCN: ds_write_b32 v0, v0
				80	define void @void_func_byval_struct_i8_i32_ptr({ i8, i32 }* byval %arg0) #0 {
				81	%gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %arg0, i32 0, i32 0
				82	%gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %arg0, i32 0, i32 1
				83	%load1 = load i32, i32* %gep1
				84	store volatile i32* %gep1, i32* addrspace(3)* undef
				85	ret void
				86	}
				87
				88	; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr_value:
				89	; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Matt Arsenault	f28683c	2017-06-26 17:53:59 +0000	[diff] [blame]	90	; GCN-NEXT: s_mov_b32 s5, s32
Matt Arsenault	2b1f9aa	2017-05-17 21:56:25 +0000	[diff] [blame]	91	; GCN-NEXT: buffer_load_ubyte v0, off, s[0:3], s5
				92	; GCN_NEXT: buffer_load_dword v1, off, s[0:3], s5 offset:4
				93	define void @void_func_byval_struct_i8_i32_ptr_value({ i8, i32 }* byval %arg0) #0 {
				94	%gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %arg0, i32 0, i32 0
				95	%gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %arg0, i32 0, i32 1
				96	%load0 = load i8, i8* %gep0
				97	%load1 = load i32, i32* %gep1
				98	store volatile i8 %load0, i8 addrspace(3)* undef
				99	store volatile i32 %load1, i32 addrspace(3)* undef
				100	ret void
				101	}
				102
				103	; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr_nonentry_block:
Matthias Braun	7a482e2	2017-06-20 18:43:14 +0000	[diff] [blame]	104	; GCN: s_sub_u32 s6, s5, s4
				105	; GCN: v_lshr_b32_e64 v1, s6, 6
Matt Arsenault	2b1f9aa	2017-05-17 21:56:25 +0000	[diff] [blame]	106	; GCN: s_and_saveexec_b64
				107
				108	; GCN: v_add_i32_e32 v0, vcc, 4, v1
				109	; GCN: buffer_load_dword v1, v1, s[0:3], s4 offen offset:4
				110	; GCN: ds_write_b32
				111	define void @void_func_byval_struct_i8_i32_ptr_nonentry_block({ i8, i32 }* byval %arg0, i32 %arg2) #0 {
				112	%cmp = icmp eq i32 %arg2, 0
				113	br i1 %cmp, label %bb, label %ret
				114
				115	bb:
				116	%gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %arg0, i32 0, i32 0
				117	%gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %arg0, i32 0, i32 1
				118	%load1 = load volatile i32, i32* %gep1
				119	store volatile i32* %gep1, i32* addrspace(3)* undef
				120	br label %ret
				121
				122	ret:
				123	ret void
				124	}
				125
Matt Arsenault	c595185	2017-06-19 23:47:21 +0000	[diff] [blame]	126	; Added offset can't be used with VOP3 add
				127	; GCN-LABEL: {{^}}func_other_fi_user_non_inline_imm_offset_i32:
Matthias Braun	7a482e2	2017-06-20 18:43:14 +0000	[diff] [blame]	128	; GCN: s_sub_u32 s6, s5, s4
				129	; GCN-DAG: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s6, 6
				130	; GCN-DAG: s_movk_i32 s6, 0x204
				131	; GCN: v_add_i32_e64 v0, s[6:7], s6, [[SCALED]]
Matt Arsenault	c595185	2017-06-19 23:47:21 +0000	[diff] [blame]	132	; GCN: v_mul_lo_i32 v0, v0, 9
				133	; GCN: ds_write_b32 v0, v0
				134	define void @func_other_fi_user_non_inline_imm_offset_i32() #0 {
				135	%alloca0 = alloca [128 x i32], align 4
				136	%alloca1 = alloca [8 x i32], align 4
				137	%gep0 = getelementptr inbounds [128 x i32], [128 x i32]* %alloca0, i32 0, i32 65
				138	%gep1 = getelementptr inbounds [8 x i32], [8 x i32]* %alloca1, i32 0, i32 0
				139	store volatile i32 7, i32* %gep0
				140	%ptrtoint = ptrtoint i32* %gep1 to i32
				141	%mul = mul i32 %ptrtoint, 9
				142	store volatile i32 %mul, i32 addrspace(3)* undef
				143	ret void
				144	}
				145	; GCN-LABEL: {{^}}func_other_fi_user_non_inline_imm_offset_i32_vcc_live:
				146	; GCN: s_sub_u32 [[DIFF:s[0-9]+]], s5, s4
				147	; GCN-DAG: v_lshr_b32_e64 [[SCALED:v[0-9]+]], [[DIFF]], 6
				148	; GCN-DAG: s_movk_i32 [[OFFSET:s[0-9]+]], 0x204
				149	; GCN: v_add_i32_e64 v0, s{{\[[0-9]+:[0-9]+\]}}, [[OFFSET]], [[SCALED]]
				150	; GCN: v_mul_lo_i32 v0, v0, 9
				151	; GCN: ds_write_b32 v0, v0
				152	define void @func_other_fi_user_non_inline_imm_offset_i32_vcc_live() #0 {
				153	%alloca0 = alloca [128 x i32], align 4
				154	%alloca1 = alloca [8 x i32], align 4
				155	%vcc = call i64 asm sideeffect "; def $0", "={VCC}"()
				156	%gep0 = getelementptr inbounds [128 x i32], [128 x i32]* %alloca0, i32 0, i32 65
				157	%gep1 = getelementptr inbounds [8 x i32], [8 x i32]* %alloca1, i32 0, i32 0
				158	store volatile i32 7, i32* %gep0
				159	call void asm sideeffect "; use $0", "{VCC}"(i64 %vcc)
				160	%ptrtoint = ptrtoint i32* %gep1 to i32
				161	%mul = mul i32 %ptrtoint, 9
				162	store volatile i32 %mul, i32 addrspace(3)* undef
				163	ret void
				164	}
				165
Matt Arsenault	2b1f9aa	2017-05-17 21:56:25 +0000	[diff] [blame]	166	attributes #0 = { nounwind }