Blame - llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll - toolchain/llvm-project

blob: 822ea803194d8683266c96268895a1c48f6b96b2 [file] [log] [blame]

Marek Olsak	79c0587	2016-11-25 17:37:09 +0000	[diff] [blame]	1	; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s \| FileCheck -check-prefix=TOSGPR -check-prefix=ALL %s
				2	; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -amdgpu-spill-sgpr-to-smem=1 -verify-machineinstrs < %s \| FileCheck -check-prefix=TOSMEM -check-prefix=ALL %s
Konstantin Zhuravlyov	1d65026	2016-09-06 20:22:28 +0000	[diff] [blame]	3
Marek Olsak	79c0587	2016-11-25 17:37:09 +0000	[diff] [blame]	4	; If spilling to smem, additional registers are used for the resource
				5	; descriptor.
				6
Valery Pykhtin	75d1de9	2017-01-26 10:51:47 +0000	[diff] [blame]	7	; ALL-LABEL: {{^}}max_9_sgprs:
Matt Arsenault	08906a3	2016-10-28 19:43:31 +0000	[diff] [blame]	8
Marek Olsak	79c0587	2016-11-25 17:37:09 +0000	[diff] [blame]	9	; ALL: SGPRBlocks: 1
Valery Pykhtin	75d1de9	2017-01-26 10:51:47 +0000	[diff] [blame]	10	; ALL: NumSGPRsForWavesPerEU: 9
Matt Arsenault	3dbeefa	2017-03-21 21:39:51 +0000	[diff] [blame]	11	define amdgpu_kernel void @max_9_sgprs(i32 addrspace(1)* %out1,
Marek Olsak	79c0587	2016-11-25 17:37:09 +0000	[diff] [blame]	12
Konstantin Zhuravlyov	1d65026	2016-09-06 20:22:28 +0000	[diff] [blame]	13	i32 addrspace(1)* %out2,
				14	i32 addrspace(1)* %out3,
				15	i32 addrspace(1)* %out4,
Stanislav Mekhanoshin	582a523	2017-02-15 17:19:50 +0000	[diff] [blame]	16	i32 addrspace(1)* %out5,
				17	i32 %one, i32 %two, i32 %three, i32 %four, i32 %five) #0 {
Konstantin Zhuravlyov	1d65026	2016-09-06 20:22:28 +0000	[diff] [blame]	18	store i32 %one, i32 addrspace(1)* %out1
				19	store i32 %two, i32 addrspace(1)* %out2
				20	store i32 %three, i32 addrspace(1)* %out3
				21	store i32 %four, i32 addrspace(1)* %out4
Stanislav Mekhanoshin	582a523	2017-02-15 17:19:50 +0000	[diff] [blame]	22	store i32 %five, i32 addrspace(1)* %out5
Konstantin Zhuravlyov	1d65026	2016-09-06 20:22:28 +0000	[diff] [blame]	23	ret void
				24	}
Matt Arsenault	08906a3	2016-10-28 19:43:31 +0000	[diff] [blame]	25
				26	; private resource: 4
				27	; scratch wave offset: 1
				28	; workgroup ids: 3
				29	; dispatch id: 2
				30	; queue ptr: 2
				31	; flat scratch init: 2
				32	; ---------------------
				33	; total: 14
				34
Marek Olsak	693e9be	2016-12-09 19:49:48 +0000	[diff] [blame]	35	; + reserved vcc = 16
Matt Arsenault	08906a3	2016-10-28 19:43:31 +0000	[diff] [blame]	36
				37	; Because we can't handle re-using the last few input registers as the
				38	; special vcc etc. registers (as well as decide to not use the unused
				39	; features when the number of registers is frozen), this ends up using
				40	; more than expected.
				41
Matthias Braun	537d039	2017-06-17 02:08:18 +0000	[diff] [blame]	42	; XALL-LABEL: {{^}}max_12_sgprs_14_input_sgprs:
				43	; XTOSGPR: SGPRBlocks: 1
				44	; XTOSGPR: NumSGPRsForWavesPerEU: 16
Matt Arsenault	08906a3	2016-10-28 19:43:31 +0000	[diff] [blame]	45
Matthias Braun	537d039	2017-06-17 02:08:18 +0000	[diff] [blame]	46	; XTOSMEM: s_mov_b64 s[10:11], s[2:3]
				47	; XTOSMEM: s_mov_b64 s[8:9], s[0:1]
				48	; XTOSMEM: s_mov_b32 s7, s13
Matt Arsenault	08906a3	2016-10-28 19:43:31 +0000	[diff] [blame]	49
Matthias Braun	537d039	2017-06-17 02:08:18 +0000	[diff] [blame]	50	; XTOSMEM: SGPRBlocks: 1
				51	; XTOSMEM: NumSGPRsForWavesPerEU: 16
				52	;
				53	; This test case is disabled: When calculating the spillslot addresses AMDGPU
				54	; creates an extra vreg to save/restore m0 which in a point of maximum register
				55	; pressure would trigger an endless loop; the compiler aborts earlier with
				56	; "Incomplete scavenging after 2nd pass" in practice.
				57	;define amdgpu_kernel void @max_12_sgprs_14_input_sgprs(i32 addrspace(1)* %out1,
				58	; i32 addrspace(1)* %out2,
				59	; i32 addrspace(1)* %out3,
				60	; i32 addrspace(1)* %out4,
				61	; i32 %one, i32 %two, i32 %three, i32 %four) #2 {
				62	; %x.0 = call i32 @llvm.amdgcn.workgroup.id.x()
				63	; %x.1 = call i32 @llvm.amdgcn.workgroup.id.y()
				64	; %x.2 = call i32 @llvm.amdgcn.workgroup.id.z()
				65	; %x.3 = call i64 @llvm.amdgcn.dispatch.id()
				66	; %x.4 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
				67	; %x.5 = call i8 addrspace(2)* @llvm.amdgcn.queue.ptr()
				68	; store volatile i32 0, i32* undef
				69	; br label %stores
				70	;
				71	;stores:
				72	; store volatile i32 %x.0, i32 addrspace(1)* undef
				73	; store volatile i32 %x.0, i32 addrspace(1)* undef
				74	; store volatile i32 %x.0, i32 addrspace(1)* undef
				75	; store volatile i64 %x.3, i64 addrspace(1)* undef
				76	; store volatile i8 addrspace(2)* %x.4, i8 addrspace(2)* addrspace(1)* undef
				77	; store volatile i8 addrspace(2)* %x.5, i8 addrspace(2)* addrspace(1)* undef
				78	;
				79	; store i32 %one, i32 addrspace(1)* %out1
				80	; store i32 %two, i32 addrspace(1)* %out2
				81	; store i32 %three, i32 addrspace(1)* %out3
				82	; store i32 %four, i32 addrspace(1)* %out4
				83	; ret void
				84	;}
Matt Arsenault	08906a3	2016-10-28 19:43:31 +0000	[diff] [blame]	85
Matthias Braun	709a4cc	2016-12-01 22:39:51 +0000	[diff] [blame]	86	; The following test is commented out for now; http://llvm.org/PR31230
				87	; XALL-LABEL: max_12_sgprs_12_input_sgprs{{$}}
Matt Arsenault	08906a3	2016-10-28 19:43:31 +0000	[diff] [blame]	88	; ; Make sure copies for input buffer are not clobbered. This requires
				89	; ; swapping the order the registers are copied from what normally
				90	; ; happens.
				91
Matthias Braun	709a4cc	2016-12-01 22:39:51 +0000	[diff] [blame]	92	; XTOSMEM: s_mov_b32 s5, s11
				93	; XTOSMEM: s_add_u32 m0, s5,
				94	; XTOSMEM: s_buffer_store_dword vcc_lo, s[0:3], m0
Matt Arsenault	08906a3	2016-10-28 19:43:31 +0000	[diff] [blame]	95
Matthias Braun	709a4cc	2016-12-01 22:39:51 +0000	[diff] [blame]	96	; XALL: SGPRBlocks: 2
				97	; XALL: NumSGPRsForWavesPerEU: 18
Matt Arsenault	3dbeefa	2017-03-21 21:39:51 +0000	[diff] [blame]	98	;define amdgpu_kernel void @max_12_sgprs_12_input_sgprs(i32 addrspace(1)* %out1,
Matthias Braun	709a4cc	2016-12-01 22:39:51 +0000	[diff] [blame]	99	; i32 addrspace(1)* %out2,
				100	; i32 addrspace(1)* %out3,
				101	; i32 addrspace(1)* %out4,
				102	; i32 %one, i32 %two, i32 %three, i32 %four) #2 {
				103	; store volatile i32 0, i32* undef
				104	; %x.0 = call i32 @llvm.amdgcn.workgroup.id.x()
				105	; store volatile i32 %x.0, i32 addrspace(1)* undef
				106	; %x.1 = call i32 @llvm.amdgcn.workgroup.id.y()
				107	; store volatile i32 %x.0, i32 addrspace(1)* undef
				108	; %x.2 = call i32 @llvm.amdgcn.workgroup.id.z()
				109	; store volatile i32 %x.0, i32 addrspace(1)* undef
				110	; %x.3 = call i64 @llvm.amdgcn.dispatch.id()
				111	; store volatile i64 %x.3, i64 addrspace(1)* undef
				112	; %x.4 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
				113	; store volatile i8 addrspace(2)* %x.4, i8 addrspace(2)* addrspace(1)* undef
				114	;
				115	; store i32 %one, i32 addrspace(1)* %out1
				116	; store i32 %two, i32 addrspace(1)* %out2
				117	; store i32 %three, i32 addrspace(1)* %out3
				118	; store i32 %four, i32 addrspace(1)* %out4
				119	; ret void
				120	;}
Matt Arsenault	08906a3	2016-10-28 19:43:31 +0000	[diff] [blame]	121
				122	declare i32 @llvm.amdgcn.workgroup.id.x() #1
				123	declare i32 @llvm.amdgcn.workgroup.id.y() #1
				124	declare i32 @llvm.amdgcn.workgroup.id.z() #1
				125	declare i64 @llvm.amdgcn.dispatch.id() #1
				126	declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #1
				127	declare i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #1
				128
				129	attributes #0 = { nounwind "amdgpu-num-sgpr"="14" }
				130	attributes #1 = { nounwind readnone }
				131	attributes #2 = { nounwind "amdgpu-num-sgpr"="12" }
				132	attributes #3 = { nounwind "amdgpu-num-sgpr"="11" }