Blame - llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll - toolchain/llvm-project

blob: 4e3b326d156e13a1aba563d01c02164fd864f514 [file] [log] [blame]

Marek Olsak	79c0587	2016-11-25 17:37:09 +0000	[diff] [blame]	1	; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s \| FileCheck -check-prefix=TOSGPR -check-prefix=ALL %s
Konstantin Zhuravlyov	1d65026	2016-09-06 20:22:28 +0000	[diff] [blame]	2
Marek Olsak	79c0587	2016-11-25 17:37:09 +0000	[diff] [blame]	3	; If spilling to smem, additional registers are used for the resource
				4	; descriptor.
				5
Matt Arsenault	8c4a352	2018-06-26 19:10:00 +0000	[diff] [blame]	6	; FIXME: Vectorization can increase required SGPR count beyond limit.
				7	; FIXME: SGPR-to-SMEM requires an additional SGPR always to scavenge m0
				8
Valery Pykhtin	75d1de9	2017-01-26 10:51:47 +0000	[diff] [blame]	9	; ALL-LABEL: {{^}}max_9_sgprs:
Matt Arsenault	08906a3	2016-10-28 19:43:31 +0000	[diff] [blame]	10
Marek Olsak	79c0587	2016-11-25 17:37:09 +0000	[diff] [blame]	11	; ALL: SGPRBlocks: 1
Valery Pykhtin	75d1de9	2017-01-26 10:51:47 +0000	[diff] [blame]	12	; ALL: NumSGPRsForWavesPerEU: 9
Matt Arsenault	8c4a352	2018-06-26 19:10:00 +0000	[diff] [blame]	13	define amdgpu_kernel void @max_9_sgprs() #0 {
				14	%one = load volatile i32, i32 addrspace(4)* undef
				15	%two = load volatile i32, i32 addrspace(4)* undef
				16	%three = load volatile i32, i32 addrspace(4)* undef
				17	%four = load volatile i32, i32 addrspace(4)* undef
				18	%five = load volatile i32, i32 addrspace(4)* undef
				19	%six = load volatile i32, i32 addrspace(4)* undef
				20	%seven = load volatile i32, i32 addrspace(4)* undef
				21	%eight = load volatile i32, i32 addrspace(4)* undef
				22	%nine = load volatile i32, i32 addrspace(4)* undef
				23	%ten = load volatile i32, i32 addrspace(4)* undef
				24	call void asm sideeffect "", "s,s,s,s,s,s,s,s,s"(i32 %one, i32 %two, i32 %three, i32 %four, i32 %five, i32 %six, i32 %seven, i32 %eight, i32 %nine)
				25	store volatile i32 %one, i32 addrspace(1)* undef
				26	store volatile i32 %two, i32 addrspace(1)* undef
				27	store volatile i32 %three, i32 addrspace(1)* undef
				28	store volatile i32 %four, i32 addrspace(1)* undef
				29	store volatile i32 %five, i32 addrspace(1)* undef
				30	store volatile i32 %six, i32 addrspace(1)* undef
				31	store volatile i32 %seven, i32 addrspace(1)* undef
				32	store volatile i32 %eight, i32 addrspace(1)* undef
				33	store volatile i32 %nine, i32 addrspace(1)* undef
				34	store volatile i32 %ten, i32 addrspace(1)* undef
Konstantin Zhuravlyov	1d65026	2016-09-06 20:22:28 +0000	[diff] [blame]	35	ret void
				36	}
Matt Arsenault	08906a3	2016-10-28 19:43:31 +0000	[diff] [blame]	37
				38	; private resource: 4
				39	; scratch wave offset: 1
				40	; workgroup ids: 3
				41	; dispatch id: 2
				42	; queue ptr: 2
				43	; flat scratch init: 2
				44	; ---------------------
				45	; total: 14
				46
Marek Olsak	693e9be	2016-12-09 19:49:48 +0000	[diff] [blame]	47	; + reserved vcc = 16
Matt Arsenault	08906a3	2016-10-28 19:43:31 +0000	[diff] [blame]	48
				49	; Because we can't handle re-using the last few input registers as the
				50	; special vcc etc. registers (as well as decide to not use the unused
				51	; features when the number of registers is frozen), this ends up using
				52	; more than expected.
				53
Matthias Braun	537d039	2017-06-17 02:08:18 +0000	[diff] [blame]	54	; XALL-LABEL: {{^}}max_12_sgprs_14_input_sgprs:
				55	; XTOSGPR: SGPRBlocks: 1
				56	; XTOSGPR: NumSGPRsForWavesPerEU: 16
Matt Arsenault	08906a3	2016-10-28 19:43:31 +0000	[diff] [blame]	57
Matthias Braun	537d039	2017-06-17 02:08:18 +0000	[diff] [blame]	58	; XTOSMEM: s_mov_b64 s[10:11], s[2:3]
				59	; XTOSMEM: s_mov_b64 s[8:9], s[0:1]
				60	; XTOSMEM: s_mov_b32 s7, s13
Matt Arsenault	08906a3	2016-10-28 19:43:31 +0000	[diff] [blame]	61
Matthias Braun	537d039	2017-06-17 02:08:18 +0000	[diff] [blame]	62	; XTOSMEM: SGPRBlocks: 1
				63	; XTOSMEM: NumSGPRsForWavesPerEU: 16
				64	;
				65	; This test case is disabled: When calculating the spillslot addresses AMDGPU
				66	; creates an extra vreg to save/restore m0 which in a point of maximum register
				67	; pressure would trigger an endless loop; the compiler aborts earlier with
				68	; "Incomplete scavenging after 2nd pass" in practice.
				69	;define amdgpu_kernel void @max_12_sgprs_14_input_sgprs(i32 addrspace(1)* %out1,
				70	; i32 addrspace(1)* %out2,
				71	; i32 addrspace(1)* %out3,
				72	; i32 addrspace(1)* %out4,
				73	; i32 %one, i32 %two, i32 %three, i32 %four) #2 {
				74	; %x.0 = call i32 @llvm.amdgcn.workgroup.id.x()
				75	; %x.1 = call i32 @llvm.amdgcn.workgroup.id.y()
				76	; %x.2 = call i32 @llvm.amdgcn.workgroup.id.z()
				77	; %x.3 = call i64 @llvm.amdgcn.dispatch.id()
Matt Arsenault	72d27f5	2018-09-10 02:54:25 +0000	[diff] [blame]	78	; %x.4 = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
				79	; %x.5 = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
Matthias Braun	537d039	2017-06-17 02:08:18 +0000	[diff] [blame]	80	; store volatile i32 0, i32* undef
				81	; br label %stores
				82	;
				83	;stores:
				84	; store volatile i32 %x.0, i32 addrspace(1)* undef
				85	; store volatile i32 %x.0, i32 addrspace(1)* undef
				86	; store volatile i32 %x.0, i32 addrspace(1)* undef
				87	; store volatile i64 %x.3, i64 addrspace(1)* undef
Matt Arsenault	72d27f5	2018-09-10 02:54:25 +0000	[diff] [blame]	88	; store volatile i8 addrspace(4)* %x.4, i8 addrspace(4)* addrspace(1)* undef
				89	; store volatile i8 addrspace(4)* %x.5, i8 addrspace(4)* addrspace(1)* undef
Matthias Braun	537d039	2017-06-17 02:08:18 +0000	[diff] [blame]	90	;
				91	; store i32 %one, i32 addrspace(1)* %out1
				92	; store i32 %two, i32 addrspace(1)* %out2
				93	; store i32 %three, i32 addrspace(1)* %out3
				94	; store i32 %four, i32 addrspace(1)* %out4
				95	; ret void
				96	;}
Matt Arsenault	08906a3	2016-10-28 19:43:31 +0000	[diff] [blame]	97
Matthias Braun	709a4cc	2016-12-01 22:39:51 +0000	[diff] [blame]	98	; The following test is commented out for now; http://llvm.org/PR31230
				99	; XALL-LABEL: max_12_sgprs_12_input_sgprs{{$}}
Matt Arsenault	08906a3	2016-10-28 19:43:31 +0000	[diff] [blame]	100	; ; Make sure copies for input buffer are not clobbered. This requires
				101	; ; swapping the order the registers are copied from what normally
				102	; ; happens.
				103
Matthias Braun	709a4cc	2016-12-01 22:39:51 +0000	[diff] [blame]	104	; XTOSMEM: s_mov_b32 s5, s11
				105	; XTOSMEM: s_add_u32 m0, s5,
				106	; XTOSMEM: s_buffer_store_dword vcc_lo, s[0:3], m0
Matt Arsenault	08906a3	2016-10-28 19:43:31 +0000	[diff] [blame]	107
Matthias Braun	709a4cc	2016-12-01 22:39:51 +0000	[diff] [blame]	108	; XALL: SGPRBlocks: 2
				109	; XALL: NumSGPRsForWavesPerEU: 18
Matt Arsenault	3dbeefa	2017-03-21 21:39:51 +0000	[diff] [blame]	110	;define amdgpu_kernel void @max_12_sgprs_12_input_sgprs(i32 addrspace(1)* %out1,
Matthias Braun	709a4cc	2016-12-01 22:39:51 +0000	[diff] [blame]	111	; i32 addrspace(1)* %out2,
				112	; i32 addrspace(1)* %out3,
				113	; i32 addrspace(1)* %out4,
				114	; i32 %one, i32 %two, i32 %three, i32 %four) #2 {
				115	; store volatile i32 0, i32* undef
				116	; %x.0 = call i32 @llvm.amdgcn.workgroup.id.x()
				117	; store volatile i32 %x.0, i32 addrspace(1)* undef
				118	; %x.1 = call i32 @llvm.amdgcn.workgroup.id.y()
				119	; store volatile i32 %x.0, i32 addrspace(1)* undef
				120	; %x.2 = call i32 @llvm.amdgcn.workgroup.id.z()
				121	; store volatile i32 %x.0, i32 addrspace(1)* undef
				122	; %x.3 = call i64 @llvm.amdgcn.dispatch.id()
				123	; store volatile i64 %x.3, i64 addrspace(1)* undef
Matt Arsenault	72d27f5	2018-09-10 02:54:25 +0000	[diff] [blame]	124	; %x.4 = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
				125	; store volatile i8 addrspace(4)* %x.4, i8 addrspace(4)* addrspace(1)* undef
Matthias Braun	709a4cc	2016-12-01 22:39:51 +0000	[diff] [blame]	126	;
				127	; store i32 %one, i32 addrspace(1)* %out1
				128	; store i32 %two, i32 addrspace(1)* %out2
				129	; store i32 %three, i32 addrspace(1)* %out3
				130	; store i32 %four, i32 addrspace(1)* %out4
				131	; ret void
				132	;}
Matt Arsenault	08906a3	2016-10-28 19:43:31 +0000	[diff] [blame]	133
				134	declare i32 @llvm.amdgcn.workgroup.id.x() #1
				135	declare i32 @llvm.amdgcn.workgroup.id.y() #1
				136	declare i32 @llvm.amdgcn.workgroup.id.z() #1
				137	declare i64 @llvm.amdgcn.dispatch.id() #1
Matt Arsenault	72d27f5	2018-09-10 02:54:25 +0000	[diff] [blame]	138	declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #1
				139	declare i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #1
Matt Arsenault	08906a3	2016-10-28 19:43:31 +0000	[diff] [blame]	140
				141	attributes #0 = { nounwind "amdgpu-num-sgpr"="14" }
				142	attributes #1 = { nounwind readnone }
				143	attributes #2 = { nounwind "amdgpu-num-sgpr"="12" }
				144	attributes #3 = { nounwind "amdgpu-num-sgpr"="11" }