blob: 187320805c11d2ed7110d2dbd5f88ad11eadfb18 [file] [log] [blame]
Matt Arsenault7aad8fd2017-01-24 22:02:15 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
Jan Vesely30479502014-07-31 22:11:03 +00003; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
Tom Stellard94593ee2013-06-03 17:40:18 +00004
Jan Vesely2fa28c32016-07-10 21:20:29 +00005; Legacy intrinsics that just read implicit parameters
Jan Vesely30479502014-07-31 22:11:03 +00006
Jan Vesely2fa28c32016-07-10 21:20:29 +00007; FUNC-LABEL: {{^}}ngroups_x:
8; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0
9; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0
10; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
11; GCN-NOHSA: buffer_store_dword [[VVAL]]
12
13; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
14; EG: MOV {{\*? *}}[[VAL]], KC0[0].X
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000015define amdgpu_kernel void @ngroups_x (i32 addrspace(1)* %out) {
Tom Stellard94593ee2013-06-03 17:40:18 +000016entry:
17 %0 = call i32 @llvm.r600.read.ngroups.x() #0
18 store i32 %0, i32 addrspace(1)* %out
19 ret void
20}
21
Tom Stellard79243d92014-10-01 17:15:17 +000022; FUNC-LABEL: {{^}}ngroups_y:
Matt Arsenault26f8f3d2015-11-30 21:16:03 +000023; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1
24; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
25; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
26; GCN-NOHSA: buffer_store_dword [[VVAL]]
Jan Vesely2fa28c32016-07-10 21:20:29 +000027
28; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
29; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000030define amdgpu_kernel void @ngroups_y (i32 addrspace(1)* %out) {
Tom Stellard94593ee2013-06-03 17:40:18 +000031entry:
32 %0 = call i32 @llvm.r600.read.ngroups.y() #0
33 store i32 %0, i32 addrspace(1)* %out
34 ret void
35}
36
Tom Stellard79243d92014-10-01 17:15:17 +000037; FUNC-LABEL: {{^}}ngroups_z:
Matt Arsenault26f8f3d2015-11-30 21:16:03 +000038; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2
39; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
40; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
41; GCN-NOHSA: buffer_store_dword [[VVAL]]
Jan Vesely2fa28c32016-07-10 21:20:29 +000042
43; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
44; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000045define amdgpu_kernel void @ngroups_z (i32 addrspace(1)* %out) {
Tom Stellard94593ee2013-06-03 17:40:18 +000046entry:
47 %0 = call i32 @llvm.r600.read.ngroups.z() #0
48 store i32 %0, i32 addrspace(1)* %out
49 ret void
50}
51
Tom Stellard79243d92014-10-01 17:15:17 +000052; FUNC-LABEL: {{^}}global_size_x:
Matt Arsenault26f8f3d2015-11-30 21:16:03 +000053; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3
54; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc
55; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
56; GCN-NOHSA: buffer_store_dword [[VVAL]]
Jan Vesely2fa28c32016-07-10 21:20:29 +000057
58; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
59; EG: MOV {{\*? *}}[[VAL]], KC0[0].W
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000060define amdgpu_kernel void @global_size_x (i32 addrspace(1)* %out) {
Tom Stellard94593ee2013-06-03 17:40:18 +000061entry:
62 %0 = call i32 @llvm.r600.read.global.size.x() #0
63 store i32 %0, i32 addrspace(1)* %out
64 ret void
65}
66
Tom Stellard79243d92014-10-01 17:15:17 +000067; FUNC-LABEL: {{^}}global_size_y:
Matt Arsenault26f8f3d2015-11-30 21:16:03 +000068; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
69; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10
70; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
71; GCN-NOHSA: buffer_store_dword [[VVAL]]
Jan Vesely2fa28c32016-07-10 21:20:29 +000072
73; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
74; EG: MOV {{\*? *}}[[VAL]], KC0[1].X
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000075define amdgpu_kernel void @global_size_y (i32 addrspace(1)* %out) {
Tom Stellard94593ee2013-06-03 17:40:18 +000076entry:
77 %0 = call i32 @llvm.r600.read.global.size.y() #0
78 store i32 %0, i32 addrspace(1)* %out
79 ret void
80}
81
Tom Stellard79243d92014-10-01 17:15:17 +000082; FUNC-LABEL: {{^}}global_size_z:
Matt Arsenault26f8f3d2015-11-30 21:16:03 +000083; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5
84; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14
85; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
86; GCN-NOHSA: buffer_store_dword [[VVAL]]
Jan Vesely2fa28c32016-07-10 21:20:29 +000087
88; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
89; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000090define amdgpu_kernel void @global_size_z (i32 addrspace(1)* %out) {
Tom Stellard94593ee2013-06-03 17:40:18 +000091entry:
92 %0 = call i32 @llvm.r600.read.global.size.z() #0
93 store i32 %0, i32 addrspace(1)* %out
94 ret void
95}
96
Jan Vesely2fa28c32016-07-10 21:20:29 +000097; FUNC-LABEL: {{^}}local_size_x:
98; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
99; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18
100; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
101; GCN-NOHSA: buffer_store_dword [[VVAL]]
102
103; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
104; EG: MOV {{\*? *}}[[VAL]], KC0[1].Z
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000105define amdgpu_kernel void @local_size_x (i32 addrspace(1)* %out) {
Jan Vesely2fa28c32016-07-10 21:20:29 +0000106entry:
107 %0 = call i32 @llvm.r600.read.local.size.x() #0
108 store i32 %0, i32 addrspace(1)* %out
109 ret void
110}
111
112; FUNC-LABEL: {{^}}local_size_y:
113; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
114; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
115; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
116; GCN-NOHSA: buffer_store_dword [[VVAL]]
117
118; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
119; EG: MOV {{\*? *}}[[VAL]], KC0[1].W
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000120define amdgpu_kernel void @local_size_y (i32 addrspace(1)* %out) {
Jan Vesely2fa28c32016-07-10 21:20:29 +0000121entry:
122 %0 = call i32 @llvm.r600.read.local.size.y() #0
123 store i32 %0, i32 addrspace(1)* %out
124 ret void
125}
126
127; FUNC-LABEL: {{^}}local_size_z:
128; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
129; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20
130; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
131; GCN-NOHSA: buffer_store_dword [[VVAL]]
132
133; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
134; EG: MOV {{\*? *}}[[VAL]], KC0[2].X
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000135define amdgpu_kernel void @local_size_z (i32 addrspace(1)* %out) {
Jan Vesely2fa28c32016-07-10 21:20:29 +0000136entry:
137 %0 = call i32 @llvm.r600.read.local.size.z() #0
138 store i32 %0, i32 addrspace(1)* %out
139 ret void
140}
141
142; Legacy use of r600 intrinsics by GCN
143
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000144; The tgid values are stored in sgprs offset by the number of user
145; sgprs.
Tom Stellard94593ee2013-06-03 17:40:18 +0000146
Jan Vesely2fa28c32016-07-10 21:20:29 +0000147; FUNC-LABEL: {{^}}tgid_x_legacy:
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000148; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}}
Changpeng Fangb41574a2015-12-22 20:55:23 +0000149; GCN-NOHSA: buffer_store_dword [[VVAL]]
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000150
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000151; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
152; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
153; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
154; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
155; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000156define amdgpu_kernel void @tgid_x_legacy(i32 addrspace(1)* %out) {
Tom Stellard94593ee2013-06-03 17:40:18 +0000157entry:
158 %0 = call i32 @llvm.r600.read.tgid.x() #0
159 store i32 %0, i32 addrspace(1)* %out
160 ret void
161}
162
Jan Vesely2fa28c32016-07-10 21:20:29 +0000163; FUNC-LABEL: {{^}}tgid_y_legacy:
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000164; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3
Changpeng Fangb41574a2015-12-22 20:55:23 +0000165; GCN-NOHSA: buffer_store_dword [[VVAL]]
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000166
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000167; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000168define amdgpu_kernel void @tgid_y_legacy(i32 addrspace(1)* %out) {
Tom Stellard94593ee2013-06-03 17:40:18 +0000169entry:
170 %0 = call i32 @llvm.r600.read.tgid.y() #0
171 store i32 %0, i32 addrspace(1)* %out
172 ret void
173}
174
Jan Vesely2fa28c32016-07-10 21:20:29 +0000175; FUNC-LABEL: {{^}}tgid_z_legacy:
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000176; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}}
Changpeng Fangb41574a2015-12-22 20:55:23 +0000177; GCN-NOHSA: buffer_store_dword [[VVAL]]
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000178
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000179; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
180; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
181; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
182; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1
183; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000184define amdgpu_kernel void @tgid_z_legacy(i32 addrspace(1)* %out) {
Tom Stellard94593ee2013-06-03 17:40:18 +0000185entry:
186 %0 = call i32 @llvm.r600.read.tgid.z() #0
187 store i32 %0, i32 addrspace(1)* %out
188 ret void
189}
190
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000191; GCN-NOHSA: .section .AMDGPU.config
192; GCN-NOHSA: .long 47180
193; GCN-NOHSA-NEXT: .long 132{{$}}
194
Jan Vesely2fa28c32016-07-10 21:20:29 +0000195; FUNC-LABEL: {{^}}tidig_x_legacy:
Changpeng Fangb41574a2015-12-22 20:55:23 +0000196; GCN-NOHSA: buffer_store_dword v0
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000197define amdgpu_kernel void @tidig_x_legacy(i32 addrspace(1)* %out) {
Tom Stellard94593ee2013-06-03 17:40:18 +0000198entry:
199 %0 = call i32 @llvm.r600.read.tidig.x() #0
200 store i32 %0, i32 addrspace(1)* %out
201 ret void
202}
203
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000204; GCN-NOHSA: .section .AMDGPU.config
205; GCN-NOHSA: .long 47180
206; GCN-NOHSA-NEXT: .long 2180{{$}}
207
Jan Vesely2fa28c32016-07-10 21:20:29 +0000208; FUNC-LABEL: {{^}}tidig_y_legacy:
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000209
Changpeng Fangb41574a2015-12-22 20:55:23 +0000210; GCN-NOHSA: buffer_store_dword v1
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000211define amdgpu_kernel void @tidig_y_legacy(i32 addrspace(1)* %out) {
Tom Stellard94593ee2013-06-03 17:40:18 +0000212entry:
213 %0 = call i32 @llvm.r600.read.tidig.y() #0
214 store i32 %0, i32 addrspace(1)* %out
215 ret void
216}
217
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000218; GCN-NOHSA: .section .AMDGPU.config
219; GCN-NOHSA: .long 47180
220; GCN-NOHSA-NEXT: .long 4228{{$}}
221
Jan Vesely2fa28c32016-07-10 21:20:29 +0000222; FUNC-LABEL: {{^}}tidig_z_legacy:
Changpeng Fangb41574a2015-12-22 20:55:23 +0000223; GCN-NOHSA: buffer_store_dword v2
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000224define amdgpu_kernel void @tidig_z_legacy(i32 addrspace(1)* %out) {
Tom Stellard94593ee2013-06-03 17:40:18 +0000225entry:
226 %0 = call i32 @llvm.r600.read.tidig.z() #0
227 store i32 %0, i32 addrspace(1)* %out
228 ret void
229}
230
231declare i32 @llvm.r600.read.ngroups.x() #0
232declare i32 @llvm.r600.read.ngroups.y() #0
233declare i32 @llvm.r600.read.ngroups.z() #0
234
235declare i32 @llvm.r600.read.global.size.x() #0
236declare i32 @llvm.r600.read.global.size.y() #0
237declare i32 @llvm.r600.read.global.size.z() #0
238
Jan Vesely2fa28c32016-07-10 21:20:29 +0000239declare i32 @llvm.r600.read.local.size.x() #0
240declare i32 @llvm.r600.read.local.size.y() #0
241declare i32 @llvm.r600.read.local.size.z() #0
242
Tom Stellard94593ee2013-06-03 17:40:18 +0000243declare i32 @llvm.r600.read.tgid.x() #0
244declare i32 @llvm.r600.read.tgid.y() #0
245declare i32 @llvm.r600.read.tgid.z() #0
246
247declare i32 @llvm.r600.read.tidig.x() #0
248declare i32 @llvm.r600.read.tidig.y() #0
249declare i32 @llvm.r600.read.tidig.z() #0
250
Tom Stellard94593ee2013-06-03 17:40:18 +0000251attributes #0 = { readnone }