blob: e2620ce353c607f042d93bdc75373bb4cd7c4889 [file] [log] [blame]
Matt Arsenaultd0799df2016-01-30 05:10:59 +00001; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s
2
3declare i32 @llvm.amdgcn.workgroup.id.x() #0
4declare i32 @llvm.amdgcn.workgroup.id.y() #0
5declare i32 @llvm.amdgcn.workgroup.id.z() #0
6
7declare i32 @llvm.amdgcn.workitem.id.x() #0
8declare i32 @llvm.amdgcn.workitem.id.y() #0
9declare i32 @llvm.amdgcn.workitem.id.z() #0
10
11declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
Matt Arsenault48ab5262016-04-25 19:27:18 +000012declare i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
Matt Arsenaultd0799df2016-01-30 05:10:59 +000013
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000014; HSA: define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
15define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
Matt Arsenaultd0799df2016-01-30 05:10:59 +000016 %val = call i32 @llvm.amdgcn.workgroup.id.x()
17 store i32 %val, i32 addrspace(1)* %ptr
18 ret void
19}
20
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000021; HSA: define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #2 {
22define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #1 {
Matt Arsenaultd0799df2016-01-30 05:10:59 +000023 %val = call i32 @llvm.amdgcn.workgroup.id.y()
24 store i32 %val, i32 addrspace(1)* %ptr
25 ret void
26}
27
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000028; HSA: define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #2 {
29define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 {
Matt Arsenaultd0799df2016-01-30 05:10:59 +000030 %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
31 store volatile i32 %val0, i32 addrspace(1)* %ptr
32 %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
33 store volatile i32 %val1, i32 addrspace(1)* %ptr
34 ret void
35}
36
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000037; HSA: define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #2 {
38define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 {
Matt Arsenaultd0799df2016-01-30 05:10:59 +000039 %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
40 %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
41 store volatile i32 %val0, i32 addrspace(1)* %ptr
42 store volatile i32 %val1, i32 addrspace(1)* %ptr
43 ret void
44}
45
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000046; HSA: define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #3 {
47define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #1 {
Matt Arsenaultd0799df2016-01-30 05:10:59 +000048 %val = call i32 @llvm.amdgcn.workgroup.id.z()
49 store i32 %val, i32 addrspace(1)* %ptr
50 ret void
51}
52
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000053; HSA: define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #3 {
54define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 {
Matt Arsenaultd0799df2016-01-30 05:10:59 +000055 %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
56 %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
57 store volatile i32 %val0, i32 addrspace(1)* %ptr
58 store volatile i32 %val1, i32 addrspace(1)* %ptr
59 ret void
60}
61
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000062; HSA: define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #4 {
63define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 {
Matt Arsenaultd0799df2016-01-30 05:10:59 +000064 %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
65 %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
66 store volatile i32 %val0, i32 addrspace(1)* %ptr
67 store volatile i32 %val1, i32 addrspace(1)* %ptr
68 ret void
69}
70
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000071; HSA: define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #4 {
72define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 {
Matt Arsenaultd0799df2016-01-30 05:10:59 +000073 %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
74 %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
75 %val2 = call i32 @llvm.amdgcn.workgroup.id.z()
76 store volatile i32 %val0, i32 addrspace(1)* %ptr
77 store volatile i32 %val1, i32 addrspace(1)* %ptr
78 store volatile i32 %val2, i32 addrspace(1)* %ptr
79 ret void
80}
81
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000082; HSA: define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
83define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
Matt Arsenaultd0799df2016-01-30 05:10:59 +000084 %val = call i32 @llvm.amdgcn.workitem.id.x()
85 store i32 %val, i32 addrspace(1)* %ptr
86 ret void
87}
88
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000089; HSA: define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #5 {
90define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #1 {
Matt Arsenaultd0799df2016-01-30 05:10:59 +000091 %val = call i32 @llvm.amdgcn.workitem.id.y()
92 store i32 %val, i32 addrspace(1)* %ptr
93 ret void
94}
95
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000096; HSA: define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #6 {
97define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #1 {
Matt Arsenaultd0799df2016-01-30 05:10:59 +000098 %val = call i32 @llvm.amdgcn.workitem.id.z()
99 store i32 %val, i32 addrspace(1)* %ptr
100 ret void
101}
102
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000103; HSA: define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
104define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
Matt Arsenaultd0799df2016-01-30 05:10:59 +0000105 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
106 %val1 = call i32 @llvm.amdgcn.workgroup.id.x()
107 store volatile i32 %val0, i32 addrspace(1)* %ptr
108 store volatile i32 %val1, i32 addrspace(1)* %ptr
109 ret void
110}
111
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000112; HSA: define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #7 {
113define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 {
Matt Arsenaultd0799df2016-01-30 05:10:59 +0000114 %val0 = call i32 @llvm.amdgcn.workitem.id.y()
115 %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
116 store volatile i32 %val0, i32 addrspace(1)* %ptr
117 store volatile i32 %val1, i32 addrspace(1)* %ptr
118 ret void
119}
120
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000121; HSA: define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #8 {
122define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 {
Matt Arsenaultd0799df2016-01-30 05:10:59 +0000123 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
124 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
125 %val2 = call i32 @llvm.amdgcn.workitem.id.z()
126 store volatile i32 %val0, i32 addrspace(1)* %ptr
127 store volatile i32 %val1, i32 addrspace(1)* %ptr
128 store volatile i32 %val2, i32 addrspace(1)* %ptr
129 ret void
130}
131
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000132; HSA: define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #9 {
133define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #1 {
Matt Arsenaultd0799df2016-01-30 05:10:59 +0000134 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
135 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
136 %val2 = call i32 @llvm.amdgcn.workitem.id.z()
137 %val3 = call i32 @llvm.amdgcn.workgroup.id.x()
138 %val4 = call i32 @llvm.amdgcn.workgroup.id.y()
139 %val5 = call i32 @llvm.amdgcn.workgroup.id.z()
140 store volatile i32 %val0, i32 addrspace(1)* %ptr
141 store volatile i32 %val1, i32 addrspace(1)* %ptr
142 store volatile i32 %val2, i32 addrspace(1)* %ptr
143 store volatile i32 %val3, i32 addrspace(1)* %ptr
144 store volatile i32 %val4, i32 addrspace(1)* %ptr
145 store volatile i32 %val5, i32 addrspace(1)* %ptr
146 ret void
147}
148
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000149; HSA: define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #10 {
150define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 {
Matt Arsenaultd0799df2016-01-30 05:10:59 +0000151 %dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
152 %bc = bitcast i8 addrspace(2)* %dispatch.ptr to i32 addrspace(2)*
153 %val = load i32, i32 addrspace(2)* %bc
154 store i32 %val, i32 addrspace(1)* %ptr
155 ret void
156}
157
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000158; HSA: define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #11 {
159define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #1 {
Matt Arsenault48ab5262016-04-25 19:27:18 +0000160 %dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.queue.ptr()
161 %bc = bitcast i8 addrspace(2)* %dispatch.ptr to i32 addrspace(2)*
162 %val = load i32, i32 addrspace(2)* %bc
163 store i32 %val, i32 addrspace(1)* %ptr
164 ret void
165}
166
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000167; HSA: define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #11 {
168define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 {
Matt Arsenault99c14522016-04-25 19:27:24 +0000169 %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
170 store volatile i32 0, i32 addrspace(4)* %stof
171 ret void
172}
173
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000174; HSA: define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32* %ptr) #11 {
175define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32* %ptr) #1 {
Matt Arsenault99c14522016-04-25 19:27:24 +0000176 %stof = addrspacecast i32* %ptr to i32 addrspace(4)*
177 store volatile i32 0, i32 addrspace(4)* %stof
178 ret void
179}
180
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000181; HSA: define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #1 {
182define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #1 {
Matt Arsenault99c14522016-04-25 19:27:24 +0000183 %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(3)*
184 store volatile i32 0, i32 addrspace(3)* %ftos
185 ret void
186}
187
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000188; HSA: define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #1 {
189define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #1 {
Matt Arsenault99c14522016-04-25 19:27:24 +0000190 %ftos = addrspacecast i32 addrspace(4)* %ptr to i32*
191 store volatile i32 0, i32* %ftos
192 ret void
193}
194
195; No-op addrspacecast should not use queue ptr
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000196; HSA: define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 {
197define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 {
Matt Arsenault99c14522016-04-25 19:27:24 +0000198 %stof = addrspacecast i32 addrspace(1)* %ptr to i32 addrspace(4)*
199 store volatile i32 0, i32 addrspace(4)* %stof
200 ret void
201}
202
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000203; HSA: define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #1 {
204define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #1 {
Matt Arsenault99c14522016-04-25 19:27:24 +0000205 %stof = addrspacecast i32 addrspace(2)* %ptr to i32 addrspace(4)*
206 %ld = load volatile i32, i32 addrspace(4)* %stof
207 ret void
208}
209
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000210; HSA: define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #1 {
211define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #1 {
Matt Arsenault99c14522016-04-25 19:27:24 +0000212 %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(1)*
213 store volatile i32 0, i32 addrspace(1)* %ftos
214 ret void
215}
216
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000217; HSA: define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #1 {
218define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #1 {
Matt Arsenault99c14522016-04-25 19:27:24 +0000219 %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(2)*
220 %ld = load volatile i32, i32 addrspace(2)* %ftos
221 ret void
222}
223
Matt Arsenaultd0799df2016-01-30 05:10:59 +0000224attributes #0 = { nounwind readnone }
225attributes #1 = { nounwind }
226
227; HSA: attributes #0 = { nounwind readnone }
228; HSA: attributes #1 = { nounwind }
229; HSA: attributes #2 = { nounwind "amdgpu-work-group-id-y" }
230; HSA: attributes #3 = { nounwind "amdgpu-work-group-id-z" }
231; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" }
232; HSA: attributes #5 = { nounwind "amdgpu-work-item-id-y" }
233; HSA: attributes #6 = { nounwind "amdgpu-work-item-id-z" }
234; HSA: attributes #7 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" }
235; HSA: attributes #8 = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
236; HSA: attributes #9 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
237; HSA: attributes #10 = { nounwind "amdgpu-dispatch-ptr" }
Matt Arsenault48ab5262016-04-25 19:27:18 +0000238; HSA: attributes #11 = { nounwind "amdgpu-queue-ptr" }