blob: 82c42601ef1e77368acb1462c70775d4433d88ec [file] [log] [blame]
Matt Arsenault7aad8fd2017-01-24 22:02:15 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
Matt Arsenault26f8f3d2015-11-30 21:16:03 +00003; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4
5
6; FUNC-LABEL: {{^}}local_size_x:
7; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
Matthias Braun97d0ffb2015-12-04 01:51:19 +00008; EG: MOV * [[VAL]], KC0[1].Z
Matt Arsenault26f8f3d2015-11-30 21:16:03 +00009
10; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
11; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18
12; CI-HSA: s_load_dword [[XY:s[0-9]+]], s[4:5], 0x1
13; VI-HSA: s_load_dword [[XY:s[0-9]+]], s[4:5], 0x4
14
15; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
16; GCN: buffer_store_dword [[VVAL]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000017define amdgpu_kernel void @local_size_x(i32 addrspace(1)* %out) {
Matt Arsenault26f8f3d2015-11-30 21:16:03 +000018entry:
19 %0 = call i32 @llvm.r600.read.local.size.x() #0
20 store i32 %0, i32 addrspace(1)* %out
21 ret void
22}
23
24; FUNC-LABEL: {{^}}local_size_y:
25; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
Matthias Braun97d0ffb2015-12-04 01:51:19 +000026; EG: MOV * [[VAL]], KC0[1].W
Matt Arsenault26f8f3d2015-11-30 21:16:03 +000027
28; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
29; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
30; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
31; GCN: buffer_store_dword [[VVAL]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000032define amdgpu_kernel void @local_size_y(i32 addrspace(1)* %out) {
Matt Arsenault26f8f3d2015-11-30 21:16:03 +000033entry:
34 %0 = call i32 @llvm.r600.read.local.size.y() #0
35 store i32 %0, i32 addrspace(1)* %out
36 ret void
37}
38
39; FUNC-LABEL: {{^}}local_size_z:
40; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
Matthias Braun97d0ffb2015-12-04 01:51:19 +000041; EG: MOV * [[VAL]], KC0[2].X
Matt Arsenault26f8f3d2015-11-30 21:16:03 +000042
43; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
44; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20
45; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
46; GCN: buffer_store_dword [[VVAL]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000047define amdgpu_kernel void @local_size_z(i32 addrspace(1)* %out) {
Matt Arsenault26f8f3d2015-11-30 21:16:03 +000048entry:
49 %0 = call i32 @llvm.r600.read.local.size.z() #0
50 store i32 %0, i32 addrspace(1)* %out
51 ret void
52}
53
54; FUNC-LABEL: {{^}}local_size_xy:
55; SI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x6
56; SI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x7
57; VI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x18
58; VI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x1c
59; GCN-DAG: v_mov_b32_e32 [[VY:v[0-9]+]], [[Y]]
60; GCN: v_mul_u32_u24_e32 [[VAL:v[0-9]+]], [[X]], [[VY]]
61; GCN: buffer_store_dword [[VAL]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000062define amdgpu_kernel void @local_size_xy(i32 addrspace(1)* %out) {
Matt Arsenault26f8f3d2015-11-30 21:16:03 +000063entry:
64 %x = call i32 @llvm.r600.read.local.size.x() #0
65 %y = call i32 @llvm.r600.read.local.size.y() #0
66 %val = mul i32 %x, %y
67 store i32 %val, i32 addrspace(1)* %out
68 ret void
69}
70
71; FUNC-LABEL: {{^}}local_size_xz:
72
73; SI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x6
74; SI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x8
75; VI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x18
76; VI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x20
77; HSA-DAG: s_and_b32 [[X:s[0-9]+]], [[XY]], 0xffff
78; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]]
79; GCN: v_mul_u32_u24_e32 [[VAL:v[0-9]+]], [[X]], [[VZ]]
80; GCN: buffer_store_dword [[VAL]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000081define amdgpu_kernel void @local_size_xz(i32 addrspace(1)* %out) {
Matt Arsenault26f8f3d2015-11-30 21:16:03 +000082entry:
83 %x = call i32 @llvm.r600.read.local.size.x() #0
84 %z = call i32 @llvm.r600.read.local.size.z() #0
85 %val = mul i32 %x, %z
86 store i32 %val, i32 addrspace(1)* %out
87 ret void
88}
89
90; FUNC-LABEL: {{^}}local_size_yz:
91; HSA: enable_sgpr_private_segment_buffer = 1
92; HSA: enable_sgpr_dispatch_ptr = 1
93
94; SI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x7
95; SI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x8
96; VI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x1c
97; VI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x20
98; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]]
99; GCN: v_mul_u32_u24_e32 [[VAL:v[0-9]+]], [[Y]], [[VZ]]
100; GCN: buffer_store_dword [[VAL]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000101define amdgpu_kernel void @local_size_yz(i32 addrspace(1)* %out) {
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000102entry:
103 %y = call i32 @llvm.r600.read.local.size.y() #0
104 %z = call i32 @llvm.r600.read.local.size.z() #0
105 %val = mul i32 %y, %z
106 store i32 %val, i32 addrspace(1)* %out
107 ret void
108}
109
110; FUNC-LABEL: {{^}}local_size_xyz:
111; HSA: enable_sgpr_private_segment_buffer = 1
112; HSA: enable_sgpr_dispatch_ptr = 1
113
114; SI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x6
115; SI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x7
116; SI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x8
117; VI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x18
118; VI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x1c
119; VI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x20
120; GCN-DAG: v_mov_b32_e32 [[VY:v[0-9]+]], [[Y]]
121; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]]
122; GCN: v_mad_u32_u24 [[VAL:v[0-9]+]], [[X]], [[VY]], [[VZ]]
123; GCN: buffer_store_dword [[VAL]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000124define amdgpu_kernel void @local_size_xyz(i32 addrspace(1)* %out) {
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000125entry:
126 %x = call i32 @llvm.r600.read.local.size.x() #0
127 %y = call i32 @llvm.r600.read.local.size.y() #0
128 %z = call i32 @llvm.r600.read.local.size.z() #0
129 %xy = mul i32 %x, %y
130 %xyz = add i32 %xy, %z
131 store i32 %xyz, i32 addrspace(1)* %out
132 ret void
133}
134
135; FUNC-LABEL: {{^}}local_size_x_known_bits:
136; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
137; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18
138; GCN-NOT: 0xffff
139; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
140; GCN-NEXT: buffer_store_dword [[VVAL]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000141define amdgpu_kernel void @local_size_x_known_bits(i32 addrspace(1)* %out) {
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000142entry:
143 %size = call i32 @llvm.r600.read.local.size.x() #0
144 %shl = shl i32 %size, 16
145 %shr = lshr i32 %shl, 16
146 store i32 %shr, i32 addrspace(1)* %out
147 ret void
148}
149
150; FUNC-LABEL: {{^}}local_size_y_known_bits:
151; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
152; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
153; GCN-NOT: 0xffff
154; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
155; GCN-NEXT: buffer_store_dword [[VVAL]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000156define amdgpu_kernel void @local_size_y_known_bits(i32 addrspace(1)* %out) {
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000157entry:
158 %size = call i32 @llvm.r600.read.local.size.y() #0
159 %shl = shl i32 %size, 16
160 %shr = lshr i32 %shl, 16
161 store i32 %shr, i32 addrspace(1)* %out
162 ret void
163}
164
165; FUNC-LABEL: {{^}}local_size_z_known_bits:
166; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
167; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20
168; GCN-NOT: 0xffff
169; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
170; GCN-NEXT: buffer_store_dword [[VVAL]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000171define amdgpu_kernel void @local_size_z_known_bits(i32 addrspace(1)* %out) {
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000172entry:
173 %size = call i32 @llvm.r600.read.local.size.z() #0
174 %shl = shl i32 %size, 16
175 %shr = lshr i32 %shl, 16
176 store i32 %shr, i32 addrspace(1)* %out
177 ret void
178}
179
180declare i32 @llvm.r600.read.local.size.x() #0
181declare i32 @llvm.r600.read.local.size.y() #0
182declare i32 @llvm.r600.read.local.size.z() #0
183
184attributes #0 = { nounwind readnone }