blob: df1ef1cba71c8e5c21d33ae1aba4a4c7b5289f66 [file] [log] [blame]
Stanislav Mekhanoshinb9704c02018-02-23 23:53:27 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6 %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8_9 %s
Ron Liebermanf48e43b2018-11-30 18:29:17 +00003; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX9,GFX8_9 %s
Matt Arsenault0325d3d2015-02-21 21:29:07 +00004
Matt Arsenault9c47dd52016-02-11 06:02:01 +00005declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
Matt Arsenault0325d3d2015-02-21 21:29:07 +00006declare float @llvm.fabs.f32(float) nounwind readnone
7
8; GCN-LABEL: {{^}}madak_f32:
Stanislav Mekhanoshinb9704c02018-02-23 23:53:27 +00009; GFX6: buffer_load_dword [[VA:v[0-9]+]]
10; GFX6: buffer_load_dword [[VB:v[0-9]+]]
Ron Liebermancac749a2018-11-16 01:13:34 +000011; GFX8: {{flat|global}}_load_dword [[VB:v[0-9]+]]
12; GFX8: {{flat|global}}_load_dword [[VA:v[0-9]+]]
13; GFX9: {{flat|global}}_load_dword [[VA:v[0-9]+]]
14; GFX9: {{flat|global}}_load_dword [[VB:v[0-9]+]]
Stanislav Mekhanoshinb9704c02018-02-23 23:53:27 +000015; GCN: v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000016define amdgpu_kernel void @madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000017 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
David Blaikie79e6c742015-02-27 19:29:02 +000018 %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
19 %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
20 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
Matt Arsenault0325d3d2015-02-21 21:29:07 +000021
David Blaikiea79ac142015-02-27 21:17:42 +000022 %a = load float, float addrspace(1)* %in.a.gep, align 4
23 %b = load float, float addrspace(1)* %in.b.gep, align 4
Matt Arsenault0325d3d2015-02-21 21:29:07 +000024
25 %mul = fmul float %a, %b
26 %madak = fadd float %mul, 10.0
27 store float %madak, float addrspace(1)* %out.gep, align 4
28 ret void
29}
30
31; Make sure this is only folded with one use. This is a code size
32; optimization and if we fold the immediate multiple times, we'll undo
33; it.
34
35; GCN-LABEL: {{^}}madak_2_use_f32:
Stanislav Mekhanoshinb9704c02018-02-23 23:53:27 +000036; GFX8_9: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
37; GFX6-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
38; GFX6-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
39; GFX6-DAG: buffer_load_dword [[VC:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
40; GFX8_9: {{flat|global}}_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}
41; GFX8_9: {{flat|global}}_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}
42; GFX8_9: {{flat|global}}_load_dword [[VC:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}
43; GFX6-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
44; GCN-DAG: v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
45; GCN-DAG: v_mac_f32_e32 [[VK]], [[VA]], [[VC]]
Matt Arsenault0325d3d2015-02-21 21:29:07 +000046; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000047define amdgpu_kernel void @madak_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000048 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
Matt Arsenault0325d3d2015-02-21 21:29:07 +000049
David Blaikie79e6c742015-02-27 19:29:02 +000050 %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
51 %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
52 %in.gep.2 = getelementptr float, float addrspace(1)* %in.gep.0, i32 2
Matt Arsenault0325d3d2015-02-21 21:29:07 +000053
David Blaikie79e6c742015-02-27 19:29:02 +000054 %out.gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
55 %out.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
Matt Arsenault0325d3d2015-02-21 21:29:07 +000056
Matt Arsenault4578d6a2016-05-25 17:42:39 +000057 %a = load volatile float, float addrspace(1)* %in.gep.0, align 4
58 %b = load volatile float, float addrspace(1)* %in.gep.1, align 4
59 %c = load volatile float, float addrspace(1)* %in.gep.2, align 4
Matt Arsenault0325d3d2015-02-21 21:29:07 +000060
61 %mul0 = fmul float %a, %b
62 %mul1 = fmul float %a, %c
63 %madak0 = fadd float %mul0, 10.0
64 %madak1 = fadd float %mul1, 10.0
65
Matt Arsenault4578d6a2016-05-25 17:42:39 +000066 store volatile float %madak0, float addrspace(1)* %out.gep.0, align 4
67 store volatile float %madak1, float addrspace(1)* %out.gep.1, align 4
Matt Arsenault0325d3d2015-02-21 21:29:07 +000068 ret void
69}
70
71; GCN-LABEL: {{^}}madak_m_inline_imm_f32:
Stanislav Mekhanoshinb9704c02018-02-23 23:53:27 +000072; GCN: {{buffer|flat|global}}_load_dword [[VA:v[0-9]+]]
Dmitry Preobrazhenskyda61a7f2017-05-10 13:00:28 +000073; GCN: v_madak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000074define amdgpu_kernel void @madak_m_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a) nounwind {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000075 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
David Blaikie79e6c742015-02-27 19:29:02 +000076 %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
77 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
Matt Arsenault0325d3d2015-02-21 21:29:07 +000078
David Blaikiea79ac142015-02-27 21:17:42 +000079 %a = load float, float addrspace(1)* %in.a.gep, align 4
Matt Arsenault0325d3d2015-02-21 21:29:07 +000080
81 %mul = fmul float 4.0, %a
82 %madak = fadd float %mul, 10.0
83 store float %madak, float addrspace(1)* %out.gep, align 4
84 ret void
85}
86
87; Make sure nothing weird happens with a value that is also allowed as
88; an inline immediate.
89
90; GCN-LABEL: {{^}}madak_inline_imm_f32:
Stanislav Mekhanoshinb9704c02018-02-23 23:53:27 +000091; GFX6: buffer_load_dword [[VA:v[0-9]+]]
92; GFX6: buffer_load_dword [[VB:v[0-9]+]]
Ron Liebermancac749a2018-11-16 01:13:34 +000093; GFX8: {{flat|global}}_load_dword [[VB:v[0-9]+]]
94; GFX8: {{flat|global}}_load_dword [[VA:v[0-9]+]]
95; GFX9: {{flat|global}}_load_dword [[VA:v[0-9]+]]
96; GFX9: {{flat|global}}_load_dword [[VB:v[0-9]+]]
Stanislav Mekhanoshinb9704c02018-02-23 23:53:27 +000097; GCN: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000098define amdgpu_kernel void @madak_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000099 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
David Blaikie79e6c742015-02-27 19:29:02 +0000100 %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
101 %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
102 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
Matt Arsenault0325d3d2015-02-21 21:29:07 +0000103
David Blaikiea79ac142015-02-27 21:17:42 +0000104 %a = load float, float addrspace(1)* %in.a.gep, align 4
105 %b = load float, float addrspace(1)* %in.b.gep, align 4
Matt Arsenault0325d3d2015-02-21 21:29:07 +0000106
107 %mul = fmul float %a, %b
108 %madak = fadd float %mul, 4.0
109 store float %madak, float addrspace(1)* %out.gep, align 4
110 ret void
111}
112
113; We can't use an SGPR when forming madak
114; GCN-LABEL: {{^}}s_v_madak_f32:
Tom Stellarda76bcc22016-03-28 16:10:13 +0000115; GCN-DAG: s_load_dword [[SB:s[0-9]+]]
Matt Arsenault0325d3d2015-02-21 21:29:07 +0000116; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
Stanislav Mekhanoshinb9704c02018-02-23 23:53:27 +0000117; GCN-DAG: {{buffer|flat|global}}_load_dword [[VA:v[0-9]+]]
Matt Arsenault0325d3d2015-02-21 21:29:07 +0000118; GCN-NOT: v_madak_f32
Tom Stellarddb5a11f2015-07-13 15:47:57 +0000119; GCN: v_mac_f32_e32 [[VK]], [[SB]], [[VA]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000120define amdgpu_kernel void @s_v_madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float %b) nounwind {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000121 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
David Blaikie79e6c742015-02-27 19:29:02 +0000122 %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
123 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
Matt Arsenault0325d3d2015-02-21 21:29:07 +0000124
David Blaikiea79ac142015-02-27 21:17:42 +0000125 %a = load float, float addrspace(1)* %in.a.gep, align 4
Matt Arsenault0325d3d2015-02-21 21:29:07 +0000126
127 %mul = fmul float %a, %b
128 %madak = fadd float %mul, 10.0
129 store float %madak, float addrspace(1)* %out.gep, align 4
130 ret void
131}
132
133; GCN-LABEL: @v_s_madak_f32
134; GCN-DAG: s_load_dword [[SB:s[0-9]+]]
135; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
Stanislav Mekhanoshinb9704c02018-02-23 23:53:27 +0000136; GCN-DAG: {{buffer|flat|global}}_load_dword [[VA:v[0-9]+]]
Matt Arsenault0325d3d2015-02-21 21:29:07 +0000137; GCN-NOT: v_madak_f32
Tom Stellarddb5a11f2015-07-13 15:47:57 +0000138; GCN: v_mac_f32_e32 [[VK]], [[SB]], [[VA]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000139define amdgpu_kernel void @v_s_madak_f32(float addrspace(1)* noalias %out, float %a, float addrspace(1)* noalias %in.b) nounwind {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000140 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
David Blaikie79e6c742015-02-27 19:29:02 +0000141 %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
142 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
Matt Arsenault0325d3d2015-02-21 21:29:07 +0000143
David Blaikiea79ac142015-02-27 21:17:42 +0000144 %b = load float, float addrspace(1)* %in.b.gep, align 4
Matt Arsenault0325d3d2015-02-21 21:29:07 +0000145
146 %mul = fmul float %a, %b
147 %madak = fadd float %mul, 10.0
148 store float %madak, float addrspace(1)* %out.gep, align 4
149 ret void
150}
151
152; GCN-LABEL: {{^}}s_s_madak_f32:
153; GCN-NOT: v_madak_f32
Tom Stellarddb5a11f2015-07-13 15:47:57 +0000154; GCN: v_mac_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000155define amdgpu_kernel void @s_s_madak_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
Matt Arsenault0325d3d2015-02-21 21:29:07 +0000156 %mul = fmul float %a, %b
157 %madak = fadd float %mul, 10.0
158 store float %madak, float addrspace(1)* %out, align 4
159 ret void
160}
161
162; GCN-LABEL: {{^}}no_madak_src0_modifier_f32:
Stanislav Mekhanoshinb9704c02018-02-23 23:53:27 +0000163; GFX6: buffer_load_dword [[VA:v[0-9]+]]
164; GFX6: buffer_load_dword [[VB:v[0-9]+]]
165; GFX8_9: {{flat|global}}_load_dword [[VB:v[0-9]+]]
166; GFX8_9: {{flat|global}}_load_dword [[VA:v[0-9]+]]
167; GCN: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{[sv][0-9]+}}
168; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000169define amdgpu_kernel void @no_madak_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000170 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
David Blaikie79e6c742015-02-27 19:29:02 +0000171 %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
172 %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
173 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
Matt Arsenault0325d3d2015-02-21 21:29:07 +0000174
David Blaikiea79ac142015-02-27 21:17:42 +0000175 %a = load float, float addrspace(1)* %in.a.gep, align 4
176 %b = load float, float addrspace(1)* %in.b.gep, align 4
Matt Arsenault0325d3d2015-02-21 21:29:07 +0000177
178 %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
179
180 %mul = fmul float %a.fabs, %b
181 %madak = fadd float %mul, 10.0
182 store float %madak, float addrspace(1)* %out.gep, align 4
183 ret void
184}
185
186; GCN-LABEL: {{^}}no_madak_src1_modifier_f32:
Stanislav Mekhanoshinb9704c02018-02-23 23:53:27 +0000187; GFX6: buffer_load_dword [[VA:v[0-9]+]]
188; GFX6: buffer_load_dword [[VB:v[0-9]+]]
189; GFX8_9: {{flat|global}}_load_dword [[VB:v[0-9]+]]
190; GFX8_9: {{flat|global}}_load_dword [[VA:v[0-9]+]]
191; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, {{[sv][0-9]+}}
192; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000193define amdgpu_kernel void @no_madak_src1_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000194 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
David Blaikie79e6c742015-02-27 19:29:02 +0000195 %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
196 %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
197 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
Matt Arsenault0325d3d2015-02-21 21:29:07 +0000198
David Blaikiea79ac142015-02-27 21:17:42 +0000199 %a = load float, float addrspace(1)* %in.a.gep, align 4
200 %b = load float, float addrspace(1)* %in.b.gep, align 4
Matt Arsenault0325d3d2015-02-21 21:29:07 +0000201
202 %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone
203
204 %mul = fmul float %a, %b.fabs
205 %madak = fadd float %mul, 10.0
206 store float %madak, float addrspace(1)* %out.gep, align 4
207 ret void
208}
Matt Arsenaultffc82752016-07-05 17:09:01 +0000209
210; SIFoldOperands should not fold the SGPR copy into the instruction
211; because the implicit immediate already uses the constant bus.
212; GCN-LABEL: {{^}}madak_constant_bus_violation:
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000213; GCN: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0x12|0x48}}
Stanislav Mekhanoshinb9704c02018-02-23 23:53:27 +0000214; GCN: v_mov_b32_e32 [[SGPR0_VCOPY:v[0-9]+]], [[SGPR0]]
215; GCN: {{buffer|flat|global}}_load_dword [[VGPR:v[0-9]+]]
216; GCN: v_madak_f32 [[MADAK:v[0-9]+]], 0.5, [[SGPR0_VCOPY]], 0x42280000
217; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[MADAK]], [[VGPR]]
218; GFX6: buffer_store_dword [[MUL]]
219; GFX8_9: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[MUL]]
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000220define amdgpu_kernel void @madak_constant_bus_violation(i32 %arg1, [8 x i32], float %sgpr0, float %sgpr1) #0 {
Matt Arsenaultffc82752016-07-05 17:09:01 +0000221bb:
222 %tmp = icmp eq i32 %arg1, 0
223 br i1 %tmp, label %bb3, label %bb4
224
225bb3:
226 store volatile float 0.0, float addrspace(1)* undef
227 br label %bb4
228
229bb4:
230 %vgpr = load volatile float, float addrspace(1)* undef
231 %tmp0 = fmul float %sgpr0, 0.5
232 %tmp1 = fadd float %tmp0, 42.0
233 %tmp2 = fmul float %tmp1, %vgpr
234 store volatile float %tmp2, float addrspace(1)* undef, align 4
235 ret void
236}
237
238attributes #0 = { nounwind}