blob: b78d65ae1e1a1627ce47376a04567f69a6379a07 [file] [log] [blame]
Matt Arsenaultf0783302015-02-21 21:29:10 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
Matt Arsenault7aad8fd2017-01-24 22:02:15 +00002; XUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
Matt Arsenaultf0783302015-02-21 21:29:10 +00003
Matt Arsenault3d1c1de2016-04-14 21:58:24 +00004 ; FIXME: None of these trigger madmk emission anymore. It is still
5 ; possible, but requires the correct registers to be used which is
6 ; hard to trigger.
7
Matt Arsenault9c47dd52016-02-11 06:02:01 +00008declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
Matt Arsenaultf0783302015-02-21 21:29:10 +00009declare float @llvm.fabs.f32(float) nounwind readnone
10
11; GCN-LABEL: {{^}}madmk_f32:
12; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
13; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
Matt Arsenault3d1c1de2016-04-14 21:58:24 +000014; GCN: v_mac_f32_e32 [[VB]], 0x41200000, [[VA]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000015define amdgpu_kernel void @madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000016 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
David Blaikie79e6c742015-02-27 19:29:02 +000017 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
18 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
19 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
Matt Arsenaultf0783302015-02-21 21:29:10 +000020
Matt Arsenault44e54832016-04-12 13:38:18 +000021 %a = load volatile float, float addrspace(1)* %gep.0, align 4
22 %b = load volatile float, float addrspace(1)* %gep.1, align 4
Matt Arsenaultf0783302015-02-21 21:29:10 +000023
24 %mul = fmul float %a, 10.0
25 %madmk = fadd float %mul, %b
26 store float %madmk, float addrspace(1)* %out.gep, align 4
27 ret void
28}
29
30; GCN-LABEL: {{^}}madmk_2_use_f32:
31; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
32; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
33; GCN-DAG: buffer_load_dword [[VC:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
34; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
Matt Arsenault6c29c5a2017-07-10 19:53:57 +000035; GCN-DAG: v_mac_f32_e32 [[VB]], [[VA]], [[VK]]
36; GCN-DAG: v_mac_f32_e32 [[VC]], [[VA]], [[VK]]
Matt Arsenaultf0783302015-02-21 21:29:10 +000037; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000038define amdgpu_kernel void @madmk_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000039 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
Matt Arsenaultf0783302015-02-21 21:29:10 +000040
David Blaikie79e6c742015-02-27 19:29:02 +000041 %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
42 %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
43 %in.gep.2 = getelementptr float, float addrspace(1)* %in.gep.0, i32 2
Matt Arsenaultf0783302015-02-21 21:29:10 +000044
David Blaikie79e6c742015-02-27 19:29:02 +000045 %out.gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
46 %out.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
Matt Arsenaultf0783302015-02-21 21:29:10 +000047
Matt Arsenault44e54832016-04-12 13:38:18 +000048 %a = load volatile float, float addrspace(1)* %in.gep.0, align 4
49 %b = load volatile float, float addrspace(1)* %in.gep.1, align 4
50 %c = load volatile float, float addrspace(1)* %in.gep.2, align 4
Matt Arsenaultf0783302015-02-21 21:29:10 +000051
52 %mul0 = fmul float %a, 10.0
53 %mul1 = fmul float %a, 10.0
54 %madmk0 = fadd float %mul0, %b
55 %madmk1 = fadd float %mul1, %c
56
57 store float %madmk0, float addrspace(1)* %out.gep.0, align 4
58 store float %madmk1, float addrspace(1)* %out.gep.1, align 4
59 ret void
60}
61
62; We don't get any benefit if the constant is an inline immediate.
63; GCN-LABEL: {{^}}madmk_inline_imm_f32:
64; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
65; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
Tom Stellarddb5a11f2015-07-13 15:47:57 +000066; GCN: v_mac_f32_e32 [[VB]], 4.0, [[VA]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000067define amdgpu_kernel void @madmk_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000068 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
David Blaikie79e6c742015-02-27 19:29:02 +000069 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
70 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
71 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
Matt Arsenaultf0783302015-02-21 21:29:10 +000072
Matt Arsenault44e54832016-04-12 13:38:18 +000073 %a = load volatile float, float addrspace(1)* %gep.0, align 4
74 %b = load volatile float, float addrspace(1)* %gep.1, align 4
Matt Arsenaultf0783302015-02-21 21:29:10 +000075
76 %mul = fmul float %a, 4.0
77 %madmk = fadd float %mul, %b
78 store float %madmk, float addrspace(1)* %out.gep, align 4
79 ret void
80}
81
82; GCN-LABEL: {{^}}s_s_madmk_f32:
83; GCN-NOT: v_madmk_f32
Tom Stellarddb5a11f2015-07-13 15:47:57 +000084; GCN: v_mac_f32_e32
Matt Arsenaultf0783302015-02-21 21:29:10 +000085; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000086define amdgpu_kernel void @s_s_madmk_f32(float addrspace(1)* noalias %out, float %a, float %b) nounwind {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000087 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
David Blaikie79e6c742015-02-27 19:29:02 +000088 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
Matt Arsenaultf0783302015-02-21 21:29:10 +000089
90 %mul = fmul float %a, 10.0
91 %madmk = fadd float %mul, %b
92 store float %madmk, float addrspace(1)* %out.gep, align 4
93 ret void
94}
95
96; GCN-LABEL: {{^}}v_s_madmk_f32:
97; GCN-NOT: v_madmk_f32
98; GCN: v_mad_f32
99; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000100define amdgpu_kernel void @v_s_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %b) nounwind {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000101 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
David Blaikie79e6c742015-02-27 19:29:02 +0000102 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
103 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
David Blaikiea79ac142015-02-27 21:17:42 +0000104 %a = load float, float addrspace(1)* %gep.0, align 4
Matt Arsenaultf0783302015-02-21 21:29:10 +0000105
106 %mul = fmul float %a, 10.0
107 %madmk = fadd float %mul, %b
108 store float %madmk, float addrspace(1)* %out.gep, align 4
109 ret void
110}
111
112; GCN-LABEL: {{^}}scalar_vector_madmk_f32:
113; GCN-NOT: v_madmk_f32
Tom Stellarddb5a11f2015-07-13 15:47:57 +0000114; GCN: v_mac_f32_e32
Matt Arsenaultf0783302015-02-21 21:29:10 +0000115; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000116define amdgpu_kernel void @scalar_vector_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %a) nounwind {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000117 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
David Blaikie79e6c742015-02-27 19:29:02 +0000118 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
119 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
David Blaikiea79ac142015-02-27 21:17:42 +0000120 %b = load float, float addrspace(1)* %gep.0, align 4
Matt Arsenaultf0783302015-02-21 21:29:10 +0000121
122 %mul = fmul float %a, 10.0
123 %madmk = fadd float %mul, %b
124 store float %madmk, float addrspace(1)* %out.gep, align 4
125 ret void
126}
127
128; GCN-LABEL: {{^}}no_madmk_src0_modifier_f32:
129; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
130; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
Matt Arsenaultbbb47da2016-09-08 17:19:29 +0000131; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000132; GCN: v_mad_f32 {{v[0-9]+}}, |[[VA]]|, [[VK]], [[VB]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000133define amdgpu_kernel void @no_madmk_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000134 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
David Blaikie79e6c742015-02-27 19:29:02 +0000135 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
136 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
137 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
Matt Arsenaultf0783302015-02-21 21:29:10 +0000138
Matt Arsenault44e54832016-04-12 13:38:18 +0000139 %a = load volatile float, float addrspace(1)* %gep.0, align 4
140 %b = load volatile float, float addrspace(1)* %gep.1, align 4
Matt Arsenaultf0783302015-02-21 21:29:10 +0000141
142 %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
143
144 %mul = fmul float %a.fabs, 10.0
145 %madmk = fadd float %mul, %b
146 store float %madmk, float addrspace(1)* %out.gep, align 4
147 ret void
148}
149
150; GCN-LABEL: {{^}}no_madmk_src2_modifier_f32:
151; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
152; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
153; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, |{{[sv][0-9]+}}|
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000154define amdgpu_kernel void @no_madmk_src2_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000155 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
David Blaikie79e6c742015-02-27 19:29:02 +0000156 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
157 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
158 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
Matt Arsenaultf0783302015-02-21 21:29:10 +0000159
Matt Arsenault44e54832016-04-12 13:38:18 +0000160 %a = load volatile float, float addrspace(1)* %gep.0, align 4
161 %b = load volatile float, float addrspace(1)* %gep.1, align 4
Matt Arsenaultf0783302015-02-21 21:29:10 +0000162
163 %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone
164
165 %mul = fmul float %a, 10.0
166 %madmk = fadd float %mul, %b.fabs
167 store float %madmk, float addrspace(1)* %out.gep, align 4
168 ret void
169}
170
171; GCN-LABEL: {{^}}madmk_add_inline_imm_f32:
172; GCN: buffer_load_dword [[A:v[0-9]+]]
173; GCN: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000174; GCN: v_mad_f32 {{v[0-9]+}}, [[A]], [[VK]], 2.0
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000175define amdgpu_kernel void @madmk_add_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000176 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
David Blaikie79e6c742015-02-27 19:29:02 +0000177 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
178 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
Matt Arsenaultf0783302015-02-21 21:29:10 +0000179
David Blaikiea79ac142015-02-27 21:17:42 +0000180 %a = load float, float addrspace(1)* %gep.0, align 4
Matt Arsenaultf0783302015-02-21 21:29:10 +0000181
182 %mul = fmul float %a, 10.0
183 %madmk = fadd float %mul, 2.0
184 store float %madmk, float addrspace(1)* %out.gep, align 4
185 ret void
186}
Matt Arsenault5e100162015-04-24 01:57:58 +0000187
188; SI-LABEL: {{^}}kill_madmk_verifier_error:
189; SI: s_xor_b64
Matt Arsenault3d1c1de2016-04-14 21:58:24 +0000190; SI: v_mac_f32_e32 {{v[0-9]+}}, 0x472aee8c, {{v[0-9]+}}
Matt Arsenault5e100162015-04-24 01:57:58 +0000191; SI: s_or_b64
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000192define amdgpu_kernel void @kill_madmk_verifier_error() nounwind {
Matt Arsenault5e100162015-04-24 01:57:58 +0000193bb:
194 br label %bb2
195
196bb1: ; preds = %bb2
197 ret void
198
199bb2: ; preds = %bb6, %bb
200 %tmp = phi float [ undef, %bb ], [ %tmp8, %bb6 ]
Tom Stellardbc4497b2016-02-12 23:45:29 +0000201 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #1
202 %f_tid = bitcast i32 %tid to float
203 %tmp3 = fsub float %f_tid, %tmp
Matt Arsenault5e100162015-04-24 01:57:58 +0000204 %tmp5 = fcmp oeq float %tmp3, 1.000000e+04
205 br i1 %tmp5, label %bb1, label %bb6
206
207bb6: ; preds = %bb2
208 %tmp4 = fmul float %tmp, undef
209 %tmp7 = fmul float %tmp4, 0x40E55DD180000000
210 %tmp8 = fadd float %tmp7, undef
211 br label %bb2
212}
Tom Stellardbc4497b2016-02-12 23:45:29 +0000213
214declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
215
216attributes #1 = { nounwind readnone }