blob: 226f37bff3686a4e8c2581241315c5c9dbd02380 [file] [log] [blame]
Matt Arsenault8d630032015-02-20 22:10:41 +00001; Make sure we still form mad even when unsafe math or fp-contract is allowed instead of fma.
2
3; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=FUNC %s
4; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=FUNC %s
5; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=FUNC %s
6
7; Make sure we don't form mad with denormals
8; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=FUNC %s
9; RUN: llc -march=amdgcn -mcpu=verde -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM-SLOWFMAF -check-prefix=FUNC %s
10
Matt Arsenault9c47dd52016-02-11 06:02:01 +000011declare i32 @llvm.amdgcn.workitem.id.x() #0
Matt Arsenault8d630032015-02-20 22:10:41 +000012declare float @llvm.fabs.f32(float) #0
13declare float @llvm.fma.f32(float, float, float) #0
14declare float @llvm.fmuladd.f32(float, float, float) #0
15
16; (fadd (fmul x, y), z) -> (fma x, y, z)
17; FUNC-LABEL: {{^}}combine_to_mad_f32_0:
18; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
19; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
20; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
21
Tom Stellarddb5a11f2015-07-13 15:47:57 +000022; SI-STD: v_mac_f32_e32 [[C]], [[B]], [[A]]
Matt Arsenault8d630032015-02-20 22:10:41 +000023
24; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
25
26; SI-DENORM-SLOWFMAF-NOT: v_fma
27; SI-DENORM-SLOWFMAF-NOT: v_mad
28
29; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
30; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]]
31
Tom Stellarddb5a11f2015-07-13 15:47:57 +000032; SI-DENORM: buffer_store_dword [[RESULT]]
33; SI-STD: buffer_store_dword [[C]]
Matt Arsenault8d630032015-02-20 22:10:41 +000034define void @combine_to_mad_f32_0(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000035 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +000036 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
37 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
38 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
39 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
Matt Arsenault8d630032015-02-20 22:10:41 +000040
Matt Arsenault44e54832016-04-12 13:38:18 +000041 %a = load volatile float, float addrspace(1)* %gep.0
42 %b = load volatile float, float addrspace(1)* %gep.1
43 %c = load volatile float, float addrspace(1)* %gep.2
Matt Arsenault8d630032015-02-20 22:10:41 +000044
45 %mul = fmul float %a, %b
46 %fma = fadd float %mul, %c
47 store float %fma, float addrspace(1)* %gep.out
48 ret void
49}
50
51; (fadd (fmul x, y), z) -> (fma x, y, z)
52; FUNC-LABEL: {{^}}combine_to_mad_f32_0_2use:
53; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
54; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
55; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
56; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
57
Tom Stellarddb5a11f2015-07-13 15:47:57 +000058; SI-STD-DAG: v_mac_f32_e32 [[C]], [[B]], [[A]]
59; SI-STD-DAG: v_mac_f32_e32 [[D]], [[B]], [[A]]
Matt Arsenault8d630032015-02-20 22:10:41 +000060
61; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], [[C]]
62; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], [[D]]
63
64; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
65; SI-DENORM-SLOWFMAF-DAG: v_add_f32_e32 [[RESULT0:v[0-9]+]], [[C]], [[TMP]]
66; SI-DENORM-SLOWFMAF-DAG: v_add_f32_e32 [[RESULT1:v[0-9]+]], [[D]], [[TMP]]
67
Tom Stellarddb5a11f2015-07-13 15:47:57 +000068; SI-DENORM-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
69; SI-DENORM-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
70; SI-STD-DAG: buffer_store_dword [[C]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
71; SI-STD-DAG: buffer_store_dword [[D]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
Matt Arsenault8d630032015-02-20 22:10:41 +000072; SI: s_endpgm
73define void @combine_to_mad_f32_0_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000074 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +000075 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
76 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
77 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
78 %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
79 %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
80 %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
Matt Arsenault8d630032015-02-20 22:10:41 +000081
Matt Arsenault44e54832016-04-12 13:38:18 +000082 %a = load volatile float, float addrspace(1)* %gep.0
83 %b = load volatile float, float addrspace(1)* %gep.1
84 %c = load volatile float, float addrspace(1)* %gep.2
85 %d = load volatile float, float addrspace(1)* %gep.3
Matt Arsenault8d630032015-02-20 22:10:41 +000086
87 %mul = fmul float %a, %b
88 %fma0 = fadd float %mul, %c
89 %fma1 = fadd float %mul, %d
90
Matt Arsenault44e54832016-04-12 13:38:18 +000091 store volatile float %fma0, float addrspace(1)* %gep.out.0
92 store volatile float %fma1, float addrspace(1)* %gep.out.1
Matt Arsenault8d630032015-02-20 22:10:41 +000093 ret void
94}
95
96; (fadd x, (fmul y, z)) -> (fma y, z, x)
97; FUNC-LABEL: {{^}}combine_to_mad_f32_1:
98; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
99; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
100; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
101
Tom Stellarddb5a11f2015-07-13 15:47:57 +0000102; SI-STD: v_mac_f32_e32 [[C]], [[B]], [[A]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000103; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
104
105; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
106; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]]
107
Tom Stellarddb5a11f2015-07-13 15:47:57 +0000108; SI-DENORM: buffer_store_dword [[RESULT]]
109; SI-STD: buffer_store_dword [[C]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000110define void @combine_to_mad_f32_1(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000111 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000112 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
113 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
114 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
115 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
Matt Arsenault8d630032015-02-20 22:10:41 +0000116
Matt Arsenault44e54832016-04-12 13:38:18 +0000117 %a = load volatile float, float addrspace(1)* %gep.0
118 %b = load volatile float, float addrspace(1)* %gep.1
119 %c = load volatile float, float addrspace(1)* %gep.2
Matt Arsenault8d630032015-02-20 22:10:41 +0000120
121 %mul = fmul float %a, %b
122 %fma = fadd float %c, %mul
123 store float %fma, float addrspace(1)* %gep.out
124 ret void
125}
126
127; (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
128; FUNC-LABEL: {{^}}combine_to_mad_fsub_0_f32:
129; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
130; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
131; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
132
133; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], -[[C]]
134; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], -[[C]]
135
136; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
137; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]]
138
139; SI: buffer_store_dword [[RESULT]]
140define void @combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000141 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000142 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
143 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
144 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
145 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
Matt Arsenault8d630032015-02-20 22:10:41 +0000146
Matt Arsenault44e54832016-04-12 13:38:18 +0000147 %a = load volatile float, float addrspace(1)* %gep.0
148 %b = load volatile float, float addrspace(1)* %gep.1
149 %c = load volatile float, float addrspace(1)* %gep.2
Matt Arsenault8d630032015-02-20 22:10:41 +0000150
151 %mul = fmul float %a, %b
152 %fma = fsub float %mul, %c
153 store float %fma, float addrspace(1)* %gep.out
154 ret void
155}
156
157; (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
158; FUNC-LABEL: {{^}}combine_to_mad_fsub_0_f32_2use:
159; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
160; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
161; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
162; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
163
164; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], -[[C]]
165; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
166
167; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], -[[C]]
168; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
169
170; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
171; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT0:v[0-9]+]], [[C]], [[TMP]]
172; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT1:v[0-9]+]], [[D]], [[TMP]]
173
174; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
175; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
176; SI: s_endpgm
177define void @combine_to_mad_fsub_0_f32_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000178 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000179 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
180 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
181 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
182 %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
183 %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
184 %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
Matt Arsenault8d630032015-02-20 22:10:41 +0000185
Matt Arsenault44e54832016-04-12 13:38:18 +0000186 %a = load volatile float, float addrspace(1)* %gep.0
187 %b = load volatile float, float addrspace(1)* %gep.1
188 %c = load volatile float, float addrspace(1)* %gep.2
189 %d = load volatile float, float addrspace(1)* %gep.3
Matt Arsenault8d630032015-02-20 22:10:41 +0000190
191 %mul = fmul float %a, %b
192 %fma0 = fsub float %mul, %c
193 %fma1 = fsub float %mul, %d
Matt Arsenault44e54832016-04-12 13:38:18 +0000194 store volatile float %fma0, float addrspace(1)* %gep.out.0
195 store volatile float %fma1, float addrspace(1)* %gep.out.1
Matt Arsenault8d630032015-02-20 22:10:41 +0000196 ret void
197}
198
199; (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
200; FUNC-LABEL: {{^}}combine_to_mad_fsub_1_f32:
201; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
202; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
203; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
204
205; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], [[C]]
206; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], [[C]]
207
208; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
209; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]]
210
211; SI: buffer_store_dword [[RESULT]]
212define void @combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000213 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000214 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
215 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
216 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
217 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
Matt Arsenault8d630032015-02-20 22:10:41 +0000218
Matt Arsenault44e54832016-04-12 13:38:18 +0000219 %a = load volatile float, float addrspace(1)* %gep.0
220 %b = load volatile float, float addrspace(1)* %gep.1
221 %c = load volatile float, float addrspace(1)* %gep.2
Matt Arsenault8d630032015-02-20 22:10:41 +0000222
223 %mul = fmul float %a, %b
224 %fma = fsub float %c, %mul
225 store float %fma, float addrspace(1)* %gep.out
226 ret void
227}
228
229; (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
230; FUNC-LABEL: {{^}}combine_to_mad_fsub_1_f32_2use:
231; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
232; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
233; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
234
235; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], [[C]]
236; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], [[D]]
237
238; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], [[C]]
239; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], [[D]]
240
241; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
242; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT0:v[0-9]+]], [[TMP]], [[C]]
243; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT1:v[0-9]+]], [[TMP]], [[D]]
244
245; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
246; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
247; SI: s_endpgm
248define void @combine_to_mad_fsub_1_f32_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000249 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000250 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
251 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
252 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
253 %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
254 %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
255 %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
Matt Arsenault8d630032015-02-20 22:10:41 +0000256
Matt Arsenault44e54832016-04-12 13:38:18 +0000257 %a = load volatile float, float addrspace(1)* %gep.0
258 %b = load volatile float, float addrspace(1)* %gep.1
259 %c = load volatile float, float addrspace(1)* %gep.2
260 %d = load volatile float, float addrspace(1)* %gep.3
Matt Arsenault8d630032015-02-20 22:10:41 +0000261
262 %mul = fmul float %a, %b
263 %fma0 = fsub float %c, %mul
264 %fma1 = fsub float %d, %mul
Matt Arsenault44e54832016-04-12 13:38:18 +0000265 store volatile float %fma0, float addrspace(1)* %gep.out.0
266 store volatile float %fma1, float addrspace(1)* %gep.out.1
Matt Arsenault8d630032015-02-20 22:10:41 +0000267 ret void
268}
269
270; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
271; FUNC-LABEL: {{^}}combine_to_mad_fsub_2_f32:
272; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
273; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
274; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
275
276; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], -[[C]]
277
278; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], -[[C]]
279
280; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
281; SI-DENORM-SLOWFMAF: v_sub_f32_e64 [[RESULT:v[0-9]+]], -[[TMP]], [[C]]
282
283; SI: buffer_store_dword [[RESULT]]
284define void @combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000285 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000286 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
287 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
288 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
289 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
Matt Arsenault8d630032015-02-20 22:10:41 +0000290
Matt Arsenault44e54832016-04-12 13:38:18 +0000291 %a = load volatile float, float addrspace(1)* %gep.0
292 %b = load volatile float, float addrspace(1)* %gep.1
293 %c = load volatile float, float addrspace(1)* %gep.2
Matt Arsenault8d630032015-02-20 22:10:41 +0000294
295 %mul = fmul float %a, %b
296 %mul.neg = fsub float -0.0, %mul
297 %fma = fsub float %mul.neg, %c
298
299 store float %fma, float addrspace(1)* %gep.out
300 ret void
301}
302
303; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
304; FUNC-LABEL: {{^}}combine_to_mad_fsub_2_f32_2uses_neg:
305; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
306; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
307; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
308
309; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
310; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], -[[D]]
311
312; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
313; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], -[[D]]
314
315; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
316; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e64 [[RESULT0:v[0-9]+]], -[[TMP]], [[C]]
317; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e64 [[RESULT1:v[0-9]+]], -[[TMP]], [[D]]
318
319; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
320; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
321; SI: s_endpgm
322define void @combine_to_mad_fsub_2_f32_2uses_neg(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000323 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000324 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
325 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
326 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
327 %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
328 %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
329 %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
Matt Arsenault8d630032015-02-20 22:10:41 +0000330
Matt Arsenault44e54832016-04-12 13:38:18 +0000331 %a = load volatile float, float addrspace(1)* %gep.0
332 %b = load volatile float, float addrspace(1)* %gep.1
333 %c = load volatile float, float addrspace(1)* %gep.2
334 %d = load volatile float, float addrspace(1)* %gep.3
Matt Arsenault8d630032015-02-20 22:10:41 +0000335
336 %mul = fmul float %a, %b
337 %mul.neg = fsub float -0.0, %mul
338 %fma0 = fsub float %mul.neg, %c
339 %fma1 = fsub float %mul.neg, %d
340
Matt Arsenault44e54832016-04-12 13:38:18 +0000341 store volatile float %fma0, float addrspace(1)* %gep.out.0
342 store volatile float %fma1, float addrspace(1)* %gep.out.1
Matt Arsenault8d630032015-02-20 22:10:41 +0000343 ret void
344}
345
346; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
347; FUNC-LABEL: {{^}}combine_to_mad_fsub_2_f32_2uses_mul:
348; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
349; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
350; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
351
352; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
353; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
354
355; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
356; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
357
358; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
359; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e64 [[RESULT0:v[0-9]+]], -[[TMP]], [[C]]
360; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT1:v[0-9]+]], [[D]], [[TMP]]
361
362; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
363; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
364; SI: s_endpgm
365define void @combine_to_mad_fsub_2_f32_2uses_mul(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000366 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000367 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
368 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
369 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
370 %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
371 %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
372 %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
Matt Arsenault8d630032015-02-20 22:10:41 +0000373
Matt Arsenault44e54832016-04-12 13:38:18 +0000374 %a = load volatile float, float addrspace(1)* %gep.0
375 %b = load volatile float, float addrspace(1)* %gep.1
376 %c = load volatile float, float addrspace(1)* %gep.2
377 %d = load volatile float, float addrspace(1)* %gep.3
Matt Arsenault8d630032015-02-20 22:10:41 +0000378
379 %mul = fmul float %a, %b
380 %mul.neg = fsub float -0.0, %mul
381 %fma0 = fsub float %mul.neg, %c
382 %fma1 = fsub float %mul, %d
383
Matt Arsenault44e54832016-04-12 13:38:18 +0000384 store volatile float %fma0, float addrspace(1)* %gep.out.0
385 store volatile float %fma1, float addrspace(1)* %gep.out.1
Matt Arsenault8d630032015-02-20 22:10:41 +0000386 ret void
387}
388
389; fold (fsub (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, (fneg z)))
390
391; FUNC-LABEL: {{^}}aggressive_combine_to_mad_fsub_0_f32:
392; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
393; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
394; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
395; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
396; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
397
398; SI-STD: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
399; SI-STD: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]]
400; SI-STD: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP1]]
401
402; SI-DENORM: v_fma_f32 [[TMP0:v[0-9]+]], [[D]], [[E]], -[[C]]
403; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[TMP0]]
404
405; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
406; SI-DENORM-SLOWFMAF: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]]
407; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT1:v[0-9]+]], [[C]], [[TMP1]]
408
409; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
410define void @aggressive_combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000411 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000412 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
413 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
414 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
415 %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
416 %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
417 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
Matt Arsenault8d630032015-02-20 22:10:41 +0000418
Matt Arsenault44e54832016-04-12 13:38:18 +0000419 %x = load volatile float, float addrspace(1)* %gep.0
420 %y = load volatile float, float addrspace(1)* %gep.1
421 %z = load volatile float, float addrspace(1)* %gep.2
422 %u = load volatile float, float addrspace(1)* %gep.3
423 %v = load volatile float, float addrspace(1)* %gep.4
Matt Arsenault8d630032015-02-20 22:10:41 +0000424
425 %tmp0 = fmul float %u, %v
426 %tmp1 = call float @llvm.fma.f32(float %x, float %y, float %tmp0) #0
427 %tmp2 = fsub float %tmp1, %z
428
429 store float %tmp2, float addrspace(1)* %gep.out
430 ret void
431}
432
433; fold (fsub x, (fma y, z, (fmul u, v)))
434; -> (fma (fneg y), z, (fma (fneg u), v, x))
435
436; FUNC-LABEL: {{^}}aggressive_combine_to_mad_fsub_1_f32:
437; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
438; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
439; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
440; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
441; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
442
443; SI-STD: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
444; SI-STD: v_fma_f32 [[TMP1:v[0-9]+]], [[B]], [[C]], [[TMP0]]
445; SI-STD: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[A]]
446
447; SI-DENORM: v_fma_f32 [[TMP0:v[0-9]+]], -[[D]], [[E]], [[A]]
448; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[B]], [[C]], [[TMP0]]
449
450; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
451; SI-DENORM-SLOWFMAF: v_fma_f32 [[TMP1:v[0-9]+]], [[B]], [[C]], [[TMP0]]
452; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[A]]
453
454; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
455; SI: s_endpgm
456define void @aggressive_combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000457 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000458 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
459 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
460 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
461 %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
462 %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
463 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
Matt Arsenault8d630032015-02-20 22:10:41 +0000464
Matt Arsenault44e54832016-04-12 13:38:18 +0000465 %x = load volatile float, float addrspace(1)* %gep.0
466 %y = load volatile float, float addrspace(1)* %gep.1
467 %z = load volatile float, float addrspace(1)* %gep.2
468 %u = load volatile float, float addrspace(1)* %gep.3
469 %v = load volatile float, float addrspace(1)* %gep.4
Matt Arsenault8d630032015-02-20 22:10:41 +0000470
471 %tmp0 = fmul float %u, %v
472 %tmp1 = call float @llvm.fma.f32(float %y, float %z, float %tmp0) #0
473 %tmp2 = fsub float %x, %tmp1
474
475 store float %tmp2, float addrspace(1)* %gep.out
476 ret void
477}
478
479; fold (fsub (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, (fneg z)))
480
481; FUNC-LABEL: {{^}}aggressive_combine_to_mad_fsub_2_f32:
482; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
483; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
484; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
485; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
486; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
487
488; SI-STD: v_mad_f32 [[TMP:v[0-9]+]], [[D]], [[E]], -[[C]]
Tom Stellarddb5a11f2015-07-13 15:47:57 +0000489; SI-STD: v_mac_f32_e32 [[TMP]], [[B]], [[A]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000490
491; SI-DENORM: v_fma_f32 [[TMP:v[0-9]+]], [[D]], [[E]], -[[C]]
492; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[TMP]]
493
494; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
495; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP1:v[0-9]+]], [[B]], [[A]]
496; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[TMP2:v[0-9]+]], [[TMP0]], [[TMP1]]
497; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP2]]
498
Tom Stellarddb5a11f2015-07-13 15:47:57 +0000499; SI-DENORM: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
500; SI-STD: buffer_store_dword [[TMP]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
Matt Arsenault8d630032015-02-20 22:10:41 +0000501; SI: s_endpgm
502define void @aggressive_combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000503 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000504 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
505 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
506 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
507 %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
508 %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
509 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
Matt Arsenault8d630032015-02-20 22:10:41 +0000510
Matt Arsenault44e54832016-04-12 13:38:18 +0000511 %x = load volatile float, float addrspace(1)* %gep.0
512 %y = load volatile float, float addrspace(1)* %gep.1
513 %z = load volatile float, float addrspace(1)* %gep.2
514 %u = load volatile float, float addrspace(1)* %gep.3
515 %v = load volatile float, float addrspace(1)* %gep.4
Matt Arsenault8d630032015-02-20 22:10:41 +0000516
517 %tmp0 = fmul float %u, %v
518 %tmp1 = call float @llvm.fmuladd.f32(float %x, float %y, float %tmp0) #0
519 %tmp2 = fsub float %tmp1, %z
520
521 store float %tmp2, float addrspace(1)* %gep.out
522 ret void
523}
524
525; fold (fsub x, (fmuladd y, z, (fmul u, v)))
526; -> (fmuladd (fneg y), z, (fmuladd (fneg u), v, x))
527
528; FUNC-LABEL: {{^}}aggressive_combine_to_mad_fsub_3_f32:
529; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
530; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
531; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
532; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
533; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
534
535; SI-STD: v_mad_f32 [[TMP:v[0-9]+]], -[[D]], [[E]], [[A]]
536; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], -[[B]], [[C]], [[TMP]]
537
538; SI-DENORM: v_fma_f32 [[TMP:v[0-9]+]], -[[D]], [[E]], [[A]]
539; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[B]], [[C]], [[TMP]]
540
541; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
542; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP1:v[0-9]+]], [[C]], [[B]]
543; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[TMP2:v[0-9]+]], [[TMP0]], [[TMP1]]
544; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP2]], [[A]]
545
546; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
547; SI: s_endpgm
548define void @aggressive_combine_to_mad_fsub_3_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000549 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000550 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
551 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
552 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
553 %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
554 %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
555 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
Matt Arsenault8d630032015-02-20 22:10:41 +0000556
Matt Arsenault44e54832016-04-12 13:38:18 +0000557 %x = load volatile float, float addrspace(1)* %gep.0
558 %y = load volatile float, float addrspace(1)* %gep.1
559 %z = load volatile float, float addrspace(1)* %gep.2
560 %u = load volatile float, float addrspace(1)* %gep.3
561 %v = load volatile float, float addrspace(1)* %gep.4
Matt Arsenault8d630032015-02-20 22:10:41 +0000562
563 %tmp0 = fmul float %u, %v
564 %tmp1 = call float @llvm.fmuladd.f32(float %y, float %z, float %tmp0) #0
565 %tmp2 = fsub float %x, %tmp1
566
567 store float %tmp2, float addrspace(1)* %gep.out
568 ret void
569}
570
571attributes #0 = { nounwind readnone }
572attributes #1 = { nounwind }