blob: 0b360f6ecefb5b5da73fff272dd79194617a889f [file] [log] [blame]
Matt Arsenault8d630032015-02-20 22:10:41 +00001; Make sure we still form mad even when unsafe math or fp-contract is allowed instead of fma.
2
Matt Arsenault70b92822017-11-12 23:53:44 +00003; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s
4; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s
5; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-UNSAFE -check-prefix=FUNC %s
Matt Arsenault8d630032015-02-20 22:10:41 +00006
7; Make sure we don't form mad with denormals
Matt Arsenault70b92822017-11-12 23:53:44 +00008; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-FASTFMAF -check-prefix=FUNC %s
9; RUN: llc -march=amdgcn -mcpu=verde -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-SLOWFMAF -check-prefix=FUNC %s
Matt Arsenault8d630032015-02-20 22:10:41 +000010
Matt Arsenault9c47dd52016-02-11 06:02:01 +000011declare i32 @llvm.amdgcn.workitem.id.x() #0
Matt Arsenault8d630032015-02-20 22:10:41 +000012declare float @llvm.fabs.f32(float) #0
13declare float @llvm.fma.f32(float, float, float) #0
14declare float @llvm.fmuladd.f32(float, float, float) #0
15
16; (fadd (fmul x, y), z) -> (fma x, y, z)
17; FUNC-LABEL: {{^}}combine_to_mad_f32_0:
18; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
19; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
20; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
21
Matt Arsenault6c29c5a2017-07-10 19:53:57 +000022; SI-STD: v_mac_f32_e32 [[C]], [[A]], [[B]]
Matt Arsenault8d630032015-02-20 22:10:41 +000023
Nicolai Haehnle8813d5d2017-01-31 14:35:37 +000024; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
Matt Arsenault8d630032015-02-20 22:10:41 +000025
26; SI-DENORM-SLOWFMAF-NOT: v_fma
27; SI-DENORM-SLOWFMAF-NOT: v_mad
28
Matt Arsenault6c29c5a2017-07-10 19:53:57 +000029; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[A]], [[B]]
30; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]]
Matt Arsenault8d630032015-02-20 22:10:41 +000031
Tom Stellarddb5a11f2015-07-13 15:47:57 +000032; SI-DENORM: buffer_store_dword [[RESULT]]
33; SI-STD: buffer_store_dword [[C]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000034define amdgpu_kernel void @combine_to_mad_f32_0(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000035 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +000036 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
37 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
38 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
39 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
Matt Arsenault8d630032015-02-20 22:10:41 +000040
Matt Arsenault44e54832016-04-12 13:38:18 +000041 %a = load volatile float, float addrspace(1)* %gep.0
42 %b = load volatile float, float addrspace(1)* %gep.1
43 %c = load volatile float, float addrspace(1)* %gep.2
Matt Arsenault8d630032015-02-20 22:10:41 +000044
45 %mul = fmul float %a, %b
46 %fma = fadd float %mul, %c
47 store float %fma, float addrspace(1)* %gep.out
48 ret void
49}
50
51; (fadd (fmul x, y), z) -> (fma x, y, z)
52; FUNC-LABEL: {{^}}combine_to_mad_f32_0_2use:
53; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
54; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
55; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
56; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
57
Matt Arsenault6c29c5a2017-07-10 19:53:57 +000058; SI-STD-DAG: v_mac_f32_e32 [[C]], [[A]], [[B]]
59; SI-STD-DAG: v_mac_f32_e32 [[D]], [[A]], [[B]]
Matt Arsenault8d630032015-02-20 22:10:41 +000060
Nicolai Haehnle8813d5d2017-01-31 14:35:37 +000061; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], [[C]]
62; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], [[D]]
Matt Arsenault8d630032015-02-20 22:10:41 +000063
Matt Arsenault6c29c5a2017-07-10 19:53:57 +000064; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[A]], [[B]]
65; SI-DENORM-SLOWFMAF-DAG: v_add_f32_e32 [[RESULT0:v[0-9]+]], [[TMP]], [[C]]
66; SI-DENORM-SLOWFMAF-DAG: v_add_f32_e32 [[RESULT1:v[0-9]+]], [[TMP]], [[D]]
Matt Arsenault8d630032015-02-20 22:10:41 +000067
Tom Stellarddb5a11f2015-07-13 15:47:57 +000068; SI-DENORM-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
69; SI-DENORM-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
70; SI-STD-DAG: buffer_store_dword [[C]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
71; SI-STD-DAG: buffer_store_dword [[D]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
Matt Arsenault8d630032015-02-20 22:10:41 +000072; SI: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000073define amdgpu_kernel void @combine_to_mad_f32_0_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000074 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +000075 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
76 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
77 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
78 %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
79 %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
80 %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
Matt Arsenault8d630032015-02-20 22:10:41 +000081
Matt Arsenault44e54832016-04-12 13:38:18 +000082 %a = load volatile float, float addrspace(1)* %gep.0
83 %b = load volatile float, float addrspace(1)* %gep.1
84 %c = load volatile float, float addrspace(1)* %gep.2
85 %d = load volatile float, float addrspace(1)* %gep.3
Matt Arsenault8d630032015-02-20 22:10:41 +000086
87 %mul = fmul float %a, %b
88 %fma0 = fadd float %mul, %c
89 %fma1 = fadd float %mul, %d
90
Matt Arsenault44e54832016-04-12 13:38:18 +000091 store volatile float %fma0, float addrspace(1)* %gep.out.0
92 store volatile float %fma1, float addrspace(1)* %gep.out.1
Matt Arsenault8d630032015-02-20 22:10:41 +000093 ret void
94}
95
96; (fadd x, (fmul y, z)) -> (fma y, z, x)
97; FUNC-LABEL: {{^}}combine_to_mad_f32_1:
98; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
99; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
100; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
101
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000102; SI-STD: v_mac_f32_e32 [[C]], [[A]], [[B]]
Nicolai Haehnle8813d5d2017-01-31 14:35:37 +0000103; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000104
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000105; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[A]], [[B]]
106; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000107
Tom Stellarddb5a11f2015-07-13 15:47:57 +0000108; SI-DENORM: buffer_store_dword [[RESULT]]
109; SI-STD: buffer_store_dword [[C]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000110define amdgpu_kernel void @combine_to_mad_f32_1(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000111 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000112 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
113 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
114 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
115 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
Matt Arsenault8d630032015-02-20 22:10:41 +0000116
Matt Arsenault44e54832016-04-12 13:38:18 +0000117 %a = load volatile float, float addrspace(1)* %gep.0
118 %b = load volatile float, float addrspace(1)* %gep.1
119 %c = load volatile float, float addrspace(1)* %gep.2
Matt Arsenault8d630032015-02-20 22:10:41 +0000120
121 %mul = fmul float %a, %b
122 %fma = fadd float %c, %mul
123 store float %fma, float addrspace(1)* %gep.out
124 ret void
125}
126
127; (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
128; FUNC-LABEL: {{^}}combine_to_mad_fsub_0_f32:
129; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
130; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
131; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
132
133; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], -[[C]]
Nicolai Haehnle8813d5d2017-01-31 14:35:37 +0000134; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], -[[C]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000135
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000136; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[A]], [[B]]
137; SI-DENORM-SLOWFMAF: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000138
139; SI: buffer_store_dword [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000140define amdgpu_kernel void @combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000141 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000142 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
143 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
144 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
145 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
Matt Arsenault8d630032015-02-20 22:10:41 +0000146
Matt Arsenault44e54832016-04-12 13:38:18 +0000147 %a = load volatile float, float addrspace(1)* %gep.0
148 %b = load volatile float, float addrspace(1)* %gep.1
149 %c = load volatile float, float addrspace(1)* %gep.2
Matt Arsenault8d630032015-02-20 22:10:41 +0000150
151 %mul = fmul float %a, %b
152 %fma = fsub float %mul, %c
153 store float %fma, float addrspace(1)* %gep.out
154 ret void
155}
156
157; (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
158; FUNC-LABEL: {{^}}combine_to_mad_fsub_0_f32_2use:
159; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
160; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
161; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
162; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
163
164; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], -[[C]]
165; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
166
Nicolai Haehnle8813d5d2017-01-31 14:35:37 +0000167; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], -[[C]]
168; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000169
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000170; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[A]], [[B]]
171; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e32 [[RESULT0:v[0-9]+]], [[TMP]], [[C]]
172; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e32 [[RESULT1:v[0-9]+]], [[TMP]], [[D]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000173
174; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
175; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
176; SI: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000177define amdgpu_kernel void @combine_to_mad_fsub_0_f32_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000178 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000179 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
180 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
181 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
182 %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
183 %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
184 %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
Matt Arsenault8d630032015-02-20 22:10:41 +0000185
Matt Arsenault44e54832016-04-12 13:38:18 +0000186 %a = load volatile float, float addrspace(1)* %gep.0
187 %b = load volatile float, float addrspace(1)* %gep.1
188 %c = load volatile float, float addrspace(1)* %gep.2
189 %d = load volatile float, float addrspace(1)* %gep.3
Matt Arsenault8d630032015-02-20 22:10:41 +0000190
191 %mul = fmul float %a, %b
192 %fma0 = fsub float %mul, %c
193 %fma1 = fsub float %mul, %d
Matt Arsenault44e54832016-04-12 13:38:18 +0000194 store volatile float %fma0, float addrspace(1)* %gep.out.0
195 store volatile float %fma1, float addrspace(1)* %gep.out.1
Matt Arsenault8d630032015-02-20 22:10:41 +0000196 ret void
197}
198
199; (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
200; FUNC-LABEL: {{^}}combine_to_mad_fsub_1_f32:
201; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
202; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
203; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
204
205; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], [[C]]
Nicolai Haehnle8813d5d2017-01-31 14:35:37 +0000206; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], [[C]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000207
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000208; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[A]], [[B]]
209; SI-DENORM-SLOWFMAF: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000210
211; SI: buffer_store_dword [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000212define amdgpu_kernel void @combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000213 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000214 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
215 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
216 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
217 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
Matt Arsenault8d630032015-02-20 22:10:41 +0000218
Matt Arsenault44e54832016-04-12 13:38:18 +0000219 %a = load volatile float, float addrspace(1)* %gep.0
220 %b = load volatile float, float addrspace(1)* %gep.1
221 %c = load volatile float, float addrspace(1)* %gep.2
Matt Arsenault8d630032015-02-20 22:10:41 +0000222
223 %mul = fmul float %a, %b
224 %fma = fsub float %c, %mul
225 store float %fma, float addrspace(1)* %gep.out
226 ret void
227}
228
229; (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
230; FUNC-LABEL: {{^}}combine_to_mad_fsub_1_f32_2use:
231; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
232; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
233; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
Matt Arsenault70b92822017-11-12 23:53:44 +0000234; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
Matt Arsenault8d630032015-02-20 22:10:41 +0000235
236; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], [[C]]
237; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], [[D]]
238
Nicolai Haehnle8813d5d2017-01-31 14:35:37 +0000239; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], [[C]]
240; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], [[D]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000241
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000242; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[A]], [[B]]
243; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e32 [[RESULT0:v[0-9]+]], [[C]], [[TMP]]
244; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e32 [[RESULT1:v[0-9]+]], [[D]], [[TMP]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000245
246; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
247; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
248; SI: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000249define amdgpu_kernel void @combine_to_mad_fsub_1_f32_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000250 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000251 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
252 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
253 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
254 %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
255 %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
256 %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
Matt Arsenault8d630032015-02-20 22:10:41 +0000257
Matt Arsenault44e54832016-04-12 13:38:18 +0000258 %a = load volatile float, float addrspace(1)* %gep.0
259 %b = load volatile float, float addrspace(1)* %gep.1
260 %c = load volatile float, float addrspace(1)* %gep.2
261 %d = load volatile float, float addrspace(1)* %gep.3
Matt Arsenault8d630032015-02-20 22:10:41 +0000262
263 %mul = fmul float %a, %b
264 %fma0 = fsub float %c, %mul
265 %fma1 = fsub float %d, %mul
Matt Arsenault44e54832016-04-12 13:38:18 +0000266 store volatile float %fma0, float addrspace(1)* %gep.out.0
267 store volatile float %fma1, float addrspace(1)* %gep.out.1
Matt Arsenault8d630032015-02-20 22:10:41 +0000268 ret void
269}
270
271; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
272; FUNC-LABEL: {{^}}combine_to_mad_fsub_2_f32:
273; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
274; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
275; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
276
Matt Arsenault4103a812017-01-12 00:23:20 +0000277; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000278
Nicolai Haehnle8813d5d2017-01-31 14:35:37 +0000279; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], -[[C]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000280
Matt Arsenault4103a812017-01-12 00:23:20 +0000281; SI-DENORM-SLOWFMAF: v_mul_f32_e64 [[TMP:v[0-9]+]], [[A]], -[[B]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000282; SI-DENORM-SLOWFMAF: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000283
284; SI: buffer_store_dword [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000285define amdgpu_kernel void @combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000286 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000287 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
288 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
289 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
290 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
Matt Arsenault8d630032015-02-20 22:10:41 +0000291
Matt Arsenault44e54832016-04-12 13:38:18 +0000292 %a = load volatile float, float addrspace(1)* %gep.0
293 %b = load volatile float, float addrspace(1)* %gep.1
294 %c = load volatile float, float addrspace(1)* %gep.2
Matt Arsenault8d630032015-02-20 22:10:41 +0000295
296 %mul = fmul float %a, %b
297 %mul.neg = fsub float -0.0, %mul
298 %fma = fsub float %mul.neg, %c
299
300 store float %fma, float addrspace(1)* %gep.out
301 ret void
302}
303
304; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
305; FUNC-LABEL: {{^}}combine_to_mad_fsub_2_f32_2uses_neg:
306; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
307; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
308; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
Matt Arsenault70b92822017-11-12 23:53:44 +0000309; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
Matt Arsenault8d630032015-02-20 22:10:41 +0000310
Matt Arsenault4103a812017-01-12 00:23:20 +0000311; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], [[A]], -[[B]], -[[C]]
312; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], -[[B]], -[[D]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000313
Nicolai Haehnle8813d5d2017-01-31 14:35:37 +0000314; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
315; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], -[[D]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000316
Matt Arsenault4103a812017-01-12 00:23:20 +0000317; SI-DENORM-SLOWFMAF: v_mul_f32_e64 [[TMP:v[0-9]+]], [[A]], -[[B]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000318; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e32 [[RESULT0:v[0-9]+]], [[TMP]], [[C]]
319; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e32 [[RESULT1:v[0-9]+]], [[TMP]], [[D]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000320
321; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
322; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
323; SI: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000324define amdgpu_kernel void @combine_to_mad_fsub_2_f32_2uses_neg(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000325 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000326 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
327 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
328 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
329 %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
330 %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
331 %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
Matt Arsenault8d630032015-02-20 22:10:41 +0000332
Matt Arsenault44e54832016-04-12 13:38:18 +0000333 %a = load volatile float, float addrspace(1)* %gep.0
334 %b = load volatile float, float addrspace(1)* %gep.1
335 %c = load volatile float, float addrspace(1)* %gep.2
336 %d = load volatile float, float addrspace(1)* %gep.3
Matt Arsenault8d630032015-02-20 22:10:41 +0000337
338 %mul = fmul float %a, %b
339 %mul.neg = fsub float -0.0, %mul
340 %fma0 = fsub float %mul.neg, %c
341 %fma1 = fsub float %mul.neg, %d
342
Matt Arsenault44e54832016-04-12 13:38:18 +0000343 store volatile float %fma0, float addrspace(1)* %gep.out.0
344 store volatile float %fma1, float addrspace(1)* %gep.out.1
Matt Arsenault8d630032015-02-20 22:10:41 +0000345 ret void
346}
347
348; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
349; FUNC-LABEL: {{^}}combine_to_mad_fsub_2_f32_2uses_mul:
350; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
351; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
352; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
Matt Arsenault70b92822017-11-12 23:53:44 +0000353; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
Matt Arsenault8d630032015-02-20 22:10:41 +0000354
355; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
356; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
357
Nicolai Haehnle8813d5d2017-01-31 14:35:37 +0000358; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
359; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000360
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000361; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[A]], [[B]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000362; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e64 [[RESULT0:v[0-9]+]], -[[TMP]], [[C]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000363; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e32 [[RESULT1:v[0-9]+]], [[TMP]], [[D]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000364
365; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
366; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
367; SI: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000368define amdgpu_kernel void @combine_to_mad_fsub_2_f32_2uses_mul(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000369 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000370 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
371 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
372 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
373 %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
374 %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
375 %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
Matt Arsenault8d630032015-02-20 22:10:41 +0000376
Matt Arsenault44e54832016-04-12 13:38:18 +0000377 %a = load volatile float, float addrspace(1)* %gep.0
378 %b = load volatile float, float addrspace(1)* %gep.1
379 %c = load volatile float, float addrspace(1)* %gep.2
380 %d = load volatile float, float addrspace(1)* %gep.3
Matt Arsenault8d630032015-02-20 22:10:41 +0000381
382 %mul = fmul float %a, %b
383 %mul.neg = fsub float -0.0, %mul
384 %fma0 = fsub float %mul.neg, %c
385 %fma1 = fsub float %mul, %d
386
Matt Arsenault44e54832016-04-12 13:38:18 +0000387 store volatile float %fma0, float addrspace(1)* %gep.out.0
388 store volatile float %fma1, float addrspace(1)* %gep.out.1
Matt Arsenault8d630032015-02-20 22:10:41 +0000389 ret void
390}
391
392; fold (fsub (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, (fneg z)))
393
394; FUNC-LABEL: {{^}}aggressive_combine_to_mad_fsub_0_f32:
395; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
396; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
397; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
398; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
399; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
400
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000401; SI-STD: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000402; SI-STD: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000403; SI-STD: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[C]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000404
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000405; SI-DENORM: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
Nicolai Haehnle8813d5d2017-01-31 14:35:37 +0000406; SI-DENORM: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]]
Matt Arsenault70b92822017-11-12 23:53:44 +0000407; SI-DENORM: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[C]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000408
409; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000410define amdgpu_kernel void @aggressive_combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000411 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000412 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
413 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
414 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
415 %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
416 %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
417 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
Matt Arsenault8d630032015-02-20 22:10:41 +0000418
Matt Arsenault44e54832016-04-12 13:38:18 +0000419 %x = load volatile float, float addrspace(1)* %gep.0
420 %y = load volatile float, float addrspace(1)* %gep.1
421 %z = load volatile float, float addrspace(1)* %gep.2
422 %u = load volatile float, float addrspace(1)* %gep.3
423 %v = load volatile float, float addrspace(1)* %gep.4
Matt Arsenault8d630032015-02-20 22:10:41 +0000424
425 %tmp0 = fmul float %u, %v
426 %tmp1 = call float @llvm.fma.f32(float %x, float %y, float %tmp0) #0
427 %tmp2 = fsub float %tmp1, %z
428
429 store float %tmp2, float addrspace(1)* %gep.out
430 ret void
431}
432
433; fold (fsub x, (fma y, z, (fmul u, v)))
434; -> (fma (fneg y), z, (fma (fneg u), v, x))
435
436; FUNC-LABEL: {{^}}aggressive_combine_to_mad_fsub_1_f32:
437; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
438; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
439; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
440; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
441; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
442
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000443; SI-STD: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000444; SI-STD: v_fma_f32 [[TMP1:v[0-9]+]], [[B]], [[C]], [[TMP0]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000445; SI-STD: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[A]], [[TMP1]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000446
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000447; SI-DENORM: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
Nicolai Haehnle8813d5d2017-01-31 14:35:37 +0000448; SI-DENORM: v_fma_f32 [[TMP1:v[0-9]+]], [[B]], [[C]], [[TMP0]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000449; SI-DENORM: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[A]], [[TMP1]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000450
451; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
452; SI: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000453define amdgpu_kernel void @aggressive_combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000454 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000455 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
456 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
457 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
458 %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
459 %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
460 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
Matt Arsenault8d630032015-02-20 22:10:41 +0000461
Matt Arsenault44e54832016-04-12 13:38:18 +0000462 %x = load volatile float, float addrspace(1)* %gep.0
463 %y = load volatile float, float addrspace(1)* %gep.1
464 %z = load volatile float, float addrspace(1)* %gep.2
465 %u = load volatile float, float addrspace(1)* %gep.3
466 %v = load volatile float, float addrspace(1)* %gep.4
Matt Arsenault8d630032015-02-20 22:10:41 +0000467
468 %tmp0 = fmul float %u, %v
469 %tmp1 = call float @llvm.fma.f32(float %y, float %z, float %tmp0) #0
470 %tmp2 = fsub float %x, %tmp1
471
472 store float %tmp2, float addrspace(1)* %gep.out
473 ret void
474}
475
476; fold (fsub (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, (fneg z)))
477
478; FUNC-LABEL: {{^}}aggressive_combine_to_mad_fsub_2_f32:
479; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
480; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
481; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
482; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
483; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
484
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000485; SI-STD-SAFE: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
486; SI-STD-SAFE: v_mac_f32_e32 [[TMP0]], [[A]], [[B]]
487; SI-STD-SAFE: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP0]], [[C]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000488
Nicolai Haehnle8813d5d2017-01-31 14:35:37 +0000489; SI-STD-UNSAFE: v_mad_f32 [[RESULT:v[0-9]+]], [[D]], [[E]], -[[C]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000490; SI-STD-UNSAFE: v_mac_f32_e32 [[RESULT]], [[A]], [[B]]
Nicolai Haehnle8813d5d2017-01-31 14:35:37 +0000491
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000492; SI-DENORM-FASTFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
Nicolai Haehnle8813d5d2017-01-31 14:35:37 +0000493; SI-DENORM-FASTFMAF: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000494; SI-DENORM-FASTFMAF: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[C]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000495
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000496; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
497; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP1:v[0-9]+]], [[A]], [[B]]
498; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[TMP2:v[0-9]+]], [[TMP1]], [[TMP0]]
499; SI-DENORM-SLOWFMAF: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP2]], [[C]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000500
Nicolai Haehnle8813d5d2017-01-31 14:35:37 +0000501; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
Matt Arsenault8d630032015-02-20 22:10:41 +0000502; SI: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000503define amdgpu_kernel void @aggressive_combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000504 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000505 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
506 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
507 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
508 %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
509 %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
510 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
Matt Arsenault8d630032015-02-20 22:10:41 +0000511
Matt Arsenault44e54832016-04-12 13:38:18 +0000512 %x = load volatile float, float addrspace(1)* %gep.0
513 %y = load volatile float, float addrspace(1)* %gep.1
514 %z = load volatile float, float addrspace(1)* %gep.2
515 %u = load volatile float, float addrspace(1)* %gep.3
516 %v = load volatile float, float addrspace(1)* %gep.4
Matt Arsenault8d630032015-02-20 22:10:41 +0000517
518 %tmp0 = fmul float %u, %v
519 %tmp1 = call float @llvm.fmuladd.f32(float %x, float %y, float %tmp0) #0
520 %tmp2 = fsub float %tmp1, %z
521
522 store float %tmp2, float addrspace(1)* %gep.out
523 ret void
524}
525
526; fold (fsub x, (fmuladd y, z, (fmul u, v)))
527; -> (fmuladd (fneg y), z, (fmuladd (fneg u), v, x))
528
529; FUNC-LABEL: {{^}}aggressive_combine_to_mad_fsub_3_f32:
530; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
531; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
532; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
533; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
534; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
535
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000536; SI-STD-SAFE: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
537; SI-STD-SAFE: v_mac_f32_e32 [[TMP0]], [[B]], [[C]]
538; SI-STD-SAFE: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[A]], [[TMP0]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000539
Nicolai Haehnle8813d5d2017-01-31 14:35:37 +0000540; SI-STD-UNSAFE: v_mad_f32 [[TMP:v[0-9]+]], -[[D]], [[E]], [[A]]
541; SI-STD-UNSAFE: v_mad_f32 [[RESULT:v[0-9]+]], -[[B]], [[C]], [[TMP]]
542
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000543; SI-DENORM-FASTFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
Nicolai Haehnle8813d5d2017-01-31 14:35:37 +0000544; SI-DENORM-FASTFMAF: v_fma_f32 [[TMP1:v[0-9]+]], [[B]], [[C]], [[TMP0]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000545; SI-DENORM-FASTFMAF: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[A]], [[TMP1]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000546
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000547; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
548; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP1:v[0-9]+]], [[B]], [[C]]
549; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[TMP2:v[0-9]+]], [[TMP1]], [[TMP0]]
550; SI-DENORM-SLOWFMAF: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[A]], [[TMP2]]
Matt Arsenault8d630032015-02-20 22:10:41 +0000551
552; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
553; SI: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000554define amdgpu_kernel void @aggressive_combine_to_mad_fsub_3_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000555 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000556 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
557 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
558 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
559 %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
560 %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
561 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
Matt Arsenault8d630032015-02-20 22:10:41 +0000562
Matt Arsenault44e54832016-04-12 13:38:18 +0000563 %x = load volatile float, float addrspace(1)* %gep.0
564 %y = load volatile float, float addrspace(1)* %gep.1
565 %z = load volatile float, float addrspace(1)* %gep.2
566 %u = load volatile float, float addrspace(1)* %gep.3
567 %v = load volatile float, float addrspace(1)* %gep.4
Matt Arsenault8d630032015-02-20 22:10:41 +0000568
569 %tmp0 = fmul float %u, %v
570 %tmp1 = call float @llvm.fmuladd.f32(float %y, float %z, float %tmp0) #0
571 %tmp2 = fsub float %x, %tmp1
572
573 store float %tmp2, float addrspace(1)* %gep.out
574 ret void
575}
576
577attributes #0 = { nounwind readnone }
578attributes #1 = { nounwind }