blob: 3b3c87f17f72bf1b17a7eedb386afb307c807f72 [file] [log] [blame]
Matt Arsenault70b92822017-11-12 23:53:44 +00001; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
2; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
3; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast -enable-no-infs-fp-math -enable-unsafe-fp-math -mattr=+fp32-denormals < %s | FileCheck -enable-var-scope -check-prefix=SI-FMA -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
Nicolai Haehnle33ca1822016-12-02 16:06:18 +00004
5; Note: The SI-FMA conversions of type x * (y + 1) --> x * y + x would be
6; beneficial even without fp32 denormals, but they do require no-infs-fp-math
7; for correctness.
Matt Arsenault423bf3f2015-01-29 19:34:32 +00008
Matt Arsenault9c47dd52016-02-11 06:02:01 +00009declare i32 @llvm.amdgcn.workitem.id.x() #0
Matt Arsenault423bf3f2015-01-29 19:34:32 +000010declare double @llvm.fabs.f64(double) #0
11declare double @llvm.fma.f64(double, double, double) #0
12declare float @llvm.fma.f32(float, float, float) #0
13
14; (fadd (fmul x, y), z) -> (fma x, y, z)
15; FUNC-LABEL: {{^}}combine_to_fma_f64_0:
16; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
17; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
18; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
19; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]]
20; SI: buffer_store_dwordx2 [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000021define amdgpu_kernel void @combine_to_fma_f64_0(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000022 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +000023 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
24 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
25 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
26 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
Matt Arsenault423bf3f2015-01-29 19:34:32 +000027
Matt Arsenault44e54832016-04-12 13:38:18 +000028 %a = load volatile double, double addrspace(1)* %gep.0
29 %b = load volatile double, double addrspace(1)* %gep.1
30 %c = load volatile double, double addrspace(1)* %gep.2
Matt Arsenault423bf3f2015-01-29 19:34:32 +000031
32 %mul = fmul double %a, %b
33 %fma = fadd double %mul, %c
34 store double %fma, double addrspace(1)* %gep.out
35 ret void
36}
37
38; (fadd (fmul x, y), z) -> (fma x, y, z)
39; FUNC-LABEL: {{^}}combine_to_fma_f64_0_2use:
40; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
41; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
42; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
43; SI-DAG: buffer_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}}
44; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]]
45; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[D]]
46; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
47; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
48; SI: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000049define amdgpu_kernel void @combine_to_fma_f64_0_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000050 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +000051 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
52 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
53 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
54 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
55 %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
56 %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
Matt Arsenault423bf3f2015-01-29 19:34:32 +000057
Matt Arsenault44e54832016-04-12 13:38:18 +000058 %a = load volatile double, double addrspace(1)* %gep.0
59 %b = load volatile double, double addrspace(1)* %gep.1
60 %c = load volatile double, double addrspace(1)* %gep.2
61 %d = load volatile double, double addrspace(1)* %gep.3
Matt Arsenault423bf3f2015-01-29 19:34:32 +000062
63 %mul = fmul double %a, %b
64 %fma0 = fadd double %mul, %c
65 %fma1 = fadd double %mul, %d
Matt Arsenault44e54832016-04-12 13:38:18 +000066 store volatile double %fma0, double addrspace(1)* %gep.out.0
67 store volatile double %fma1, double addrspace(1)* %gep.out.1
Matt Arsenault423bf3f2015-01-29 19:34:32 +000068 ret void
69}
70
71; (fadd x, (fmul y, z)) -> (fma y, z, x)
72; FUNC-LABEL: {{^}}combine_to_fma_f64_1:
73; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
74; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
75; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
76; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]]
77; SI: buffer_store_dwordx2 [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000078define amdgpu_kernel void @combine_to_fma_f64_1(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000079 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +000080 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
81 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
82 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
83 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
Matt Arsenault423bf3f2015-01-29 19:34:32 +000084
Matt Arsenault44e54832016-04-12 13:38:18 +000085 %a = load volatile double, double addrspace(1)* %gep.0
86 %b = load volatile double, double addrspace(1)* %gep.1
87 %c = load volatile double, double addrspace(1)* %gep.2
Matt Arsenault423bf3f2015-01-29 19:34:32 +000088
89 %mul = fmul double %a, %b
90 %fma = fadd double %c, %mul
91 store double %fma, double addrspace(1)* %gep.out
92 ret void
93}
94
95; (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
96; FUNC-LABEL: {{^}}combine_to_fma_fsub_0_f64:
97; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
98; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
99; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
100; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[C]]
101; SI: buffer_store_dwordx2 [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000102define amdgpu_kernel void @combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000103 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000104 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
105 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
106 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
107 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000108
Matt Arsenault44e54832016-04-12 13:38:18 +0000109 %a = load volatile double, double addrspace(1)* %gep.0
110 %b = load volatile double, double addrspace(1)* %gep.1
111 %c = load volatile double, double addrspace(1)* %gep.2
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000112
113 %mul = fmul double %a, %b
114 %fma = fsub double %mul, %c
115 store double %fma, double addrspace(1)* %gep.out
116 ret void
117}
118
119; (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
120; FUNC-LABEL: {{^}}combine_to_fma_fsub_f64_0_2use:
121; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
122; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
123; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
124; SI-DAG: buffer_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}}
125; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[C]]
126; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[D]]
127; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
128; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
129; SI: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000130define amdgpu_kernel void @combine_to_fma_fsub_f64_0_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000131 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000132 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
133 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
134 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
135 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
136 %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
137 %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000138
Matt Arsenault44e54832016-04-12 13:38:18 +0000139 %a = load volatile double, double addrspace(1)* %gep.0
140 %b = load volatile double, double addrspace(1)* %gep.1
141 %c = load volatile double, double addrspace(1)* %gep.2
142 %d = load volatile double, double addrspace(1)* %gep.3
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000143
144 %mul = fmul double %a, %b
145 %fma0 = fsub double %mul, %c
146 %fma1 = fsub double %mul, %d
Matt Arsenault44e54832016-04-12 13:38:18 +0000147 store volatile double %fma0, double addrspace(1)* %gep.out.0
148 store volatile double %fma1, double addrspace(1)* %gep.out.1
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000149 ret void
150}
151
152; (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
153; FUNC-LABEL: {{^}}combine_to_fma_fsub_1_f64:
154; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
155; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
156; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
157; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], [[C]]
158; SI: buffer_store_dwordx2 [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000159define amdgpu_kernel void @combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000160 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000161 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
162 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
163 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
164 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000165
Matt Arsenault44e54832016-04-12 13:38:18 +0000166 %a = load volatile double, double addrspace(1)* %gep.0
167 %b = load volatile double, double addrspace(1)* %gep.1
168 %c = load volatile double, double addrspace(1)* %gep.2
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000169
170 %mul = fmul double %a, %b
171 %fma = fsub double %c, %mul
172 store double %fma, double addrspace(1)* %gep.out
173 ret void
174}
175
176; (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
177; FUNC-LABEL: {{^}}combine_to_fma_fsub_1_f64_2use:
178; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
179; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
180; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
181; SI-DAG: buffer_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}}
182; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], [[C]]
183; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], [[D]]
184; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
185; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
186; SI: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000187define amdgpu_kernel void @combine_to_fma_fsub_1_f64_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000188 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000189 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
190 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
191 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
192 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
193 %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
194 %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000195
Matt Arsenault44e54832016-04-12 13:38:18 +0000196 %a = load volatile double, double addrspace(1)* %gep.0
197 %b = load volatile double, double addrspace(1)* %gep.1
198 %c = load volatile double, double addrspace(1)* %gep.2
199 %d = load volatile double, double addrspace(1)* %gep.3
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000200
201 %mul = fmul double %a, %b
202 %fma0 = fsub double %c, %mul
203 %fma1 = fsub double %d, %mul
Matt Arsenault44e54832016-04-12 13:38:18 +0000204 store volatile double %fma0, double addrspace(1)* %gep.out.0
205 store volatile double %fma1, double addrspace(1)* %gep.out.1
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000206 ret void
207}
208
209; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
210; FUNC-LABEL: {{^}}combine_to_fma_fsub_2_f64:
211; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
212; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
213; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
214; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]]
215; SI: buffer_store_dwordx2 [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000216define amdgpu_kernel void @combine_to_fma_fsub_2_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000217 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000218 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
219 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
220 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
221 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000222
Matt Arsenault44e54832016-04-12 13:38:18 +0000223 %a = load volatile double, double addrspace(1)* %gep.0
224 %b = load volatile double, double addrspace(1)* %gep.1
225 %c = load volatile double, double addrspace(1)* %gep.2
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000226
227 %mul = fmul double %a, %b
228 %mul.neg = fsub double -0.0, %mul
229 %fma = fsub double %mul.neg, %c
230
231 store double %fma, double addrspace(1)* %gep.out
232 ret void
233}
234
235; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
236; FUNC-LABEL: {{^}}combine_to_fma_fsub_2_f64_2uses_neg:
237; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
238; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
239; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
Matt Arsenault70b92822017-11-12 23:53:44 +0000240; SI-DAG: buffer_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}}
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000241; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]]
242; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[D]]
243; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
244; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
245; SI: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000246define amdgpu_kernel void @combine_to_fma_fsub_2_f64_2uses_neg(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000247 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000248 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
249 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
250 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
251 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
252 %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
253 %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000254
Matt Arsenault44e54832016-04-12 13:38:18 +0000255 %a = load volatile double, double addrspace(1)* %gep.0
256 %b = load volatile double, double addrspace(1)* %gep.1
257 %c = load volatile double, double addrspace(1)* %gep.2
258 %d = load volatile double, double addrspace(1)* %gep.3
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000259
260 %mul = fmul double %a, %b
261 %mul.neg = fsub double -0.0, %mul
262 %fma0 = fsub double %mul.neg, %c
263 %fma1 = fsub double %mul.neg, %d
264
Matt Arsenault44e54832016-04-12 13:38:18 +0000265 store volatile double %fma0, double addrspace(1)* %gep.out.0
266 store volatile double %fma1, double addrspace(1)* %gep.out.1
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000267 ret void
268}
269
270; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
271; FUNC-LABEL: {{^}}combine_to_fma_fsub_2_f64_2uses_mul:
272; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
273; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
274; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
Matt Arsenault70b92822017-11-12 23:53:44 +0000275; SI-DAG: buffer_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}}
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000276; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]]
277; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[D]]
278; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
279; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
280; SI: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000281define amdgpu_kernel void @combine_to_fma_fsub_2_f64_2uses_mul(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000282 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000283 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
284 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
285 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
286 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
287 %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
288 %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000289
Matt Arsenault44e54832016-04-12 13:38:18 +0000290 %a = load volatile double, double addrspace(1)* %gep.0
291 %b = load volatile double, double addrspace(1)* %gep.1
292 %c = load volatile double, double addrspace(1)* %gep.2
293 %d = load volatile double, double addrspace(1)* %gep.3
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000294
295 %mul = fmul double %a, %b
296 %mul.neg = fsub double -0.0, %mul
297 %fma0 = fsub double %mul.neg, %c
298 %fma1 = fsub double %mul, %d
299
Matt Arsenault44e54832016-04-12 13:38:18 +0000300 store volatile double %fma0, double addrspace(1)* %gep.out.0
301 store volatile double %fma1, double addrspace(1)* %gep.out.1
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000302 ret void
303}
304
305; fold (fsub (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, (fneg z)))
306
307; FUNC-LABEL: {{^}}aggressive_combine_to_fma_fsub_0_f64:
308; SI-DAG: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
309; SI-DAG: buffer_load_dwordx2 [[Y:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
310; SI-DAG: buffer_load_dwordx2 [[Z:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
311; SI-DAG: buffer_load_dwordx2 [[U:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}}
312; SI-DAG: buffer_load_dwordx2 [[V:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:32{{$}}
Nicolai Haehnle8813d5d2017-01-31 14:35:37 +0000313
314; SI-SAFE: v_mul_f64 [[TMP0:v\[[0-9]+:[0-9]+\]]], [[U]], [[V]]
315; SI-SAFE: v_fma_f64 [[TMP1:v\[[0-9]+:[0-9]+\]]], [[X]], [[Y]], [[TMP0]]
316; SI-SAFE: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[TMP1]], -[[Z]]
317
318; SI-UNSAFE: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], [[U]], [[V]], -[[Z]]
319; SI-UNSAFE: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[X]], [[Y]], [[FMA0]]
320
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000321; SI: buffer_store_dwordx2 [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000322define amdgpu_kernel void @aggressive_combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000323 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000324 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
325 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
326 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
327 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
328 %gep.4 = getelementptr double, double addrspace(1)* %gep.0, i32 4
329 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000330
Matt Arsenault44e54832016-04-12 13:38:18 +0000331 %x = load volatile double, double addrspace(1)* %gep.0
332 %y = load volatile double, double addrspace(1)* %gep.1
333 %z = load volatile double, double addrspace(1)* %gep.2
334 %u = load volatile double, double addrspace(1)* %gep.3
335 %v = load volatile double, double addrspace(1)* %gep.4
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000336
337 %tmp0 = fmul double %u, %v
338 %tmp1 = call double @llvm.fma.f64(double %x, double %y, double %tmp0) #0
339 %tmp2 = fsub double %tmp1, %z
340
341 store double %tmp2, double addrspace(1)* %gep.out
342 ret void
343}
344
345; fold (fsub x, (fma y, z, (fmul u, v)))
346; -> (fma (fneg y), z, (fma (fneg u), v, x))
347
348; FUNC-LABEL: {{^}}aggressive_combine_to_fma_fsub_1_f64:
349; SI-DAG: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
350; SI-DAG: buffer_load_dwordx2 [[Y:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
351; SI-DAG: buffer_load_dwordx2 [[Z:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
352; SI-DAG: buffer_load_dwordx2 [[U:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}}
353; SI-DAG: buffer_load_dwordx2 [[V:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:32{{$}}
Nicolai Haehnle8813d5d2017-01-31 14:35:37 +0000354
355; SI-SAFE: v_mul_f64 [[TMP0:v\[[0-9]+:[0-9]+\]]], [[U]], [[V]]
356; SI-SAFE: v_fma_f64 [[TMP1:v\[[0-9]+:[0-9]+\]]], [[Y]], [[Z]], [[TMP0]]
357; SI-SAFE: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[X]], -[[TMP1]]
358
359; SI-UNSAFE: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], -[[U]], [[V]], [[X]]
360; SI-UNSAFE: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[Y]], [[Z]], [[FMA0]]
361
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000362; SI: buffer_store_dwordx2 [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000363define amdgpu_kernel void @aggressive_combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000364 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000365 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
366 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
367 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
368 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
369 %gep.4 = getelementptr double, double addrspace(1)* %gep.0, i32 4
370 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000371
Matt Arsenault44e54832016-04-12 13:38:18 +0000372 %x = load volatile double, double addrspace(1)* %gep.0
373 %y = load volatile double, double addrspace(1)* %gep.1
374 %z = load volatile double, double addrspace(1)* %gep.2
375 %u = load volatile double, double addrspace(1)* %gep.3
376 %v = load volatile double, double addrspace(1)* %gep.4
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000377
378 %tmp0 = fmul double %u, %v
379 %tmp1 = call double @llvm.fma.f64(double %y, double %z, double %tmp0) #0
380 %tmp2 = fsub double %x, %tmp1
381
382 store double %tmp2, double addrspace(1)* %gep.out
383 ret void
384}
385
Simon Pilgrim4003ed22015-09-21 20:32:48 +0000386;
387; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y)
388;
389
390; FUNC-LABEL: {{^}}test_f32_mul_add_x_one_y:
Nicolai Haehnle33ca1822016-12-02 16:06:18 +0000391; SI-NOFMA: v_add_f32_e32 [[VS:v[0-9]]], 1.0, [[VX:v[0-9]]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000392; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]]
Nicolai Haehnle33ca1822016-12-02 16:06:18 +0000393;
394; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000395define amdgpu_kernel void @test_f32_mul_add_x_one_y(float addrspace(1)* %out,
Simon Pilgrim4003ed22015-09-21 20:32:48 +0000396 float addrspace(1)* %in1,
397 float addrspace(1)* %in2) {
Matt Arsenault44e54832016-04-12 13:38:18 +0000398 %x = load volatile float, float addrspace(1)* %in1
399 %y = load volatile float, float addrspace(1)* %in2
Simon Pilgrim4003ed22015-09-21 20:32:48 +0000400 %a = fadd float %x, 1.0
401 %m = fmul float %a, %y
402 store float %m, float addrspace(1)* %out
403 ret void
404}
405
406; FUNC-LABEL: {{^}}test_f32_mul_y_add_x_one:
Nicolai Haehnle33ca1822016-12-02 16:06:18 +0000407; SI-NOFMA: v_add_f32_e32 [[VS:v[0-9]]], 1.0, [[VX:v[0-9]]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000408; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]]
Nicolai Haehnle33ca1822016-12-02 16:06:18 +0000409;
410; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000411define amdgpu_kernel void @test_f32_mul_y_add_x_one(float addrspace(1)* %out,
Simon Pilgrim4003ed22015-09-21 20:32:48 +0000412 float addrspace(1)* %in1,
413 float addrspace(1)* %in2) {
Matt Arsenault44e54832016-04-12 13:38:18 +0000414 %x = load volatile float, float addrspace(1)* %in1
415 %y = load volatile float, float addrspace(1)* %in2
Simon Pilgrim4003ed22015-09-21 20:32:48 +0000416 %a = fadd float %x, 1.0
417 %m = fmul float %y, %a
418 store float %m, float addrspace(1)* %out
419 ret void
420}
421
422; FUNC-LABEL: {{^}}test_f32_mul_add_x_negone_y:
Nicolai Haehnle33ca1822016-12-02 16:06:18 +0000423; SI-NOFMA: v_add_f32_e32 [[VS:v[0-9]]], -1.0, [[VX:v[0-9]]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000424; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]]
Nicolai Haehnle33ca1822016-12-02 16:06:18 +0000425;
426; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000427define amdgpu_kernel void @test_f32_mul_add_x_negone_y(float addrspace(1)* %out,
Simon Pilgrim4003ed22015-09-21 20:32:48 +0000428 float addrspace(1)* %in1,
429 float addrspace(1)* %in2) {
430 %x = load float, float addrspace(1)* %in1
431 %y = load float, float addrspace(1)* %in2
432 %a = fadd float %x, -1.0
433 %m = fmul float %a, %y
434 store float %m, float addrspace(1)* %out
435 ret void
436}
437
438; FUNC-LABEL: {{^}}test_f32_mul_y_add_x_negone:
Nicolai Haehnle33ca1822016-12-02 16:06:18 +0000439; SI-NOFMA: v_add_f32_e32 [[VS:v[0-9]]], -1.0, [[VX:v[0-9]]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000440; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]]
Nicolai Haehnle33ca1822016-12-02 16:06:18 +0000441;
442; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000443define amdgpu_kernel void @test_f32_mul_y_add_x_negone(float addrspace(1)* %out,
Simon Pilgrim4003ed22015-09-21 20:32:48 +0000444 float addrspace(1)* %in1,
445 float addrspace(1)* %in2) {
446 %x = load float, float addrspace(1)* %in1
447 %y = load float, float addrspace(1)* %in2
448 %a = fadd float %x, -1.0
449 %m = fmul float %y, %a
450 store float %m, float addrspace(1)* %out
451 ret void
452}
453
454; FUNC-LABEL: {{^}}test_f32_mul_sub_one_x_y:
Nicolai Haehnle33ca1822016-12-02 16:06:18 +0000455; SI-NOFMA: v_sub_f32_e32 [[VS:v[0-9]]], 1.0, [[VX:v[0-9]]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000456; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]]
Nicolai Haehnle33ca1822016-12-02 16:06:18 +0000457;
458; SI-FMA: v_fma_f32 {{v[0-9]}}, -[[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000459define amdgpu_kernel void @test_f32_mul_sub_one_x_y(float addrspace(1)* %out,
Simon Pilgrim4003ed22015-09-21 20:32:48 +0000460 float addrspace(1)* %in1,
461 float addrspace(1)* %in2) {
462 %x = load float, float addrspace(1)* %in1
463 %y = load float, float addrspace(1)* %in2
464 %s = fsub float 1.0, %x
465 %m = fmul float %s, %y
466 store float %m, float addrspace(1)* %out
467 ret void
468}
469
470; FUNC-LABEL: {{^}}test_f32_mul_y_sub_one_x:
Nicolai Haehnle33ca1822016-12-02 16:06:18 +0000471; SI-NOFMA: v_sub_f32_e32 [[VS:v[0-9]]], 1.0, [[VX:v[0-9]]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000472; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]]
Nicolai Haehnle33ca1822016-12-02 16:06:18 +0000473;
474; SI-FMA: v_fma_f32 {{v[0-9]}}, -[[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000475define amdgpu_kernel void @test_f32_mul_y_sub_one_x(float addrspace(1)* %out,
Simon Pilgrim4003ed22015-09-21 20:32:48 +0000476 float addrspace(1)* %in1,
477 float addrspace(1)* %in2) {
478 %x = load float, float addrspace(1)* %in1
479 %y = load float, float addrspace(1)* %in2
480 %s = fsub float 1.0, %x
481 %m = fmul float %y, %s
482 store float %m, float addrspace(1)* %out
483 ret void
484}
485
486; FUNC-LABEL: {{^}}test_f32_mul_sub_negone_x_y:
Nicolai Haehnle33ca1822016-12-02 16:06:18 +0000487; SI-NOFMA: v_sub_f32_e32 [[VS:v[0-9]]], -1.0, [[VX:v[0-9]]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000488; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]]
Nicolai Haehnle33ca1822016-12-02 16:06:18 +0000489;
490; SI-FMA: v_fma_f32 {{v[0-9]}}, -[[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000491define amdgpu_kernel void @test_f32_mul_sub_negone_x_y(float addrspace(1)* %out,
Simon Pilgrim4003ed22015-09-21 20:32:48 +0000492 float addrspace(1)* %in1,
493 float addrspace(1)* %in2) {
494 %x = load float, float addrspace(1)* %in1
495 %y = load float, float addrspace(1)* %in2
496 %s = fsub float -1.0, %x
497 %m = fmul float %s, %y
498 store float %m, float addrspace(1)* %out
499 ret void
500}
501
502; FUNC-LABEL: {{^}}test_f32_mul_y_sub_negone_x:
Nicolai Haehnle33ca1822016-12-02 16:06:18 +0000503; SI-NOFMA: v_sub_f32_e32 [[VS:v[0-9]]], -1.0, [[VX:v[0-9]]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000504; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]]
Nicolai Haehnle33ca1822016-12-02 16:06:18 +0000505;
506; SI-FMA: v_fma_f32 {{v[0-9]}}, -[[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000507define amdgpu_kernel void @test_f32_mul_y_sub_negone_x(float addrspace(1)* %out,
Simon Pilgrim4003ed22015-09-21 20:32:48 +0000508 float addrspace(1)* %in1,
509 float addrspace(1)* %in2) {
510 %x = load float, float addrspace(1)* %in1
511 %y = load float, float addrspace(1)* %in2
512 %s = fsub float -1.0, %x
513 %m = fmul float %y, %s
514 store float %m, float addrspace(1)* %out
515 ret void
516}
517
518; FUNC-LABEL: {{^}}test_f32_mul_sub_x_one_y:
Nicolai Haehnle33ca1822016-12-02 16:06:18 +0000519; SI-NOFMA: v_add_f32_e32 [[VS:v[0-9]]], -1.0, [[VX:v[0-9]]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000520; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]]
Nicolai Haehnle33ca1822016-12-02 16:06:18 +0000521;
522; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000523define amdgpu_kernel void @test_f32_mul_sub_x_one_y(float addrspace(1)* %out,
Simon Pilgrim4003ed22015-09-21 20:32:48 +0000524 float addrspace(1)* %in1,
525 float addrspace(1)* %in2) {
526 %x = load float, float addrspace(1)* %in1
527 %y = load float, float addrspace(1)* %in2
528 %s = fsub float %x, 1.0
529 %m = fmul float %s, %y
530 store float %m, float addrspace(1)* %out
531 ret void
532}
533
534; FUNC-LABEL: {{^}}test_f32_mul_y_sub_x_one:
Nicolai Haehnle33ca1822016-12-02 16:06:18 +0000535; SI-NOFMA: v_add_f32_e32 [[VS:v[0-9]]], -1.0, [[VX:v[0-9]]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000536; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]]
Nicolai Haehnle33ca1822016-12-02 16:06:18 +0000537;
538; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000539define amdgpu_kernel void @test_f32_mul_y_sub_x_one(float addrspace(1)* %out,
Simon Pilgrim4003ed22015-09-21 20:32:48 +0000540 float addrspace(1)* %in1,
541 float addrspace(1)* %in2) {
542 %x = load float, float addrspace(1)* %in1
543 %y = load float, float addrspace(1)* %in2
544 %s = fsub float %x, 1.0
545 %m = fmul float %y, %s
546 store float %m, float addrspace(1)* %out
547 ret void
548}
549
550; FUNC-LABEL: {{^}}test_f32_mul_sub_x_negone_y:
Nicolai Haehnle33ca1822016-12-02 16:06:18 +0000551; SI-NOFMA: v_add_f32_e32 [[VS:v[0-9]]], 1.0, [[VX:v[0-9]]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000552; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]]
Nicolai Haehnle33ca1822016-12-02 16:06:18 +0000553;
554; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000555define amdgpu_kernel void @test_f32_mul_sub_x_negone_y(float addrspace(1)* %out,
Simon Pilgrim4003ed22015-09-21 20:32:48 +0000556 float addrspace(1)* %in1,
557 float addrspace(1)* %in2) {
558 %x = load float, float addrspace(1)* %in1
559 %y = load float, float addrspace(1)* %in2
560 %s = fsub float %x, -1.0
561 %m = fmul float %s, %y
562 store float %m, float addrspace(1)* %out
563 ret void
564}
565
566; FUNC-LABEL: {{^}}test_f32_mul_y_sub_x_negone:
Nicolai Haehnle33ca1822016-12-02 16:06:18 +0000567; SI-NOFMA: v_add_f32_e32 [[VS:v[0-9]]], 1.0, [[VX:v[0-9]]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000568; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]]
Nicolai Haehnle33ca1822016-12-02 16:06:18 +0000569;
570; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000571define amdgpu_kernel void @test_f32_mul_y_sub_x_negone(float addrspace(1)* %out,
Simon Pilgrim4003ed22015-09-21 20:32:48 +0000572 float addrspace(1)* %in1,
573 float addrspace(1)* %in2) {
574 %x = load float, float addrspace(1)* %in1
575 %y = load float, float addrspace(1)* %in2
576 %s = fsub float %x, -1.0
577 %m = fmul float %y, %s
578 store float %m, float addrspace(1)* %out
579 ret void
580}
581
582;
583; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y))
584;
585
586; FUNC-LABEL: {{^}}test_f32_interp:
Nicolai Haehnle33ca1822016-12-02 16:06:18 +0000587; SI-NOFMA: v_sub_f32_e32 [[VT1:v[0-9]]], 1.0, [[VT:v[0-9]]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000588; SI-NOFMA: v_mul_f32_e32 [[VTY:v[0-9]]], [[VY:v[0-9]]], [[VT1]]
589; SI-NOFMA: v_mac_f32_e32 [[VTY]], [[VX:v[0-9]]], [[VT]]
Nicolai Haehnle33ca1822016-12-02 16:06:18 +0000590;
591; SI-FMA: v_fma_f32 [[VR:v[0-9]]], -[[VT:v[0-9]]], [[VY:v[0-9]]], [[VY]]
592; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VT]], [[VR]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000593define amdgpu_kernel void @test_f32_interp(float addrspace(1)* %out,
Simon Pilgrim4003ed22015-09-21 20:32:48 +0000594 float addrspace(1)* %in1,
595 float addrspace(1)* %in2,
596 float addrspace(1)* %in3) {
597 %x = load float, float addrspace(1)* %in1
598 %y = load float, float addrspace(1)* %in2
599 %t = load float, float addrspace(1)* %in3
600 %t1 = fsub float 1.0, %t
601 %tx = fmul float %x, %t
602 %ty = fmul float %y, %t1
603 %r = fadd float %tx, %ty
604 store float %r, float addrspace(1)* %out
605 ret void
606}
607
608; FUNC-LABEL: {{^}}test_f64_interp:
Nicolai Haehnle33ca1822016-12-02 16:06:18 +0000609; SI-NOFMA: v_add_f64 [[VT1:v\[[0-9]+:[0-9]+\]]], -[[VT:v\[[0-9]+:[0-9]+\]]], 1.0
610; SI-NOFMA: v_mul_f64 [[VTY:v\[[0-9]+:[0-9]+\]]], [[VY:v\[[0-9]+:[0-9]+\]]], [[VT1]]
611; SI-NOFMA: v_fma_f64 v{{\[[0-9]+:[0-9]+\]}}, [[VX:v\[[0-9]+:[0-9]+\]]], [[VT]], [[VTY]]
612;
613; SI-FMA: v_fma_f64 [[VR:v\[[0-9]+:[0-9]+\]]], -[[VT:v\[[0-9]+:[0-9]+\]]], [[VY:v\[[0-9]+:[0-9]+\]]], [[VY]]
614; SI-FMA: v_fma_f64 v{{\[[0-9]+:[0-9]+\]}}, [[VX:v\[[0-9]+:[0-9]+\]]], [[VT]], [[VR]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000615define amdgpu_kernel void @test_f64_interp(double addrspace(1)* %out,
Simon Pilgrim4003ed22015-09-21 20:32:48 +0000616 double addrspace(1)* %in1,
617 double addrspace(1)* %in2,
618 double addrspace(1)* %in3) {
619 %x = load double, double addrspace(1)* %in1
620 %y = load double, double addrspace(1)* %in2
621 %t = load double, double addrspace(1)* %in3
622 %t1 = fsub double 1.0, %t
623 %tx = fmul double %x, %t
624 %ty = fmul double %y, %t1
625 %r = fadd double %tx, %ty
626 store double %r, double addrspace(1)* %out
627 ret void
628}
629
Matt Arsenault878827d2017-10-27 09:06:07 +0000630; Make sure negative constant cancels out fneg
631; GCN-LABEL: {{^}}fma_neg_2.0_neg_a_b_f32:
632; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
633; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
634; GCN-NOT: [[A]]
635; GCN-NOT: [[B]]
636; GCN: v_fma_f32 v{{[0-9]+}}, [[A]], 2.0, [[B]]
637define amdgpu_kernel void @fma_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
638 %tid = call i32 @llvm.amdgcn.workitem.id.x()
639 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
640 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
641 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
642
643 %r1 = load volatile float, float addrspace(1)* %gep.0
644 %r2 = load volatile float, float addrspace(1)* %gep.1
645
646 %r1.fneg = fsub float -0.000000e+00, %r1
647
648 %r3 = tail call float @llvm.fma.f32(float -2.0, float %r1.fneg, float %r2)
649 store float %r3, float addrspace(1)* %gep.out
650 ret void
651}
652
653; GCN-LABEL: {{^}}fma_2.0_neg_a_b_f32:
654; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
655; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
656; GCN-NOT: [[A]]
657; GCN-NOT: [[B]]
658; GCN: v_fma_f32 v{{[0-9]+}}, [[A]], -2.0, [[B]]
659define amdgpu_kernel void @fma_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
660 %tid = call i32 @llvm.amdgcn.workitem.id.x()
661 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
662 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
663 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
664
665 %r1 = load volatile float, float addrspace(1)* %gep.0
666 %r2 = load volatile float, float addrspace(1)* %gep.1
667
668 %r1.fneg = fsub float -0.000000e+00, %r1
669
670 %r3 = tail call float @llvm.fma.f32(float 2.0, float %r1.fneg, float %r2)
671 store float %r3, float addrspace(1)* %gep.out
672 ret void
673}
674
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000675attributes #0 = { nounwind readnone }
676attributes #1 = { nounwind }
Matt Arsenault878827d2017-10-27 09:06:07 +0000677