blob: f959f2b41731dbe6fa50e467dcab0b001e0077e1 [file] [log] [blame]
Matt Arsenault423bf3f2015-01-29 19:34:32 +00001; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-FASTFMAF -check-prefix=SI -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-SLOWFMAF -check-prefix=SI -check-prefix=FUNC %s
3
Matt Arsenault9c47dd52016-02-11 06:02:01 +00004declare i32 @llvm.amdgcn.workitem.id.x() #0
Matt Arsenault423bf3f2015-01-29 19:34:32 +00005declare double @llvm.fabs.f64(double) #0
6declare double @llvm.fma.f64(double, double, double) #0
7declare float @llvm.fma.f32(float, float, float) #0
8
9; (fadd (fmul x, y), z) -> (fma x, y, z)
10; FUNC-LABEL: {{^}}combine_to_fma_f64_0:
11; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
12; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
13; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
14; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]]
15; SI: buffer_store_dwordx2 [[RESULT]]
16define void @combine_to_fma_f64_0(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000017 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +000018 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
19 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
20 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
21 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
Matt Arsenault423bf3f2015-01-29 19:34:32 +000022
David Blaikiea79ac142015-02-27 21:17:42 +000023 %a = load double, double addrspace(1)* %gep.0
24 %b = load double, double addrspace(1)* %gep.1
25 %c = load double, double addrspace(1)* %gep.2
Matt Arsenault423bf3f2015-01-29 19:34:32 +000026
27 %mul = fmul double %a, %b
28 %fma = fadd double %mul, %c
29 store double %fma, double addrspace(1)* %gep.out
30 ret void
31}
32
33; (fadd (fmul x, y), z) -> (fma x, y, z)
34; FUNC-LABEL: {{^}}combine_to_fma_f64_0_2use:
35; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
36; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
37; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
38; SI-DAG: buffer_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}}
39; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]]
40; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[D]]
41; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
42; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
43; SI: s_endpgm
44define void @combine_to_fma_f64_0_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000045 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +000046 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
47 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
48 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
49 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
50 %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
51 %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
Matt Arsenault423bf3f2015-01-29 19:34:32 +000052
David Blaikiea79ac142015-02-27 21:17:42 +000053 %a = load double, double addrspace(1)* %gep.0
54 %b = load double, double addrspace(1)* %gep.1
55 %c = load double, double addrspace(1)* %gep.2
56 %d = load double, double addrspace(1)* %gep.3
Matt Arsenault423bf3f2015-01-29 19:34:32 +000057
58 %mul = fmul double %a, %b
59 %fma0 = fadd double %mul, %c
60 %fma1 = fadd double %mul, %d
61 store double %fma0, double addrspace(1)* %gep.out.0
62 store double %fma1, double addrspace(1)* %gep.out.1
63 ret void
64}
65
66; (fadd x, (fmul y, z)) -> (fma y, z, x)
67; FUNC-LABEL: {{^}}combine_to_fma_f64_1:
68; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
69; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
70; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
71; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]]
72; SI: buffer_store_dwordx2 [[RESULT]]
73define void @combine_to_fma_f64_1(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000074 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +000075 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
76 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
77 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
78 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
Matt Arsenault423bf3f2015-01-29 19:34:32 +000079
David Blaikiea79ac142015-02-27 21:17:42 +000080 %a = load double, double addrspace(1)* %gep.0
81 %b = load double, double addrspace(1)* %gep.1
82 %c = load double, double addrspace(1)* %gep.2
Matt Arsenault423bf3f2015-01-29 19:34:32 +000083
84 %mul = fmul double %a, %b
85 %fma = fadd double %c, %mul
86 store double %fma, double addrspace(1)* %gep.out
87 ret void
88}
89
90; (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
91; FUNC-LABEL: {{^}}combine_to_fma_fsub_0_f64:
92; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
93; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
94; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
95; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[C]]
96; SI: buffer_store_dwordx2 [[RESULT]]
97define void @combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000098 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +000099 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
100 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
101 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
102 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000103
David Blaikiea79ac142015-02-27 21:17:42 +0000104 %a = load double, double addrspace(1)* %gep.0
105 %b = load double, double addrspace(1)* %gep.1
106 %c = load double, double addrspace(1)* %gep.2
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000107
108 %mul = fmul double %a, %b
109 %fma = fsub double %mul, %c
110 store double %fma, double addrspace(1)* %gep.out
111 ret void
112}
113
114; (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
115; FUNC-LABEL: {{^}}combine_to_fma_fsub_f64_0_2use:
116; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
117; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
118; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
119; SI-DAG: buffer_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}}
120; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[C]]
121; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[D]]
122; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
123; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
124; SI: s_endpgm
125define void @combine_to_fma_fsub_f64_0_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000126 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000127 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
128 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
129 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
130 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
131 %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
132 %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000133
David Blaikiea79ac142015-02-27 21:17:42 +0000134 %a = load double, double addrspace(1)* %gep.0
135 %b = load double, double addrspace(1)* %gep.1
136 %c = load double, double addrspace(1)* %gep.2
137 %d = load double, double addrspace(1)* %gep.3
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000138
139 %mul = fmul double %a, %b
140 %fma0 = fsub double %mul, %c
141 %fma1 = fsub double %mul, %d
142 store double %fma0, double addrspace(1)* %gep.out.0
143 store double %fma1, double addrspace(1)* %gep.out.1
144 ret void
145}
146
147; (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
148; FUNC-LABEL: {{^}}combine_to_fma_fsub_1_f64:
149; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
150; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
151; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
152; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], [[C]]
153; SI: buffer_store_dwordx2 [[RESULT]]
154define void @combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000155 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000156 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
157 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
158 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
159 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000160
David Blaikiea79ac142015-02-27 21:17:42 +0000161 %a = load double, double addrspace(1)* %gep.0
162 %b = load double, double addrspace(1)* %gep.1
163 %c = load double, double addrspace(1)* %gep.2
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000164
165 %mul = fmul double %a, %b
166 %fma = fsub double %c, %mul
167 store double %fma, double addrspace(1)* %gep.out
168 ret void
169}
170
171; (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
172; FUNC-LABEL: {{^}}combine_to_fma_fsub_1_f64_2use:
173; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
174; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
175; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
176; SI-DAG: buffer_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}}
177; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], [[C]]
178; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], [[D]]
179; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
180; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
181; SI: s_endpgm
182define void @combine_to_fma_fsub_1_f64_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000183 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000184 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
185 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
186 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
187 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
188 %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
189 %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000190
David Blaikiea79ac142015-02-27 21:17:42 +0000191 %a = load double, double addrspace(1)* %gep.0
192 %b = load double, double addrspace(1)* %gep.1
193 %c = load double, double addrspace(1)* %gep.2
194 %d = load double, double addrspace(1)* %gep.3
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000195
196 %mul = fmul double %a, %b
197 %fma0 = fsub double %c, %mul
198 %fma1 = fsub double %d, %mul
199 store double %fma0, double addrspace(1)* %gep.out.0
200 store double %fma1, double addrspace(1)* %gep.out.1
201 ret void
202}
203
204; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
205; FUNC-LABEL: {{^}}combine_to_fma_fsub_2_f64:
206; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
207; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
208; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
209; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]]
210; SI: buffer_store_dwordx2 [[RESULT]]
211define void @combine_to_fma_fsub_2_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000212 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000213 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
214 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
215 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
216 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000217
David Blaikiea79ac142015-02-27 21:17:42 +0000218 %a = load double, double addrspace(1)* %gep.0
219 %b = load double, double addrspace(1)* %gep.1
220 %c = load double, double addrspace(1)* %gep.2
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000221
222 %mul = fmul double %a, %b
223 %mul.neg = fsub double -0.0, %mul
224 %fma = fsub double %mul.neg, %c
225
226 store double %fma, double addrspace(1)* %gep.out
227 ret void
228}
229
230; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
231; FUNC-LABEL: {{^}}combine_to_fma_fsub_2_f64_2uses_neg:
232; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
233; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
234; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
235; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]]
236; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[D]]
237; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
238; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
239; SI: s_endpgm
240define void @combine_to_fma_fsub_2_f64_2uses_neg(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000241 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000242 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
243 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
244 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
245 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
246 %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
247 %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000248
David Blaikiea79ac142015-02-27 21:17:42 +0000249 %a = load double, double addrspace(1)* %gep.0
250 %b = load double, double addrspace(1)* %gep.1
251 %c = load double, double addrspace(1)* %gep.2
252 %d = load double, double addrspace(1)* %gep.3
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000253
254 %mul = fmul double %a, %b
255 %mul.neg = fsub double -0.0, %mul
256 %fma0 = fsub double %mul.neg, %c
257 %fma1 = fsub double %mul.neg, %d
258
259 store double %fma0, double addrspace(1)* %gep.out.0
260 store double %fma1, double addrspace(1)* %gep.out.1
261 ret void
262}
263
264; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
265; FUNC-LABEL: {{^}}combine_to_fma_fsub_2_f64_2uses_mul:
266; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
267; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
268; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
269; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]]
270; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[D]]
271; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
272; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
273; SI: s_endpgm
274define void @combine_to_fma_fsub_2_f64_2uses_mul(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000275 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000276 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
277 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
278 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
279 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
280 %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
281 %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000282
David Blaikiea79ac142015-02-27 21:17:42 +0000283 %a = load double, double addrspace(1)* %gep.0
284 %b = load double, double addrspace(1)* %gep.1
285 %c = load double, double addrspace(1)* %gep.2
286 %d = load double, double addrspace(1)* %gep.3
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000287
288 %mul = fmul double %a, %b
289 %mul.neg = fsub double -0.0, %mul
290 %fma0 = fsub double %mul.neg, %c
291 %fma1 = fsub double %mul, %d
292
293 store double %fma0, double addrspace(1)* %gep.out.0
294 store double %fma1, double addrspace(1)* %gep.out.1
295 ret void
296}
297
298; fold (fsub (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, (fneg z)))
299
300; FUNC-LABEL: {{^}}aggressive_combine_to_fma_fsub_0_f64:
301; SI-DAG: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
302; SI-DAG: buffer_load_dwordx2 [[Y:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
303; SI-DAG: buffer_load_dwordx2 [[Z:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
304; SI-DAG: buffer_load_dwordx2 [[U:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}}
305; SI-DAG: buffer_load_dwordx2 [[V:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:32{{$}}
306; SI: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], [[U]], [[V]], -[[Z]]
307; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[X]], [[Y]], [[FMA0]]
308; SI: buffer_store_dwordx2 [[RESULT]]
309define void @aggressive_combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000310 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000311 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
312 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
313 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
314 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
315 %gep.4 = getelementptr double, double addrspace(1)* %gep.0, i32 4
316 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000317
David Blaikiea79ac142015-02-27 21:17:42 +0000318 %x = load double, double addrspace(1)* %gep.0
319 %y = load double, double addrspace(1)* %gep.1
320 %z = load double, double addrspace(1)* %gep.2
321 %u = load double, double addrspace(1)* %gep.3
322 %v = load double, double addrspace(1)* %gep.4
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000323
324 %tmp0 = fmul double %u, %v
325 %tmp1 = call double @llvm.fma.f64(double %x, double %y, double %tmp0) #0
326 %tmp2 = fsub double %tmp1, %z
327
328 store double %tmp2, double addrspace(1)* %gep.out
329 ret void
330}
331
332; fold (fsub x, (fma y, z, (fmul u, v)))
333; -> (fma (fneg y), z, (fma (fneg u), v, x))
334
335; FUNC-LABEL: {{^}}aggressive_combine_to_fma_fsub_1_f64:
336; SI-DAG: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
337; SI-DAG: buffer_load_dwordx2 [[Y:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
338; SI-DAG: buffer_load_dwordx2 [[Z:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
339; SI-DAG: buffer_load_dwordx2 [[U:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}}
340; SI-DAG: buffer_load_dwordx2 [[V:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:32{{$}}
341; SI: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], -[[U]], [[V]], [[X]]
342; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[Y]], [[Z]], [[FMA0]]
343; SI: buffer_store_dwordx2 [[RESULT]]
344define void @aggressive_combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000345 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000346 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
347 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
348 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
349 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
350 %gep.4 = getelementptr double, double addrspace(1)* %gep.0, i32 4
351 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000352
David Blaikiea79ac142015-02-27 21:17:42 +0000353 %x = load double, double addrspace(1)* %gep.0
354 %y = load double, double addrspace(1)* %gep.1
355 %z = load double, double addrspace(1)* %gep.2
356 %u = load double, double addrspace(1)* %gep.3
357 %v = load double, double addrspace(1)* %gep.4
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000358
359 %tmp0 = fmul double %u, %v
360 %tmp1 = call double @llvm.fma.f64(double %y, double %z, double %tmp0) #0
361 %tmp2 = fsub double %x, %tmp1
362
363 store double %tmp2, double addrspace(1)* %gep.out
364 ret void
365}
366
Simon Pilgrim4003ed22015-09-21 20:32:48 +0000367;
368; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y)
369;
370
371; FUNC-LABEL: {{^}}test_f32_mul_add_x_one_y:
372; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY:v[0-9]]], [[VX:v[0-9]]]
373define void @test_f32_mul_add_x_one_y(float addrspace(1)* %out,
374 float addrspace(1)* %in1,
375 float addrspace(1)* %in2) {
376 %x = load float, float addrspace(1)* %in1
377 %y = load float, float addrspace(1)* %in2
378 %a = fadd float %x, 1.0
379 %m = fmul float %a, %y
380 store float %m, float addrspace(1)* %out
381 ret void
382}
383
384; FUNC-LABEL: {{^}}test_f32_mul_y_add_x_one:
385; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY:v[0-9]]], [[VX:v[0-9]]]
386define void @test_f32_mul_y_add_x_one(float addrspace(1)* %out,
387 float addrspace(1)* %in1,
388 float addrspace(1)* %in2) {
389 %x = load float, float addrspace(1)* %in1
390 %y = load float, float addrspace(1)* %in2
391 %a = fadd float %x, 1.0
392 %m = fmul float %y, %a
393 store float %m, float addrspace(1)* %out
394 ret void
395}
396
397; FUNC-LABEL: {{^}}test_f32_mul_add_x_negone_y:
398; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]]
399define void @test_f32_mul_add_x_negone_y(float addrspace(1)* %out,
400 float addrspace(1)* %in1,
401 float addrspace(1)* %in2) {
402 %x = load float, float addrspace(1)* %in1
403 %y = load float, float addrspace(1)* %in2
404 %a = fadd float %x, -1.0
405 %m = fmul float %a, %y
406 store float %m, float addrspace(1)* %out
407 ret void
408}
409
410; FUNC-LABEL: {{^}}test_f32_mul_y_add_x_negone:
411; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]]
412define void @test_f32_mul_y_add_x_negone(float addrspace(1)* %out,
413 float addrspace(1)* %in1,
414 float addrspace(1)* %in2) {
415 %x = load float, float addrspace(1)* %in1
416 %y = load float, float addrspace(1)* %in2
417 %a = fadd float %x, -1.0
418 %m = fmul float %y, %a
419 store float %m, float addrspace(1)* %out
420 ret void
421}
422
423; FUNC-LABEL: {{^}}test_f32_mul_sub_one_x_y:
424; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], [[VY]]
425define void @test_f32_mul_sub_one_x_y(float addrspace(1)* %out,
426 float addrspace(1)* %in1,
427 float addrspace(1)* %in2) {
428 %x = load float, float addrspace(1)* %in1
429 %y = load float, float addrspace(1)* %in2
430 %s = fsub float 1.0, %x
431 %m = fmul float %s, %y
432 store float %m, float addrspace(1)* %out
433 ret void
434}
435
436; FUNC-LABEL: {{^}}test_f32_mul_y_sub_one_x:
437; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], [[VY]]
438define void @test_f32_mul_y_sub_one_x(float addrspace(1)* %out,
439 float addrspace(1)* %in1,
440 float addrspace(1)* %in2) {
441 %x = load float, float addrspace(1)* %in1
442 %y = load float, float addrspace(1)* %in2
443 %s = fsub float 1.0, %x
444 %m = fmul float %y, %s
445 store float %m, float addrspace(1)* %out
446 ret void
447}
448
449; FUNC-LABEL: {{^}}test_f32_mul_sub_negone_x_y:
450; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], -[[VY]]
451define void @test_f32_mul_sub_negone_x_y(float addrspace(1)* %out,
452 float addrspace(1)* %in1,
453 float addrspace(1)* %in2) {
454 %x = load float, float addrspace(1)* %in1
455 %y = load float, float addrspace(1)* %in2
456 %s = fsub float -1.0, %x
457 %m = fmul float %s, %y
458 store float %m, float addrspace(1)* %out
459 ret void
460}
461
462; FUNC-LABEL: {{^}}test_f32_mul_y_sub_negone_x:
463; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], -[[VY]]
464define void @test_f32_mul_y_sub_negone_x(float addrspace(1)* %out,
465 float addrspace(1)* %in1,
466 float addrspace(1)* %in2) {
467 %x = load float, float addrspace(1)* %in1
468 %y = load float, float addrspace(1)* %in2
469 %s = fsub float -1.0, %x
470 %m = fmul float %y, %s
471 store float %m, float addrspace(1)* %out
472 ret void
473}
474
475; FUNC-LABEL: {{^}}test_f32_mul_sub_x_one_y:
476; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]]
477define void @test_f32_mul_sub_x_one_y(float addrspace(1)* %out,
478 float addrspace(1)* %in1,
479 float addrspace(1)* %in2) {
480 %x = load float, float addrspace(1)* %in1
481 %y = load float, float addrspace(1)* %in2
482 %s = fsub float %x, 1.0
483 %m = fmul float %s, %y
484 store float %m, float addrspace(1)* %out
485 ret void
486}
487
488; FUNC-LABEL: {{^}}test_f32_mul_y_sub_x_one:
489; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]]
490define void @test_f32_mul_y_sub_x_one(float addrspace(1)* %out,
491 float addrspace(1)* %in1,
492 float addrspace(1)* %in2) {
493 %x = load float, float addrspace(1)* %in1
494 %y = load float, float addrspace(1)* %in2
495 %s = fsub float %x, 1.0
496 %m = fmul float %y, %s
497 store float %m, float addrspace(1)* %out
498 ret void
499}
500
501; FUNC-LABEL: {{^}}test_f32_mul_sub_x_negone_y:
502; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY]], [[VX:v[0-9]]]
503define void @test_f32_mul_sub_x_negone_y(float addrspace(1)* %out,
504 float addrspace(1)* %in1,
505 float addrspace(1)* %in2) {
506 %x = load float, float addrspace(1)* %in1
507 %y = load float, float addrspace(1)* %in2
508 %s = fsub float %x, -1.0
509 %m = fmul float %s, %y
510 store float %m, float addrspace(1)* %out
511 ret void
512}
513
514; FUNC-LABEL: {{^}}test_f32_mul_y_sub_x_negone:
515; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY]], [[VX:v[0-9]]]
516define void @test_f32_mul_y_sub_x_negone(float addrspace(1)* %out,
517 float addrspace(1)* %in1,
518 float addrspace(1)* %in2) {
519 %x = load float, float addrspace(1)* %in1
520 %y = load float, float addrspace(1)* %in2
521 %s = fsub float %x, -1.0
522 %m = fmul float %y, %s
523 store float %m, float addrspace(1)* %out
524 ret void
525}
526
527;
528; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y))
529;
530
531; FUNC-LABEL: {{^}}test_f32_interp:
532; SI: v_mad_f32 [[VR:v[0-9]]], -[[VT:v[0-9]]], [[VY:v[0-9]]], [[VY]]
533; SI: v_mac_f32_e32 [[VR]], [[VT]], [[VX:v[0-9]]]
534define void @test_f32_interp(float addrspace(1)* %out,
535 float addrspace(1)* %in1,
536 float addrspace(1)* %in2,
537 float addrspace(1)* %in3) {
538 %x = load float, float addrspace(1)* %in1
539 %y = load float, float addrspace(1)* %in2
540 %t = load float, float addrspace(1)* %in3
541 %t1 = fsub float 1.0, %t
542 %tx = fmul float %x, %t
543 %ty = fmul float %y, %t1
544 %r = fadd float %tx, %ty
545 store float %r, float addrspace(1)* %out
546 ret void
547}
548
549; FUNC-LABEL: {{^}}test_f64_interp:
550; SI: v_fma_f64 [[VR:v\[[0-9]+:[0-9]+\]]], -[[VT:v\[[0-9]+:[0-9]+\]]], [[VY:v\[[0-9]+:[0-9]+\]]], [[VY]]
551; SI: v_fma_f64 [[VR:v\[[0-9]+:[0-9]+\]]], [[VX:v\[[0-9]+:[0-9]+\]]], [[VT]], [[VR]]
552define void @test_f64_interp(double addrspace(1)* %out,
553 double addrspace(1)* %in1,
554 double addrspace(1)* %in2,
555 double addrspace(1)* %in3) {
556 %x = load double, double addrspace(1)* %in1
557 %y = load double, double addrspace(1)* %in2
558 %t = load double, double addrspace(1)* %in3
559 %t1 = fsub double 1.0, %t
560 %tx = fmul double %x, %t
561 %ty = fmul double %y, %t1
562 %r = fadd double %tx, %ty
563 store double %r, double addrspace(1)* %out
564 ret void
565}
566
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000567attributes #0 = { nounwind readnone }
568attributes #1 = { nounwind }