blob: bd574b877117ec33c640950d847f1eda97b93970 [file] [log] [blame]
Matt Arsenault423bf3f2015-01-29 19:34:32 +00001; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-FASTFMAF -check-prefix=SI -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-SLOWFMAF -check-prefix=SI -check-prefix=FUNC %s
3
4declare i32 @llvm.r600.read.tidig.x() #0
5declare double @llvm.fabs.f64(double) #0
6declare double @llvm.fma.f64(double, double, double) #0
7declare float @llvm.fma.f32(float, float, float) #0
8
9; (fadd (fmul x, y), z) -> (fma x, y, z)
10; FUNC-LABEL: {{^}}combine_to_fma_f64_0:
11; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
12; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
13; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
14; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]]
15; SI: buffer_store_dwordx2 [[RESULT]]
16define void @combine_to_fma_f64_0(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
17 %tid = tail call i32 @llvm.r600.read.tidig.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +000018 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
19 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
20 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
21 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
Matt Arsenault423bf3f2015-01-29 19:34:32 +000022
David Blaikiea79ac142015-02-27 21:17:42 +000023 %a = load double, double addrspace(1)* %gep.0
24 %b = load double, double addrspace(1)* %gep.1
25 %c = load double, double addrspace(1)* %gep.2
Matt Arsenault423bf3f2015-01-29 19:34:32 +000026
27 %mul = fmul double %a, %b
28 %fma = fadd double %mul, %c
29 store double %fma, double addrspace(1)* %gep.out
30 ret void
31}
32
33; (fadd (fmul x, y), z) -> (fma x, y, z)
34; FUNC-LABEL: {{^}}combine_to_fma_f64_0_2use:
35; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
36; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
37; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
38; SI-DAG: buffer_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}}
39; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]]
40; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[D]]
41; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
42; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
43; SI: s_endpgm
44define void @combine_to_fma_f64_0_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
45 %tid = tail call i32 @llvm.r600.read.tidig.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +000046 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
47 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
48 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
49 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
50 %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
51 %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
Matt Arsenault423bf3f2015-01-29 19:34:32 +000052
David Blaikiea79ac142015-02-27 21:17:42 +000053 %a = load double, double addrspace(1)* %gep.0
54 %b = load double, double addrspace(1)* %gep.1
55 %c = load double, double addrspace(1)* %gep.2
56 %d = load double, double addrspace(1)* %gep.3
Matt Arsenault423bf3f2015-01-29 19:34:32 +000057
58 %mul = fmul double %a, %b
59 %fma0 = fadd double %mul, %c
60 %fma1 = fadd double %mul, %d
61 store double %fma0, double addrspace(1)* %gep.out.0
62 store double %fma1, double addrspace(1)* %gep.out.1
63 ret void
64}
65
66; (fadd x, (fmul y, z)) -> (fma y, z, x)
67; FUNC-LABEL: {{^}}combine_to_fma_f64_1:
68; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
69; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
70; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
71; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]]
72; SI: buffer_store_dwordx2 [[RESULT]]
73define void @combine_to_fma_f64_1(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
74 %tid = tail call i32 @llvm.r600.read.tidig.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +000075 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
76 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
77 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
78 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
Matt Arsenault423bf3f2015-01-29 19:34:32 +000079
David Blaikiea79ac142015-02-27 21:17:42 +000080 %a = load double, double addrspace(1)* %gep.0
81 %b = load double, double addrspace(1)* %gep.1
82 %c = load double, double addrspace(1)* %gep.2
Matt Arsenault423bf3f2015-01-29 19:34:32 +000083
84 %mul = fmul double %a, %b
85 %fma = fadd double %c, %mul
86 store double %fma, double addrspace(1)* %gep.out
87 ret void
88}
89
90; (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
91; FUNC-LABEL: {{^}}combine_to_fma_fsub_0_f64:
92; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
93; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
94; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
95; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[C]]
96; SI: buffer_store_dwordx2 [[RESULT]]
97define void @combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
98 %tid = tail call i32 @llvm.r600.read.tidig.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +000099 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
100 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
101 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
102 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000103
David Blaikiea79ac142015-02-27 21:17:42 +0000104 %a = load double, double addrspace(1)* %gep.0
105 %b = load double, double addrspace(1)* %gep.1
106 %c = load double, double addrspace(1)* %gep.2
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000107
108 %mul = fmul double %a, %b
109 %fma = fsub double %mul, %c
110 store double %fma, double addrspace(1)* %gep.out
111 ret void
112}
113
114; (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
115; FUNC-LABEL: {{^}}combine_to_fma_fsub_f64_0_2use:
116; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
117; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
118; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
119; SI-DAG: buffer_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}}
120; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[C]]
121; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[D]]
122; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
123; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
124; SI: s_endpgm
125define void @combine_to_fma_fsub_f64_0_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
126 %tid = tail call i32 @llvm.r600.read.tidig.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000127 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
128 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
129 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
130 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
131 %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
132 %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000133
David Blaikiea79ac142015-02-27 21:17:42 +0000134 %a = load double, double addrspace(1)* %gep.0
135 %b = load double, double addrspace(1)* %gep.1
136 %c = load double, double addrspace(1)* %gep.2
137 %d = load double, double addrspace(1)* %gep.3
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000138
139 %mul = fmul double %a, %b
140 %fma0 = fsub double %mul, %c
141 %fma1 = fsub double %mul, %d
142 store double %fma0, double addrspace(1)* %gep.out.0
143 store double %fma1, double addrspace(1)* %gep.out.1
144 ret void
145}
146
147; (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
148; FUNC-LABEL: {{^}}combine_to_fma_fsub_1_f64:
149; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
150; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
151; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
152; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], [[C]]
153; SI: buffer_store_dwordx2 [[RESULT]]
154define void @combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
155 %tid = tail call i32 @llvm.r600.read.tidig.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000156 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
157 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
158 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
159 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000160
David Blaikiea79ac142015-02-27 21:17:42 +0000161 %a = load double, double addrspace(1)* %gep.0
162 %b = load double, double addrspace(1)* %gep.1
163 %c = load double, double addrspace(1)* %gep.2
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000164
165 %mul = fmul double %a, %b
166 %fma = fsub double %c, %mul
167 store double %fma, double addrspace(1)* %gep.out
168 ret void
169}
170
171; (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
172; FUNC-LABEL: {{^}}combine_to_fma_fsub_1_f64_2use:
173; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
174; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
175; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
176; SI-DAG: buffer_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}}
177; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], [[C]]
178; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], [[D]]
179; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
180; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
181; SI: s_endpgm
182define void @combine_to_fma_fsub_1_f64_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
183 %tid = tail call i32 @llvm.r600.read.tidig.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000184 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
185 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
186 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
187 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
188 %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
189 %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000190
David Blaikiea79ac142015-02-27 21:17:42 +0000191 %a = load double, double addrspace(1)* %gep.0
192 %b = load double, double addrspace(1)* %gep.1
193 %c = load double, double addrspace(1)* %gep.2
194 %d = load double, double addrspace(1)* %gep.3
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000195
196 %mul = fmul double %a, %b
197 %fma0 = fsub double %c, %mul
198 %fma1 = fsub double %d, %mul
199 store double %fma0, double addrspace(1)* %gep.out.0
200 store double %fma1, double addrspace(1)* %gep.out.1
201 ret void
202}
203
204; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
205; FUNC-LABEL: {{^}}combine_to_fma_fsub_2_f64:
206; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
207; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
208; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
209; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]]
210; SI: buffer_store_dwordx2 [[RESULT]]
211define void @combine_to_fma_fsub_2_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
212 %tid = tail call i32 @llvm.r600.read.tidig.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000213 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
214 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
215 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
216 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000217
David Blaikiea79ac142015-02-27 21:17:42 +0000218 %a = load double, double addrspace(1)* %gep.0
219 %b = load double, double addrspace(1)* %gep.1
220 %c = load double, double addrspace(1)* %gep.2
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000221
222 %mul = fmul double %a, %b
223 %mul.neg = fsub double -0.0, %mul
224 %fma = fsub double %mul.neg, %c
225
226 store double %fma, double addrspace(1)* %gep.out
227 ret void
228}
229
230; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
231; FUNC-LABEL: {{^}}combine_to_fma_fsub_2_f64_2uses_neg:
232; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
233; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
234; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
235; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]]
236; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[D]]
237; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
238; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
239; SI: s_endpgm
240define void @combine_to_fma_fsub_2_f64_2uses_neg(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
241 %tid = tail call i32 @llvm.r600.read.tidig.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000242 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
243 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
244 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
245 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
246 %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
247 %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000248
David Blaikiea79ac142015-02-27 21:17:42 +0000249 %a = load double, double addrspace(1)* %gep.0
250 %b = load double, double addrspace(1)* %gep.1
251 %c = load double, double addrspace(1)* %gep.2
252 %d = load double, double addrspace(1)* %gep.3
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000253
254 %mul = fmul double %a, %b
255 %mul.neg = fsub double -0.0, %mul
256 %fma0 = fsub double %mul.neg, %c
257 %fma1 = fsub double %mul.neg, %d
258
259 store double %fma0, double addrspace(1)* %gep.out.0
260 store double %fma1, double addrspace(1)* %gep.out.1
261 ret void
262}
263
264; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
265; FUNC-LABEL: {{^}}combine_to_fma_fsub_2_f64_2uses_mul:
266; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
267; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
268; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
269; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]]
270; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[D]]
271; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
272; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
273; SI: s_endpgm
274define void @combine_to_fma_fsub_2_f64_2uses_mul(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
275 %tid = tail call i32 @llvm.r600.read.tidig.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000276 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
277 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
278 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
279 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
280 %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
281 %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000282
David Blaikiea79ac142015-02-27 21:17:42 +0000283 %a = load double, double addrspace(1)* %gep.0
284 %b = load double, double addrspace(1)* %gep.1
285 %c = load double, double addrspace(1)* %gep.2
286 %d = load double, double addrspace(1)* %gep.3
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000287
288 %mul = fmul double %a, %b
289 %mul.neg = fsub double -0.0, %mul
290 %fma0 = fsub double %mul.neg, %c
291 %fma1 = fsub double %mul, %d
292
293 store double %fma0, double addrspace(1)* %gep.out.0
294 store double %fma1, double addrspace(1)* %gep.out.1
295 ret void
296}
297
298; fold (fsub (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, (fneg z)))
299
300; FUNC-LABEL: {{^}}aggressive_combine_to_fma_fsub_0_f64:
301; SI-DAG: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
302; SI-DAG: buffer_load_dwordx2 [[Y:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
303; SI-DAG: buffer_load_dwordx2 [[Z:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
304; SI-DAG: buffer_load_dwordx2 [[U:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}}
305; SI-DAG: buffer_load_dwordx2 [[V:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:32{{$}}
306; SI: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], [[U]], [[V]], -[[Z]]
307; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[X]], [[Y]], [[FMA0]]
308; SI: buffer_store_dwordx2 [[RESULT]]
309define void @aggressive_combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
310 %tid = tail call i32 @llvm.r600.read.tidig.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000311 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
312 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
313 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
314 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
315 %gep.4 = getelementptr double, double addrspace(1)* %gep.0, i32 4
316 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000317
David Blaikiea79ac142015-02-27 21:17:42 +0000318 %x = load double, double addrspace(1)* %gep.0
319 %y = load double, double addrspace(1)* %gep.1
320 %z = load double, double addrspace(1)* %gep.2
321 %u = load double, double addrspace(1)* %gep.3
322 %v = load double, double addrspace(1)* %gep.4
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000323
324 %tmp0 = fmul double %u, %v
325 %tmp1 = call double @llvm.fma.f64(double %x, double %y, double %tmp0) #0
326 %tmp2 = fsub double %tmp1, %z
327
328 store double %tmp2, double addrspace(1)* %gep.out
329 ret void
330}
331
332; fold (fsub x, (fma y, z, (fmul u, v)))
333; -> (fma (fneg y), z, (fma (fneg u), v, x))
334
335; FUNC-LABEL: {{^}}aggressive_combine_to_fma_fsub_1_f64:
336; SI-DAG: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
337; SI-DAG: buffer_load_dwordx2 [[Y:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
338; SI-DAG: buffer_load_dwordx2 [[Z:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
339; SI-DAG: buffer_load_dwordx2 [[U:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}}
340; SI-DAG: buffer_load_dwordx2 [[V:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:32{{$}}
341; SI: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], -[[U]], [[V]], [[X]]
342; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[Y]], [[Z]], [[FMA0]]
343; SI: buffer_store_dwordx2 [[RESULT]]
344define void @aggressive_combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
345 %tid = tail call i32 @llvm.r600.read.tidig.x() #0
David Blaikie79e6c742015-02-27 19:29:02 +0000346 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
347 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
348 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
349 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
350 %gep.4 = getelementptr double, double addrspace(1)* %gep.0, i32 4
351 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000352
David Blaikiea79ac142015-02-27 21:17:42 +0000353 %x = load double, double addrspace(1)* %gep.0
354 %y = load double, double addrspace(1)* %gep.1
355 %z = load double, double addrspace(1)* %gep.2
356 %u = load double, double addrspace(1)* %gep.3
357 %v = load double, double addrspace(1)* %gep.4
Matt Arsenault423bf3f2015-01-29 19:34:32 +0000358
359 %tmp0 = fmul double %u, %v
360 %tmp1 = call double @llvm.fma.f64(double %y, double %z, double %tmp0) #0
361 %tmp2 = fsub double %x, %tmp1
362
363 store double %tmp2, double addrspace(1)* %gep.out
364 ret void
365}
366
367attributes #0 = { nounwind readnone }
368attributes #1 = { nounwind }