blob: af1c661417c2a1801973cf70bb93ba987d70872e [file] [log] [blame]
Sanjay Patel681a56a2015-07-06 22:35:29 +00001; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=SSE
2; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=AVX
Sanjay Patele79b43a2015-06-23 00:39:40 +00003
4; Verify that the first two adds are independent regardless of how the inputs are
5; commuted. The destination registers are used as source registers for the third add.
6
7define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
Sanjay Patel681a56a2015-07-06 22:35:29 +00008; SSE-LABEL: reassociate_adds1:
9; SSE: # BB#0:
10; SSE-NEXT: addss %xmm1, %xmm0
11; SSE-NEXT: addss %xmm3, %xmm2
12; SSE-NEXT: addss %xmm2, %xmm0
13; SSE-NEXT: retq
14;
15; AVX-LABEL: reassociate_adds1:
16; AVX: # BB#0:
17; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
18; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
19; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
20; AVX-NEXT: retq
Sanjay Patele79b43a2015-06-23 00:39:40 +000021 %t0 = fadd float %x0, %x1
22 %t1 = fadd float %t0, %x2
23 %t2 = fadd float %t1, %x3
24 ret float %t2
25}
26
27define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
Sanjay Patel681a56a2015-07-06 22:35:29 +000028; SSE-LABEL: reassociate_adds2:
29; SSE: # BB#0:
30; SSE-NEXT: addss %xmm1, %xmm0
31; SSE-NEXT: addss %xmm3, %xmm2
32; SSE-NEXT: addss %xmm2, %xmm0
33; SSE-NEXT: retq
34;
35; AVX-LABEL: reassociate_adds2:
36; AVX: # BB#0:
37; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
38; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
39; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
40; AVX-NEXT: retq
Sanjay Patele79b43a2015-06-23 00:39:40 +000041 %t0 = fadd float %x0, %x1
42 %t1 = fadd float %x2, %t0
43 %t2 = fadd float %t1, %x3
44 ret float %t2
45}
46
47define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
Sanjay Patel681a56a2015-07-06 22:35:29 +000048; SSE-LABEL: reassociate_adds3:
49; SSE: # BB#0:
50; SSE-NEXT: addss %xmm1, %xmm0
51; SSE-NEXT: addss %xmm3, %xmm2
52; SSE-NEXT: addss %xmm2, %xmm0
53; SSE-NEXT: retq
54;
55; AVX-LABEL: reassociate_adds3:
56; AVX: # BB#0:
57; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
58; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
59; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
60; AVX-NEXT: retq
Sanjay Patele79b43a2015-06-23 00:39:40 +000061 %t0 = fadd float %x0, %x1
62 %t1 = fadd float %t0, %x2
63 %t2 = fadd float %x3, %t1
64 ret float %t2
65}
66
67define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
Sanjay Patel681a56a2015-07-06 22:35:29 +000068; SSE-LABEL: reassociate_adds4:
69; SSE: # BB#0:
70; SSE-NEXT: addss %xmm1, %xmm0
71; SSE-NEXT: addss %xmm3, %xmm2
72; SSE-NEXT: addss %xmm2, %xmm0
73; SSE-NEXT: retq
74;
75; AVX-LABEL: reassociate_adds4:
76; AVX: # BB#0:
77; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
78; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
79; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
80; AVX-NEXT: retq
Sanjay Patele79b43a2015-06-23 00:39:40 +000081 %t0 = fadd float %x0, %x1
82 %t1 = fadd float %x2, %t0
83 %t2 = fadd float %x3, %t1
84 ret float %t2
85}
86
87; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
88; produced because that would cost more compile time.
89
90define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
Sanjay Patel681a56a2015-07-06 22:35:29 +000091; SSE-LABEL: reassociate_adds5:
92; SSE: # BB#0:
93; SSE-NEXT: addss %xmm1, %xmm0
94; SSE-NEXT: addss %xmm3, %xmm2
95; SSE-NEXT: addss %xmm2, %xmm0
96; SSE-NEXT: addss %xmm5, %xmm4
97; SSE-NEXT: addss %xmm6, %xmm4
98; SSE-NEXT: addss %xmm4, %xmm0
99; SSE-NEXT: addss %xmm7, %xmm0
100; SSE-NEXT: retq
101;
102; AVX-LABEL: reassociate_adds5:
103; AVX: # BB#0:
104; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
105; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
106; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
107; AVX-NEXT: vaddss %xmm5, %xmm4, %xmm1
108; AVX-NEXT: vaddss %xmm6, %xmm1, %xmm1
109; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
110; AVX-NEXT: vaddss %xmm7, %xmm0, %xmm0
111; AVX-NEXT: retq
Sanjay Patele79b43a2015-06-23 00:39:40 +0000112 %t0 = fadd float %x0, %x1
113 %t1 = fadd float %t0, %x2
114 %t2 = fadd float %t1, %x3
115 %t3 = fadd float %t2, %x4
116 %t4 = fadd float %t3, %x5
117 %t5 = fadd float %t4, %x6
118 %t6 = fadd float %t5, %x7
119 ret float %t6
120}
121
122; Verify that we only need two associative operations to reassociate the operands.
123; Also, we should reassociate such that the result of the high latency division
124; is used by the final 'add' rather than reassociating the %x3 operand with the
125; division. The latter reassociation would not improve anything.
Sanjay Patel681a56a2015-07-06 22:35:29 +0000126
Sanjay Patele79b43a2015-06-23 00:39:40 +0000127define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
Sanjay Patel681a56a2015-07-06 22:35:29 +0000128; SSE-LABEL: reassociate_adds6:
129; SSE: # BB#0:
130; SSE-NEXT: divss %xmm1, %xmm0
131; SSE-NEXT: addss %xmm3, %xmm2
132; SSE-NEXT: addss %xmm2, %xmm0
133; SSE-NEXT: retq
134;
135; AVX-LABEL: reassociate_adds6:
136; AVX: # BB#0:
137; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
138; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
139; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
140; AVX-NEXT: retq
Sanjay Patele79b43a2015-06-23 00:39:40 +0000141 %t0 = fdiv float %x0, %x1
142 %t1 = fadd float %x2, %t0
143 %t2 = fadd float %x3, %t1
144 ret float %t2
145}
146
Sanjay Patelea81edf2015-07-09 22:48:54 +0000147; Verify that SSE and AVX scalar single-precison multiplies are reassociated.
Sanjay Patel093fb172015-07-08 22:35:20 +0000148
149define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
150; SSE-LABEL: reassociate_muls1:
151; SSE: # BB#0:
152; SSE-NEXT: divss %xmm1, %xmm0
153; SSE-NEXT: mulss %xmm3, %xmm2
154; SSE-NEXT: mulss %xmm2, %xmm0
155; SSE-NEXT: retq
156;
157; AVX-LABEL: reassociate_muls1:
158; AVX: # BB#0:
159; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
160; AVX-NEXT: vmulss %xmm3, %xmm2, %xmm1
161; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
162; AVX-NEXT: retq
163 %t0 = fdiv float %x0, %x1
164 %t1 = fmul float %x2, %t0
165 %t2 = fmul float %x3, %t1
166 ret float %t2
167}
Sanjay Patelea81edf2015-07-09 22:48:54 +0000168
169; Verify that SSE and AVX scalar double-precison adds are reassociated.
170
171define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) {
172; SSE-LABEL: reassociate_adds_double:
173; SSE: # BB#0:
174; SSE-NEXT: divsd %xmm1, %xmm0
175; SSE-NEXT: addsd %xmm3, %xmm2
176; SSE-NEXT: addsd %xmm2, %xmm0
177; SSE-NEXT: retq
178;
179; AVX-LABEL: reassociate_adds_double:
180; AVX: # BB#0:
181; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
182; AVX-NEXT: vaddsd %xmm3, %xmm2, %xmm1
183; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
184; AVX-NEXT: retq
185 %t0 = fdiv double %x0, %x1
186 %t1 = fadd double %x2, %t0
187 %t2 = fadd double %x3, %t1
188 ret double %t2
189}
Sanjay Patel81beefc2015-07-09 22:58:39 +0000190
191; Verify that SSE and AVX scalar double-precison multiplies are reassociated.
192
193define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) {
194; SSE-LABEL: reassociate_muls_double:
195; SSE: # BB#0:
196; SSE-NEXT: divsd %xmm1, %xmm0
197; SSE-NEXT: mulsd %xmm3, %xmm2
198; SSE-NEXT: mulsd %xmm2, %xmm0
199; SSE-NEXT: retq
200;
201; AVX-LABEL: reassociate_muls_double:
202; AVX: # BB#0:
203; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
204; AVX-NEXT: vmulsd %xmm3, %xmm2, %xmm1
205; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
206; AVX-NEXT: retq
207 %t0 = fdiv double %x0, %x1
208 %t1 = fmul double %x2, %t0
209 %t2 = fmul double %x3, %t1
210 ret double %t2
211}
212
Sanjay Patele0178262015-08-08 19:08:20 +0000213; Verify that SSE and AVX 128-bit vector single-precison adds are reassociated.
214
215define <4 x float> @reassociate_adds_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
216; SSE-LABEL: reassociate_adds_v4f32:
217; SSE: # BB#0:
218; SSE-NEXT: mulps %xmm1, %xmm0
219; SSE-NEXT: addps %xmm3, %xmm2
220; SSE-NEXT: addps %xmm2, %xmm0
221; SSE-NEXT: retq
222;
223; AVX-LABEL: reassociate_adds_v4f32:
224; AVX: # BB#0:
225; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
226; AVX-NEXT: vaddps %xmm3, %xmm2, %xmm1
227; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
228; AVX-NEXT: retq
229 %t0 = fmul <4 x float> %x0, %x1
230 %t1 = fadd <4 x float> %x2, %t0
231 %t2 = fadd <4 x float> %x3, %t1
232 ret <4 x float> %t2
233}
234
235; Verify that SSE and AVX 128-bit vector double-precison adds are reassociated.
236
237define <2 x double> @reassociate_adds_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
238; SSE-LABEL: reassociate_adds_v2f64:
239; SSE: # BB#0:
240; SSE-NEXT: mulpd %xmm1, %xmm0
241; SSE-NEXT: addpd %xmm3, %xmm2
242; SSE-NEXT: addpd %xmm2, %xmm0
243; SSE-NEXT: retq
244;
245; AVX-LABEL: reassociate_adds_v2f64:
246; AVX: # BB#0:
247; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0
248; AVX-NEXT: vaddpd %xmm3, %xmm2, %xmm1
249; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
250; AVX-NEXT: retq
251 %t0 = fmul <2 x double> %x0, %x1
252 %t1 = fadd <2 x double> %x2, %t0
253 %t2 = fadd <2 x double> %x3, %t1
254 ret <2 x double> %t2
255}
256