blob: 0943bebbb0999d0f7a0bec8e5eb33b74396e0071 [file] [log] [blame]
Sanjay Patel681a56a2015-07-06 22:35:29 +00001; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=SSE
2; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=AVX
Sanjay Patele79b43a2015-06-23 00:39:40 +00003
4; Verify that the first two adds are independent regardless of how the inputs are
5; commuted. The destination registers are used as source registers for the third add.
6
7define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
Sanjay Patel681a56a2015-07-06 22:35:29 +00008; SSE-LABEL: reassociate_adds1:
9; SSE: # BB#0:
10; SSE-NEXT: addss %xmm1, %xmm0
11; SSE-NEXT: addss %xmm3, %xmm2
12; SSE-NEXT: addss %xmm2, %xmm0
13; SSE-NEXT: retq
14;
15; AVX-LABEL: reassociate_adds1:
16; AVX: # BB#0:
17; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
18; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
19; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
20; AVX-NEXT: retq
Sanjay Patele79b43a2015-06-23 00:39:40 +000021 %t0 = fadd float %x0, %x1
22 %t1 = fadd float %t0, %x2
23 %t2 = fadd float %t1, %x3
24 ret float %t2
25}
26
27define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
Sanjay Patel681a56a2015-07-06 22:35:29 +000028; SSE-LABEL: reassociate_adds2:
29; SSE: # BB#0:
30; SSE-NEXT: addss %xmm1, %xmm0
31; SSE-NEXT: addss %xmm3, %xmm2
32; SSE-NEXT: addss %xmm2, %xmm0
33; SSE-NEXT: retq
34;
35; AVX-LABEL: reassociate_adds2:
36; AVX: # BB#0:
37; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
38; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
39; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
40; AVX-NEXT: retq
Sanjay Patele79b43a2015-06-23 00:39:40 +000041 %t0 = fadd float %x0, %x1
42 %t1 = fadd float %x2, %t0
43 %t2 = fadd float %t1, %x3
44 ret float %t2
45}
46
47define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
Sanjay Patel681a56a2015-07-06 22:35:29 +000048; SSE-LABEL: reassociate_adds3:
49; SSE: # BB#0:
50; SSE-NEXT: addss %xmm1, %xmm0
51; SSE-NEXT: addss %xmm3, %xmm2
52; SSE-NEXT: addss %xmm2, %xmm0
53; SSE-NEXT: retq
54;
55; AVX-LABEL: reassociate_adds3:
56; AVX: # BB#0:
57; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
58; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
59; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
60; AVX-NEXT: retq
Sanjay Patele79b43a2015-06-23 00:39:40 +000061 %t0 = fadd float %x0, %x1
62 %t1 = fadd float %t0, %x2
63 %t2 = fadd float %x3, %t1
64 ret float %t2
65}
66
67define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
Sanjay Patel681a56a2015-07-06 22:35:29 +000068; SSE-LABEL: reassociate_adds4:
69; SSE: # BB#0:
70; SSE-NEXT: addss %xmm1, %xmm0
71; SSE-NEXT: addss %xmm3, %xmm2
72; SSE-NEXT: addss %xmm2, %xmm0
73; SSE-NEXT: retq
74;
75; AVX-LABEL: reassociate_adds4:
76; AVX: # BB#0:
77; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
78; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
79; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
80; AVX-NEXT: retq
Sanjay Patele79b43a2015-06-23 00:39:40 +000081 %t0 = fadd float %x0, %x1
82 %t1 = fadd float %x2, %t0
83 %t2 = fadd float %x3, %t1
84 ret float %t2
85}
86
87; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
88; produced because that would cost more compile time.
89
90define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
Sanjay Patel681a56a2015-07-06 22:35:29 +000091; SSE-LABEL: reassociate_adds5:
92; SSE: # BB#0:
93; SSE-NEXT: addss %xmm1, %xmm0
94; SSE-NEXT: addss %xmm3, %xmm2
95; SSE-NEXT: addss %xmm2, %xmm0
96; SSE-NEXT: addss %xmm5, %xmm4
97; SSE-NEXT: addss %xmm6, %xmm4
98; SSE-NEXT: addss %xmm4, %xmm0
99; SSE-NEXT: addss %xmm7, %xmm0
100; SSE-NEXT: retq
101;
102; AVX-LABEL: reassociate_adds5:
103; AVX: # BB#0:
104; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
105; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
106; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
107; AVX-NEXT: vaddss %xmm5, %xmm4, %xmm1
108; AVX-NEXT: vaddss %xmm6, %xmm1, %xmm1
109; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
110; AVX-NEXT: vaddss %xmm7, %xmm0, %xmm0
111; AVX-NEXT: retq
Sanjay Patele79b43a2015-06-23 00:39:40 +0000112 %t0 = fadd float %x0, %x1
113 %t1 = fadd float %t0, %x2
114 %t2 = fadd float %t1, %x3
115 %t3 = fadd float %t2, %x4
116 %t4 = fadd float %t3, %x5
117 %t5 = fadd float %t4, %x6
118 %t6 = fadd float %t5, %x7
119 ret float %t6
120}
121
122; Verify that we only need two associative operations to reassociate the operands.
123; Also, we should reassociate such that the result of the high latency division
124; is used by the final 'add' rather than reassociating the %x3 operand with the
125; division. The latter reassociation would not improve anything.
Sanjay Patel681a56a2015-07-06 22:35:29 +0000126
Sanjay Patele79b43a2015-06-23 00:39:40 +0000127define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
Sanjay Patel681a56a2015-07-06 22:35:29 +0000128; SSE-LABEL: reassociate_adds6:
129; SSE: # BB#0:
130; SSE-NEXT: divss %xmm1, %xmm0
131; SSE-NEXT: addss %xmm3, %xmm2
132; SSE-NEXT: addss %xmm2, %xmm0
133; SSE-NEXT: retq
134;
135; AVX-LABEL: reassociate_adds6:
136; AVX: # BB#0:
137; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
138; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
139; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
140; AVX-NEXT: retq
Sanjay Patele79b43a2015-06-23 00:39:40 +0000141 %t0 = fdiv float %x0, %x1
142 %t1 = fadd float %x2, %t0
143 %t2 = fadd float %x3, %t1
144 ret float %t2
145}
146
Sanjay Patelea81edf2015-07-09 22:48:54 +0000147; Verify that SSE and AVX scalar single-precison multiplies are reassociated.
Sanjay Patel093fb172015-07-08 22:35:20 +0000148
149define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
150; SSE-LABEL: reassociate_muls1:
151; SSE: # BB#0:
152; SSE-NEXT: divss %xmm1, %xmm0
153; SSE-NEXT: mulss %xmm3, %xmm2
154; SSE-NEXT: mulss %xmm2, %xmm0
155; SSE-NEXT: retq
156;
157; AVX-LABEL: reassociate_muls1:
158; AVX: # BB#0:
159; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
160; AVX-NEXT: vmulss %xmm3, %xmm2, %xmm1
161; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
162; AVX-NEXT: retq
163 %t0 = fdiv float %x0, %x1
164 %t1 = fmul float %x2, %t0
165 %t2 = fmul float %x3, %t1
166 ret float %t2
167}
Sanjay Patelea81edf2015-07-09 22:48:54 +0000168
169; Verify that SSE and AVX scalar double-precison adds are reassociated.
170
171define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) {
172; SSE-LABEL: reassociate_adds_double:
173; SSE: # BB#0:
174; SSE-NEXT: divsd %xmm1, %xmm0
175; SSE-NEXT: addsd %xmm3, %xmm2
176; SSE-NEXT: addsd %xmm2, %xmm0
177; SSE-NEXT: retq
178;
179; AVX-LABEL: reassociate_adds_double:
180; AVX: # BB#0:
181; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
182; AVX-NEXT: vaddsd %xmm3, %xmm2, %xmm1
183; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
184; AVX-NEXT: retq
185 %t0 = fdiv double %x0, %x1
186 %t1 = fadd double %x2, %t0
187 %t2 = fadd double %x3, %t1
188 ret double %t2
189}
Sanjay Patel81beefc2015-07-09 22:58:39 +0000190
191; Verify that SSE and AVX scalar double-precison multiplies are reassociated.
192
193define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) {
194; SSE-LABEL: reassociate_muls_double:
195; SSE: # BB#0:
196; SSE-NEXT: divsd %xmm1, %xmm0
197; SSE-NEXT: mulsd %xmm3, %xmm2
198; SSE-NEXT: mulsd %xmm2, %xmm0
199; SSE-NEXT: retq
200;
201; AVX-LABEL: reassociate_muls_double:
202; AVX: # BB#0:
203; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
204; AVX-NEXT: vmulsd %xmm3, %xmm2, %xmm1
205; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
206; AVX-NEXT: retq
207 %t0 = fdiv double %x0, %x1
208 %t1 = fmul double %x2, %t0
209 %t2 = fmul double %x3, %t1
210 ret double %t2
211}
212