blob: ea40e4edd3483b45486e7e22728bdf794b576115 [file] [log] [blame]
Sanjay Patele7532d22018-05-03 17:41:37 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; REQUIRES: asserts
Sanjay Patel52151882018-05-03 18:49:35 +00003; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 | FileCheck %s --check-prefix=FMFDEBUG
4; RUN: llc < %s -mtriple=powerpc64le | FileCheck %s --check-prefix=FMF
Sanjay Patel4c8a67a2018-05-14 21:17:49 +00005; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=GLOBALDEBUG
6; RUN: llc < %s -mtriple=powerpc64le -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=GLOBAL
Sanjay Patele7532d22018-05-03 17:41:37 +00007
8; Test FP transforms using instruction/node-level fast-math-flags.
9; We're also checking debug output to verify that FMF is propagated to the newly created nodes.
10; The run with the global unsafe param tests the pre-FMF behavior using regular instructions/nodes.
11
12declare float @llvm.fma.f32(float, float, float)
13declare float @llvm.sqrt.f32(float)
14
15; X * Y + Z --> fma(X, Y, Z)
16
17; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract1:'
Michael Berg8e570c32018-06-16 00:03:06 +000018; FMFDEBUG: fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
Sanjay Patele7532d22018-05-03 17:41:37 +000019; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_contract1:'
20
21define float @fmul_fadd_contract1(float %x, float %y, float %z) {
22; FMF-LABEL: fmul_fadd_contract1:
23; FMF: # %bb.0:
Michael Berg8e570c32018-06-16 00:03:06 +000024; FMF-NEXT: xsmaddasp 3, 1, 2
25; FMF-NEXT: fmr 1, 3
Sanjay Patele7532d22018-05-03 17:41:37 +000026; FMF-NEXT: blr
27;
28; GLOBAL-LABEL: fmul_fadd_contract1:
29; GLOBAL: # %bb.0:
30; GLOBAL-NEXT: xsmaddasp 3, 1, 2
31; GLOBAL-NEXT: fmr 1, 3
32; GLOBAL-NEXT: blr
33 %mul = fmul float %x, %y
34 %add = fadd contract float %mul, %z
35 ret float %add
36}
37
38; This shouldn't change anything - the intermediate fmul result is now also flagged.
39
40; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract2:'
Michael Berg77b5be72018-06-07 22:49:09 +000041; FMFDEBUG: fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
Sanjay Patele7532d22018-05-03 17:41:37 +000042; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_contract2:'
43
44define float @fmul_fadd_contract2(float %x, float %y, float %z) {
45; FMF-LABEL: fmul_fadd_contract2:
46; FMF: # %bb.0:
47; FMF-NEXT: xsmaddasp 3, 1, 2
48; FMF-NEXT: fmr 1, 3
49; FMF-NEXT: blr
50;
51; GLOBAL-LABEL: fmul_fadd_contract2:
52; GLOBAL: # %bb.0:
53; GLOBAL-NEXT: xsmaddasp 3, 1, 2
54; GLOBAL-NEXT: fmr 1, 3
55; GLOBAL-NEXT: blr
56 %mul = fmul contract float %x, %y
57 %add = fadd contract float %mul, %z
58 ret float %add
59}
60
61; Reassociation implies that FMA contraction is allowed.
62
63; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc1:'
Michael Berg8e570c32018-06-16 00:03:06 +000064; FMFDEBUG: fma reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
Sanjay Patele7532d22018-05-03 17:41:37 +000065; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc1:'
66
67define float @fmul_fadd_reassoc1(float %x, float %y, float %z) {
68; FMF-LABEL: fmul_fadd_reassoc1:
69; FMF: # %bb.0:
Michael Berg8e570c32018-06-16 00:03:06 +000070; FMF-NEXT: xsmaddasp 3, 1, 2
71; FMF-NEXT: fmr 1, 3
Sanjay Patele7532d22018-05-03 17:41:37 +000072; FMF-NEXT: blr
73;
74; GLOBAL-LABEL: fmul_fadd_reassoc1:
75; GLOBAL: # %bb.0:
76; GLOBAL-NEXT: xsmaddasp 3, 1, 2
77; GLOBAL-NEXT: fmr 1, 3
78; GLOBAL-NEXT: blr
79 %mul = fmul float %x, %y
80 %add = fadd reassoc float %mul, %z
81 ret float %add
82}
83
84; This shouldn't change anything - the intermediate fmul result is now also flagged.
85
86; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc2:'
Michael Berg77b5be72018-06-07 22:49:09 +000087; FMFDEBUG: fma reassoc {{t[0-9]+}}, {{t[0-9]+}}
Sanjay Patele7532d22018-05-03 17:41:37 +000088; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc2:'
89
90define float @fmul_fadd_reassoc2(float %x, float %y, float %z) {
91; FMF-LABEL: fmul_fadd_reassoc2:
92; FMF: # %bb.0:
Michael Berg7acc81b2018-05-04 18:48:20 +000093; FMF-NEXT: xsmaddasp 3, 1, 2
94; FMF-NEXT: fmr 1, 3
Sanjay Patele7532d22018-05-03 17:41:37 +000095; FMF-NEXT: blr
96;
97; GLOBAL-LABEL: fmul_fadd_reassoc2:
98; GLOBAL: # %bb.0:
99; GLOBAL-NEXT: xsmaddasp 3, 1, 2
100; GLOBAL-NEXT: fmr 1, 3
101; GLOBAL-NEXT: blr
102 %mul = fmul reassoc float %x, %y
103 %add = fadd reassoc float %mul, %z
104 ret float %add
105}
106
107; The fadd is now fully 'fast'. This implies that contraction is allowed.
108
109; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_fast1:'
Michael Berg77b5be72018-06-07 22:49:09 +0000110; FMFDEBUG: fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
Sanjay Patele7532d22018-05-03 17:41:37 +0000111; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_fast1:'
112
113define float @fmul_fadd_fast1(float %x, float %y, float %z) {
114; FMF-LABEL: fmul_fadd_fast1:
115; FMF: # %bb.0:
116; FMF-NEXT: xsmaddasp 3, 1, 2
117; FMF-NEXT: fmr 1, 3
118; FMF-NEXT: blr
119;
120; GLOBAL-LABEL: fmul_fadd_fast1:
121; GLOBAL: # %bb.0:
122; GLOBAL-NEXT: xsmaddasp 3, 1, 2
123; GLOBAL-NEXT: fmr 1, 3
124; GLOBAL-NEXT: blr
125 %mul = fmul fast float %x, %y
126 %add = fadd fast float %mul, %z
127 ret float %add
128}
129
130; This shouldn't change anything - the intermediate fmul result is now also flagged.
131
132; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_fast2:'
Michael Berg77b5be72018-06-07 22:49:09 +0000133; FMFDEBUG: fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
Sanjay Patele7532d22018-05-03 17:41:37 +0000134; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_fast2:'
135
136define float @fmul_fadd_fast2(float %x, float %y, float %z) {
137; FMF-LABEL: fmul_fadd_fast2:
138; FMF: # %bb.0:
139; FMF-NEXT: xsmaddasp 3, 1, 2
140; FMF-NEXT: fmr 1, 3
141; FMF-NEXT: blr
142;
143; GLOBAL-LABEL: fmul_fadd_fast2:
144; GLOBAL: # %bb.0:
145; GLOBAL-NEXT: xsmaddasp 3, 1, 2
146; GLOBAL-NEXT: fmr 1, 3
147; GLOBAL-NEXT: blr
148 %mul = fmul fast float %x, %y
149 %add = fadd fast float %mul, %z
150 ret float %add
151}
152
153; fma(X, 7.0, X * 42.0) --> X * 49.0
154; This is the minimum FMF needed for this transform - the FMA allows reassociation.
155
156; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:'
Stefan Pintiliecb4f0c52018-07-04 18:54:25 +0000157; FMFDEBUG: fmul reassoc {{t[0-9]+}},
Sanjay Patele7532d22018-05-03 17:41:37 +0000158; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:'
159
Sanjay Patel52151882018-05-03 18:49:35 +0000160; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:'
Michael Berg7acc81b2018-05-04 18:48:20 +0000161; GLOBALDEBUG: fmul reassoc {{t[0-9]+}}
Sanjay Patel52151882018-05-03 18:49:35 +0000162; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:'
163
Sanjay Patele7532d22018-05-03 17:41:37 +0000164define float @fmul_fma_reassoc1(float %x) {
165; FMF-LABEL: fmul_fma_reassoc1:
166; FMF: # %bb.0:
167; FMF-NEXT: addis 3, 2, .LCPI6_0@toc@ha
QingShan Zhangf8f9af72018-08-20 02:52:55 +0000168; FMF-NEXT: lfs 0, .LCPI6_0@toc@l(3)
Michael Berg8e570c32018-06-16 00:03:06 +0000169; FMF-NEXT: xsmulsp 1, 1, 0
Sanjay Patele7532d22018-05-03 17:41:37 +0000170; FMF-NEXT: blr
171;
172; GLOBAL-LABEL: fmul_fma_reassoc1:
173; GLOBAL: # %bb.0:
174; GLOBAL-NEXT: addis 3, 2, .LCPI6_0@toc@ha
QingShan Zhangf8f9af72018-08-20 02:52:55 +0000175; GLOBAL-NEXT: lfs 0, .LCPI6_0@toc@l(3)
Sanjay Patele7532d22018-05-03 17:41:37 +0000176; GLOBAL-NEXT: xsmulsp 1, 1, 0
177; GLOBAL-NEXT: blr
178 %mul = fmul float %x, 42.0
179 %fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul)
180 ret float %fma
181}
182
183; This shouldn't change anything - the intermediate fmul result is now also flagged.
184
185; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'
Michael Berg77b5be72018-06-07 22:49:09 +0000186; FMFDEBUG: fmul reassoc {{t[0-9]+}}
Sanjay Patele7532d22018-05-03 17:41:37 +0000187; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:'
188
Sanjay Patel52151882018-05-03 18:49:35 +0000189; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'
Michael Berg7acc81b2018-05-04 18:48:20 +0000190; GLOBALDEBUG: fmul reassoc {{t[0-9]+}}
Sanjay Patel52151882018-05-03 18:49:35 +0000191; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:'
192
Sanjay Patele7532d22018-05-03 17:41:37 +0000193define float @fmul_fma_reassoc2(float %x) {
194; FMF-LABEL: fmul_fma_reassoc2:
195; FMF: # %bb.0:
196; FMF-NEXT: addis 3, 2, .LCPI7_0@toc@ha
QingShan Zhangf8f9af72018-08-20 02:52:55 +0000197; FMF-NEXT: lfs 0, .LCPI7_0@toc@l(3)
Michael Berg8e570c32018-06-16 00:03:06 +0000198; FMF-NEXT: xsmulsp 1, 1, 0
Sanjay Patele7532d22018-05-03 17:41:37 +0000199; FMF-NEXT: blr
200;
201; GLOBAL-LABEL: fmul_fma_reassoc2:
202; GLOBAL: # %bb.0:
203; GLOBAL-NEXT: addis 3, 2, .LCPI7_0@toc@ha
QingShan Zhangf8f9af72018-08-20 02:52:55 +0000204; GLOBAL-NEXT: lfs 0, .LCPI7_0@toc@l(3)
Sanjay Patele7532d22018-05-03 17:41:37 +0000205; GLOBAL-NEXT: xsmulsp 1, 1, 0
206; GLOBAL-NEXT: blr
207 %mul = fmul reassoc float %x, 42.0
208 %fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul)
209 ret float %fma
210}
211
212; The FMA is now fully 'fast'. This implies that reassociation is allowed.
213
214; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:'
Michael Berg8e570c32018-06-16 00:03:06 +0000215; FMFDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
Sanjay Patele7532d22018-05-03 17:41:37 +0000216; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:'
217
Sanjay Patel52151882018-05-03 18:49:35 +0000218; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:'
Michael Berg77b5be72018-06-07 22:49:09 +0000219; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
Sanjay Patel52151882018-05-03 18:49:35 +0000220; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:'
221
Sanjay Patele7532d22018-05-03 17:41:37 +0000222define float @fmul_fma_fast1(float %x) {
223; FMF-LABEL: fmul_fma_fast1:
224; FMF: # %bb.0:
225; FMF-NEXT: addis 3, 2, .LCPI8_0@toc@ha
QingShan Zhangf8f9af72018-08-20 02:52:55 +0000226; FMF-NEXT: lfs 0, .LCPI8_0@toc@l(3)
Michael Berg8e570c32018-06-16 00:03:06 +0000227; FMF-NEXT: xsmulsp 1, 1, 0
Sanjay Patele7532d22018-05-03 17:41:37 +0000228; FMF-NEXT: blr
229;
230; GLOBAL-LABEL: fmul_fma_fast1:
231; GLOBAL: # %bb.0:
232; GLOBAL-NEXT: addis 3, 2, .LCPI8_0@toc@ha
QingShan Zhangf8f9af72018-08-20 02:52:55 +0000233; GLOBAL-NEXT: lfs 0, .LCPI8_0@toc@l(3)
Sanjay Patele7532d22018-05-03 17:41:37 +0000234; GLOBAL-NEXT: xsmulsp 1, 1, 0
235; GLOBAL-NEXT: blr
236 %mul = fmul float %x, 42.0
237 %fma = call fast float @llvm.fma.f32(float %x, float 7.0, float %mul)
238 ret float %fma
239}
240
241; This shouldn't change anything - the intermediate fmul result is now also flagged.
242
243; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:'
Michael Berg77b5be72018-06-07 22:49:09 +0000244; FMFDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
Sanjay Patele7532d22018-05-03 17:41:37 +0000245; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:'
246
Sanjay Patel52151882018-05-03 18:49:35 +0000247; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:'
Michael Berg77b5be72018-06-07 22:49:09 +0000248; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
Sanjay Patel52151882018-05-03 18:49:35 +0000249; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:'
250
Sanjay Patele7532d22018-05-03 17:41:37 +0000251define float @fmul_fma_fast2(float %x) {
252; FMF-LABEL: fmul_fma_fast2:
253; FMF: # %bb.0:
254; FMF-NEXT: addis 3, 2, .LCPI9_0@toc@ha
QingShan Zhangf8f9af72018-08-20 02:52:55 +0000255; FMF-NEXT: lfs 0, .LCPI9_0@toc@l(3)
Michael Berg8e570c32018-06-16 00:03:06 +0000256; FMF-NEXT: xsmulsp 1, 1, 0
Sanjay Patele7532d22018-05-03 17:41:37 +0000257; FMF-NEXT: blr
258;
259; GLOBAL-LABEL: fmul_fma_fast2:
260; GLOBAL: # %bb.0:
261; GLOBAL-NEXT: addis 3, 2, .LCPI9_0@toc@ha
QingShan Zhangf8f9af72018-08-20 02:52:55 +0000262; GLOBAL-NEXT: lfs 0, .LCPI9_0@toc@l(3)
Sanjay Patele7532d22018-05-03 17:41:37 +0000263; GLOBAL-NEXT: xsmulsp 1, 1, 0
264; GLOBAL-NEXT: blr
265 %mul = fmul fast float %x, 42.0
266 %fma = call fast float @llvm.fma.f32(float %x, float 7.0, float %mul)
267 ret float %fma
268}
269
270; Reduced precision for sqrt is allowed - should use estimate and NR iterations.
271
272; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn:'
Michael Bergcc1c4b62018-06-06 18:47:55 +0000273; FMFDEBUG: fmul afn {{t[0-9]+}}
Sanjay Patele7532d22018-05-03 17:41:37 +0000274; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn:'
275
Sanjay Patel52151882018-05-03 18:49:35 +0000276; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn:'
Michael Bergcc1c4b62018-06-06 18:47:55 +0000277; GLOBALDEBUG: fmul afn {{t[0-9]+}}
Sanjay Patel52151882018-05-03 18:49:35 +0000278; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn:'
279
Sanjay Patele7532d22018-05-03 17:41:37 +0000280define float @sqrt_afn(float %x) {
281; FMF-LABEL: sqrt_afn:
Stefan Pintiliecb4f0c52018-07-04 18:54:25 +0000282; FMF: # %bb.0:
283; FMF-NEXT: xxlxor 0, 0, 0
284; FMF-NEXT: fcmpu 0, 1, 0
285; FMF-NEXT: beq 0, .LBB10_2
286; FMF-NEXT: # %bb.1:
287; FMF-NEXT: addis 3, 2, .LCPI10_0@toc@ha
288; FMF-NEXT: xsrsqrtesp 3, 1
QingShan Zhangf8f9af72018-08-20 02:52:55 +0000289; FMF-NEXT: lfs 0, .LCPI10_0@toc@l(3)
Stefan Pintiliecb4f0c52018-07-04 18:54:25 +0000290; FMF-NEXT: xsmulsp 2, 1, 0
291; FMF-NEXT: xsmulsp 4, 3, 3
292; FMF-NEXT: xssubsp 2, 2, 1
293; FMF-NEXT: xsmulsp 2, 2, 4
294; FMF-NEXT: xssubsp 0, 0, 2
295; FMF-NEXT: xsmulsp 0, 3, 0
296; FMF-NEXT: xsmulsp 0, 0, 1
297; FMF-NEXT: .LBB10_2:
298; FMF-NEXT: fmr 1, 0
299; FMF-NEXT: blr
Sanjay Patele7532d22018-05-03 17:41:37 +0000300;
301; GLOBAL-LABEL: sqrt_afn:
302; GLOBAL: # %bb.0:
303; GLOBAL-NEXT: xxlxor 0, 0, 0
304; GLOBAL-NEXT: fcmpu 0, 1, 0
305; GLOBAL-NEXT: beq 0, .LBB10_2
306; GLOBAL-NEXT: # %bb.1:
307; GLOBAL-NEXT: xsrsqrtesp 2, 1
Sanjay Patele7532d22018-05-03 17:41:37 +0000308; GLOBAL-NEXT: fneg 0, 1
Stefan Pintiliecb4f0c52018-07-04 18:54:25 +0000309; GLOBAL-NEXT: addis 3, 2, .LCPI10_0@toc@ha
Sanjay Patele7532d22018-05-03 17:41:37 +0000310; GLOBAL-NEXT: fmr 4, 1
QingShan Zhangf8f9af72018-08-20 02:52:55 +0000311; GLOBAL-NEXT: lfs 3, .LCPI10_0@toc@l(3)
Sanjay Patele7532d22018-05-03 17:41:37 +0000312; GLOBAL-NEXT: xsmaddasp 4, 0, 3
313; GLOBAL-NEXT: xsmulsp 0, 2, 2
314; GLOBAL-NEXT: xsmaddasp 3, 4, 0
315; GLOBAL-NEXT: xsmulsp 0, 2, 3
316; GLOBAL-NEXT: xsmulsp 0, 0, 1
317; GLOBAL-NEXT: .LBB10_2:
318; GLOBAL-NEXT: fmr 1, 0
319; GLOBAL-NEXT: blr
320 %rt = call afn float @llvm.sqrt.f32(float %x)
321 ret float %rt
322}
323
324; The call is now fully 'fast'. This implies that approximation is allowed.
325
326; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast:'
Michael Bergcc1c4b62018-06-06 18:47:55 +0000327; FMFDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
Sanjay Patele7532d22018-05-03 17:41:37 +0000328; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast:'
329
Sanjay Patel52151882018-05-03 18:49:35 +0000330; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast:'
Michael Bergcc1c4b62018-06-06 18:47:55 +0000331; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
Sanjay Patel52151882018-05-03 18:49:35 +0000332; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast:'
333
Sanjay Patele7532d22018-05-03 17:41:37 +0000334define float @sqrt_fast(float %x) {
335; FMF-LABEL: sqrt_fast:
Stefan Pintiliecb4f0c52018-07-04 18:54:25 +0000336; FMF: # %bb.0:
337; FMF-NEXT: xxlxor 0, 0, 0
338; FMF-NEXT: fcmpu 0, 1, 0
339; FMF-NEXT: beq 0, .LBB11_2
340; FMF-NEXT: # %bb.1:
341; FMF-NEXT: xsrsqrtesp 2, 1
342; FMF-NEXT: fneg 0, 1
343; FMF-NEXT: addis 3, 2, .LCPI11_0@toc@ha
344; FMF-NEXT: fmr 4, 1
QingShan Zhangf8f9af72018-08-20 02:52:55 +0000345; FMF-NEXT: lfs 3, .LCPI11_0@toc@l(3)
Stefan Pintiliecb4f0c52018-07-04 18:54:25 +0000346; FMF-NEXT: xsmaddasp 4, 0, 3
347; FMF-NEXT: xsmulsp 0, 2, 2
348; FMF-NEXT: xsmaddasp 3, 4, 0
349; FMF-NEXT: xsmulsp 0, 2, 3
350; FMF-NEXT: xsmulsp 0, 0, 1
351; FMF-NEXT: .LBB11_2:
352; FMF-NEXT: fmr 1, 0
353; FMF-NEXT: blr
Sanjay Patele7532d22018-05-03 17:41:37 +0000354;
355; GLOBAL-LABEL: sqrt_fast:
356; GLOBAL: # %bb.0:
357; GLOBAL-NEXT: xxlxor 0, 0, 0
358; GLOBAL-NEXT: fcmpu 0, 1, 0
359; GLOBAL-NEXT: beq 0, .LBB11_2
360; GLOBAL-NEXT: # %bb.1:
361; GLOBAL-NEXT: xsrsqrtesp 2, 1
Sanjay Patele7532d22018-05-03 17:41:37 +0000362; GLOBAL-NEXT: fneg 0, 1
Stefan Pintiliecb4f0c52018-07-04 18:54:25 +0000363; GLOBAL-NEXT: addis 3, 2, .LCPI11_0@toc@ha
Sanjay Patele7532d22018-05-03 17:41:37 +0000364; GLOBAL-NEXT: fmr 4, 1
QingShan Zhangf8f9af72018-08-20 02:52:55 +0000365; GLOBAL-NEXT: lfs 3, .LCPI11_0@toc@l(3)
Sanjay Patele7532d22018-05-03 17:41:37 +0000366; GLOBAL-NEXT: xsmaddasp 4, 0, 3
367; GLOBAL-NEXT: xsmulsp 0, 2, 2
368; GLOBAL-NEXT: xsmaddasp 3, 4, 0
369; GLOBAL-NEXT: xsmulsp 0, 2, 3
370; GLOBAL-NEXT: xsmulsp 0, 0, 1
371; GLOBAL-NEXT: .LBB11_2:
372; GLOBAL-NEXT: fmr 1, 0
373; GLOBAL-NEXT: blr
374 %rt = call fast float @llvm.sqrt.f32(float %x)
375 ret float %rt
376}
377
Sanjay Patel4c8a67a2018-05-14 21:17:49 +0000378; fcmp can have fast-math-flags.
379
380; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fcmp_nnan:'
381; FMFDEBUG: select_cc {{t[0-9]+}}
382; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fcmp_nnan:'
383
384; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fcmp_nnan:'
385; GLOBALDEBUG: select_cc {{t[0-9]+}}
386; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fcmp_nnan:'
387
388define double @fcmp_nnan(double %a, double %y, double %z) {
389; FMF-LABEL: fcmp_nnan:
390; FMF: # %bb.0:
391; FMF-NEXT: xxlxor 0, 0, 0
Sanjay Patel8652c532018-05-15 14:16:24 +0000392; FMF-NEXT: xscmpudp 0, 1, 0
393; FMF-NEXT: blt 0, .LBB12_2
Sanjay Patel4c8a67a2018-05-14 21:17:49 +0000394; FMF-NEXT: # %bb.1:
395; FMF-NEXT: fmr 3, 2
396; FMF-NEXT: .LBB12_2:
397; FMF-NEXT: fmr 1, 3
398; FMF-NEXT: blr
399;
400; GLOBAL-LABEL: fcmp_nnan:
401; GLOBAL: # %bb.0:
402; GLOBAL-NEXT: xxlxor 0, 0, 0
403; GLOBAL-NEXT: xscmpudp 0, 1, 0
404; GLOBAL-NEXT: blt 0, .LBB12_2
405; GLOBAL-NEXT: # %bb.1:
406; GLOBAL-NEXT: fmr 3, 2
407; GLOBAL-NEXT: .LBB12_2:
408; GLOBAL-NEXT: fmr 1, 3
409; GLOBAL-NEXT: blr
410 %cmp = fcmp nnan ult double %a, 0.0
411 %z.y = select i1 %cmp, double %z, double %y
412 ret double %z.y
413}
414
415; FP library calls can have fast-math-flags.
416
417; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'log2_approx:'
418; FMFDEBUG: ch,glue = PPCISD::CALL_NOP t11, TargetGlobalAddress:i64<double (double)* @log2>
419; FMFDEBUG: ch,glue = callseq_end t15, TargetConstant:i64<32>, TargetConstant:i64<0>, t15:1
Sanjay Patel8652c532018-05-15 14:16:24 +0000420; FMFDEBUG: f64,ch,glue = CopyFromReg afn t16, Register:f64 $f1, t16:1
Sanjay Patel4c8a67a2018-05-14 21:17:49 +0000421; FMFDEBUG: Type-legalized selection DAG: %bb.0 'log2_approx:'
422
423; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'log2_approx:'
424; GLOBALDEBUG: ch,glue = PPCISD::CALL_NOP t11, TargetGlobalAddress:i64<double (double)* @log2>
425; GLOBALDEBUG: ch,glue = callseq_end t15, TargetConstant:i64<32>, TargetConstant:i64<0>, t15:1
Sanjay Patel8652c532018-05-15 14:16:24 +0000426; GLOBALDEBUG: f64,ch,glue = CopyFromReg afn t16, Register:f64 $f1, t16:1
Sanjay Patel4c8a67a2018-05-14 21:17:49 +0000427; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'log2_approx:'
428
429declare double @log2(double)
430define double @log2_approx(double %x) nounwind {
431; FMF-LABEL: log2_approx:
432; FMF: # %bb.0:
433; FMF-NEXT: mflr 0
434; FMF-NEXT: std 0, 16(1)
435; FMF-NEXT: stdu 1, -32(1)
436; FMF-NEXT: bl log2
437; FMF-NEXT: nop
438; FMF-NEXT: addi 1, 1, 32
439; FMF-NEXT: ld 0, 16(1)
440; FMF-NEXT: mtlr 0
441; FMF-NEXT: blr
442;
443; GLOBAL-LABEL: log2_approx:
444; GLOBAL: # %bb.0:
445; GLOBAL-NEXT: mflr 0
446; GLOBAL-NEXT: std 0, 16(1)
447; GLOBAL-NEXT: stdu 1, -32(1)
448; GLOBAL-NEXT: bl log2
449; GLOBAL-NEXT: nop
450; GLOBAL-NEXT: addi 1, 1, 32
451; GLOBAL-NEXT: ld 0, 16(1)
452; GLOBAL-NEXT: mtlr 0
453; GLOBAL-NEXT: blr
454 %r = call afn double @log2(double %x)
455 ret double %r
456}
457
Michael Berg8f6d6c82018-06-05 18:12:25 +0000458; -(X - Y) --> (Y - X)
459
460; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fneg_fsub_nozeros_1:'
Michael Berg96925fe2018-06-05 18:49:47 +0000461; FMFDEBUG: fsub nsz {{t[0-9]+}}, {{t[0-9]+}}
Michael Berg8f6d6c82018-06-05 18:12:25 +0000462; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fneg_fsub_nozeros_1:'
463
464; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fneg_fsub_nozeros_1:'
Michael Berg96925fe2018-06-05 18:49:47 +0000465; GLOBALDEBUG: fsub nsz {{t[0-9]+}}, {{t[0-9]+}}
Michael Berg8f6d6c82018-06-05 18:12:25 +0000466; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fneg_fsub_nozeros_1:'
467
468define float @fneg_fsub_nozeros_1(float %x, float %y, float %z) {
469; FMF-LABEL: fneg_fsub_nozeros_1:
470; FMF: # %bb.0:
Michael Berg96925fe2018-06-05 18:49:47 +0000471; FMF-NEXT: xssubsp 1, 2, 1
Michael Berg8f6d6c82018-06-05 18:12:25 +0000472; FMF-NEXT: blr
473;
474; GLOBAL-LABEL: fneg_fsub_nozeros_1:
475; GLOBAL: # %bb.0:
Michael Berg96925fe2018-06-05 18:49:47 +0000476; GLOBAL-NEXT: xssubsp 1, 2, 1
Michael Berg8f6d6c82018-06-05 18:12:25 +0000477; GLOBAL-NEXT: blr
478 %neg = fsub float %x, %y
Michael Berg96925fe2018-06-05 18:49:47 +0000479 %add = fsub nsz float 0.0, %neg
Michael Berg8f6d6c82018-06-05 18:12:25 +0000480 ret float %add
481}
Michael Berg96925fe2018-06-05 18:49:47 +0000482