Use rsqrt (X86) to speed up reciprocal square root calcs This is a first step for generating SSE rsqrt instructions for reciprocal square root calcs when fast-math is allowed. For now, be conservative and only enable this for AMD btver2 where performance improves significantly - for example, 29% on llvm/projects/test-suite/SingleSource/Benchmarks/BenchmarkGame/n-body.c (if we convert the data type to single-precision float). This patch adds a two constant version of the Newton-Raphson refinement algorithm to DAGCombiner that can be selected by any target via a parameter returned by getRsqrtEstimate().. See PR20900 for more details: http://llvm.org/bugs/show_bug.cgi?id=20900 Differential Revision: http://reviews.llvm.org/D5658 llvm-svn: 220570

commit: 957efc23bb87d341a1b478d87a48bb888c2d4068 [log] [tgz]
author: Sanjay Patel <spatel@rotateright.com> Fri Oct 24 17:02:16 2014 +0000
committer: Sanjay Patel <spatel@rotateright.com> Fri Oct 24 17:02:16 2014 +0000
tree: 48ae584987b7970cb90899c03590938f4d622799
parent: 5e3a421bfcb891fc7821daa501e30c113fb1bf16 [diff] [blame]
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index 24ee553..cdecf2a 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h

@@ -192,6 +192,11 @@
   /// SlowIncDec - True if INC and DEC instructions are slow when writing to flags
   bool SlowIncDec;
 
+  /// Use the RSQRT* instructions to optimize square root calculations.
+  /// For this to be profitable, the cost of FSQRT and FDIV must be
+  /// substantially higher than normal FP ops like FADD and FMUL.
+  bool UseSqrtEst;
+
   /// Processor has AVX-512 PreFetch Instructions
   bool HasPFI;
 
@@ -369,6 +374,7 @@
   bool LEAusesAG() const { return LEAUsesAG; }
   bool slowLEA() const { return SlowLEA; }
   bool slowIncDec() const { return SlowIncDec; }
+  bool useSqrtEst() const { return UseSqrtEst; }
   bool hasCDI() const { return HasCDI; }
   bool hasPFI() const { return HasPFI; }
   bool hasERI() const { return HasERI; }
commit	957efc23bb87d341a1b478d87a48bb888c2d4068	[log] [tgz]
author	Sanjay Patel <spatel@rotateright.com>	Fri Oct 24 17:02:16 2014 +0000
committer	Sanjay Patel <spatel@rotateright.com>	Fri Oct 24 17:02:16 2014 +0000
tree	48ae584987b7970cb90899c03590938f4d622799
parent	5e3a421bfcb891fc7821daa501e30c113fb1bf16 [diff] [blame]