rdar://12100355 (part 1)
This revision attempts to recognize following population-count pattern:
while(a) { c++; ... ; a &= a - 1; ... },
where <c> and <a>could be used multiple times in the loop body.
TODO: On X8664 and ARM, __buildin_ctpop() are not expanded to a efficent
instruction sequence, which need to be improved in the following commits.
Reviewed by Nadav, really appreciate!
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168931 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index d1552cd..d8e7263 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -17670,6 +17670,17 @@
return -1;
}
+ScalarTargetTransformInfo::PopcntHwSupport
+X86ScalarTargetTransformImpl::getPopcntHwSupport(unsigned TyWidth) const {
+ assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+ const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget<X86Subtarget>();
+
+ // TODO: Currently the __builtin_popcount() implementation using SSE3
+ // instructions is inefficient. Once the problem is fixed, we should
+ // call ST.hasSSE3() instead of ST.hasSSE4().
+ return ST.hasSSE41() ? Fast : None;
+}
+
unsigned
X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode,
Type *Ty) const {
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 465c603..0673368 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -933,6 +933,14 @@
const TargetLibraryInfo *libInfo);
}
+ class X86ScalarTargetTransformImpl : public ScalarTargetTransformImpl {
+ public:
+ explicit X86ScalarTargetTransformImpl(const TargetLowering *TL) :
+ ScalarTargetTransformImpl(TL) {};
+
+ virtual PopcntHwSupport getPopcntHwSupport(unsigned TyWidth) const;
+ };
+
class X86VectorTargetTransformInfo : public VectorTargetTransformImpl {
public:
explicit X86VectorTargetTransformInfo(const TargetLowering *TL) :
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 12311a1..3f60c95 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -118,7 +118,7 @@
X86SelectionDAGInfo TSInfo;
X86TargetLowering TLInfo;
X86JITInfo JITInfo;
- ScalarTargetTransformImpl STTI;
+ X86ScalarTargetTransformImpl STTI;
X86VectorTargetTransformInfo VTTI;
public:
X86_64TargetMachine(const Target &T, StringRef TT,