AMDGPU: Use a custom areInlineCompatible
Fixes not inlining OpenCL library functions on AMDGPU,
which don't have an explicitly set target-cpu.
llvm-svn: 310269
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 89a0390..fde2132 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -534,3 +534,16 @@
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}
+
+bool AMDGPUTTIImpl::areInlineCompatible(const Function *Caller,
+ const Function *Callee) const {
+ const TargetMachine &TM = getTLI()->getTargetMachine();
+ const FeatureBitset &CallerBits =
+ TM.getSubtargetImpl(*Caller)->getFeatureBits();
+ const FeatureBitset &CalleeBits =
+ TM.getSubtargetImpl(*Callee)->getFeatureBits();
+
+ FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
+ FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
+ return ((RealCallerBits & RealCalleeBits) == RealCalleeBits);
+}