Implement cpu_dispatch/cpu_specific Multiversioning
As documented here: https://software.intel.com/en-us/node/682969 and
https://software.intel.com/en-us/node/523346. cpu_dispatch multiversioning
is an ICC feature that provides for function multiversioning.
This feature is implemented with two attributes: First, cpu_specific,
which specifies the individual function versions. Second, cpu_dispatch,
which specifies the location of the resolver function and the list of
resolvable functions.
This is valuable since it provides a mechanism where the resolver's TU
can be specified in one location, and the individual implementions
each in their own translation units.
The goal of this patch is to be source-compatible with ICC, so this
implementation diverges from the ICC implementation in a few ways:
1- Linux x86/64 only: This implementation uses ifuncs in order to
properly dispatch functions. This is is a valuable performance benefit
over the ICC implementation. A future patch will be provided to enable
this feature on Windows, but it will obviously more closely fit ICC's
implementation.
2- CPU Identification functions: ICC uses a set of custom functions to identify
the feature list of the host processor. This patch uses the cpu_supports
functionality in order to better align with 'target' multiversioning.
1- cpu_dispatch function def/decl: ICC's cpu_dispatch requires that the function
marked cpu_dispatch be an empty definition. This patch supports that as well,
however declarations are also permitted, since the linker will solve the
issue of multiple emissions.
Differential Revision: https://reviews.llvm.org/D47474
llvm-svn: 337552
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 6886870..627a33d 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -861,22 +861,38 @@
GV->setThreadLocalMode(TLM);
}
+static std::string getCPUSpecificMangling(const CodeGenModule &CGM,
+ StringRef Name) {
+ const TargetInfo &Target = CGM.getTarget();
+ return (Twine('.') + Twine(Target.CPUSpecificManglingCharacter(Name))).str();
+}
+
+static void AppendCPUSpecificCPUDispatchMangling(const CodeGenModule &CGM,
+ const CPUSpecificAttr *Attr,
+ raw_ostream &Out) {
+ // cpu_specific gets the current name, dispatch gets the resolver.
+ if (Attr)
+ Out << getCPUSpecificMangling(CGM, Attr->getCurCPUName()->getName());
+ else
+ Out << ".resolver";
+}
+
static void AppendTargetMangling(const CodeGenModule &CGM,
const TargetAttr *Attr, raw_ostream &Out) {
if (Attr->isDefaultVersion())
return;
Out << '.';
- const auto &Target = CGM.getTarget();
+ const TargetInfo &Target = CGM.getTarget();
TargetAttr::ParsedTargetAttr Info =
Attr->parse([&Target](StringRef LHS, StringRef RHS) {
- // Multiversioning doesn't allow "no-${feature}", so we can
- // only have "+" prefixes here.
- assert(LHS.startswith("+") && RHS.startswith("+") &&
- "Features should always have a prefix.");
- return Target.multiVersionSortPriority(LHS.substr(1)) >
- Target.multiVersionSortPriority(RHS.substr(1));
- });
+ // Multiversioning doesn't allow "no-${feature}", so we can
+ // only have "+" prefixes here.
+ assert(LHS.startswith("+") && RHS.startswith("+") &&
+ "Features should always have a prefix.");
+ return Target.multiVersionSortPriority(LHS.substr(1)) >
+ Target.multiVersionSortPriority(RHS.substr(1));
+ });
bool IsFirst = true;
@@ -895,7 +911,7 @@
static std::string getMangledNameImpl(const CodeGenModule &CGM, GlobalDecl GD,
const NamedDecl *ND,
- bool OmitTargetMangling = false) {
+ bool OmitMultiVersionMangling = false) {
SmallString<256> Buffer;
llvm::raw_svector_ostream Out(Buffer);
MangleContext &MC = CGM.getCXXABI().getMangleContext();
@@ -922,8 +938,14 @@
}
if (const auto *FD = dyn_cast<FunctionDecl>(ND))
- if (FD->isMultiVersion() && !OmitTargetMangling)
- AppendTargetMangling(CGM, FD->getAttr<TargetAttr>(), Out);
+ if (FD->isMultiVersion() && !OmitMultiVersionMangling) {
+ if (FD->isCPUDispatchMultiVersion() || FD->isCPUSpecificMultiVersion())
+ AppendCPUSpecificCPUDispatchMangling(
+ CGM, FD->getAttr<CPUSpecificAttr>(), Out);
+ else
+ AppendTargetMangling(CGM, FD->getAttr<TargetAttr>(), Out);
+ }
+
return Out.str();
}
@@ -936,7 +958,7 @@
// allows us to lookup the version that was emitted when this wasn't a
// multiversion function.
std::string NonTargetName =
- getMangledNameImpl(*this, GD, FD, /*OmitTargetMangling=*/true);
+ getMangledNameImpl(*this, GD, FD, /*OmitMultiVersionMangling=*/true);
GlobalDecl OtherGD;
if (lookupRepresentativeDecl(NonTargetName, OtherGD)) {
assert(OtherGD.getCanonicalDecl()
@@ -979,11 +1001,30 @@
}
}
+ const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl());
+ // Since CPUSpecific can require multiple emits per decl, store the manglings
+ // separately.
+ if (FD &&
+ (FD->isCPUDispatchMultiVersion() || FD->isCPUSpecificMultiVersion())) {
+ const auto *SD = FD->getAttr<CPUSpecificAttr>();
+
+ std::pair<GlobalDecl, unsigned> SpecCanonicalGD{
+ CanonicalGD,
+ SD ? SD->ActiveArgIndex : std::numeric_limits<unsigned>::max()};
+
+ auto FoundName = CPUSpecificMangledDeclNames.find(SpecCanonicalGD);
+ if (FoundName != CPUSpecificMangledDeclNames.end())
+ return FoundName->second;
+
+ auto Result = CPUSpecificManglings.insert(
+ std::make_pair(getMangledNameImpl(*this, GD, FD), SpecCanonicalGD));
+ return CPUSpecificMangledDeclNames[SpecCanonicalGD] = Result.first->first();
+ }
+
auto FoundName = MangledDeclNames.find(CanonicalGD);
if (FoundName != MangledDeclNames.end())
return FoundName->second;
-
// Keep the first result in the case of a mangling collision.
const auto *ND = cast<NamedDecl>(GD.getDecl());
auto Result =
@@ -1321,8 +1362,9 @@
const auto *FD = dyn_cast_or_null<FunctionDecl>(D);
FD = FD ? FD->getMostRecentDecl() : FD;
const auto *TD = FD ? FD->getAttr<TargetAttr>() : nullptr;
+ const auto *SD = FD ? FD->getAttr<CPUSpecificAttr>() : nullptr;
bool AddedAttr = false;
- if (TD) {
+ if (TD || SD) {
llvm::StringMap<bool> FeatureMap;
getFunctionFeatureMap(FeatureMap, FD);
@@ -1334,10 +1376,12 @@
// While we populated the feature map above, we still need to
// get and parse the target attribute so we can get the cpu for
// the function.
- TargetAttr::ParsedTargetAttr ParsedAttr = TD->parse();
- if (ParsedAttr.Architecture != "" &&
- getTarget().isValidCPUName(ParsedAttr.Architecture))
- TargetCPU = ParsedAttr.Architecture;
+ if (TD) {
+ TargetAttr::ParsedTargetAttr ParsedAttr = TD->parse();
+ if (ParsedAttr.Architecture != "" &&
+ getTarget().isValidCPUName(ParsedAttr.Architecture))
+ TargetCPU = ParsedAttr.Architecture;
+ }
} else {
// Otherwise just add the existing target cpu and target features to the
// function.
@@ -2037,6 +2081,10 @@
if (Global->hasAttr<IFuncAttr>())
return emitIFuncDefinition(GD);
+ // If this is a cpu_dispatch multiversion function, emit the resolver.
+ if (Global->hasAttr<CPUDispatchAttr>())
+ return emitCPUDispatchDefinition(GD);
+
// If this is CUDA, be selective about which declarations we emit.
if (LangOpts.CUDA) {
if (LangOpts.CUDAIsDevice) {
@@ -2355,7 +2403,7 @@
void CodeGenModule::emitMultiVersionFunctions() {
for (GlobalDecl GD : MultiVersionFuncs) {
- SmallVector<CodeGenFunction::MultiVersionResolverOption, 10> Options;
+ SmallVector<CodeGenFunction::TargetMultiVersionResolverOption, 10> Options;
const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
getContext().forEachMultiversionedFunctionVersion(
FD, [this, &GD, &Options](const FunctionDecl *CurFD) {
@@ -2387,28 +2435,75 @@
getModule().getOrInsertComdat(ResolverFunc->getName()));
std::stable_sort(
Options.begin(), Options.end(),
- std::greater<CodeGenFunction::MultiVersionResolverOption>());
+ std::greater<CodeGenFunction::TargetMultiVersionResolverOption>());
CodeGenFunction CGF(*this);
- CGF.EmitMultiVersionResolver(ResolverFunc, Options);
+ CGF.EmitTargetMultiVersionResolver(ResolverFunc, Options);
}
}
+void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) {
+ const auto *FD = cast<FunctionDecl>(GD.getDecl());
+ assert(FD && "Not a FunctionDecl?");
+ const auto *DD = FD->getAttr<CPUDispatchAttr>();
+ assert(DD && "Not a cpu_dispatch Function?");
+ llvm::Type *DeclTy = getTypes().ConvertTypeForMem(FD->getType());
+
+ StringRef ResolverName = getMangledName(GD);
+ llvm::Type *ResolverType = llvm::FunctionType::get(
+ llvm::PointerType::get(DeclTy,
+ Context.getTargetAddressSpace(FD->getType())),
+ false);
+ auto *ResolverFunc = cast<llvm::Function>(
+ GetOrCreateLLVMFunction(ResolverName, ResolverType, GlobalDecl{},
+ /*ForVTable=*/false));
+
+ SmallVector<CodeGenFunction::CPUDispatchMultiVersionResolverOption, 10>
+ Options;
+ const TargetInfo &Target = getTarget();
+ for (const IdentifierInfo *II : DD->cpus()) {
+ // Get the name of the target function so we can look it up/create it.
+ std::string MangledName = getMangledNameImpl(*this, GD, FD, true) +
+ getCPUSpecificMangling(*this, II->getName());
+ llvm::Constant *Func = GetOrCreateLLVMFunction(
+ MangledName, DeclTy, GD, /*ForVTable=*/false, /*DontDefer=*/false,
+ /*IsThunk=*/false, llvm::AttributeList(), ForDefinition);
+ llvm::SmallVector<StringRef, 32> Features;
+ Target.getCPUSpecificCPUDispatchFeatures(II->getName(), Features);
+ llvm::transform(Features, Features.begin(),
+ [](StringRef Str) { return Str.substr(1); });
+ Features.erase(std::remove_if(
+ Features.begin(), Features.end(), [&Target](StringRef Feat) {
+ return !Target.validateCpuSupports(Feat);
+ }), Features.end());
+ Options.emplace_back(cast<llvm::Function>(Func),
+ CodeGenFunction::GetX86CpuSupportsMask(Features));
+ }
+
+ llvm::sort(
+ Options.begin(), Options.end(),
+ std::greater<CodeGenFunction::CPUDispatchMultiVersionResolverOption>());
+ CodeGenFunction CGF(*this);
+ CGF.EmitCPUDispatchMultiVersionResolver(ResolverFunc, Options);
+}
+
/// If an ifunc for the specified mangled name is not in the module, create and
/// return an llvm IFunc Function with the specified type.
llvm::Constant *
CodeGenModule::GetOrCreateMultiVersionIFunc(GlobalDecl GD, llvm::Type *DeclTy,
- StringRef MangledName,
const FunctionDecl *FD) {
- std::string IFuncName = (MangledName + ".ifunc").str();
+ std::string MangledName =
+ getMangledNameImpl(*this, GD, FD, /*OmitMultiVersionMangling=*/true);
+ std::string IFuncName = MangledName + ".ifunc";
if (llvm::GlobalValue *IFuncGV = GetGlobalValue(IFuncName))
return IFuncGV;
// Since this is the first time we've created this IFunc, make sure
// that we put this multiversioned function into the list to be
- // replaced later.
- MultiVersionFuncs.push_back(GD);
+ // replaced later if necessary (target multiversioning only).
+ if (!FD->isCPUDispatchMultiVersion() && !FD->isCPUSpecificMultiVersion())
+ MultiVersionFuncs.push_back(GD);
- std::string ResolverName = (MangledName + ".resolver").str();
+ std::string ResolverName = MangledName + ".resolver";
llvm::Type *ResolverType = llvm::FunctionType::get(
llvm::PointerType::get(DeclTy,
Context.getTargetAddressSpace(FD->getType())),
@@ -2455,10 +2550,12 @@
addDeferredDeclToEmit(GDDef);
}
- if (FD->isMultiVersion() && FD->getAttr<TargetAttr>()->isDefaultVersion()) {
- UpdateMultiVersionNames(GD, FD);
+ if (FD->isMultiVersion()) {
+ const auto *TA = FD->getAttr<TargetAttr>();
+ if (TA && TA->isDefaultVersion())
+ UpdateMultiVersionNames(GD, FD);
if (!IsForDefinition)
- return GetOrCreateMultiVersionIFunc(GD, Ty, MangledName, FD);
+ return GetOrCreateMultiVersionIFunc(GD, Ty, FD);
}
}
@@ -3727,6 +3824,15 @@
AddGlobalDtor(Fn, DA->getPriority());
if (D->hasAttr<AnnotateAttr>())
AddGlobalAnnotations(D, Fn);
+
+ if (D->isCPUSpecificMultiVersion()) {
+ auto *Spec = D->getAttr<CPUSpecificAttr>();
+ // If there is another specific version we need to emit, do so here.
+ if (Spec->ActiveArgIndex + 1 < Spec->cpus_size()) {
+ ++Spec->ActiveArgIndex;
+ EmitGlobalFunctionDefinition(GD, nullptr);
+ }
+ }
}
void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) {
@@ -5107,6 +5213,12 @@
// the attribute.
Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU,
ParsedAttr.Features);
+ } else if (const auto *SD = FD->getAttr<CPUSpecificAttr>()) {
+ llvm::SmallVector<StringRef, 32> FeaturesTmp;
+ Target.getCPUSpecificCPUDispatchFeatures(SD->getCurCPUName()->getName(),
+ FeaturesTmp);
+ std::vector<std::string> Features(FeaturesTmp.begin(), FeaturesTmp.end());
+ Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, Features);
} else {
Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU,
Target.getTargetOpts().Features);