[CodeGen] [CUDA] Add the ability set default attrs on functions in linked modules.
Summary:
Now when you ask clang to link in a bitcode module, you can tell it to
set attributes on that module's functions to match what we would have
set if we'd emitted those functions ourselves.
This is particularly important for fast-math attributes in CUDA
compilations.
Each CUDA compilation links in libdevice, a bitcode library provided by
nvidia as part of the CUDA distribution. Without this patch, if we have
a user-function F that is compiled with -ffast-math that calls a
function G from libdevice, F will have the unsafe-fp-math=true (etc.)
attributes, but G will have no attributes.
Since F calls G, the inliner will merge G's attributes into F's. It
considers the lack of an unsafe-fp-math=true attribute on G to be
tantamount to unsafe-fp-math=false, so it "merges" these by setting
unsafe-fp-math=false on F.
This then continues up the call graph, until every function that
(transitively) calls something in libdevice gets unsafe-fp-math=false
set, thus disabling fastmath in almost all CUDA code.
Reviewers: echristo
Subscribers: hfinkel, llvm-commits, mehdi_amini
Differential Revision: https://reviews.llvm.org/D28538
llvm-svn: 293097
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index c7c61e0..7d3419b 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -1620,15 +1620,113 @@
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
}
+void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
+ bool AttrOnCallSite,
+ llvm::AttrBuilder &FuncAttrs) {
+ // OptimizeNoneAttr takes precedence over -Os or -Oz. No warning needed.
+ if (!HasOptnone) {
+ if (CodeGenOpts.OptimizeSize)
+ FuncAttrs.addAttribute(llvm::Attribute::OptimizeForSize);
+ if (CodeGenOpts.OptimizeSize == 2)
+ FuncAttrs.addAttribute(llvm::Attribute::MinSize);
+ }
+
+ if (CodeGenOpts.DisableRedZone)
+ FuncAttrs.addAttribute(llvm::Attribute::NoRedZone);
+ if (CodeGenOpts.NoImplicitFloat)
+ FuncAttrs.addAttribute(llvm::Attribute::NoImplicitFloat);
+
+ if (AttrOnCallSite) {
+ // Attributes that should go on the call site only.
+ if (!CodeGenOpts.SimplifyLibCalls ||
+ CodeGenOpts.isNoBuiltinFunc(Name.data()))
+ FuncAttrs.addAttribute(llvm::Attribute::NoBuiltin);
+ if (!CodeGenOpts.TrapFuncName.empty())
+ FuncAttrs.addAttribute("trap-func-name", CodeGenOpts.TrapFuncName);
+ } else {
+ // Attributes that should go on the function, but not the call site.
+ if (!CodeGenOpts.DisableFPElim) {
+ FuncAttrs.addAttribute("no-frame-pointer-elim", "false");
+ } else if (CodeGenOpts.OmitLeafFramePointer) {
+ FuncAttrs.addAttribute("no-frame-pointer-elim", "false");
+ FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf");
+ } else {
+ FuncAttrs.addAttribute("no-frame-pointer-elim", "true");
+ FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf");
+ }
+
+ FuncAttrs.addAttribute("less-precise-fpmad",
+ llvm::toStringRef(CodeGenOpts.LessPreciseFPMAD));
+
+ if (!CodeGenOpts.FPDenormalMode.empty())
+ FuncAttrs.addAttribute("denormal-fp-math", CodeGenOpts.FPDenormalMode);
+
+ FuncAttrs.addAttribute("no-trapping-math",
+ llvm::toStringRef(CodeGenOpts.NoTrappingMath));
+
+ // TODO: Are these all needed?
+ // unsafe/inf/nan/nsz are handled by instruction-level FastMathFlags.
+ FuncAttrs.addAttribute("no-infs-fp-math",
+ llvm::toStringRef(CodeGenOpts.NoInfsFPMath));
+ FuncAttrs.addAttribute("no-nans-fp-math",
+ llvm::toStringRef(CodeGenOpts.NoNaNsFPMath));
+ FuncAttrs.addAttribute("unsafe-fp-math",
+ llvm::toStringRef(CodeGenOpts.UnsafeFPMath));
+ FuncAttrs.addAttribute("use-soft-float",
+ llvm::toStringRef(CodeGenOpts.SoftFloat));
+ FuncAttrs.addAttribute("stack-protector-buffer-size",
+ llvm::utostr(CodeGenOpts.SSPBufferSize));
+ FuncAttrs.addAttribute("no-signed-zeros-fp-math",
+ llvm::toStringRef(CodeGenOpts.NoSignedZeros));
+ FuncAttrs.addAttribute(
+ "correctly-rounded-divide-sqrt-fp-math",
+ llvm::toStringRef(CodeGenOpts.CorrectlyRoundedDivSqrt));
+
+ // TODO: Reciprocal estimate codegen options should apply to instructions?
+ std::vector<std::string> &Recips = getTarget().getTargetOpts().Reciprocals;
+ if (!Recips.empty())
+ FuncAttrs.addAttribute("reciprocal-estimates",
+ llvm::join(Recips.begin(), Recips.end(), ","));
+
+ if (CodeGenOpts.StackRealignment)
+ FuncAttrs.addAttribute("stackrealign");
+ if (CodeGenOpts.Backchain)
+ FuncAttrs.addAttribute("backchain");
+ }
+
+ if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
+ // Conservatively, mark all functions and calls in CUDA as convergent
+ // (meaning, they may call an intrinsically convergent op, such as
+ // __syncthreads(), and so can't have certain optimizations applied around
+ // them). LLVM will remove this attribute where it safely can.
+ FuncAttrs.addAttribute(llvm::Attribute::Convergent);
+
+ // Exceptions aren't supported in CUDA device code.
+ FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
+
+ // Respect -fcuda-flush-denormals-to-zero.
+ if (getLangOpts().CUDADeviceFlushDenormalsToZero)
+ FuncAttrs.addAttribute("nvptx-f32ftz", "true");
+ }
+}
+
+void CodeGenModule::AddDefaultFnAttrs(llvm::Function &F) {
+ llvm::AttrBuilder FuncAttrs;
+ ConstructDefaultFnAttrList(F.getName(),
+ F.hasFnAttribute(llvm::Attribute::OptimizeNone),
+ /* AttrOnCallsite = */ false, FuncAttrs);
+ llvm::AttributeSet AS = llvm::AttributeSet::get(
+ getLLVMContext(), llvm::AttributeSet::FunctionIndex, FuncAttrs);
+ F.addAttributes(llvm::AttributeSet::FunctionIndex, AS);
+}
+
void CodeGenModule::ConstructAttributeList(
StringRef Name, const CGFunctionInfo &FI, CGCalleeInfo CalleeInfo,
AttributeListType &PAL, unsigned &CallingConv, bool AttrOnCallSite) {
llvm::AttrBuilder FuncAttrs;
llvm::AttrBuilder RetAttrs;
- bool HasOptnone = false;
CallingConv = FI.getEffectiveCallingConvention();
-
if (FI.isNoReturn())
FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
@@ -1639,7 +1737,7 @@
const Decl *TargetDecl = CalleeInfo.getCalleeDecl();
- bool HasAnyX86InterruptAttr = false;
+ bool HasOptnone = false;
// FIXME: handle sseregparm someday...
if (TargetDecl) {
if (TargetDecl->hasAttr<ReturnsTwiceAttr>())
@@ -1679,7 +1777,6 @@
if (TargetDecl->hasAttr<ReturnsNonNullAttr>())
RetAttrs.addAttribute(llvm::Attribute::NonNull);
- HasAnyX86InterruptAttr = TargetDecl->hasAttr<AnyX86InterruptAttr>();
HasOptnone = TargetDecl->hasAttr<OptimizeNoneAttr>();
if (auto *AllocSize = TargetDecl->getAttr<AllocSizeAttr>()) {
Optional<unsigned> NumElemsParam;
@@ -1691,86 +1788,19 @@
}
}
- // OptimizeNoneAttr takes precedence over -Os or -Oz. No warning needed.
- if (!HasOptnone) {
- if (CodeGenOpts.OptimizeSize)
- FuncAttrs.addAttribute(llvm::Attribute::OptimizeForSize);
- if (CodeGenOpts.OptimizeSize == 2)
- FuncAttrs.addAttribute(llvm::Attribute::MinSize);
- }
+ ConstructDefaultFnAttrList(Name, HasOptnone, AttrOnCallSite, FuncAttrs);
- if (CodeGenOpts.DisableRedZone)
- FuncAttrs.addAttribute(llvm::Attribute::NoRedZone);
- if (CodeGenOpts.NoImplicitFloat)
- FuncAttrs.addAttribute(llvm::Attribute::NoImplicitFloat);
if (CodeGenOpts.EnableSegmentedStacks &&
!(TargetDecl && TargetDecl->hasAttr<NoSplitStackAttr>()))
FuncAttrs.addAttribute("split-stack");
- if (AttrOnCallSite) {
- // Attributes that should go on the call site only.
- if (!CodeGenOpts.SimplifyLibCalls ||
- CodeGenOpts.isNoBuiltinFunc(Name.data()))
- FuncAttrs.addAttribute(llvm::Attribute::NoBuiltin);
- if (!CodeGenOpts.TrapFuncName.empty())
- FuncAttrs.addAttribute("trap-func-name", CodeGenOpts.TrapFuncName);
- } else {
- // Attributes that should go on the function, but not the call site.
- if (!CodeGenOpts.DisableFPElim) {
- FuncAttrs.addAttribute("no-frame-pointer-elim", "false");
- } else if (CodeGenOpts.OmitLeafFramePointer) {
- FuncAttrs.addAttribute("no-frame-pointer-elim", "false");
- FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf");
- } else {
- FuncAttrs.addAttribute("no-frame-pointer-elim", "true");
- FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf");
- }
-
+ if (!AttrOnCallSite) {
bool DisableTailCalls =
- CodeGenOpts.DisableTailCalls || HasAnyX86InterruptAttr ||
- (TargetDecl && TargetDecl->hasAttr<DisableTailCallsAttr>());
- FuncAttrs.addAttribute(
- "disable-tail-calls",
- llvm::toStringRef(DisableTailCalls));
-
- FuncAttrs.addAttribute("less-precise-fpmad",
- llvm::toStringRef(CodeGenOpts.LessPreciseFPMAD));
-
- if (!CodeGenOpts.FPDenormalMode.empty())
- FuncAttrs.addAttribute("denormal-fp-math",
- CodeGenOpts.FPDenormalMode);
-
- FuncAttrs.addAttribute("no-trapping-math",
- llvm::toStringRef(CodeGenOpts.NoTrappingMath));
-
- // TODO: Are these all needed?
- // unsafe/inf/nan/nsz are handled by instruction-level FastMathFlags.
- FuncAttrs.addAttribute("no-infs-fp-math",
- llvm::toStringRef(CodeGenOpts.NoInfsFPMath));
- FuncAttrs.addAttribute("no-nans-fp-math",
- llvm::toStringRef(CodeGenOpts.NoNaNsFPMath));
- FuncAttrs.addAttribute("unsafe-fp-math",
- llvm::toStringRef(CodeGenOpts.UnsafeFPMath));
- FuncAttrs.addAttribute("use-soft-float",
- llvm::toStringRef(CodeGenOpts.SoftFloat));
- FuncAttrs.addAttribute("stack-protector-buffer-size",
- llvm::utostr(CodeGenOpts.SSPBufferSize));
- FuncAttrs.addAttribute("no-signed-zeros-fp-math",
- llvm::toStringRef(CodeGenOpts.NoSignedZeros));
- FuncAttrs.addAttribute(
- "correctly-rounded-divide-sqrt-fp-math",
- llvm::toStringRef(CodeGenOpts.CorrectlyRoundedDivSqrt));
-
- // TODO: Reciprocal estimate codegen options should apply to instructions?
- std::vector<std::string> &Recips = getTarget().getTargetOpts().Reciprocals;
- if (!Recips.empty())
- FuncAttrs.addAttribute("reciprocal-estimates",
- llvm::join(Recips.begin(), Recips.end(), ","));
-
- if (CodeGenOpts.StackRealignment)
- FuncAttrs.addAttribute("stackrealign");
- if (CodeGenOpts.Backchain)
- FuncAttrs.addAttribute("backchain");
+ CodeGenOpts.DisableTailCalls ||
+ (TargetDecl && (TargetDecl->hasAttr<DisableTailCallsAttr>() ||
+ TargetDecl->hasAttr<AnyX86InterruptAttr>()));
+ FuncAttrs.addAttribute("disable-tail-calls",
+ llvm::toStringRef(DisableTailCalls));
// Add target-cpu and target-features attributes to functions. If
// we have a decl for the function and it has a target attribute then
@@ -1819,21 +1849,6 @@
}
}
- if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
- // Conservatively, mark all functions and calls in CUDA as convergent
- // (meaning, they may call an intrinsically convergent op, such as
- // __syncthreads(), and so can't have certain optimizations applied around
- // them). LLVM will remove this attribute where it safely can.
- FuncAttrs.addAttribute(llvm::Attribute::Convergent);
-
- // Exceptions aren't supported in CUDA device code.
- FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
-
- // Respect -fcuda-flush-denormals-to-zero.
- if (getLangOpts().CUDADeviceFlushDenormalsToZero)
- FuncAttrs.addAttribute("nvptx-f32ftz", "true");
- }
-
ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI);
QualType RetTy = FI.getReturnType();
diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp
index 0daedf4..527d90e 100644
--- a/clang/lib/CodeGen/CodeGenAction.cpp
+++ b/clang/lib/CodeGen/CodeGenAction.cpp
@@ -7,6 +7,8 @@
//
//===----------------------------------------------------------------------===//
+#include "clang/CodeGen/CodeGenAction.h"
+#include "CodeGenModule.h"
#include "CoverageMappingGen.h"
#include "clang/AST/ASTConsumer.h"
#include "clang/AST/ASTContext.h"
@@ -16,7 +18,6 @@
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/CodeGen/BackendUtil.h"
-#include "clang/CodeGen/CodeGenAction.h"
#include "clang/CodeGen/ModuleBuilder.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Frontend/FrontendDiagnostic.h"
@@ -41,6 +42,8 @@
namespace clang {
class BackendConsumer : public ASTConsumer {
+ using LinkModule = CodeGenAction::LinkModule;
+
virtual void anchor();
DiagnosticsEngine &Diags;
BackendAction Action;
@@ -61,43 +64,37 @@
std::unique_ptr<CodeGenerator> Gen;
- SmallVector<std::pair<unsigned, std::unique_ptr<llvm::Module>>, 4>
- LinkModules;
+ SmallVector<LinkModule, 4> LinkModules;
// This is here so that the diagnostic printer knows the module a diagnostic
// refers to.
llvm::Module *CurLinkModule = nullptr;
public:
- BackendConsumer(
- BackendAction Action, DiagnosticsEngine &Diags,
- const HeaderSearchOptions &HeaderSearchOpts,
- const PreprocessorOptions &PPOpts, const CodeGenOptions &CodeGenOpts,
- const TargetOptions &TargetOpts, const LangOptions &LangOpts,
- bool TimePasses, const std::string &InFile,
- const SmallVectorImpl<std::pair<unsigned, llvm::Module *>> &LinkModules,
- std::unique_ptr<raw_pwrite_stream> OS, LLVMContext &C,
- CoverageSourceInfo *CoverageInfo = nullptr)
+ BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags,
+ const HeaderSearchOptions &HeaderSearchOpts,
+ const PreprocessorOptions &PPOpts,
+ const CodeGenOptions &CodeGenOpts,
+ const TargetOptions &TargetOpts,
+ const LangOptions &LangOpts, bool TimePasses,
+ const std::string &InFile,
+ SmallVector<LinkModule, 4> LinkModules,
+ std::unique_ptr<raw_pwrite_stream> OS, LLVMContext &C,
+ CoverageSourceInfo *CoverageInfo = nullptr)
: Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts),
CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts),
AsmOutStream(std::move(OS)), Context(nullptr),
LLVMIRGeneration("irgen", "LLVM IR Generation Time"),
LLVMIRGenerationRefCount(0),
Gen(CreateLLVMCodeGen(Diags, InFile, HeaderSearchOpts, PPOpts,
- CodeGenOpts, C, CoverageInfo)) {
+ CodeGenOpts, C, CoverageInfo)),
+ LinkModules(std::move(LinkModules)) {
llvm::TimePassesIsEnabled = TimePasses;
- for (auto &I : LinkModules)
- this->LinkModules.push_back(
- std::make_pair(I.first, std::unique_ptr<llvm::Module>(I.second)));
}
llvm::Module *getModule() const { return Gen->GetModule(); }
std::unique_ptr<llvm::Module> takeModule() {
return std::unique_ptr<llvm::Module>(Gen->ReleaseModule());
}
- void releaseLinkModules() {
- for (auto &I : LinkModules)
- I.second.release();
- }
void HandleCXXStaticMemberVarInstantiation(VarDecl *VD) override {
Gen->HandleCXXStaticMemberVarInstantiation(VD);
@@ -159,6 +156,21 @@
HandleTopLevelDecl(D);
}
+ // Links each entry in LinkModules into our module. Returns true on error.
+ bool LinkInModules() {
+ for (auto &LM : LinkModules) {
+ if (LM.PropagateAttrs)
+ for (Function &F : *LM.Module)
+ Gen->CGM().AddDefaultFnAttrs(F);
+
+ CurLinkModule = LM.Module.get();
+ if (Linker::linkModules(*getModule(), std::move(LM.Module),
+ LM.LinkFlags))
+ return true;
+ }
+ return false; // success
+ }
+
void HandleTranslationUnit(ASTContext &C) override {
{
PrettyStackTraceString CrashInfo("Per-file LLVM IR generation");
@@ -216,13 +228,9 @@
Ctx.setDiagnosticHotnessRequested(true);
}
- // Link LinkModule into this module if present, preserving its validity.
- for (auto &I : LinkModules) {
- unsigned LinkFlags = I.first;
- CurLinkModule = I.second.get();
- if (Linker::linkModules(*getModule(), std::move(I.second), LinkFlags))
- return;
- }
+ // Link each LinkModule into our module.
+ if (LinkInModules())
+ return;
EmbedBitcode(getModule(), CodeGenOpts, llvm::MemoryBufferRef());
@@ -729,10 +737,6 @@
if (!getCompilerInstance().hasASTConsumer())
return;
- // Take back ownership of link modules we passed to consumer.
- if (!LinkModules.empty())
- BEConsumer->releaseLinkModules();
-
// Steal the module from the consumer.
TheModule = BEConsumer->takeModule();
}
@@ -775,13 +779,12 @@
// Load bitcode modules to link with, if we need to.
if (LinkModules.empty())
- for (auto &I : CI.getCodeGenOpts().LinkBitcodeFiles) {
- const std::string &LinkBCFile = I.second;
-
- auto BCBuf = CI.getFileManager().getBufferForFile(LinkBCFile);
+ for (const CodeGenOptions::BitcodeFileToLink &F :
+ CI.getCodeGenOpts().LinkBitcodeFiles) {
+ auto BCBuf = CI.getFileManager().getBufferForFile(F.Filename);
if (!BCBuf) {
CI.getDiagnostics().Report(diag::err_cannot_open_file)
- << LinkBCFile << BCBuf.getError().message();
+ << F.Filename << BCBuf.getError().message();
LinkModules.clear();
return nullptr;
}
@@ -791,12 +794,13 @@
if (!ModuleOrErr) {
handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) {
CI.getDiagnostics().Report(diag::err_cannot_open_file)
- << LinkBCFile << EIB.message();
+ << F.Filename << EIB.message();
});
LinkModules.clear();
return nullptr;
}
- addLinkModule(ModuleOrErr.get().release(), I.first);
+ LinkModules.push_back(
+ {std::move(ModuleOrErr.get()), F.PropagateAttrs, F.LinkFlags});
}
CoverageSourceInfo *CoverageInfo = nullptr;
@@ -810,8 +814,8 @@
std::unique_ptr<BackendConsumer> Result(new BackendConsumer(
BA, CI.getDiagnostics(), CI.getHeaderSearchOpts(),
CI.getPreprocessorOpts(), CI.getCodeGenOpts(), CI.getTargetOpts(),
- CI.getLangOpts(), CI.getFrontendOpts().ShowTimers, InFile, LinkModules,
- std::move(OS), *VMContext, CoverageInfo));
+ CI.getLangOpts(), CI.getFrontendOpts().ShowTimers, InFile,
+ std::move(LinkModules), std::move(OS), *VMContext, CoverageInfo));
BEConsumer = Result.get();
return std::move(Result);
}
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index 1715b3d..613e300 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -1022,6 +1022,25 @@
CGCalleeInfo CalleeInfo, AttributeListType &PAL,
unsigned &CallingConv, bool AttrOnCallSite);
+ /// Adds attributes to F according to our CodeGenOptions and LangOptions, as
+ /// though we had emitted it ourselves. We remove any attributes on F that
+ /// conflict with the attributes we add here.
+ ///
+ /// This is useful for adding attrs to bitcode modules that you want to link
+ /// with but don't control, such as CUDA's libdevice. When linking with such
+ /// a bitcode library, you might want to set e.g. its functions'
+ /// "unsafe-fp-math" attribute to match the attr of the functions you're
+ /// codegen'ing. Otherwise, LLVM will interpret the bitcode module's lack of
+ /// unsafe-fp-math attrs as tantamount to unsafe-fp-math=false, and then LLVM
+ /// will propagate unsafe-fp-math=false up to every transitive caller of a
+ /// function in the bitcode library!
+ ///
+ /// With the exception of fast-math attrs, this will only make the attributes
+ /// on the function more conservative. But it's unsafe to call this on a
+ /// function which relies on particular fast-math attributes for correctness.
+ /// It's up to you to ensure that this is safe.
+ void AddDefaultFnAttrs(llvm::Function &F);
+
// Fills in the supplied string map with the set of target features for the
// passed in function.
void getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
@@ -1303,6 +1322,12 @@
/// Check whether we can use a "simpler", more core exceptions personality
/// function.
void SimplifyPersonality();
+
+ /// Helper function for ConstructAttributeList and AddDefaultFnAttrs.
+ /// Constructs an AttrList for a function with the given properties.
+ void ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
+ bool AttrOnCallSite,
+ llvm::AttrBuilder &FuncAttrs);
};
} // end namespace CodeGen
} // end namespace clang