Improve code generation for thread_local variables:
Summary:
* Don't bother using a thread wrapper when the variable is known to
have constant initialization.
* Emit the thread wrapper as discardable-if-unused in TUs that don't
contain a definition of the thread_local variable.
* Don't emit the thread wrapper at all if the thread_local variable
is unused and discardable; it will be emitted by all TUs that need
it.
Reviewers: rjmccall, jdoerfert
Subscribers: cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D67429
llvm-svn: 371767
diff --git a/clang/lib/CodeGen/CGCXXABI.h b/clang/lib/CodeGen/CGCXXABI.h
index 3a9c3b3..bff49be 100644
--- a/clang/lib/CodeGen/CGCXXABI.h
+++ b/clang/lib/CodeGen/CGCXXABI.h
@@ -577,7 +577,7 @@
// Determine if references to thread_local global variables can be made
// directly or require access through a thread wrapper function.
- virtual bool usesThreadWrapperFunction() const = 0;
+ virtual bool usesThreadWrapperFunction(const VarDecl *VD) const = 0;
/// Emit a reference to a non-local thread_local variable (including
/// triggering the initialization of all thread_local variables in its
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index cf6dfbe..9289db9 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2361,7 +2361,7 @@
// If it's thread_local, emit a call to its wrapper function instead.
if (VD->getTLSKind() == VarDecl::TLS_Dynamic &&
- CGF.CGM.getCXXABI().usesThreadWrapperFunction())
+ CGF.CGM.getCXXABI().usesThreadWrapperFunction(VD))
return CGF.CGM.getCXXABI().EmitThreadLocalVarDeclLValue(CGF, VD, T);
// Check if the variable is marked as declare target with link clause in
// device codegen.
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index 29ec2d4..0371853 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -43,6 +43,10 @@
/// VTables - All the vtables which have been defined.
llvm::DenseMap<const CXXRecordDecl *, llvm::GlobalVariable *> VTables;
+ /// All the thread wrapper functions that have been used.
+ llvm::SmallVector<std::pair<const VarDecl *, llvm::Function *>, 8>
+ ThreadWrappers;
+
protected:
bool UseARMMethodPtrABI;
bool UseARMGuardVarABI;
@@ -322,7 +326,42 @@
ArrayRef<llvm::Function *> CXXThreadLocalInits,
ArrayRef<const VarDecl *> CXXThreadLocalInitVars) override;
- bool usesThreadWrapperFunction() const override { return true; }
+ /// Determine whether we will definitely emit this variable with a constant
+ /// initializer, either because the language semantics demand it or because
+ /// we know that the initializer is a constant.
+ bool isEmittedWithConstantInitializer(const VarDecl *VD) const {
+ VD = VD->getMostRecentDecl();
+ if (VD->hasAttr<ConstInitAttr>())
+ return true;
+
+ // All later checks examine the initializer specified on the variable. If
+ // the variable is weak, such examination would not be correct.
+ if (VD->isWeak() || VD->hasAttr<SelectAnyAttr>())
+ return false;
+
+ const VarDecl *InitDecl = VD->getInitializingDeclaration();
+ if (!InitDecl)
+ return false;
+
+ // If there's no initializer to run, this is constant initialization.
+ if (!InitDecl->hasInit())
+ return true;
+
+ // If we have the only definition, we don't need a thread wrapper if we
+ // will emit the value as a constant.
+ if (isUniqueGVALinkage(getContext().GetGVALinkageForVariable(VD)))
+ return !VD->getType().isDestructedType() && InitDecl->evaluateValue();
+
+ // Otherwise, we need a thread wrapper unless we know that every
+ // translation unit will emit the value as a constant. We rely on
+ // ICE-ness not varying between translation units, which isn't actually
+ // guaranteed by the standard but is necessary for sanity.
+ return InitDecl->isInitKnownICE() && InitDecl->isInitICE();
+ }
+
+ bool usesThreadWrapperFunction(const VarDecl *VD) const override {
+ return !isEmittedWithConstantInitializer(VD);
+ }
LValue EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF, const VarDecl *VD,
QualType LValType) override;
@@ -2456,9 +2495,6 @@
CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Wrapper);
- if (VD->hasDefinition())
- CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Wrapper);
-
// Always resolve references to the wrapper at link time.
if (!Wrapper->hasLocalLinkage())
if (!isThreadWrapperReplaceable(VD, CGM) ||
@@ -2471,6 +2507,8 @@
Wrapper->setCallingConv(llvm::CallingConv::CXX_FAST_TLS);
Wrapper->addFnAttr(llvm::Attribute::NoUnwind);
}
+
+ ThreadWrappers.push_back({VD, Wrapper});
return Wrapper;
}
@@ -2519,20 +2557,40 @@
}
}
- // Emit thread wrappers.
+ // Create declarations for thread wrappers for all thread-local variables
+ // with non-discardable definitions in this translation unit.
for (const VarDecl *VD : CXXThreadLocals) {
+ if (VD->hasDefinition() &&
+ !isDiscardableGVALinkage(getContext().GetGVALinkageForVariable(VD))) {
+ llvm::GlobalValue *GV = CGM.GetGlobalValue(CGM.getMangledName(VD));
+ getOrCreateThreadLocalWrapper(VD, GV);
+ }
+ }
+
+ // Emit all referenced thread wrappers.
+ for (auto VDAndWrapper : ThreadWrappers) {
+ const VarDecl *VD = VDAndWrapper.first;
llvm::GlobalVariable *Var =
cast<llvm::GlobalVariable>(CGM.GetGlobalValue(CGM.getMangledName(VD)));
- llvm::Function *Wrapper = getOrCreateThreadLocalWrapper(VD, Var);
+ llvm::Function *Wrapper = VDAndWrapper.second;
// Some targets require that all access to thread local variables go through
// the thread wrapper. This means that we cannot attempt to create a thread
// wrapper or a thread helper.
- if (isThreadWrapperReplaceable(VD, CGM) && !VD->hasDefinition()) {
- Wrapper->setLinkage(llvm::Function::ExternalLinkage);
- continue;
+ if (!VD->hasDefinition()) {
+ if (isThreadWrapperReplaceable(VD, CGM)) {
+ Wrapper->setLinkage(llvm::Function::ExternalLinkage);
+ continue;
+ }
+
+ // If this isn't a TU in which this variable is defined, the thread
+ // wrapper is discardable.
+ if (Wrapper->getLinkage() == llvm::Function::WeakODRLinkage)
+ Wrapper->setLinkage(llvm::Function::LinkOnceODRLinkage);
}
+ CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Wrapper);
+
// Mangle the name for the thread_local initialization function.
SmallString<256> InitFnName;
{
@@ -2547,7 +2605,10 @@
// produce a declaration of the initialization function.
llvm::GlobalValue *Init = nullptr;
bool InitIsInitFunc = false;
- if (VD->hasDefinition()) {
+ bool HasConstantInitialization = false;
+ if (isEmittedWithConstantInitializer(VD)) {
+ HasConstantInitialization = true;
+ } else if (VD->hasDefinition()) {
InitIsInitFunc = true;
llvm::Function *InitFuncToUse = InitFunc;
if (isTemplateInstantiation(VD->getTemplateSpecializationKind()))
@@ -2576,7 +2637,9 @@
llvm::LLVMContext &Context = CGM.getModule().getContext();
llvm::BasicBlock *Entry = llvm::BasicBlock::Create(Context, "", Wrapper);
CGBuilderTy Builder(CGM, Entry);
- if (InitIsInitFunc) {
+ if (HasConstantInitialization) {
+ // No dynamic initialization to invoke.
+ } else if (InitIsInitFunc) {
if (Init) {
llvm::CallInst *CallVal = Builder.CreateCall(InitFnTy, Init);
if (isThreadWrapperReplaceable(VD, CGM)) {
diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
index ca06ad3..d1cd1b5 100644
--- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -386,7 +386,9 @@
ArrayRef<llvm::Function *> CXXThreadLocalInits,
ArrayRef<const VarDecl *> CXXThreadLocalInitVars) override;
- bool usesThreadWrapperFunction() const override { return false; }
+ bool usesThreadWrapperFunction(const VarDecl *VD) const override {
+ return false;
+ }
LValue EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF, const VarDecl *VD,
QualType LValType) override;