Part of PR39508: Emit an @llvm.invariant.start after storing to
__tls_guard.

__tls_guard can only ever transition from 0 to 1, and only once. This
permits LLVM to remove repeated checks for TLS initialization and
repeated initialization code in cases like:

  int g();
  thread_local int n = g();
  int a = n + n;

where we could not prove that __tls_guard was still 'true' when checking
it for the second reference to 'n' in the initializer of 'a'.

llvm-svn: 345774
diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp
index b7daba6..07fb4c0 100644
--- a/clang/lib/CodeGen/CGDeclCXX.cpp
+++ b/clang/lib/CodeGen/CGDeclCXX.cpp
@@ -124,22 +124,26 @@
 /// constant from this point onwards.
 static void EmitDeclInvariant(CodeGenFunction &CGF, const VarDecl &D,
                               llvm::Constant *Addr) {
+  return CGF.EmitInvariantStart(
+      Addr, CGF.getContext().getTypeSizeInChars(D.getType()));
+}
+
+void CodeGenFunction::EmitInvariantStart(llvm::Constant *Addr, CharUnits Size) {
   // Do not emit the intrinsic if we're not optimizing.
-  if (!CGF.CGM.getCodeGenOpts().OptimizationLevel)
+  if (!CGM.getCodeGenOpts().OptimizationLevel)
     return;
 
   // Grab the llvm.invariant.start intrinsic.
   llvm::Intrinsic::ID InvStartID = llvm::Intrinsic::invariant_start;
   // Overloaded address space type.
-  llvm::Type *ObjectPtr[1] = {CGF.Int8PtrTy};
-  llvm::Constant *InvariantStart = CGF.CGM.getIntrinsic(InvStartID, ObjectPtr);
+  llvm::Type *ObjectPtr[1] = {Int8PtrTy};
+  llvm::Constant *InvariantStart = CGM.getIntrinsic(InvStartID, ObjectPtr);
 
   // Emit a call with the size in bytes of the object.
-  CharUnits WidthChars = CGF.getContext().getTypeSizeInChars(D.getType());
-  uint64_t Width = WidthChars.getQuantity();
-  llvm::Value *Args[2] = { llvm::ConstantInt::getSigned(CGF.Int64Ty, Width),
-                           llvm::ConstantExpr::getBitCast(Addr, CGF.Int8PtrTy)};
-  CGF.Builder.CreateCall(InvariantStart, Args);
+  uint64_t Width = Size.getQuantity();
+  llvm::Value *Args[2] = { llvm::ConstantInt::getSigned(Int64Ty, Width),
+                           llvm::ConstantExpr::getBitCast(Addr, Int8PtrTy)};
+  Builder.CreateCall(InvariantStart, Args);
 }
 
 void CodeGenFunction::EmitCXXGlobalVarDeclInit(const VarDecl &D,
@@ -607,7 +611,7 @@
 void
 CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn,
                                            ArrayRef<llvm::Function *> Decls,
-                                           Address Guard) {
+                                           ConstantAddress Guard) {
   {
     auto NL = ApplyDebugLocation::CreateEmpty(*this);
     StartFunction(GlobalDecl(), getContext().VoidTy, Fn,
@@ -631,6 +635,12 @@
       // initializers use previously-initialized thread_local vars, that's
       // probably supposed to be OK, but the standard doesn't say.
       Builder.CreateStore(llvm::ConstantInt::get(GuardVal->getType(),1), Guard);
+
+      // The guard variable can't ever change again.
+      EmitInvariantStart(
+          Guard.getPointer(),
+          CharUnits::fromQuantity(
+              CGM.getDataLayout().getTypeAllocSize(GuardVal->getType())));
     }
 
     RunCleanupsScope Scope(*this);
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index c73cef5..d5de0a0 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3892,6 +3892,8 @@
   AddInitializerToStaticVarDecl(const VarDecl &D,
                                 llvm::GlobalVariable *GV);
 
+  // Emit an @llvm.invariant.start call for the given memory region.
+  void EmitInvariantStart(llvm::Constant *Addr, CharUnits Size);
 
   /// EmitCXXGlobalVarDeclInit - Create the initializer for a C++
   /// variable with global storage.
@@ -3927,9 +3929,10 @@
 
   /// GenerateCXXGlobalInitFunc - Generates code for initializing global
   /// variables.
-  void GenerateCXXGlobalInitFunc(llvm::Function *Fn,
-                                 ArrayRef<llvm::Function *> CXXThreadLocals,
-                                 Address Guard = Address::invalid());
+  void
+  GenerateCXXGlobalInitFunc(llvm::Function *Fn,
+                            ArrayRef<llvm::Function *> CXXThreadLocals,
+                            ConstantAddress Guard = ConstantAddress::invalid());
 
   /// GenerateCXXGlobalDtorsFunc - Generates code for destroying global
   /// variables.
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index a7e3c8d..b67a64b 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -2472,8 +2472,8 @@
     CharUnits GuardAlign = CharUnits::One();
     Guard->setAlignment(GuardAlign.getQuantity());
 
-    CodeGenFunction(CGM).GenerateCXXGlobalInitFunc(InitFunc, OrderedInits,
-                                                   Address(Guard, GuardAlign));
+    CodeGenFunction(CGM).GenerateCXXGlobalInitFunc(
+        InitFunc, OrderedInits, ConstantAddress(Guard, GuardAlign));
     // On Darwin platforms, use CXX_FAST_TLS calling convention.
     if (CGM.getTarget().getTriple().isOSDarwin()) {
       InitFunc->setCallingConv(llvm::CallingConv::CXX_FAST_TLS);