[OpenCL] Remove block invoke function from emitted block literal struct

OpenCL runtime tracks the invoke function emitted for
any block expression. Due to restrictions on blocks in
OpenCL (v2.0 s6.12.5), it is always possible to know the
block invoke function when emitting call of block expression
or __enqueue_kernel builtin functions. Since __enqueu_kernel
already has an argument for the invoke function, it is redundant
to have invoke function member in the llvm block literal structure.

This patch removes invoke function from the llvm block literal
structure. It also removes the bitcast of block invoke function
to the generic block literal type which is useless for OpenCL.

This will save some space for the kernel argument, and also
eliminate some store instructions.

Differential Revision: https://reviews.llvm.org/D43783

llvm-svn: 326937
diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp
index 6695860..132314a 100644
--- a/clang/lib/CodeGen/CGBlocks.cpp
+++ b/clang/lib/CodeGen/CGBlocks.cpp
@@ -307,25 +307,12 @@
 
   assert(elementTypes.empty());
   if (CGM.getLangOpts().OpenCL) {
-    // The header is basically 'struct { int; int; generic void *;
+    // The header is basically 'struct { int; int;
     // custom_fields; }'. Assert that struct is packed.
-    auto GenericAS =
-        CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic);
-    auto GenPtrAlign =
-        CharUnits::fromQuantity(CGM.getTarget().getPointerAlign(GenericAS) / 8);
-    auto GenPtrSize =
-        CharUnits::fromQuantity(CGM.getTarget().getPointerWidth(GenericAS) / 8);
-    assert(CGM.getIntSize() <= GenPtrSize);
-    assert(CGM.getIntAlign() <= GenPtrAlign);
-    assert((2 * CGM.getIntSize()).isMultipleOf(GenPtrAlign));
     elementTypes.push_back(CGM.IntTy); /* total size */
     elementTypes.push_back(CGM.IntTy); /* align */
-    elementTypes.push_back(
-        CGM.getOpenCLRuntime()
-            .getGenericVoidPointerType()); /* invoke function */
-    unsigned Offset =
-        2 * CGM.getIntSize().getQuantity() + GenPtrSize.getQuantity();
-    unsigned BlockAlign = GenPtrAlign.getQuantity();
+    unsigned Offset = 2 * CGM.getIntSize().getQuantity();
+    unsigned BlockAlign = CGM.getIntAlign().getQuantity();
     if (auto *Helper =
             CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
       for (auto I : Helper->getCustomFieldTypes()) /* custom fields */ {
@@ -771,20 +758,12 @@
 
 llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
   bool IsOpenCL = CGM.getContext().getLangOpts().OpenCL;
-  auto GenVoidPtrTy =
-      IsOpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : VoidPtrTy;
-  LangAS GenVoidPtrAddr = IsOpenCL ? LangAS::opencl_generic : LangAS::Default;
-  auto GenVoidPtrSize = CharUnits::fromQuantity(
-      CGM.getTarget().getPointerWidth(
-          CGM.getContext().getTargetAddressSpace(GenVoidPtrAddr)) /
-      8);
   // Using the computed layout, generate the actual block function.
   bool isLambdaConv = blockInfo.getBlockDecl()->isConversionFromLambda();
   CodeGenFunction BlockCGF{CGM, true};
   BlockCGF.SanOpts = SanOpts;
   auto *InvokeFn = BlockCGF.GenerateBlockFunction(
       CurGD, blockInfo, LocalDeclMap, isLambdaConv, blockInfo.CanBeGlobal);
-  auto *blockFn = llvm::ConstantExpr::getPointerCast(InvokeFn, GenVoidPtrTy);
 
   // If there is nothing to capture, we can emit this as a global block.
   if (blockInfo.CanBeGlobal)
@@ -853,11 +832,12 @@
           llvm::ConstantInt::get(IntTy, blockInfo.BlockAlign.getQuantity()),
           getIntSize(), "block.align");
     }
-    addHeaderField(blockFn, GenVoidPtrSize, "block.invoke");
-    if (!IsOpenCL)
+    if (!IsOpenCL) {
+      addHeaderField(llvm::ConstantExpr::getBitCast(InvokeFn, VoidPtrTy),
+                     getPointerSize(), "block.invoke");
       addHeaderField(descriptor, getPointerSize(), "block.descriptor");
-    else if (auto *Helper =
-                 CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
+    } else if (auto *Helper =
+                   CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
       for (auto I : Helper->getCustomFieldValues(*this, blockInfo)) {
         addHeaderField(
             I.first,
@@ -1060,38 +1040,23 @@
 }
 
 llvm::Type *CodeGenModule::getGenericBlockLiteralType() {
+  assert(!getLangOpts().OpenCL && "OpenCL does not need this");
+
   if (GenericBlockLiteralType)
     return GenericBlockLiteralType;
 
   llvm::Type *BlockDescPtrTy = getBlockDescriptorType();
 
-  if (getLangOpts().OpenCL) {
-    // struct __opencl_block_literal_generic {
-    //   int __size;
-    //   int __align;
-    //   __generic void *__invoke;
-    //   /* custom fields */
-    // };
-    SmallVector<llvm::Type *, 8> StructFields(
-        {IntTy, IntTy, getOpenCLRuntime().getGenericVoidPointerType()});
-    if (auto *Helper = getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
-      for (auto I : Helper->getCustomFieldTypes())
-        StructFields.push_back(I);
-    }
-    GenericBlockLiteralType = llvm::StructType::create(
-        StructFields, "struct.__opencl_block_literal_generic");
-  } else {
-    // struct __block_literal_generic {
-    //   void *__isa;
-    //   int __flags;
-    //   int __reserved;
-    //   void (*__invoke)(void *);
-    //   struct __block_descriptor *__descriptor;
-    // };
-    GenericBlockLiteralType =
-        llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy,
-                                 IntTy, IntTy, VoidPtrTy, BlockDescPtrTy);
-  }
+  // struct __block_literal_generic {
+  //   void *__isa;
+  //   int __flags;
+  //   int __reserved;
+  //   void (*__invoke)(void *);
+  //   struct __block_descriptor *__descriptor;
+  // };
+  GenericBlockLiteralType =
+      llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy,
+                               IntTy, IntTy, VoidPtrTy, BlockDescPtrTy);
 
   return GenericBlockLiteralType;
 }
@@ -1102,27 +1067,21 @@
     E->getCallee()->getType()->getAs<BlockPointerType>();
 
   llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee());
+  llvm::Value *FuncPtr;
 
-  // Get a pointer to the generic block literal.
-  // For OpenCL we generate generic AS void ptr to be able to reuse the same
-  // block definition for blocks with captures generated as private AS local
-  // variables and without captures generated as global AS program scope
-  // variables.
-  unsigned AddrSpace = 0;
-  if (getLangOpts().OpenCL)
-    AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic);
+  if (!CGM.getLangOpts().OpenCL) {
+    // Get a pointer to the generic block literal.
+    llvm::Type *BlockLiteralTy =
+        llvm::PointerType::get(CGM.getGenericBlockLiteralType(), 0);
 
-  llvm::Type *BlockLiteralTy =
-      llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace);
+    // Bitcast the callee to a block literal.
+    BlockPtr =
+        Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal");
 
-  // Bitcast the callee to a block literal.
-  BlockPtr =
-      Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal");
-
-  // Get the function pointer from the literal.
-  llvm::Value *FuncPtr =
-      Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr,
-                              CGM.getLangOpts().OpenCL ? 2 : 3);
+    // Get the function pointer from the literal.
+    FuncPtr =
+        Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, 3);
+  }
 
   // Add the block literal.
   CallArgList Args;
@@ -1145,7 +1104,11 @@
   EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments());
 
   // Load the function.
-  llvm::Value *Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
+  llvm::Value *Func;
+  if (CGM.getLangOpts().OpenCL)
+    Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee());
+  else
+    Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
 
   const FunctionType *FuncTy = FnType->castAs<FunctionType>();
   const CGFunctionInfo &FnInfo =
@@ -1254,14 +1217,14 @@
 
     // Reserved
     fields.addInt(CGM.IntTy, 0);
+
+    // Function
+    fields.add(blockFn);
   } else {
     fields.addInt(CGM.IntTy, blockInfo.BlockSize.getQuantity());
     fields.addInt(CGM.IntTy, blockInfo.BlockAlign.getQuantity());
   }
 
-  // Function
-  fields.add(blockFn);
-
   if (!IsOpenCL) {
     // Descriptor
     fields.add(buildBlockDescriptor(CGM, blockInfo));
diff --git a/clang/lib/CodeGen/CGOpenCLRuntime.cpp b/clang/lib/CodeGen/CGOpenCLRuntime.cpp
index 6d0089d..15a2221 100644
--- a/clang/lib/CodeGen/CGOpenCLRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenCLRuntime.cpp
@@ -112,6 +112,25 @@
       CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
 }
 
+// Get the block literal from an expression derived from the block expression.
+// OpenCL v2.0 s6.12.5:
+// Block variable declarations are implicitly qualified with const. Therefore
+// all block variables must be initialized at declaration time and may not be
+// reassigned.
+static const BlockExpr *getBlockExpr(const Expr *E) {
+  if (auto Cast = dyn_cast<CastExpr>(E)) {
+    E = Cast->getSubExpr();
+  }
+  if (auto DR = dyn_cast<DeclRefExpr>(E)) {
+    E = cast<VarDecl>(DR->getDecl())->getInit();
+  }
+  E = E->IgnoreImplicit();
+  if (auto Cast = dyn_cast<CastExpr>(E)) {
+    E = Cast->getSubExpr();
+  }
+  return cast<BlockExpr>(E);
+}
+
 /// Record emitted llvm invoke function and llvm block literal for the
 /// corresponding block expression.
 void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
@@ -126,21 +145,15 @@
   EnqueuedBlockMap[E].Kernel = nullptr;
 }
 
+llvm::Function *CGOpenCLRuntime::getInvokeFunction(const Expr *E) {
+  return EnqueuedBlockMap[getBlockExpr(E)].InvokeFunc;
+}
+
 CGOpenCLRuntime::EnqueuedBlockInfo
 CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) {
   CGF.EmitScalarExpr(E);
 
-  // The block literal may be assigned to a const variable. Chasing down
-  // to get the block literal.
-  if (auto DR = dyn_cast<DeclRefExpr>(E)) {
-    E = cast<VarDecl>(DR->getDecl())->getInit();
-  }
-  E = E->IgnoreImplicit();
-  if (auto Cast = dyn_cast<CastExpr>(E)) {
-    E = Cast->getSubExpr();
-  }
-  auto *Block = cast<BlockExpr>(E);
-
+  const BlockExpr *Block = getBlockExpr(E);
   assert(EnqueuedBlockMap.find(Block) != EnqueuedBlockMap.end() &&
          "Block expression not emitted");
 
diff --git a/clang/lib/CodeGen/CGOpenCLRuntime.h b/clang/lib/CodeGen/CGOpenCLRuntime.h
index c0088b5..14fd1f1 100644
--- a/clang/lib/CodeGen/CGOpenCLRuntime.h
+++ b/clang/lib/CodeGen/CGOpenCLRuntime.h
@@ -87,6 +87,10 @@
   /// \param Block block literal emitted for the block expression.
   void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF,
                        llvm::Value *Block);
+
+  /// \return LLVM block invoke function emitted for an expression derived from
+  /// the block expression.
+  llvm::Function *getInvokeFunction(const Expr *E);
 };
 
 }