[cuda][hip] Add CUDA builtin surface/texture reference support.

Summary: - Re-commit after fix Sema checks on partial template specialization.

Reviewers: tra, rjmccall, yaxunl, a.sidorin

Subscribers: cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D76365
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index 5d8e545..ed02a7d 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -50,7 +50,7 @@
   struct VarInfo {
     llvm::GlobalVariable *Var;
     const VarDecl *D;
-    unsigned Flag;
+    DeviceVarFlags Flags;
   };
   llvm::SmallVector<VarInfo, 16> DeviceVars;
   /// Keeps track of variable containing handle of GPU binary. Populated by
@@ -124,8 +124,25 @@
 
   void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override;
   void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var,
-                         unsigned Flags) override {
-    DeviceVars.push_back({&Var, VD, Flags});
+                         bool Extern, bool Constant) override {
+    DeviceVars.push_back({&Var,
+                          VD,
+                          {DeviceVarFlags::Variable, Extern, Constant,
+                           /*Normalized*/ false, /*Type*/ 0}});
+  }
+  void registerDeviceSurf(const VarDecl *VD, llvm::GlobalVariable &Var,
+                          bool Extern, int Type) override {
+    DeviceVars.push_back({&Var,
+                          VD,
+                          {DeviceVarFlags::Surface, Extern, /*Constant*/ false,
+                           /*Normalized*/ false, Type}});
+  }
+  void registerDeviceTex(const VarDecl *VD, llvm::GlobalVariable &Var,
+                         bool Extern, int Type, bool Normalized) override {
+    DeviceVars.push_back({&Var,
+                          VD,
+                          {DeviceVarFlags::Texture, Extern, /*Constant*/ false,
+                           Normalized, Type}});
   }
 
   /// Creates module constructor function
@@ -431,22 +448,55 @@
   llvm::FunctionCallee RegisterVar = CGM.CreateRuntimeFunction(
       llvm::FunctionType::get(IntTy, RegisterVarParams, false),
       addUnderscoredPrefixToName("RegisterVar"));
+  // void __cudaRegisterSurface(void **, const struct surfaceReference *,
+  //                            const void **, const char *, int, int);
+  llvm::FunctionCallee RegisterSurf = CGM.CreateRuntimeFunction(
+      llvm::FunctionType::get(
+          VoidTy, {VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy},
+          false),
+      addUnderscoredPrefixToName("RegisterSurface"));
+  // void __cudaRegisterTexture(void **, const struct textureReference *,
+  //                            const void **, const char *, int, int, int)
+  llvm::FunctionCallee RegisterTex = CGM.CreateRuntimeFunction(
+      llvm::FunctionType::get(
+          VoidTy,
+          {VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy, IntTy},
+          false),
+      addUnderscoredPrefixToName("RegisterTexture"));
   for (auto &&Info : DeviceVars) {
     llvm::GlobalVariable *Var = Info.Var;
-    unsigned Flags = Info.Flag;
     llvm::Constant *VarName = makeConstantString(getDeviceSideName(Info.D));
-    uint64_t VarSize =
-        CGM.getDataLayout().getTypeAllocSize(Var->getValueType());
-    llvm::Value *Args[] = {
-        &GpuBinaryHandlePtr,
-        Builder.CreateBitCast(Var, VoidPtrTy),
-        VarName,
-        VarName,
-        llvm::ConstantInt::get(IntTy, (Flags & ExternDeviceVar) ? 1 : 0),
-        llvm::ConstantInt::get(IntTy, VarSize),
-        llvm::ConstantInt::get(IntTy, (Flags & ConstantDeviceVar) ? 1 : 0),
-        llvm::ConstantInt::get(IntTy, 0)};
-    Builder.CreateCall(RegisterVar, Args);
+    switch (Info.Flags.Kind) {
+    case DeviceVarFlags::Variable: {
+      uint64_t VarSize =
+          CGM.getDataLayout().getTypeAllocSize(Var->getValueType());
+      llvm::Value *Args[] = {&GpuBinaryHandlePtr,
+                             Builder.CreateBitCast(Var, VoidPtrTy),
+                             VarName,
+                             VarName,
+                             llvm::ConstantInt::get(IntTy, Info.Flags.Extern),
+                             llvm::ConstantInt::get(IntTy, VarSize),
+                             llvm::ConstantInt::get(IntTy, Info.Flags.Constant),
+                             llvm::ConstantInt::get(IntTy, 0)};
+      Builder.CreateCall(RegisterVar, Args);
+      break;
+    }
+    case DeviceVarFlags::Surface:
+      Builder.CreateCall(
+          RegisterSurf,
+          {&GpuBinaryHandlePtr, Builder.CreateBitCast(Var, VoidPtrTy), VarName,
+           VarName, llvm::ConstantInt::get(IntTy, Info.Flags.SurfTexType),
+           llvm::ConstantInt::get(IntTy, Info.Flags.Extern)});
+      break;
+    case DeviceVarFlags::Texture:
+      Builder.CreateCall(
+          RegisterTex,
+          {&GpuBinaryHandlePtr, Builder.CreateBitCast(Var, VoidPtrTy), VarName,
+           VarName, llvm::ConstantInt::get(IntTy, Info.Flags.SurfTexType),
+           llvm::ConstantInt::get(IntTy, Info.Flags.Normalized),
+           llvm::ConstantInt::get(IntTy, Info.Flags.Extern)});
+      break;
+    }
   }
 
   Builder.CreateRetVoid();
diff --git a/clang/lib/CodeGen/CGCUDARuntime.h b/clang/lib/CodeGen/CGCUDARuntime.h
index 330e950..b261324 100644
--- a/clang/lib/CodeGen/CGCUDARuntime.h
+++ b/clang/lib/CodeGen/CGCUDARuntime.h
@@ -42,9 +42,17 @@
 
 public:
   // Global variable properties that must be passed to CUDA runtime.
-  enum DeviceVarFlags {
-    ExternDeviceVar = 0x01,   // extern
-    ConstantDeviceVar = 0x02, // __constant__
+  struct DeviceVarFlags {
+    enum DeviceVarKind : unsigned {
+      Variable, // Variable
+      Surface,  // Builtin surface
+      Texture,  // Builtin texture
+    };
+    DeviceVarKind Kind : 2;
+    unsigned Extern : 1;
+    unsigned Constant : 1;   // Constant variable.
+    unsigned Normalized : 1; // Normalized texture.
+    int SurfTexType;         // Type of surface/texutre.
   };
 
   CGCUDARuntime(CodeGenModule &CGM) : CGM(CGM) {}
@@ -57,7 +65,11 @@
   /// Emits a kernel launch stub.
   virtual void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) = 0;
   virtual void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var,
-                                 unsigned Flags) = 0;
+                                 bool Extern, bool Constant) = 0;
+  virtual void registerDeviceSurf(const VarDecl *VD, llvm::GlobalVariable &Var,
+                                  bool Extern, int Type) = 0;
+  virtual void registerDeviceTex(const VarDecl *VD, llvm::GlobalVariable &Var,
+                                 bool Extern, int Type, bool Normalized) = 0;
 
   /// Constructs and returns a module initialization function or nullptr if it's
   /// not needed. Must be called after all kernels have been emitted.
diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index df576de..fa2d228 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -15,6 +15,7 @@
 #include "CodeGenFunction.h"
 #include "CodeGenModule.h"
 #include "ConstantEmitter.h"
+#include "TargetInfo.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Attr.h"
 #include "clang/AST/DeclCXX.h"
@@ -1946,6 +1947,18 @@
     }
   }
 
+  if (getLangOpts().CUDAIsDevice) {
+    if (Ty->isCUDADeviceBuiltinSurfaceType()) {
+      if (getTargetHooks().emitCUDADeviceBuiltinSurfaceDeviceCopy(*this, Dest,
+                                                                  Src))
+        return;
+    } else if (Ty->isCUDADeviceBuiltinTextureType()) {
+      if (getTargetHooks().emitCUDADeviceBuiltinTextureDeviceCopy(*this, Dest,
+                                                                  Src))
+        return;
+    }
+  }
+
   // Aggregate assignment turns into llvm.memcpy.  This is almost valid per
   // C99 6.5.16.1p3, which states "If the value being stored in an object is
   // read from another object that overlaps in anyway the storage of the first
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index b91c38e..963638c 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -713,6 +713,19 @@
 TBAAAccessInfo CodeGenModule::getTBAAAccessInfo(QualType AccessType) {
   if (!TBAA)
     return TBAAAccessInfo();
+  if (getLangOpts().CUDAIsDevice) {
+    // As CUDA builtin surface/texture types are replaced, skip generating TBAA
+    // access info.
+    if (AccessType->isCUDADeviceBuiltinSurfaceType()) {
+      if (getTargetCodeGenInfo().getCUDADeviceBuiltinSurfaceDeviceType() !=
+          nullptr)
+        return TBAAAccessInfo();
+    } else if (AccessType->isCUDADeviceBuiltinTextureType()) {
+      if (getTargetCodeGenInfo().getCUDADeviceBuiltinTextureDeviceType() !=
+          nullptr)
+        return TBAAAccessInfo();
+    }
+  }
   return TBAA->getAccessInfo(AccessType);
 }
 
@@ -2507,7 +2520,9 @@
           !Global->hasAttr<CUDAGlobalAttr>() &&
           !Global->hasAttr<CUDAConstantAttr>() &&
           !Global->hasAttr<CUDASharedAttr>() &&
-          !(LangOpts.HIP && Global->hasAttr<HIPPinnedShadowAttr>()))
+          !(LangOpts.HIP && Global->hasAttr<HIPPinnedShadowAttr>()) &&
+          !Global->getType()->isCUDADeviceBuiltinSurfaceType() &&
+          !Global->getType()->isCUDADeviceBuiltinTextureType())
         return;
     } else {
       // We need to emit host-side 'shadows' for all global
@@ -3907,12 +3922,16 @@
       !getLangOpts().CUDAIsDevice &&
       (D->hasAttr<CUDAConstantAttr>() || D->hasAttr<CUDADeviceAttr>() ||
        D->hasAttr<CUDASharedAttr>());
+  bool IsCUDADeviceShadowVar =
+      getLangOpts().CUDAIsDevice &&
+      (D->getType()->isCUDADeviceBuiltinSurfaceType() ||
+       D->getType()->isCUDADeviceBuiltinTextureType());
   // HIP pinned shadow of initialized host-side global variables are also
   // left undefined.
   bool IsHIPPinnedShadowVar =
       getLangOpts().CUDAIsDevice && D->hasAttr<HIPPinnedShadowAttr>();
-  if (getLangOpts().CUDA &&
-      (IsCUDASharedVar || IsCUDAShadowVar || IsHIPPinnedShadowVar))
+  if (getLangOpts().CUDA && (IsCUDASharedVar || IsCUDAShadowVar ||
+                             IsCUDADeviceShadowVar || IsHIPPinnedShadowVar))
     Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy));
   else if (D->hasAttr<LoaderUninitializedAttr>())
     Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy));
@@ -4023,25 +4042,48 @@
       if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() ||
           D->hasAttr<HIPPinnedShadowAttr>()) {
         Linkage = llvm::GlobalValue::InternalLinkage;
-
-        // Shadow variables and their properties must be registered
-        // with CUDA runtime.
-        unsigned Flags = 0;
-        if (!D->hasDefinition())
-          Flags |= CGCUDARuntime::ExternDeviceVar;
-        if (D->hasAttr<CUDAConstantAttr>())
-          Flags |= CGCUDARuntime::ConstantDeviceVar;
-        // Extern global variables will be registered in the TU where they are
-        // defined.
+        // Shadow variables and their properties must be registered with CUDA
+        // runtime. Skip Extern global variables, which will be registered in
+        // the TU where they are defined.
         if (!D->hasExternalStorage())
-          getCUDARuntime().registerDeviceVar(D, *GV, Flags);
-      } else if (D->hasAttr<CUDASharedAttr>())
+          getCUDARuntime().registerDeviceVar(D, *GV, !D->hasDefinition(),
+                                             D->hasAttr<CUDAConstantAttr>());
+      } else if (D->hasAttr<CUDASharedAttr>()) {
         // __shared__ variables are odd. Shadows do get created, but
         // they are not registered with the CUDA runtime, so they
         // can't really be used to access their device-side
         // counterparts. It's not clear yet whether it's nvcc's bug or
         // a feature, but we've got to do the same for compatibility.
         Linkage = llvm::GlobalValue::InternalLinkage;
+      } else if (D->getType()->isCUDADeviceBuiltinSurfaceType() ||
+                 D->getType()->isCUDADeviceBuiltinTextureType()) {
+        // Builtin surfaces and textures and their template arguments are
+        // also registered with CUDA runtime.
+        Linkage = llvm::GlobalValue::InternalLinkage;
+        const ClassTemplateSpecializationDecl *TD =
+            cast<ClassTemplateSpecializationDecl>(
+                D->getType()->getAs<RecordType>()->getDecl());
+        const TemplateArgumentList &Args = TD->getTemplateArgs();
+        if (TD->hasAttr<CUDADeviceBuiltinSurfaceTypeAttr>()) {
+          assert(Args.size() == 2 &&
+                 "Unexpected number of template arguments of CUDA device "
+                 "builtin surface type.");
+          auto SurfType = Args[1].getAsIntegral();
+          if (!D->hasExternalStorage())
+            getCUDARuntime().registerDeviceSurf(D, *GV, !D->hasDefinition(),
+                                                SurfType.getSExtValue());
+        } else {
+          assert(Args.size() == 3 &&
+                 "Unexpected number of template arguments of CUDA device "
+                 "builtin texture type.");
+          auto TexType = Args[1].getAsIntegral();
+          auto Normalized = Args[2].getAsIntegral();
+          if (!D->hasExternalStorage())
+            getCUDARuntime().registerDeviceTex(D, *GV, !D->hasDefinition(),
+                                               TexType.getSExtValue(),
+                                               Normalized.getZExtValue());
+        }
+      }
     }
   }
 
diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
index 31eca16..befd80d 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -383,6 +383,20 @@
 
   const Type *Ty = T.getTypePtr();
 
+  // For the device-side compilation, CUDA device builtin surface/texture types
+  // may be represented in different types.
+  if (Context.getLangOpts().CUDAIsDevice) {
+    if (T->isCUDADeviceBuiltinSurfaceType()) {
+      if (auto *Ty = CGM.getTargetCodeGenInfo()
+                         .getCUDADeviceBuiltinSurfaceDeviceType())
+        return Ty;
+    } else if (T->isCUDADeviceBuiltinTextureType()) {
+      if (auto *Ty = CGM.getTargetCodeGenInfo()
+                         .getCUDADeviceBuiltinTextureDeviceType())
+        return Ty;
+    }
+  }
+
   // RecordTypes are cached and processed specially.
   if (const RecordType *RT = dyn_cast<RecordType>(Ty))
     return ConvertRecordDeclType(RT->getDecl());
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 2b96cc4..e64fe4f 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -28,6 +28,7 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm> // std::sort
@@ -6414,9 +6415,14 @@
 
 namespace {
 
+class NVPTXTargetCodeGenInfo;
+
 class NVPTXABIInfo : public ABIInfo {
+  NVPTXTargetCodeGenInfo &CGInfo;
+
 public:
-  NVPTXABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {}
+  NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info)
+      : ABIInfo(CGT), CGInfo(Info) {}
 
   ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyArgumentType(QualType Ty) const;
@@ -6429,16 +6435,61 @@
 class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
 public:
   NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)
-    : TargetCodeGenInfo(new NVPTXABIInfo(CGT)) {}
+      : TargetCodeGenInfo(new NVPTXABIInfo(CGT, *this)) {}
 
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &M) const override;
   bool shouldEmitStaticExternCAliases() const override;
 
+  llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override {
+    // On the device side, surface reference is represented as an object handle
+    // in 64-bit integer.
+    return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
+  }
+
+  llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override {
+    // On the device side, texture reference is represented as an object handle
+    // in 64-bit integer.
+    return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
+  }
+
+  bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst,
+                                              LValue Src) const override {
+    emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
+    return true;
+  }
+
+  bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst,
+                                              LValue Src) const override {
+    emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
+    return true;
+  }
+
 private:
-  // Adds a NamedMDNode with F, Name, and Operand as operands, and adds the
+  // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
   // resulting MDNode to the nvvm.annotations MDNode.
-  static void addNVVMMetadata(llvm::Function *F, StringRef Name, int Operand);
+  static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
+                              int Operand);
+
+  static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
+                                           LValue Src) {
+    llvm::Value *Handle = nullptr;
+    llvm::Constant *C =
+        llvm::dyn_cast<llvm::Constant>(Src.getAddress(CGF).getPointer());
+    // Lookup `addrspacecast` through the constant pointer if any.
+    if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C))
+      C = llvm::cast<llvm::Constant>(ASC->getPointerOperand());
+    if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) {
+      // Load the handle from the specific global variable using
+      // `nvvm.texsurf.handle.internal` intrinsic.
+      Handle = CGF.EmitRuntimeCall(
+          CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal,
+                               {GV->getType()}),
+          {GV}, "texsurf_handle");
+    } else
+      Handle = CGF.EmitLoadOfScalar(Src, SourceLocation());
+    CGF.EmitStoreOfScalar(Handle, Dst);
+  }
 };
 
 /// Checks if the type is unsupported directly by the current target.
@@ -6511,8 +6562,19 @@
     Ty = EnumTy->getDecl()->getIntegerType();
 
   // Return aggregates type as indirect by value
-  if (isAggregateTypeForABI(Ty))
+  if (isAggregateTypeForABI(Ty)) {
+    // Under CUDA device compilation, tex/surf builtin types are replaced with
+    // object types and passed directly.
+    if (getContext().getLangOpts().CUDAIsDevice) {
+      if (Ty->isCUDADeviceBuiltinSurfaceType())
+        return ABIArgInfo::getDirect(
+            CGInfo.getCUDADeviceBuiltinSurfaceDeviceType());
+      if (Ty->isCUDADeviceBuiltinTextureType())
+        return ABIArgInfo::getDirect(
+            CGInfo.getCUDADeviceBuiltinTextureDeviceType());
+    }
     return getNaturalAlignIndirect(Ty, /* byval */ true);
+  }
 
   return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty)
                                         : ABIArgInfo::getDirect());
@@ -6540,6 +6602,17 @@
     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
   if (GV->isDeclaration())
     return;
+  const VarDecl *VD = dyn_cast_or_null<VarDecl>(D);
+  if (VD) {
+    if (M.getLangOpts().CUDA) {
+      if (VD->getType()->isCUDADeviceBuiltinSurfaceType())
+        addNVVMMetadata(GV, "surface", 1);
+      else if (VD->getType()->isCUDADeviceBuiltinTextureType())
+        addNVVMMetadata(GV, "texture", 1);
+      return;
+    }
+  }
+
   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
   if (!FD) return;
 
@@ -6588,16 +6661,16 @@
   }
 }
 
-void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::Function *F, StringRef Name,
-                                             int Operand) {
-  llvm::Module *M = F->getParent();
+void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
+                                             StringRef Name, int Operand) {
+  llvm::Module *M = GV->getParent();
   llvm::LLVMContext &Ctx = M->getContext();
 
   // Get "nvvm.annotations" metadata node
   llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
 
   llvm::Metadata *MDVals[] = {
-      llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, Name),
+      llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
       llvm::ConstantAsMetadata::get(
           llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
   // Append metadata to nvvm.annotations
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index e1e90e7..e7c842b 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -315,6 +315,32 @@
   virtual bool shouldEmitStaticExternCAliases() const { return true; }
 
   virtual void setCUDAKernelCallingConvention(const FunctionType *&FT) const {}
+
+  /// Return the device-side type for the CUDA device builtin surface type.
+  virtual llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const {
+    // By default, no change from the original one.
+    return nullptr;
+  }
+  /// Return the device-side type for the CUDA device builtin texture type.
+  virtual llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const {
+    // By default, no change from the original one.
+    return nullptr;
+  }
+
+  /// Emit the device-side copy of the builtin surface type.
+  virtual bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF,
+                                                      LValue Dst,
+                                                      LValue Src) const {
+    // DO NOTHING by default.
+    return false;
+  }
+  /// Emit the device-side copy of the builtin texture type.
+  virtual bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF,
+                                                      LValue Dst,
+                                                      LValue Src) const {
+    // DO NOTHING by default.
+    return false;
+  }
 };
 
 } // namespace CodeGen