blob: 8339d872ad95b465d87db5304d283fa9d77378ce [file] [log] [blame]
Artem Belevich36090852016-03-02 21:03:20 +00001// RUN: echo "GPU binary would be here" > %t
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +00002// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
3// RUN: -fcuda-include-gpubinary %t -o - \
Yaxun Liu887c5692018-04-25 01:10:37 +00004// RUN: | FileCheck %s --check-prefixes=ALL,NORDC,CUDA
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +00005// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
6// RUN: -fcuda-include-gpubinary %t -o - -DNOGLOBALS \
Artem Belevich8c1ec1e2016-03-02 18:28:53 +00007// RUN: | FileCheck %s -check-prefix=NOGLOBALS
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +00008// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
9// RUN: -fcuda-rdc -fcuda-include-gpubinary %t -o - \
Yaxun Liu887c5692018-04-25 01:10:37 +000010// RUN: | FileCheck %s --check-prefixes=ALL,RDC,CUDA
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000011// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - \
12// RUN: | FileCheck %s -check-prefix=NOGPUBIN
Peter Collingbournefa4d6032011-10-06 18:51:56 +000013
Yaxun Liu887c5692018-04-25 01:10:37 +000014// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
15// RUN: -fcuda-include-gpubinary %t -o - -x hip\
16// RUN: | FileCheck %s --check-prefixes=ALL,NORDC,HIP
17// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
18// RUN: -fcuda-include-gpubinary %t -o - -DNOGLOBALS -x hip \
19// RUN: | FileCheck %s -check-prefix=NOGLOBALS
20// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
21// RUN: -fcuda-rdc -fcuda-include-gpubinary %t -o - -x hip \
22// RUN: | FileCheck %s --check-prefixes=ALL,RDC,HIP
23// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - -x hip\
24// RUN: | FileCheck %s -check-prefix=NOGPUBIN
25
Eli Bendersky3468d9d2014-04-28 22:21:28 +000026#include "Inputs/cuda.h"
Peter Collingbournefa4d6032011-10-06 18:51:56 +000027
Artem Belevich8c1ec1e2016-03-02 18:28:53 +000028#ifndef NOGLOBALS
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000029// ALL-DAG: @device_var = internal global i32
Artem Belevich42e19492016-03-02 18:28:50 +000030__device__ int device_var;
31
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000032// ALL-DAG: @constant_var = internal global i32
Artem Belevich42e19492016-03-02 18:28:50 +000033__constant__ int constant_var;
34
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000035// ALL-DAG: @shared_var = internal global i32
Artem Belevich42e19492016-03-02 18:28:50 +000036__shared__ int shared_var;
37
38// Make sure host globals don't get internalized...
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000039// ALL-DAG: @host_var = global i32
Artem Belevich42e19492016-03-02 18:28:50 +000040int host_var;
41// ... and that extern vars remain external.
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000042// ALL-DAG: @ext_host_var = external global i32
Artem Belevich42e19492016-03-02 18:28:50 +000043extern int ext_host_var;
44
45// Shadows for external device-side variables are *definitions* of
46// those variables.
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000047// ALL-DAG: @ext_device_var = internal global i32
Artem Belevich42e19492016-03-02 18:28:50 +000048extern __device__ int ext_device_var;
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000049// ALL-DAG: @ext_device_var = internal global i32
Artem Belevich42e19492016-03-02 18:28:50 +000050extern __constant__ int ext_constant_var;
51
52void use_pointers() {
53 int *p;
54 p = &device_var;
55 p = &constant_var;
56 p = &shared_var;
57 p = &host_var;
58 p = &ext_device_var;
59 p = &ext_constant_var;
60 p = &ext_host_var;
61}
62
Artem Belevich52cc4872015-05-07 19:34:16 +000063// Make sure that all parts of GPU code init/cleanup are there:
64// * constant unnamed string with the kernel name
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000065// ALL: private unnamed_addr constant{{.*}}kernelfunc{{.*}}\00"
Artem Belevich52cc4872015-05-07 19:34:16 +000066// * constant unnamed string with GPU binary
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000067// ALL: private unnamed_addr constant{{.*GPU binary would be here.*}}\00"
68// NORDC-SAME: section ".nv_fatbin", align 8
69// RDC-SAME: section "__nv_relfatbin", align 8
Artem Belevich52cc4872015-05-07 19:34:16 +000070// * constant struct that wraps GPU binary
Yaxun Liu887c5692018-04-25 01:10:37 +000071// CUDA: @__[[PREFIX:cuda]]_fatbin_wrapper = internal constant
72// CUDA-SAME: { i32, i32, i8*, i8* }
73// HIP: @__[[PREFIX:hip]]_fatbin_wrapper = internal constant
74// HIP-SAME: { i32, i32, i8*, i8* }
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000075// ALL-SAME: { i32 1180844977, i32 1, {{.*}}, i8* null }
76// ALL-SAME: section ".nvFatBinSegment"
Artem Belevich52cc4872015-05-07 19:34:16 +000077// * variable to save GPU binary handle after initialization
Yaxun Liu887c5692018-04-25 01:10:37 +000078// NORDC: @__[[PREFIX]]_gpubin_handle = internal global i8** null
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000079// * constant unnamed string with NVModuleID
80// RDC: [[MODULE_ID_GLOBAL:@.*]] = private unnamed_addr constant
81// RDC-SAME: c"[[MODULE_ID:.+]]\00", section "__nv_module_id", align 32
82// * Make sure our constructor was added to global ctor list.
Yaxun Liu887c5692018-04-25 01:10:37 +000083// ALL: @llvm.global_ctors = appending global {{.*}}@__[[PREFIX]]_module_ctor
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000084// * In separate mode we also register a destructor.
Yaxun Liu887c5692018-04-25 01:10:37 +000085// NORDC: @llvm.global_dtors = appending global {{.*}}@__[[PREFIX]]_module_dtor
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000086// * Alias to global symbol containing the NVModuleID.
87// RDC: @__fatbinwrap[[MODULE_ID]] = alias { i32, i32, i8*, i8* }
Yaxun Liu887c5692018-04-25 01:10:37 +000088// RDC-SAME: { i32, i32, i8*, i8* }* @__[[PREFIX]]_fatbin_wrapper
Artem Belevich52cc4872015-05-07 19:34:16 +000089
Peter Collingbournefa4d6032011-10-06 18:51:56 +000090// Test that we build the correct number of calls to cudaSetupArgument followed
91// by a call to cudaLaunch.
92
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000093// ALL: define{{.*}}kernelfunc
Yaxun Liu887c5692018-04-25 01:10:37 +000094// ALL: call{{.*}}[[PREFIX]]SetupArgument
95// ALL: call{{.*}}[[PREFIX]]SetupArgument
96// ALL: call{{.*}}[[PREFIX]]SetupArgument
97// ALL: call{{.*}}[[PREFIX]]Launch
Peter Collingbournefa4d6032011-10-06 18:51:56 +000098__global__ void kernelfunc(int i, int j, int k) {}
Artem Belevich52cc4872015-05-07 19:34:16 +000099
100// Test that we've built correct kernel launch sequence.
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000101// ALL: define{{.*}}hostfunc
Yaxun Liu887c5692018-04-25 01:10:37 +0000102// ALL: call{{.*}}[[PREFIX]]ConfigureCall
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000103// ALL: call{{.*}}kernelfunc
Artem Belevich52cc4872015-05-07 19:34:16 +0000104void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
Artem Belevich8c1ec1e2016-03-02 18:28:53 +0000105#endif
Artem Belevich52cc4872015-05-07 19:34:16 +0000106
Artem Belevich42e19492016-03-02 18:28:50 +0000107// Test that we've built a function to register kernels and global vars.
Yaxun Liu887c5692018-04-25 01:10:37 +0000108// ALL: define internal void @__[[PREFIX]]_register_globals
109// ALL: call{{.*}}[[PREFIX]]RegisterFunction(i8** %0, {{.*}}kernelfunc
110// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}device_var{{.*}}i32 0, i32 4, i32 0, i32 0
111// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}constant_var{{.*}}i32 0, i32 4, i32 1, i32 0
112// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_device_var{{.*}}i32 1, i32 4, i32 0, i32 0
113// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_constant_var{{.*}}i32 1, i32 4, i32 1, i32 0
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000114// ALL: ret void
Artem Belevich52cc4872015-05-07 19:34:16 +0000115
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000116// Test that we've built a constructor.
Yaxun Liu887c5692018-04-25 01:10:37 +0000117// ALL: define internal void @__[[PREFIX]]_module_ctor
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000118
Yaxun Liu887c5692018-04-25 01:10:37 +0000119// In separate mode it calls __[[PREFIX]]RegisterFatBinary(&__[[PREFIX]]_fatbin_wrapper)
120// NORDC: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper
121// .. stores return value in __[[PREFIX]]_gpubin_handle
122// NORDC-NEXT: store{{.*}}__[[PREFIX]]_gpubin_handle
123// .. and then calls __[[PREFIX]]_register_globals
124// NORDC-NEXT: call void @__[[PREFIX]]_register_globals
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000125
Yaxun Liu887c5692018-04-25 01:10:37 +0000126// With relocatable device code we call __[[PREFIX]]RegisterLinkedBinary%NVModuleID%
127// RDC: call{{.*}}__[[PREFIX]]RegisterLinkedBinary[[MODULE_ID]](
128// RDC-SAME: __[[PREFIX]]_register_globals, {{.*}}__[[PREFIX]]_fatbin_wrapper
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000129// RDC-SAME: [[MODULE_ID_GLOBAL]]
Artem Belevich52cc4872015-05-07 19:34:16 +0000130
131// Test that we've created destructor.
Yaxun Liu887c5692018-04-25 01:10:37 +0000132// NORDC: define internal void @__[[PREFIX]]_module_dtor
133// NORDC: load{{.*}}__[[PREFIX]]_gpubin_handle
134// NORDC-NEXT: call void @__[[PREFIX]]UnregisterFatBinary
Artem Belevich52cc4872015-05-07 19:34:16 +0000135
Yaxun Liu887c5692018-04-25 01:10:37 +0000136// There should be no __[[PREFIX]]_register_globals if we have no
Artem Belevich8c1ec1e2016-03-02 18:28:53 +0000137// device-side globals, but we still need to register GPU binary.
138// Skip GPU binary string first.
139// NOGLOBALS: @0 = private unnamed_addr constant{{.*}}
Yaxun Liu887c5692018-04-25 01:10:37 +0000140// NOGLOBALS-NOT: define internal void @__{{.*}}_register_globals
141// NOGLOBALS: define internal void @__[[PREFIX:.*]]_module_ctor
142// NOGLOBALS: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper
143// NOGLOBALS-NOT: call void @__[[PREFIX]]_register_globals
144// NOGLOBALS: define internal void @__[[PREFIX]]_module_dtor
145// NOGLOBALS: call void @__[[PREFIX]]UnregisterFatBinary
Artem Belevich8c1ec1e2016-03-02 18:28:53 +0000146
147// There should be no constructors/destructors if we have no GPU binary.
Yaxun Liu887c5692018-04-25 01:10:37 +0000148// NOGPUBIN-NOT: define internal void @__[[PREFIX]]_register_globals
149// NOGPUBIN-NOT: define internal void @__[[PREFIX]]_module_ctor
150// NOGPUBIN-NOT: define internal void @__[[PREFIX]]_module_dtor