blob: 3798b8cf73134b3610f277827229bf0e4a96f37f [file] [log] [blame]
Artem Belevich36090852016-03-02 21:03:20 +00001// RUN: echo "GPU binary would be here" > %t
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +00002// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
3// RUN: -fcuda-include-gpubinary %t -o - \
Yaxun Liu29155b02018-05-18 15:07:56 +00004// RUN: | FileCheck %s --check-prefixes=ALL,NORDC,CUDA,CUDANORDC
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +00005// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
6// RUN: -fcuda-include-gpubinary %t -o - -DNOGLOBALS \
Yaxun Liu29155b02018-05-18 15:07:56 +00007// RUN: | FileCheck %s -check-prefixes=NOGLOBALS,CUDANOGLOBALS
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +00008// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
9// RUN: -fcuda-rdc -fcuda-include-gpubinary %t -o - \
Yaxun Liu29155b02018-05-18 15:07:56 +000010// RUN: | FileCheck %s --check-prefixes=ALL,RDC,CUDA,CUDARDC
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000011// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - \
12// RUN: | FileCheck %s -check-prefix=NOGPUBIN
Peter Collingbournefa4d6032011-10-06 18:51:56 +000013
Yaxun Liu887c5692018-04-25 01:10:37 +000014// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
15// RUN: -fcuda-include-gpubinary %t -o - -x hip\
16// RUN: | FileCheck %s --check-prefixes=ALL,NORDC,HIP
17// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
18// RUN: -fcuda-include-gpubinary %t -o - -DNOGLOBALS -x hip \
Yaxun Liu29155b02018-05-18 15:07:56 +000019// RUN: | FileCheck %s -check-prefixes=NOGLOBALS,HIPNOGLOBALS
Yaxun Liu887c5692018-04-25 01:10:37 +000020// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
21// RUN: -fcuda-rdc -fcuda-include-gpubinary %t -o - -x hip \
Yaxun Liu29155b02018-05-18 15:07:56 +000022// RUN: | FileCheck %s --check-prefixes=ALL,RDC,HIP,HIPRDC
Yaxun Liu887c5692018-04-25 01:10:37 +000023// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - -x hip\
24// RUN: | FileCheck %s -check-prefix=NOGPUBIN
25
Eli Bendersky3468d9d2014-04-28 22:21:28 +000026#include "Inputs/cuda.h"
Peter Collingbournefa4d6032011-10-06 18:51:56 +000027
Artem Belevich8c1ec1e2016-03-02 18:28:53 +000028#ifndef NOGLOBALS
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000029// ALL-DAG: @device_var = internal global i32
Artem Belevich42e19492016-03-02 18:28:50 +000030__device__ int device_var;
31
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000032// ALL-DAG: @constant_var = internal global i32
Artem Belevich42e19492016-03-02 18:28:50 +000033__constant__ int constant_var;
34
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000035// ALL-DAG: @shared_var = internal global i32
Artem Belevich42e19492016-03-02 18:28:50 +000036__shared__ int shared_var;
37
38// Make sure host globals don't get internalized...
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000039// ALL-DAG: @host_var = global i32
Artem Belevich42e19492016-03-02 18:28:50 +000040int host_var;
41// ... and that extern vars remain external.
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000042// ALL-DAG: @ext_host_var = external global i32
Artem Belevich42e19492016-03-02 18:28:50 +000043extern int ext_host_var;
44
45// Shadows for external device-side variables are *definitions* of
46// those variables.
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000047// ALL-DAG: @ext_device_var = internal global i32
Artem Belevich42e19492016-03-02 18:28:50 +000048extern __device__ int ext_device_var;
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000049// ALL-DAG: @ext_device_var = internal global i32
Artem Belevich42e19492016-03-02 18:28:50 +000050extern __constant__ int ext_constant_var;
51
52void use_pointers() {
53 int *p;
54 p = &device_var;
55 p = &constant_var;
56 p = &shared_var;
57 p = &host_var;
58 p = &ext_device_var;
59 p = &ext_constant_var;
60 p = &ext_host_var;
61}
62
Artem Belevich52cc4872015-05-07 19:34:16 +000063// Make sure that all parts of GPU code init/cleanup are there:
64// * constant unnamed string with the kernel name
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000065// ALL: private unnamed_addr constant{{.*}}kernelfunc{{.*}}\00"
Artem Belevich52cc4872015-05-07 19:34:16 +000066// * constant unnamed string with GPU binary
Yaxun Liu29155b02018-05-18 15:07:56 +000067// HIP: @[[FATBIN:__hip_fatbin]] = external constant i8, section ".hip_fatbin"
Jonas Hahnfeld3b9cbba92018-06-08 11:17:08 +000068// CUDA: @[[FATBIN:.*]] = private constant{{.*GPU binary would be here.*}}\00",
Yaxun Liu29155b02018-05-18 15:07:56 +000069// CUDANORDC-SAME: section ".nv_fatbin", align 8
70// CUDARDC-SAME: section "__nv_relfatbin", align 8
Artem Belevich52cc4872015-05-07 19:34:16 +000071// * constant struct that wraps GPU binary
Yaxun Liu29155b02018-05-18 15:07:56 +000072// ALL: @__[[PREFIX:cuda|hip]]_fatbin_wrapper = internal constant
73// ALL-SAME: { i32, i32, i8*, i8* }
74// CUDA-SAME: { i32 1180844977, i32 1,
75// HIP-SAME: { i32 1212764230, i32 1,
76// CUDA-SAME: i8* getelementptr inbounds ({{.*}}@[[FATBIN]], i64 0, i64 0),
77// HIP-SAME: i8* @[[FATBIN]],
78// ALL-SAME: i8* null }
79// CUDA-SAME: section ".nvFatBinSegment"
80// HIP-SAME: section ".hipFatBinSegment"
Artem Belevich52cc4872015-05-07 19:34:16 +000081// * variable to save GPU binary handle after initialization
Yaxun Liu887c5692018-04-25 01:10:37 +000082// NORDC: @__[[PREFIX]]_gpubin_handle = internal global i8** null
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000083// * constant unnamed string with NVModuleID
Jonas Hahnfeld3b9cbba92018-06-08 11:17:08 +000084// RDC: [[MODULE_ID_GLOBAL:@.*]] = private constant
Yaxun Liu29155b02018-05-18 15:07:56 +000085// CUDARDC-SAME: c"[[MODULE_ID:.+]]\00", section "__nv_module_id", align 32
86// HIPRDC-SAME: c"[[MODULE_ID:.+]]\00", section "__hip_module_id", align 32
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000087// * Make sure our constructor was added to global ctor list.
Yaxun Liu887c5692018-04-25 01:10:37 +000088// ALL: @llvm.global_ctors = appending global {{.*}}@__[[PREFIX]]_module_ctor
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000089// * In separate mode we also register a destructor.
Yaxun Liu887c5692018-04-25 01:10:37 +000090// NORDC: @llvm.global_dtors = appending global {{.*}}@__[[PREFIX]]_module_dtor
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000091// * Alias to global symbol containing the NVModuleID.
92// RDC: @__fatbinwrap[[MODULE_ID]] = alias { i32, i32, i8*, i8* }
Yaxun Liu887c5692018-04-25 01:10:37 +000093// RDC-SAME: { i32, i32, i8*, i8* }* @__[[PREFIX]]_fatbin_wrapper
Artem Belevich52cc4872015-05-07 19:34:16 +000094
Peter Collingbournefa4d6032011-10-06 18:51:56 +000095// Test that we build the correct number of calls to cudaSetupArgument followed
96// by a call to cudaLaunch.
97
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000098// ALL: define{{.*}}kernelfunc
Yaxun Liu887c5692018-04-25 01:10:37 +000099// ALL: call{{.*}}[[PREFIX]]SetupArgument
100// ALL: call{{.*}}[[PREFIX]]SetupArgument
101// ALL: call{{.*}}[[PREFIX]]SetupArgument
102// ALL: call{{.*}}[[PREFIX]]Launch
Peter Collingbournefa4d6032011-10-06 18:51:56 +0000103__global__ void kernelfunc(int i, int j, int k) {}
Artem Belevich52cc4872015-05-07 19:34:16 +0000104
105// Test that we've built correct kernel launch sequence.
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000106// ALL: define{{.*}}hostfunc
Yaxun Liu887c5692018-04-25 01:10:37 +0000107// ALL: call{{.*}}[[PREFIX]]ConfigureCall
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000108// ALL: call{{.*}}kernelfunc
Artem Belevich52cc4872015-05-07 19:34:16 +0000109void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
Artem Belevich8c1ec1e2016-03-02 18:28:53 +0000110#endif
Artem Belevich52cc4872015-05-07 19:34:16 +0000111
Artem Belevich42e19492016-03-02 18:28:50 +0000112// Test that we've built a function to register kernels and global vars.
Yaxun Liu887c5692018-04-25 01:10:37 +0000113// ALL: define internal void @__[[PREFIX]]_register_globals
114// ALL: call{{.*}}[[PREFIX]]RegisterFunction(i8** %0, {{.*}}kernelfunc
115// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}device_var{{.*}}i32 0, i32 4, i32 0, i32 0
116// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}constant_var{{.*}}i32 0, i32 4, i32 1, i32 0
117// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_device_var{{.*}}i32 1, i32 4, i32 0, i32 0
118// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_constant_var{{.*}}i32 1, i32 4, i32 1, i32 0
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000119// ALL: ret void
Artem Belevich52cc4872015-05-07 19:34:16 +0000120
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000121// Test that we've built a constructor.
Yaxun Liu887c5692018-04-25 01:10:37 +0000122// ALL: define internal void @__[[PREFIX]]_module_ctor
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000123
Yaxun Liu887c5692018-04-25 01:10:37 +0000124// In separate mode it calls __[[PREFIX]]RegisterFatBinary(&__[[PREFIX]]_fatbin_wrapper)
125// NORDC: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper
126// .. stores return value in __[[PREFIX]]_gpubin_handle
127// NORDC-NEXT: store{{.*}}__[[PREFIX]]_gpubin_handle
128// .. and then calls __[[PREFIX]]_register_globals
129// NORDC-NEXT: call void @__[[PREFIX]]_register_globals
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000130
Yaxun Liu887c5692018-04-25 01:10:37 +0000131// With relocatable device code we call __[[PREFIX]]RegisterLinkedBinary%NVModuleID%
132// RDC: call{{.*}}__[[PREFIX]]RegisterLinkedBinary[[MODULE_ID]](
133// RDC-SAME: __[[PREFIX]]_register_globals, {{.*}}__[[PREFIX]]_fatbin_wrapper
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000134// RDC-SAME: [[MODULE_ID_GLOBAL]]
Artem Belevich52cc4872015-05-07 19:34:16 +0000135
136// Test that we've created destructor.
Yaxun Liu887c5692018-04-25 01:10:37 +0000137// NORDC: define internal void @__[[PREFIX]]_module_dtor
138// NORDC: load{{.*}}__[[PREFIX]]_gpubin_handle
139// NORDC-NEXT: call void @__[[PREFIX]]UnregisterFatBinary
Artem Belevich52cc4872015-05-07 19:34:16 +0000140
Yaxun Liu887c5692018-04-25 01:10:37 +0000141// There should be no __[[PREFIX]]_register_globals if we have no
Artem Belevich8c1ec1e2016-03-02 18:28:53 +0000142// device-side globals, but we still need to register GPU binary.
143// Skip GPU binary string first.
Jonas Hahnfeld3b9cbba92018-06-08 11:17:08 +0000144// CUDANOGLOBALS: @{{.*}} = private constant{{.*}}
Yaxun Liu29155b02018-05-18 15:07:56 +0000145// HIPNOGLOBALS: @{{.*}} = external constant{{.*}}
Yaxun Liu887c5692018-04-25 01:10:37 +0000146// NOGLOBALS-NOT: define internal void @__{{.*}}_register_globals
Yaxun Liu29155b02018-05-18 15:07:56 +0000147// NOGLOBALS: define internal void @__[[PREFIX:cuda|hip]]_module_ctor
Yaxun Liu887c5692018-04-25 01:10:37 +0000148// NOGLOBALS: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper
149// NOGLOBALS-NOT: call void @__[[PREFIX]]_register_globals
150// NOGLOBALS: define internal void @__[[PREFIX]]_module_dtor
151// NOGLOBALS: call void @__[[PREFIX]]UnregisterFatBinary
Artem Belevich8c1ec1e2016-03-02 18:28:53 +0000152
153// There should be no constructors/destructors if we have no GPU binary.
Yaxun Liu887c5692018-04-25 01:10:37 +0000154// NOGPUBIN-NOT: define internal void @__[[PREFIX]]_register_globals
155// NOGPUBIN-NOT: define internal void @__[[PREFIX]]_module_ctor
156// NOGPUBIN-NOT: define internal void @__[[PREFIX]]_module_dtor