blob: 30f88389424d0c962c2379391c9f1e5b2feaa16c [file] [log] [blame]
Artem Belevich36090852016-03-02 21:03:20 +00001// RUN: echo "GPU binary would be here" > %t
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +00002// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
Artem Belevichc62214d2019-01-31 21:34:03 +00003// RUN: -target-sdk-version=8.0 -fcuda-include-gpubinary %t -o - \
4// RUN: | FileCheck -allow-deprecated-dag-overlap %s \
5// RUN: --check-prefixes=ALL,NORDC,CUDA,CUDANORDC,CUDA-OLD
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +00006// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
Artem Belevichc62214d2019-01-31 21:34:03 +00007// RUN: -target-sdk-version=8.0 -fcuda-include-gpubinary %t \
8// RUN: -o - -DNOGLOBALS \
9// RUN: | FileCheck -allow-deprecated-dag-overlap %s \
10// RUN: -check-prefixes=NOGLOBALS,CUDANOGLOBALS
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000011// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
Artem Belevichc62214d2019-01-31 21:34:03 +000012// RUN: -target-sdk-version=8.0 -fgpu-rdc -fcuda-include-gpubinary %t \
13// RUN: -o - \
14// RUN: | FileCheck -allow-deprecated-dag-overlap %s \
15// RUN: --check-prefixes=ALL,RDC,CUDA,CUDARDC,CUDA-OLD
16// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
17// RUN: -target-sdk-version=8.0 -o - \
18// RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=NOGPUBIN
19
20// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
21// RUN: -target-sdk-version=9.2 -fcuda-include-gpubinary %t -o - \
22// RUN: | FileCheck %s -allow-deprecated-dag-overlap \
23// RUN: --check-prefixes=ALL,NORDC,CUDA,CUDANORDC,CUDA-NEW
24// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
25// RUN: -target-sdk-version=9.2 -fcuda-include-gpubinary %t -o - -DNOGLOBALS \
26// RUN: | FileCheck -allow-deprecated-dag-overlap %s \
27// RUN: --check-prefixes=NOGLOBALS,CUDANOGLOBALS
28// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
29// RUN: -target-sdk-version=9.2 -fgpu-rdc -fcuda-include-gpubinary %t -o - \
30// RUN: | FileCheck %s -allow-deprecated-dag-overlap \
31// RUN: --check-prefixes=ALL,RDC,CUDA,CUDARDC,CUDA_NEW
32// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
33// RUN: -target-sdk-version=9.2 -o - \
Joel E. Denny72c27832018-07-11 20:26:20 +000034// RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=NOGPUBIN
Peter Collingbournefa4d6032011-10-06 18:51:56 +000035
Yaxun Liu887c5692018-04-25 01:10:37 +000036// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
37// RUN: -fcuda-include-gpubinary %t -o - -x hip\
Yaxun Liu97670892018-10-02 17:48:54 +000038// RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,NORDC,HIP,HIPEF
Yaxun Liu887c5692018-04-25 01:10:37 +000039// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
40// RUN: -fcuda-include-gpubinary %t -o - -DNOGLOBALS -x hip \
Joel E. Denny72c27832018-07-11 20:26:20 +000041// RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=NOGLOBALS,HIPNOGLOBALS
Yaxun Liu887c5692018-04-25 01:10:37 +000042// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
Yaxun Liu97670892018-10-02 17:48:54 +000043// RUN: -fgpu-rdc -fcuda-include-gpubinary %t -o - -x hip \
44// RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,NORDC,HIP,HIPEF
Yaxun Liu887c5692018-04-25 01:10:37 +000045// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - -x hip\
Yaxun Liu97670892018-10-02 17:48:54 +000046// RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=ALL,NORDC,HIP,HIPNEF
Yaxun Liu887c5692018-04-25 01:10:37 +000047
Eli Bendersky3468d9d2014-04-28 22:21:28 +000048#include "Inputs/cuda.h"
Peter Collingbournefa4d6032011-10-06 18:51:56 +000049
Artem Belevich8c1ec1e2016-03-02 18:28:53 +000050#ifndef NOGLOBALS
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000051// ALL-DAG: @device_var = internal global i32
Artem Belevich42e19492016-03-02 18:28:50 +000052__device__ int device_var;
53
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000054// ALL-DAG: @constant_var = internal global i32
Artem Belevich42e19492016-03-02 18:28:50 +000055__constant__ int constant_var;
56
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000057// ALL-DAG: @shared_var = internal global i32
Artem Belevich42e19492016-03-02 18:28:50 +000058__shared__ int shared_var;
59
60// Make sure host globals don't get internalized...
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000061// ALL-DAG: @host_var = global i32
Artem Belevich42e19492016-03-02 18:28:50 +000062int host_var;
63// ... and that extern vars remain external.
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000064// ALL-DAG: @ext_host_var = external global i32
Artem Belevich42e19492016-03-02 18:28:50 +000065extern int ext_host_var;
66
Artem Belevich99535772018-12-22 01:11:09 +000067// external device-side variables -> extern references to their shadows.
68// ALL-DAG: @ext_device_var = external global i32
Artem Belevich42e19492016-03-02 18:28:50 +000069extern __device__ int ext_device_var;
Artem Belevich99535772018-12-22 01:11:09 +000070// ALL-DAG: @ext_device_var = external global i32
Artem Belevich42e19492016-03-02 18:28:50 +000071extern __constant__ int ext_constant_var;
72
Artem Belevich99535772018-12-22 01:11:09 +000073// external device-side variables with definitions should generate
74// definitions for the shadows.
75// ALL-DAG: @ext_device_var_def = internal global i32 undef,
76extern __device__ int ext_device_var_def;
77__device__ int ext_device_var_def = 1;
78// ALL-DAG: @ext_device_var_def = internal global i32 undef,
79__constant__ int ext_constant_var_def = 2;
80
Artem Belevich42e19492016-03-02 18:28:50 +000081void use_pointers() {
82 int *p;
83 p = &device_var;
84 p = &constant_var;
85 p = &shared_var;
86 p = &host_var;
87 p = &ext_device_var;
88 p = &ext_constant_var;
89 p = &ext_host_var;
90}
91
Artem Belevich52cc4872015-05-07 19:34:16 +000092// Make sure that all parts of GPU code init/cleanup are there:
93// * constant unnamed string with the kernel name
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000094// ALL: private unnamed_addr constant{{.*}}kernelfunc{{.*}}\00"
Artem Belevich52cc4872015-05-07 19:34:16 +000095// * constant unnamed string with GPU binary
Jonas Hahnfeld3b9cbba92018-06-08 11:17:08 +000096// CUDA: @[[FATBIN:.*]] = private constant{{.*GPU binary would be here.*}}\00",
Yaxun Liu97670892018-10-02 17:48:54 +000097// HIPEF: @[[FATBIN:.*]] = private constant{{.*GPU binary would be here.*}}\00",
98// HIPNEF: @[[FATBIN:__hip_fatbin]] = external constant i8, section ".hip_fatbin"
Yaxun Liu29155b02018-05-18 15:07:56 +000099// CUDANORDC-SAME: section ".nv_fatbin", align 8
100// CUDARDC-SAME: section "__nv_relfatbin", align 8
Artem Belevich52cc4872015-05-07 19:34:16 +0000101// * constant struct that wraps GPU binary
Yaxun Liu29155b02018-05-18 15:07:56 +0000102// ALL: @__[[PREFIX:cuda|hip]]_fatbin_wrapper = internal constant
103// ALL-SAME: { i32, i32, i8*, i8* }
104// CUDA-SAME: { i32 1180844977, i32 1,
105// HIP-SAME: { i32 1212764230, i32 1,
106// CUDA-SAME: i8* getelementptr inbounds ({{.*}}@[[FATBIN]], i64 0, i64 0),
Yaxun Liu97670892018-10-02 17:48:54 +0000107// HIPEF-SAME: i8* getelementptr inbounds ({{.*}}@[[FATBIN]], i64 0, i64 0),
108// HIPNEF-SAME: i8* @[[FATBIN]],
Yaxun Liu29155b02018-05-18 15:07:56 +0000109// ALL-SAME: i8* null }
110// CUDA-SAME: section ".nvFatBinSegment"
111// HIP-SAME: section ".hipFatBinSegment"
Artem Belevich52cc4872015-05-07 19:34:16 +0000112// * variable to save GPU binary handle after initialization
Yaxun Liuf99752b2018-07-20 22:45:24 +0000113// CUDANORDC: @__[[PREFIX]]_gpubin_handle = internal global i8** null
Yaxun Liu97670892018-10-02 17:48:54 +0000114// HIPNEF: @__[[PREFIX]]_gpubin_handle = linkonce hidden global i8** null
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000115// * constant unnamed string with NVModuleID
Jonas Hahnfeld3b9cbba92018-06-08 11:17:08 +0000116// RDC: [[MODULE_ID_GLOBAL:@.*]] = private constant
Yaxun Liu29155b02018-05-18 15:07:56 +0000117// CUDARDC-SAME: c"[[MODULE_ID:.+]]\00", section "__nv_module_id", align 32
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000118// * Make sure our constructor was added to global ctor list.
Yaxun Liu887c5692018-04-25 01:10:37 +0000119// ALL: @llvm.global_ctors = appending global {{.*}}@__[[PREFIX]]_module_ctor
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000120// * Alias to global symbol containing the NVModuleID.
121// RDC: @__fatbinwrap[[MODULE_ID]] = alias { i32, i32, i8*, i8* }
Yaxun Liu887c5692018-04-25 01:10:37 +0000122// RDC-SAME: { i32, i32, i8*, i8* }* @__[[PREFIX]]_fatbin_wrapper
Artem Belevich52cc4872015-05-07 19:34:16 +0000123
Peter Collingbournefa4d6032011-10-06 18:51:56 +0000124// Test that we build the correct number of calls to cudaSetupArgument followed
125// by a call to cudaLaunch.
126
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000127// ALL: define{{.*}}kernelfunc
Artem Belevichc62214d2019-01-31 21:34:03 +0000128
129// New launch sequence stores arguments into local buffer and passes array of
130// pointers to them directly to cudaLaunchKernel
131// CUDA-NEW: alloca
132// CUDA-NEW: store
133// CUDA-NEW: store
134// CUDA-NEW: store
135// CUDA-NEW: call{{.*}}__cudaPopCallConfiguration
136// CUDA-NEW: call{{.*}}cudaLaunchKernel
137
138// Legacy style launch sequence sets up arguments by passing them to
139// [cuda|hip]SetupArgument.
140// CUDA-OLD: call{{.*}}[[PREFIX]]SetupArgument
141// CUDA-OLD: call{{.*}}[[PREFIX]]SetupArgument
142// CUDA-OLD: call{{.*}}[[PREFIX]]SetupArgument
143// CUDA-OLD: call{{.*}}[[PREFIX]]Launch
144
145// HIP: call{{.*}}[[PREFIX]]SetupArgument
146// HIP: call{{.*}}[[PREFIX]]SetupArgument
147// HIP: call{{.*}}[[PREFIX]]SetupArgument
148// HIP: call{{.*}}[[PREFIX]]Launch
Peter Collingbournefa4d6032011-10-06 18:51:56 +0000149__global__ void kernelfunc(int i, int j, int k) {}
Artem Belevich52cc4872015-05-07 19:34:16 +0000150
151// Test that we've built correct kernel launch sequence.
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000152// ALL: define{{.*}}hostfunc
Artem Belevichc62214d2019-01-31 21:34:03 +0000153// CUDA-OLD: call{{.*}}[[PREFIX]]ConfigureCall
154// CUDA-NEW: call{{.*}}__cudaPushCallConfiguration
155// HIP: call{{.*}}[[PREFIX]]ConfigureCall
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000156// ALL: call{{.*}}kernelfunc
Artem Belevich52cc4872015-05-07 19:34:16 +0000157void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
Artem Belevich8c1ec1e2016-03-02 18:28:53 +0000158#endif
Artem Belevich52cc4872015-05-07 19:34:16 +0000159
Artem Belevich42e19492016-03-02 18:28:50 +0000160// Test that we've built a function to register kernels and global vars.
Yaxun Liu887c5692018-04-25 01:10:37 +0000161// ALL: define internal void @__[[PREFIX]]_register_globals
162// ALL: call{{.*}}[[PREFIX]]RegisterFunction(i8** %0, {{.*}}kernelfunc
163// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}device_var{{.*}}i32 0, i32 4, i32 0, i32 0
164// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}constant_var{{.*}}i32 0, i32 4, i32 1, i32 0
Artem Belevich99535772018-12-22 01:11:09 +0000165// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_device_var_def{{.*}}i32 0, i32 4, i32 0, i32 0
166// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_constant_var_def{{.*}}i32 0, i32 4, i32 1, i32 0
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000167// ALL: ret void
Artem Belevich52cc4872015-05-07 19:34:16 +0000168
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000169// Test that we've built a constructor.
Yaxun Liu887c5692018-04-25 01:10:37 +0000170// ALL: define internal void @__[[PREFIX]]_module_ctor
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000171
Yaxun Liu887c5692018-04-25 01:10:37 +0000172// In separate mode it calls __[[PREFIX]]RegisterFatBinary(&__[[PREFIX]]_fatbin_wrapper)
Yaxun Liuf99752b2018-07-20 22:45:24 +0000173// HIP only register fat binary once.
174// HIP: load i8**, i8*** @__hip_gpubin_handle
175// HIP-NEXT: icmp eq i8** {{.*}}, null
176// HIP-NEXT: br i1 {{.*}}, label %if, label %exit
177// HIP: if:
Yaxun Liu887c5692018-04-25 01:10:37 +0000178// NORDC: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper
179// .. stores return value in __[[PREFIX]]_gpubin_handle
180// NORDC-NEXT: store{{.*}}__[[PREFIX]]_gpubin_handle
181// .. and then calls __[[PREFIX]]_register_globals
Yaxun Liuf99752b2018-07-20 22:45:24 +0000182// HIP-NEXT: br label %exit
183// HIP: exit:
184// HIP-NEXT: load i8**, i8*** @__hip_gpubin_handle
Yaxun Liu887c5692018-04-25 01:10:37 +0000185// NORDC-NEXT: call void @__[[PREFIX]]_register_globals
Artem Belevichc66d2542018-06-27 18:32:51 +0000186// * In separate mode we also register a destructor.
187// NORDC-NEXT: call i32 @atexit(void (i8*)* @__[[PREFIX]]_module_dtor)
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000188
Yaxun Liu887c5692018-04-25 01:10:37 +0000189// With relocatable device code we call __[[PREFIX]]RegisterLinkedBinary%NVModuleID%
190// RDC: call{{.*}}__[[PREFIX]]RegisterLinkedBinary[[MODULE_ID]](
191// RDC-SAME: __[[PREFIX]]_register_globals, {{.*}}__[[PREFIX]]_fatbin_wrapper
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000192// RDC-SAME: [[MODULE_ID_GLOBAL]]
Artem Belevich52cc4872015-05-07 19:34:16 +0000193
194// Test that we've created destructor.
Yaxun Liu887c5692018-04-25 01:10:37 +0000195// NORDC: define internal void @__[[PREFIX]]_module_dtor
196// NORDC: load{{.*}}__[[PREFIX]]_gpubin_handle
Yaxun Liuf99752b2018-07-20 22:45:24 +0000197// CUDANORDC-NEXT: call void @__[[PREFIX]]UnregisterFatBinary
198// HIP-NEXT: icmp ne i8** {{.*}}, null
199// HIP-NEXT: br i1 {{.*}}, label %if, label %exit
200// HIP: if:
201// HIP-NEXT: call void @__[[PREFIX]]UnregisterFatBinary
202// HIP-NEXT: store i8** null, i8*** @__hip_gpubin_handle
203// HIP-NEXT: br label %exit
204// HIP: exit:
Artem Belevich52cc4872015-05-07 19:34:16 +0000205
Yaxun Liu887c5692018-04-25 01:10:37 +0000206// There should be no __[[PREFIX]]_register_globals if we have no
Artem Belevich8c1ec1e2016-03-02 18:28:53 +0000207// device-side globals, but we still need to register GPU binary.
208// Skip GPU binary string first.
Jonas Hahnfeld3b9cbba92018-06-08 11:17:08 +0000209// CUDANOGLOBALS: @{{.*}} = private constant{{.*}}
Yaxun Liu97670892018-10-02 17:48:54 +0000210// HIPNOGLOBALS: @{{.*}} = internal constant{{.*}}
Yaxun Liu887c5692018-04-25 01:10:37 +0000211// NOGLOBALS-NOT: define internal void @__{{.*}}_register_globals
Yaxun Liu29155b02018-05-18 15:07:56 +0000212// NOGLOBALS: define internal void @__[[PREFIX:cuda|hip]]_module_ctor
Yaxun Liu887c5692018-04-25 01:10:37 +0000213// NOGLOBALS: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper
214// NOGLOBALS-NOT: call void @__[[PREFIX]]_register_globals
215// NOGLOBALS: define internal void @__[[PREFIX]]_module_dtor
216// NOGLOBALS: call void @__[[PREFIX]]UnregisterFatBinary
Artem Belevich8c1ec1e2016-03-02 18:28:53 +0000217
218// There should be no constructors/destructors if we have no GPU binary.
Yaxun Liu887c5692018-04-25 01:10:37 +0000219// NOGPUBIN-NOT: define internal void @__[[PREFIX]]_register_globals
220// NOGPUBIN-NOT: define internal void @__[[PREFIX]]_module_ctor
221// NOGPUBIN-NOT: define internal void @__[[PREFIX]]_module_dtor