blob: 9db5738cdedec896166c69b7a66791f4e79eb0bf [file] [log] [blame]
Artem Belevich36090852016-03-02 21:03:20 +00001// RUN: echo "GPU binary would be here" > %t
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +00002// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
Artem Belevichc62214d2019-01-31 21:34:03 +00003// RUN: -target-sdk-version=8.0 -fcuda-include-gpubinary %t -o - \
4// RUN: | FileCheck -allow-deprecated-dag-overlap %s \
Yaxun Liuc18e9ec2019-02-14 02:00:09 +00005// RUN: --check-prefixes=ALL,LNX,NORDC,CUDA,CUDANORDC,CUDA-OLD
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +00006// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
Artem Belevichc62214d2019-01-31 21:34:03 +00007// RUN: -target-sdk-version=8.0 -fcuda-include-gpubinary %t \
8// RUN: -o - -DNOGLOBALS \
9// RUN: | FileCheck -allow-deprecated-dag-overlap %s \
10// RUN: -check-prefixes=NOGLOBALS,CUDANOGLOBALS
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000011// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
Artem Belevichc62214d2019-01-31 21:34:03 +000012// RUN: -target-sdk-version=8.0 -fgpu-rdc -fcuda-include-gpubinary %t \
13// RUN: -o - \
14// RUN: | FileCheck -allow-deprecated-dag-overlap %s \
Yaxun Liuc18e9ec2019-02-14 02:00:09 +000015// RUN: --check-prefixes=ALL,LNX,RDC,CUDA,CUDARDC,CUDA-OLD
Artem Belevichc62214d2019-01-31 21:34:03 +000016// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
17// RUN: -target-sdk-version=8.0 -o - \
18// RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=NOGPUBIN
19
20// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
21// RUN: -target-sdk-version=9.2 -fcuda-include-gpubinary %t -o - \
22// RUN: | FileCheck %s -allow-deprecated-dag-overlap \
Yaxun Liuc18e9ec2019-02-14 02:00:09 +000023// RUN: --check-prefixes=ALL,LNX,NORDC,CUDA,CUDANORDC,CUDA-NEW
Artem Belevichc62214d2019-01-31 21:34:03 +000024// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
25// RUN: -target-sdk-version=9.2 -fcuda-include-gpubinary %t -o - -DNOGLOBALS \
26// RUN: | FileCheck -allow-deprecated-dag-overlap %s \
27// RUN: --check-prefixes=NOGLOBALS,CUDANOGLOBALS
28// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
29// RUN: -target-sdk-version=9.2 -fgpu-rdc -fcuda-include-gpubinary %t -o - \
30// RUN: | FileCheck %s -allow-deprecated-dag-overlap \
Yaxun Liuc18e9ec2019-02-14 02:00:09 +000031// RUN: --check-prefixes=ALL,LNX,RDC,CUDA,CUDARDC,CUDA_NEW
Artem Belevichc62214d2019-01-31 21:34:03 +000032// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
33// RUN: -target-sdk-version=9.2 -o - \
Joel E. Denny72c27832018-07-11 20:26:20 +000034// RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=NOGPUBIN
Peter Collingbournefa4d6032011-10-06 18:51:56 +000035
Yaxun Liu887c5692018-04-25 01:10:37 +000036// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
37// RUN: -fcuda-include-gpubinary %t -o - -x hip\
Yaxun Liuc18e9ec2019-02-14 02:00:09 +000038// RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,LNX,NORDC,HIP,HIPEF
Yaxun Liu887c5692018-04-25 01:10:37 +000039// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
40// RUN: -fcuda-include-gpubinary %t -o - -DNOGLOBALS -x hip \
Joel E. Denny72c27832018-07-11 20:26:20 +000041// RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=NOGLOBALS,HIPNOGLOBALS
Yaxun Liu887c5692018-04-25 01:10:37 +000042// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
Yaxun Liu97670892018-10-02 17:48:54 +000043// RUN: -fgpu-rdc -fcuda-include-gpubinary %t -o - -x hip \
Yaxun Liuc18e9ec2019-02-14 02:00:09 +000044// RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,LNX,NORDC,HIP,HIPEF
Yaxun Liu887c5692018-04-25 01:10:37 +000045// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - -x hip\
Yaxun Liuc18e9ec2019-02-14 02:00:09 +000046// RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=ALL,LNX,NORDC,HIP,HIPNEF
47
48// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -aux-triple amdgcn -emit-llvm %s \
49// RUN: -fcuda-include-gpubinary %t -o - -x hip\
50// RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,WIN
Yaxun Liu887c5692018-04-25 01:10:37 +000051
Eli Bendersky3468d9d2014-04-28 22:21:28 +000052#include "Inputs/cuda.h"
Peter Collingbournefa4d6032011-10-06 18:51:56 +000053
Artem Belevich8c1ec1e2016-03-02 18:28:53 +000054#ifndef NOGLOBALS
Yaxun Liuc18e9ec2019-02-14 02:00:09 +000055// LNX-DAG: @device_var = internal global i32
56// WIN-DAG: @"?device_var@@3HA" = internal global i32
Artem Belevich42e19492016-03-02 18:28:50 +000057__device__ int device_var;
58
Yaxun Liuc18e9ec2019-02-14 02:00:09 +000059// LNX-DAG: @constant_var = internal global i32
60// WIN-DAG: @"?constant_var@@3HA" = internal global i32
Artem Belevich42e19492016-03-02 18:28:50 +000061__constant__ int constant_var;
62
Yaxun Liuc18e9ec2019-02-14 02:00:09 +000063// LNX-DAG: @shared_var = internal global i32
64// WIN-DAG: @"?shared_var@@3HA" = internal global i32
Artem Belevich42e19492016-03-02 18:28:50 +000065__shared__ int shared_var;
66
67// Make sure host globals don't get internalized...
Yaxun Liuc18e9ec2019-02-14 02:00:09 +000068// LNX-DAG: @host_var = global i32
69// WIN-DAG: @"?host_var@@3HA" = dso_local global i32
Artem Belevich42e19492016-03-02 18:28:50 +000070int host_var;
71// ... and that extern vars remain external.
Yaxun Liuc18e9ec2019-02-14 02:00:09 +000072// LNX-DAG: @ext_host_var = external global i32
73// WIN-DAG: @"?ext_host_var@@3HA" = external dso_local global i32
Artem Belevich42e19492016-03-02 18:28:50 +000074extern int ext_host_var;
75
Artem Belevich99535772018-12-22 01:11:09 +000076// external device-side variables -> extern references to their shadows.
Yaxun Liuc18e9ec2019-02-14 02:00:09 +000077// LNX-DAG: @ext_device_var = external global i32
78// WIN-DAG: @"?ext_device_var@@3HA" = external dso_local global i32
Artem Belevich42e19492016-03-02 18:28:50 +000079extern __device__ int ext_device_var;
Yaxun Liuc18e9ec2019-02-14 02:00:09 +000080// LNX-DAG: @ext_device_var = external global i32
81// WIN-DAG: @"?ext_constant_var@@3HA" = external dso_local global i32
Artem Belevich42e19492016-03-02 18:28:50 +000082extern __constant__ int ext_constant_var;
83
Artem Belevich99535772018-12-22 01:11:09 +000084// external device-side variables with definitions should generate
85// definitions for the shadows.
Yaxun Liuc18e9ec2019-02-14 02:00:09 +000086// LNX-DAG: @ext_device_var_def = internal global i32 undef,
87// WIN-DAG: @"?ext_device_var_def@@3HA" = internal global i32 undef
Artem Belevich99535772018-12-22 01:11:09 +000088extern __device__ int ext_device_var_def;
89__device__ int ext_device_var_def = 1;
Yaxun Liuc18e9ec2019-02-14 02:00:09 +000090// LNX-DAG: @ext_device_var_def = internal global i32 undef,
91// WIN-DAG: @"?ext_constant_var_def@@3HA" = internal global i32 undef
Artem Belevich99535772018-12-22 01:11:09 +000092__constant__ int ext_constant_var_def = 2;
93
Yaxun Liuc18e9ec2019-02-14 02:00:09 +000094
Artem Belevich42e19492016-03-02 18:28:50 +000095void use_pointers() {
96 int *p;
97 p = &device_var;
98 p = &constant_var;
99 p = &shared_var;
100 p = &host_var;
101 p = &ext_device_var;
102 p = &ext_constant_var;
103 p = &ext_host_var;
104}
105
Artem Belevich52cc4872015-05-07 19:34:16 +0000106// Make sure that all parts of GPU code init/cleanup are there:
Yaxun Liuc18e9ec2019-02-14 02:00:09 +0000107// * constant unnamed string with the device-side kernel name to be passed to
108// __hipRegisterFunction/__cudaRegisterFunction.
109// ALL: @0 = private unnamed_addr constant [18 x i8] c"_Z10kernelfunciii\00"
110// * constant unnamed string with the device-side kernel name to be passed to
111// __hipRegisterVar/__cudaRegisterVar.
112// ALL: @1 = private unnamed_addr constant [11 x i8] c"device_var\00"
113// ALL: @2 = private unnamed_addr constant [13 x i8] c"constant_var\00"
114// ALL: @3 = private unnamed_addr constant [19 x i8] c"ext_device_var_def\00"
115// ALL: @4 = private unnamed_addr constant [21 x i8] c"ext_constant_var_def\00"
Artem Belevich52cc4872015-05-07 19:34:16 +0000116// * constant unnamed string with GPU binary
Jonas Hahnfeld3b9cbba92018-06-08 11:17:08 +0000117// CUDA: @[[FATBIN:.*]] = private constant{{.*GPU binary would be here.*}}\00",
Yaxun Liu97670892018-10-02 17:48:54 +0000118// HIPEF: @[[FATBIN:.*]] = private constant{{.*GPU binary would be here.*}}\00",
119// HIPNEF: @[[FATBIN:__hip_fatbin]] = external constant i8, section ".hip_fatbin"
Yaxun Liu29155b02018-05-18 15:07:56 +0000120// CUDANORDC-SAME: section ".nv_fatbin", align 8
121// CUDARDC-SAME: section "__nv_relfatbin", align 8
Artem Belevich52cc4872015-05-07 19:34:16 +0000122// * constant struct that wraps GPU binary
Yaxun Liu29155b02018-05-18 15:07:56 +0000123// ALL: @__[[PREFIX:cuda|hip]]_fatbin_wrapper = internal constant
Yaxun Liuc18e9ec2019-02-14 02:00:09 +0000124// LNX-SAME: { i32, i32, i8*, i8* }
Yaxun Liu29155b02018-05-18 15:07:56 +0000125// CUDA-SAME: { i32 1180844977, i32 1,
126// HIP-SAME: { i32 1212764230, i32 1,
127// CUDA-SAME: i8* getelementptr inbounds ({{.*}}@[[FATBIN]], i64 0, i64 0),
Yaxun Liu97670892018-10-02 17:48:54 +0000128// HIPEF-SAME: i8* getelementptr inbounds ({{.*}}@[[FATBIN]], i64 0, i64 0),
129// HIPNEF-SAME: i8* @[[FATBIN]],
Yaxun Liuc18e9ec2019-02-14 02:00:09 +0000130// LNX-SAME: i8* null }
Yaxun Liu29155b02018-05-18 15:07:56 +0000131// CUDA-SAME: section ".nvFatBinSegment"
132// HIP-SAME: section ".hipFatBinSegment"
Artem Belevich52cc4872015-05-07 19:34:16 +0000133// * variable to save GPU binary handle after initialization
Yaxun Liuf99752b2018-07-20 22:45:24 +0000134// CUDANORDC: @__[[PREFIX]]_gpubin_handle = internal global i8** null
Yaxun Liu97670892018-10-02 17:48:54 +0000135// HIPNEF: @__[[PREFIX]]_gpubin_handle = linkonce hidden global i8** null
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000136// * constant unnamed string with NVModuleID
Jonas Hahnfeld3b9cbba92018-06-08 11:17:08 +0000137// RDC: [[MODULE_ID_GLOBAL:@.*]] = private constant
Yaxun Liu29155b02018-05-18 15:07:56 +0000138// CUDARDC-SAME: c"[[MODULE_ID:.+]]\00", section "__nv_module_id", align 32
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000139// * Make sure our constructor was added to global ctor list.
Yaxun Liuc18e9ec2019-02-14 02:00:09 +0000140// LNX: @llvm.global_ctors = appending global {{.*}}@__[[PREFIX]]_module_ctor
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000141// * Alias to global symbol containing the NVModuleID.
142// RDC: @__fatbinwrap[[MODULE_ID]] = alias { i32, i32, i8*, i8* }
Yaxun Liu887c5692018-04-25 01:10:37 +0000143// RDC-SAME: { i32, i32, i8*, i8* }* @__[[PREFIX]]_fatbin_wrapper
Artem Belevich52cc4872015-05-07 19:34:16 +0000144
Peter Collingbournefa4d6032011-10-06 18:51:56 +0000145// Test that we build the correct number of calls to cudaSetupArgument followed
146// by a call to cudaLaunch.
147
Yaxun Liu00ebc0c2019-02-22 04:20:12 +0000148// LNX: define{{.*}}kernelfunc
Artem Belevichc62214d2019-01-31 21:34:03 +0000149
150// New launch sequence stores arguments into local buffer and passes array of
151// pointers to them directly to cudaLaunchKernel
152// CUDA-NEW: alloca
153// CUDA-NEW: store
154// CUDA-NEW: store
155// CUDA-NEW: store
156// CUDA-NEW: call{{.*}}__cudaPopCallConfiguration
157// CUDA-NEW: call{{.*}}cudaLaunchKernel
158
159// Legacy style launch sequence sets up arguments by passing them to
160// [cuda|hip]SetupArgument.
161// CUDA-OLD: call{{.*}}[[PREFIX]]SetupArgument
162// CUDA-OLD: call{{.*}}[[PREFIX]]SetupArgument
163// CUDA-OLD: call{{.*}}[[PREFIX]]SetupArgument
164// CUDA-OLD: call{{.*}}[[PREFIX]]Launch
165
166// HIP: call{{.*}}[[PREFIX]]SetupArgument
167// HIP: call{{.*}}[[PREFIX]]SetupArgument
168// HIP: call{{.*}}[[PREFIX]]SetupArgument
169// HIP: call{{.*}}[[PREFIX]]Launch
Peter Collingbournefa4d6032011-10-06 18:51:56 +0000170__global__ void kernelfunc(int i, int j, int k) {}
Artem Belevich52cc4872015-05-07 19:34:16 +0000171
172// Test that we've built correct kernel launch sequence.
Yaxun Liuc18e9ec2019-02-14 02:00:09 +0000173// LNX: define{{.*}}hostfunc
Artem Belevichc62214d2019-01-31 21:34:03 +0000174// CUDA-OLD: call{{.*}}[[PREFIX]]ConfigureCall
175// CUDA-NEW: call{{.*}}__cudaPushCallConfiguration
176// HIP: call{{.*}}[[PREFIX]]ConfigureCall
Yaxun Liuc18e9ec2019-02-14 02:00:09 +0000177// LNX: call{{.*}}kernelfunc
Artem Belevich52cc4872015-05-07 19:34:16 +0000178void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
Artem Belevich8c1ec1e2016-03-02 18:28:53 +0000179#endif
Artem Belevich52cc4872015-05-07 19:34:16 +0000180
Artem Belevich42e19492016-03-02 18:28:50 +0000181// Test that we've built a function to register kernels and global vars.
Yaxun Liu887c5692018-04-25 01:10:37 +0000182// ALL: define internal void @__[[PREFIX]]_register_globals
Yaxun Liuc18e9ec2019-02-14 02:00:09 +0000183// ALL: call{{.*}}[[PREFIX]]RegisterFunction(i8** %0, {{.*}}kernelfunc{{[^,]*}}, {{[^@]*}}@0
184// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}device_var{{[^,]*}}, {{[^@]*}}@1, {{.*}}i32 0, i32 4, i32 0, i32 0
185// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}constant_var{{[^,]*}}, {{[^@]*}}@2, {{.*}}i32 0, i32 4, i32 1, i32 0
186// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_device_var_def{{[^,]*}}, {{[^@]*}}@3, {{.*}}i32 0, i32 4, i32 0, i32 0
187// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_constant_var_def{{[^,]*}}, {{[^@]*}}@4, {{.*}}i32 0, i32 4, i32 1, i32 0
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000188// ALL: ret void
Artem Belevich52cc4872015-05-07 19:34:16 +0000189
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000190// Test that we've built a constructor.
Yaxun Liuc18e9ec2019-02-14 02:00:09 +0000191// LNX: define internal void @__[[PREFIX]]_module_ctor
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000192
Yaxun Liu887c5692018-04-25 01:10:37 +0000193// In separate mode it calls __[[PREFIX]]RegisterFatBinary(&__[[PREFIX]]_fatbin_wrapper)
Yaxun Liuf99752b2018-07-20 22:45:24 +0000194// HIP only register fat binary once.
195// HIP: load i8**, i8*** @__hip_gpubin_handle
196// HIP-NEXT: icmp eq i8** {{.*}}, null
197// HIP-NEXT: br i1 {{.*}}, label %if, label %exit
198// HIP: if:
Yaxun Liu887c5692018-04-25 01:10:37 +0000199// NORDC: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper
200// .. stores return value in __[[PREFIX]]_gpubin_handle
201// NORDC-NEXT: store{{.*}}__[[PREFIX]]_gpubin_handle
202// .. and then calls __[[PREFIX]]_register_globals
Yaxun Liuf99752b2018-07-20 22:45:24 +0000203// HIP-NEXT: br label %exit
204// HIP: exit:
205// HIP-NEXT: load i8**, i8*** @__hip_gpubin_handle
Yaxun Liu887c5692018-04-25 01:10:37 +0000206// NORDC-NEXT: call void @__[[PREFIX]]_register_globals
Artem Belevichc66d2542018-06-27 18:32:51 +0000207// * In separate mode we also register a destructor.
208// NORDC-NEXT: call i32 @atexit(void (i8*)* @__[[PREFIX]]_module_dtor)
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000209
Yaxun Liu887c5692018-04-25 01:10:37 +0000210// With relocatable device code we call __[[PREFIX]]RegisterLinkedBinary%NVModuleID%
211// RDC: call{{.*}}__[[PREFIX]]RegisterLinkedBinary[[MODULE_ID]](
212// RDC-SAME: __[[PREFIX]]_register_globals, {{.*}}__[[PREFIX]]_fatbin_wrapper
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000213// RDC-SAME: [[MODULE_ID_GLOBAL]]
Artem Belevich52cc4872015-05-07 19:34:16 +0000214
215// Test that we've created destructor.
Yaxun Liu887c5692018-04-25 01:10:37 +0000216// NORDC: define internal void @__[[PREFIX]]_module_dtor
217// NORDC: load{{.*}}__[[PREFIX]]_gpubin_handle
Yaxun Liuf99752b2018-07-20 22:45:24 +0000218// CUDANORDC-NEXT: call void @__[[PREFIX]]UnregisterFatBinary
219// HIP-NEXT: icmp ne i8** {{.*}}, null
220// HIP-NEXT: br i1 {{.*}}, label %if, label %exit
221// HIP: if:
222// HIP-NEXT: call void @__[[PREFIX]]UnregisterFatBinary
223// HIP-NEXT: store i8** null, i8*** @__hip_gpubin_handle
224// HIP-NEXT: br label %exit
225// HIP: exit:
Artem Belevich52cc4872015-05-07 19:34:16 +0000226
Yaxun Liu887c5692018-04-25 01:10:37 +0000227// There should be no __[[PREFIX]]_register_globals if we have no
Artem Belevich8c1ec1e2016-03-02 18:28:53 +0000228// device-side globals, but we still need to register GPU binary.
229// Skip GPU binary string first.
Aaron Enye Shi81295212019-04-02 20:49:41 +0000230// CUDANOGLOBALS-NOT: @{{.*}} = private constant{{.*}}
Aaron Enye Shi13d8e922019-04-02 20:10:18 +0000231// HIPNOGLOBALS-NOT: @{{.*}} = internal constant{{.*}}
Yaxun Liu887c5692018-04-25 01:10:37 +0000232// NOGLOBALS-NOT: define internal void @__{{.*}}_register_globals
Aaron Enye Shi81295212019-04-02 20:49:41 +0000233// NOGLOBALS-NOT: define internal void @__[[PREFIX:cuda|hip]]_module_ctor
234// NOGLOBALS-NOT: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper
Yaxun Liu887c5692018-04-25 01:10:37 +0000235// NOGLOBALS-NOT: call void @__[[PREFIX]]_register_globals
Aaron Enye Shi81295212019-04-02 20:49:41 +0000236// NOGLOBALS-NOT: define internal void @__[[PREFIX]]_module_dtor
237// NOGLOBALS-NOT: call void @__[[PREFIX]]UnregisterFatBinary
Artem Belevich8c1ec1e2016-03-02 18:28:53 +0000238
239// There should be no constructors/destructors if we have no GPU binary.
Yaxun Liu887c5692018-04-25 01:10:37 +0000240// NOGPUBIN-NOT: define internal void @__[[PREFIX]]_register_globals
241// NOGPUBIN-NOT: define internal void @__[[PREFIX]]_module_ctor
242// NOGPUBIN-NOT: define internal void @__[[PREFIX]]_module_dtor