Artem Belevich | 52cc487 | 2015-05-07 19:34:16 +0000 | [diff] [blame^] | 1 | // RUN: %clang_cc1 -emit-llvm %s -fcuda-include-gpubinary %s -o - | FileCheck %s |
Peter Collingbourne | fa4d603 | 2011-10-06 18:51:56 +0000 | [diff] [blame] | 2 | |
Eli Bendersky | 3468d9d | 2014-04-28 22:21:28 +0000 | [diff] [blame] | 3 | #include "Inputs/cuda.h" |
Peter Collingbourne | fa4d603 | 2011-10-06 18:51:56 +0000 | [diff] [blame] | 4 | |
Artem Belevich | 52cc487 | 2015-05-07 19:34:16 +0000 | [diff] [blame^] | 5 | // Make sure that all parts of GPU code init/cleanup are there: |
| 6 | // * constant unnamed string with the kernel name |
| 7 | // CHECK: private unnamed_addr constant{{.*}}kernelfunc{{.*}}\00", align 1 |
| 8 | // * constant unnamed string with GPU binary |
| 9 | // CHECK: private unnamed_addr constant{{.*}}\00" |
| 10 | // * constant struct that wraps GPU binary |
| 11 | // CHECK: @__cuda_fatbin_wrapper = internal constant { i32, i32, i8*, i8* } |
| 12 | // CHECK: { i32 1180844977, i32 1, {{.*}}, i64 0, i64 0), i8* null } |
| 13 | // * variable to save GPU binary handle after initialization |
| 14 | // CHECK: @__cuda_gpubin_handle = internal global i8** null |
| 15 | // * Make sure our constructor/destructor was added to global ctor/dtor list. |
| 16 | // CHECK: @llvm.global_ctors = appending global {{.*}}@__cuda_module_ctor |
| 17 | // CHECK: @llvm.global_dtors = appending global {{.*}}@__cuda_module_dtor |
| 18 | |
Peter Collingbourne | fa4d603 | 2011-10-06 18:51:56 +0000 | [diff] [blame] | 19 | // Test that we build the correct number of calls to cudaSetupArgument followed |
| 20 | // by a call to cudaLaunch. |
| 21 | |
| 22 | // CHECK: define{{.*}}kernelfunc |
| 23 | // CHECK: call{{.*}}cudaSetupArgument |
| 24 | // CHECK: call{{.*}}cudaSetupArgument |
| 25 | // CHECK: call{{.*}}cudaSetupArgument |
| 26 | // CHECK: call{{.*}}cudaLaunch |
| 27 | __global__ void kernelfunc(int i, int j, int k) {} |
Artem Belevich | 52cc487 | 2015-05-07 19:34:16 +0000 | [diff] [blame^] | 28 | |
| 29 | // Test that we've built correct kernel launch sequence. |
| 30 | // CHECK: define{{.*}}hostfunc |
| 31 | // CHECK: call{{.*}}cudaConfigureCall |
| 32 | // CHEKC: call{{.*}}kernelfunc |
| 33 | void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); } |
| 34 | |
| 35 | // Test that we've built a function to register kernels |
| 36 | // CHECK: define internal void @__cuda_register_kernels |
| 37 | // CHECK: call{{.*}}cudaRegisterFunction(i8** %0, {{.*}}kernelfunc |
| 38 | |
| 39 | // Test that we've built contructor.. |
| 40 | // CHECK: define internal void @__cuda_module_ctor |
| 41 | // .. that calls __cudaRegisterFatBinary(&__cuda_fatbin_wrapper) |
| 42 | // CHECK: call{{.*}}cudaRegisterFatBinary{{.*}}__cuda_fatbin_wrapper |
| 43 | // .. stores return value in __cuda_gpubin_handle |
| 44 | // CHECK-NEXT: store{{.*}}__cuda_gpubin_handle |
| 45 | // .. and then calls __cuda_register_kernels |
| 46 | // CHECK-NEXT: call void @__cuda_register_kernels |
| 47 | |
| 48 | // Test that we've created destructor. |
| 49 | // CHECK: define internal void @__cuda_module_dtor |
| 50 | // CHECK: load{{.*}}__cuda_gpubin_handle |
| 51 | // CHECK-NEXT: call void @__cudaUnregisterFatBinary |
| 52 | |