Eli Bendersky | 8578c8f | 2014-04-15 16:57:53 +0000 | [diff] [blame] | 1 | // RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - | FileCheck %s |
| 2 | |
Eli Bendersky | 3468d9d | 2014-04-28 22:21:28 +0000 | [diff] [blame] | 3 | #include "Inputs/cuda.h" |
Eli Bendersky | 8578c8f | 2014-04-15 16:57:53 +0000 | [diff] [blame] | 4 | |
| 5 | #define MAX_THREADS_PER_BLOCK 256 |
| 6 | #define MIN_BLOCKS_PER_MP 2 |
| 7 | |
| 8 | // Test both max threads per block and Min cta per sm. |
| 9 | extern "C" { |
| 10 | __global__ void |
| 11 | __launch_bounds__( MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP ) |
| 12 | Kernel1() |
| 13 | { |
| 14 | } |
| 15 | } |
| 16 | |
Duncan P. N. Exon Smith | b3a6669 | 2014-12-15 19:10:08 +0000 | [diff] [blame] | 17 | // CHECK: !{{[0-9]+}} = !{void ()* @Kernel1, !"maxntidx", i32 256} |
| 18 | // CHECK: !{{[0-9]+}} = !{void ()* @Kernel1, !"minctasm", i32 2} |
Eli Bendersky | 8578c8f | 2014-04-15 16:57:53 +0000 | [diff] [blame] | 19 | |
| 20 | // Test only max threads per block. Min cta per sm defaults to 0, and |
| 21 | // CodeGen doesn't output a zero value for minctasm. |
| 22 | extern "C" { |
| 23 | __global__ void |
| 24 | __launch_bounds__( MAX_THREADS_PER_BLOCK ) |
| 25 | Kernel2() |
| 26 | { |
| 27 | } |
| 28 | } |
| 29 | |
Duncan P. N. Exon Smith | b3a6669 | 2014-12-15 19:10:08 +0000 | [diff] [blame] | 30 | // CHECK: !{{[0-9]+}} = !{void ()* @Kernel2, !"maxntidx", i32 256} |
Artem Belevich | 7093e40 | 2015-04-21 22:55:54 +0000 | [diff] [blame] | 31 | |
| 32 | template <int max_threads_per_block> |
| 33 | __global__ void |
| 34 | __launch_bounds__(max_threads_per_block) |
| 35 | Kernel3() |
| 36 | { |
| 37 | } |
| 38 | |
Artem Belevich | 13e9b4d | 2016-12-07 19:27:16 +0000 | [diff] [blame] | 39 | template __global__ void Kernel3<MAX_THREADS_PER_BLOCK>(); |
Artem Belevich | 7093e40 | 2015-04-21 22:55:54 +0000 | [diff] [blame] | 40 | // CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel3{{.*}}, !"maxntidx", i32 256} |
| 41 | |
| 42 | template <int max_threads_per_block, int min_blocks_per_mp> |
| 43 | __global__ void |
| 44 | __launch_bounds__(max_threads_per_block, min_blocks_per_mp) |
| 45 | Kernel4() |
| 46 | { |
| 47 | } |
Artem Belevich | 13e9b4d | 2016-12-07 19:27:16 +0000 | [diff] [blame] | 48 | template __global__ void Kernel4<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>(); |
Artem Belevich | 7093e40 | 2015-04-21 22:55:54 +0000 | [diff] [blame] | 49 | |
| 50 | // CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel4{{.*}}, !"maxntidx", i32 256} |
| 51 | // CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel4{{.*}}, !"minctasm", i32 2} |
| 52 | |
| 53 | const int constint = 100; |
| 54 | template <int max_threads_per_block, int min_blocks_per_mp> |
| 55 | __global__ void |
| 56 | __launch_bounds__(max_threads_per_block + constint, |
| 57 | min_blocks_per_mp + max_threads_per_block) |
| 58 | Kernel5() |
| 59 | { |
| 60 | } |
Artem Belevich | 13e9b4d | 2016-12-07 19:27:16 +0000 | [diff] [blame] | 61 | template __global__ void Kernel5<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>(); |
Artem Belevich | 7093e40 | 2015-04-21 22:55:54 +0000 | [diff] [blame] | 62 | |
| 63 | // CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel5{{.*}}, !"maxntidx", i32 356} |
| 64 | // CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel5{{.*}}, !"minctasm", i32 258} |
| 65 | |
| 66 | // Make sure we don't emit negative launch bounds values. |
| 67 | __global__ void |
| 68 | __launch_bounds__( -MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP ) |
| 69 | Kernel6() |
| 70 | { |
| 71 | } |
| 72 | // CHECK-NOT: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel6{{.*}}, !"maxntidx", |
| 73 | // CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel6{{.*}}, !"minctasm", |
| 74 | |
| 75 | __global__ void |
| 76 | __launch_bounds__( MAX_THREADS_PER_BLOCK, -MIN_BLOCKS_PER_MP ) |
| 77 | Kernel7() |
| 78 | { |
| 79 | } |
| 80 | // CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel7{{.*}}, !"maxntidx", |
| 81 | // CHECK-NOT: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel7{{.*}}, !"minctasm", |
Artem Belevich | bcec9da | 2016-06-06 22:54:57 +0000 | [diff] [blame] | 82 | |
| 83 | const char constchar = 12; |
| 84 | __global__ void __launch_bounds__(constint, constchar) Kernel8() {} |
| 85 | // CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel8{{.*}}, !"maxntidx", i32 100 |
| 86 | // CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel8{{.*}}, !"minctasm", i32 12 |