Yaxun Liu | 46439e8 | 2018-03-06 16:04:39 +0000 | [diff] [blame] | 1 | ; RUN: opt -data-layout=A5 -amdgpu-lower-enqueued-block -S < %s | FileCheck %s |
Yaxun Liu | de4b88d | 2017-10-10 19:39:48 +0000 | [diff] [blame] | 2 | |
Yaxun Liu | 9381ae9 | 2018-04-11 14:46:15 +0000 | [diff] [blame^] | 3 | ; CHECK: @__test_block_invoke_kernel.runtime_handle = addrspace(1) global i8 addrspace(1)* null |
| 4 | ; CHECK: @__test_block_invoke_2_kernel.runtime_handle = addrspace(1) global i8 addrspace(1)* null |
| 5 | ; CHECK: @__amdgpu_enqueued_kernel.runtime_handle = addrspace(1) global i8 addrspace(1)* null |
| 6 | ; CHECK: @__amdgpu_enqueued_kernel.1.runtime_handle = addrspace(1) global i8 addrspace(1)* null |
Yaxun Liu | de4b88d | 2017-10-10 19:39:48 +0000 | [diff] [blame] | 7 | |
Yaxun Liu | de4b88d | 2017-10-10 19:39:48 +0000 | [diff] [blame] | 8 | %struct.ndrange_t = type { i32 } |
| 9 | %opencl.queue_t = type opaque |
| 10 | |
Yaxun Liu | a99e7d8 | 2018-03-12 16:34:06 +0000 | [diff] [blame] | 11 | ; CHECK-LABEL: define amdgpu_kernel void @non_caller |
| 12 | ; CHECK-NOT: #{{[0-9]+}} |
Yaxun Liu | c928f2a | 2017-10-30 14:30:28 +0000 | [diff] [blame] | 13 | define amdgpu_kernel void @non_caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr |
| 14 | !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { |
| 15 | ret void |
| 16 | } |
| 17 | |
Yaxun Liu | a99e7d8 | 2018-03-12 16:34:06 +0000 | [diff] [blame] | 18 | ; CHECK-LABEL: define amdgpu_kernel void @caller_indirect |
| 19 | ; CHECK-SAME: #[[AT_CALLER:[0-9]+]] |
Yaxun Liu | c928f2a | 2017-10-30 14:30:28 +0000 | [diff] [blame] | 20 | define amdgpu_kernel void @caller_indirect(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr |
| 21 | !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { |
| 22 | call void @caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) |
| 23 | ret void |
| 24 | } |
| 25 | |
Yaxun Liu | a99e7d8 | 2018-03-12 16:34:06 +0000 | [diff] [blame] | 26 | ; CHECK-LABEL: define amdgpu_kernel void @caller |
| 27 | ; CHECK-SAME: #[[AT_CALLER]] |
| 28 | ; CHECK-NOT: @__test_block_invoke_kernel |
| 29 | ; CHECK-NOT: @__test_block_invoke_2_kernel |
| 30 | ; CHECK-NOT: @__amdgpu_enqueued_kernel |
| 31 | ; CHECK-NOT: @__amdgpu_enqueued_kernel.1 |
| 32 | ; CHECK-NOT: @0 |
| 33 | ; CHECK-NOT: @1 |
| 34 | ; CHECK: call i32 @__enqueue_kernel_basic({{.*}}@__test_block_invoke_kernel.runtime_handle |
| 35 | ; CHECK: call i32 @__enqueue_kernel_basic({{.*}}@__test_block_invoke_kernel.runtime_handle |
| 36 | ; CHECK: call i32 @__enqueue_kernel_basic({{.*}}@__amdgpu_enqueued_kernel.runtime_handle |
| 37 | ; CHECK: call i32 @__enqueue_kernel_basic({{.*}}@__amdgpu_enqueued_kernel.1.runtime_handle |
| 38 | ; CHECK: call i32 @__enqueue_kernel_basic({{.*}}@__test_block_invoke_2_kernel.runtime_handle |
Yaxun Liu | c928f2a | 2017-10-30 14:30:28 +0000 | [diff] [blame] | 39 | define amdgpu_kernel void @caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr |
Yaxun Liu | de4b88d | 2017-10-10 19:39:48 +0000 | [diff] [blame] | 40 | !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { |
| 41 | entry: |
Yaxun Liu | 46439e8 | 2018-03-06 16:04:39 +0000 | [diff] [blame] | 42 | %block = alloca <{ i32, i32, i8 addrspace(1)*, i8 }>, align 8, addrspace(5) |
| 43 | %tmp = alloca %struct.ndrange_t, align 4, addrspace(5) |
| 44 | %block2 = alloca <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, align 8, addrspace(5) |
| 45 | %tmp3 = alloca %struct.ndrange_t, align 4, addrspace(5) |
| 46 | %block.size = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i8 }>, <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %block, i32 0, i32 0 |
| 47 | store i32 25, i32 addrspace(5)* %block.size, align 8 |
| 48 | %block.align = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i8 }>, <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %block, i32 0, i32 1 |
| 49 | store i32 8, i32 addrspace(5)* %block.align, align 4 |
| 50 | %block.captured = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i8 }>, <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %block, i32 0, i32 2 |
| 51 | store i8 addrspace(1)* %a, i8 addrspace(1)* addrspace(5)* %block.captured, align 8 |
| 52 | %block.captured1 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i8 }>, <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %block, i32 0, i32 3 |
| 53 | store i8 %b, i8 addrspace(5)* %block.captured1, align 8 |
| 54 | %tmp1 = bitcast <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %block to void () addrspace(5)* |
| 55 | %tmp4 = addrspacecast void () addrspace(5)* %tmp1 to i8* |
| 56 | %tmp5 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t addrspace(5)* byval nonnull %tmp, |
| 57 | i8* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i8 }>)* @__test_block_invoke_kernel to i8*), i8* nonnull %tmp4) #2 |
| 58 | %tmp10 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t addrspace(5)* byval nonnull %tmp, |
| 59 | i8* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i8 }>)* @__test_block_invoke_kernel to i8*), i8* nonnull %tmp4) #2 |
Yaxun Liu | a99e7d8 | 2018-03-12 16:34:06 +0000 | [diff] [blame] | 60 | %tmp11 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t addrspace(5)* byval nonnull %tmp, |
| 61 | i8* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i8 }>)* @0 to i8*), i8* nonnull %tmp4) #2 |
| 62 | %tmp12 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t addrspace(5)* byval nonnull %tmp, |
| 63 | i8* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i8 }>)* @1 to i8*), i8* nonnull %tmp4) #2 |
Yaxun Liu | 46439e8 | 2018-03-06 16:04:39 +0000 | [diff] [blame] | 64 | %block.size4 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 0 |
| 65 | store i32 41, i32 addrspace(5)* %block.size4, align 8 |
| 66 | %block.align5 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 1 |
| 67 | store i32 8, i32 addrspace(5)* %block.align5, align 4 |
| 68 | %block.captured7 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 2 |
| 69 | store i8 addrspace(1)* %a, i8 addrspace(1)* addrspace(5)* %block.captured7, align 8 |
| 70 | %block.captured8 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 5 |
| 71 | store i8 %b, i8 addrspace(5)* %block.captured8, align 8 |
| 72 | %block.captured9 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 3 |
| 73 | store i64 addrspace(1)* %c, i64 addrspace(1)* addrspace(5)* %block.captured9, align 8 |
| 74 | %block.captured10 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 4 |
| 75 | store i64 %d, i64 addrspace(5)* %block.captured10, align 8 |
| 76 | %tmp6 = bitcast <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2 to void () addrspace(5)* |
| 77 | %tmp8 = addrspacecast void () addrspace(5)* %tmp6 to i8* |
| 78 | %tmp9 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t addrspace(5)* byval nonnull %tmp3, |
| 79 | i8* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>)* @__test_block_invoke_2_kernel to i8*), i8* nonnull %tmp8) #2 |
Yaxun Liu | de4b88d | 2017-10-10 19:39:48 +0000 | [diff] [blame] | 80 | ret void |
| 81 | } |
| 82 | |
Yaxun Liu | 9381ae9 | 2018-04-11 14:46:15 +0000 | [diff] [blame^] | 83 | ; __enqueue_kernel* functions may get inlined |
| 84 | ; CHECK-LABEL: define amdgpu_kernel void @inlined_caller |
| 85 | ; CHECK-SAME: #[[AT_CALLER]] |
| 86 | ; CHECK-NOT: @__test_block_invoke_kernel |
| 87 | ; CHECK: load i64, i64 addrspace(1)* bitcast (i8 addrspace(1)* addrspace(1)* @__test_block_invoke_kernel.runtime_handle to i64 addrspace(1)*) |
| 88 | define amdgpu_kernel void @inlined_caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr |
| 89 | !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { |
| 90 | entry: |
| 91 | %tmp = load i64, i64 addrspace(1)* addrspacecast (i64* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i8 }>)* @__test_block_invoke_kernel to i64*) to i64 addrspace(1)*) |
| 92 | store i64 %tmp, i64 addrspace(1)* %c |
| 93 | ret void |
| 94 | } |
| 95 | |
Yaxun Liu | a99e7d8 | 2018-03-12 16:34:06 +0000 | [diff] [blame] | 96 | ; CHECK-LABEL: define dso_local amdgpu_kernel void @__test_block_invoke_kernel |
| 97 | ; CHECK-SAME: #[[AT1:[0-9]+]] |
Yaxun Liu | 46439e8 | 2018-03-06 16:04:39 +0000 | [diff] [blame] | 98 | define internal amdgpu_kernel void @__test_block_invoke_kernel(<{ i32, i32, i8 addrspace(1)*, i8 }> %arg) #0 |
Yaxun Liu | de4b88d | 2017-10-10 19:39:48 +0000 | [diff] [blame] | 99 | !kernel_arg_addr_space !14 !kernel_arg_access_qual !15 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !17 { |
| 100 | entry: |
Yaxun Liu | 46439e8 | 2018-03-06 16:04:39 +0000 | [diff] [blame] | 101 | %.fca.3.extract = extractvalue <{ i32, i32, i8 addrspace(1)*, i8 }> %arg, 2 |
| 102 | %.fca.4.extract = extractvalue <{ i32, i32, i8 addrspace(1)*, i8 }> %arg, 3 |
Yaxun Liu | de4b88d | 2017-10-10 19:39:48 +0000 | [diff] [blame] | 103 | store i8 %.fca.4.extract, i8 addrspace(1)* %.fca.3.extract, align 1 |
| 104 | ret void |
| 105 | } |
| 106 | |
Yaxun Liu | 46439e8 | 2018-03-06 16:04:39 +0000 | [diff] [blame] | 107 | declare i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)*, i32, %struct.ndrange_t addrspace(5)*, i8*, i8*) local_unnamed_addr |
Yaxun Liu | de4b88d | 2017-10-10 19:39:48 +0000 | [diff] [blame] | 108 | |
Yaxun Liu | a99e7d8 | 2018-03-12 16:34:06 +0000 | [diff] [blame] | 109 | ; CHECK-LABEL: define dso_local amdgpu_kernel void @__test_block_invoke_2_kernel |
| 110 | ; CHECK-SAME: #[[AT2:[0-9]+]] |
Yaxun Liu | 46439e8 | 2018-03-06 16:04:39 +0000 | [diff] [blame] | 111 | define internal amdgpu_kernel void @__test_block_invoke_2_kernel(<{ i32, i32, i8 addrspace(1)*, |
Yaxun Liu | de4b88d | 2017-10-10 19:39:48 +0000 | [diff] [blame] | 112 | i64 addrspace(1)*, i64, i8 }> %arg) #0 !kernel_arg_addr_space !14 !kernel_arg_access_qual !15 |
| 113 | !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !17 { |
| 114 | entry: |
Yaxun Liu | 46439e8 | 2018-03-06 16:04:39 +0000 | [diff] [blame] | 115 | %.fca.3.extract = extractvalue <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> %arg, 2 |
| 116 | %.fca.4.extract = extractvalue <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> %arg, 3 |
| 117 | %.fca.5.extract = extractvalue <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> %arg, 4 |
| 118 | %.fca.6.extract = extractvalue <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> %arg, 5 |
Yaxun Liu | de4b88d | 2017-10-10 19:39:48 +0000 | [diff] [blame] | 119 | store i8 %.fca.6.extract, i8 addrspace(1)* %.fca.3.extract, align 1 |
| 120 | store i64 %.fca.5.extract, i64 addrspace(1)* %.fca.4.extract, align 8 |
| 121 | ret void |
| 122 | } |
| 123 | |
Yaxun Liu | a99e7d8 | 2018-03-12 16:34:06 +0000 | [diff] [blame] | 124 | ; CHECK-LABEL: define dso_local amdgpu_kernel void @__amdgpu_enqueued_kernel |
| 125 | ; CHECK-SAME: #[[AT3:[0-9]+]] |
| 126 | define internal amdgpu_kernel void @0(<{ i32, i32, i8 addrspace(1)*, i8 }> %arg) #0 |
| 127 | !kernel_arg_addr_space !14 !kernel_arg_access_qual !15 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !17 { |
| 128 | ret void |
| 129 | } |
| 130 | |
| 131 | ; CHECK-LABEL: define dso_local amdgpu_kernel void @__amdgpu_enqueued_kernel.1 |
| 132 | ; CHECK-SAME: #[[AT4:[0-9]+]] |
| 133 | define internal amdgpu_kernel void @1(<{ i32, i32, i8 addrspace(1)*, i8 }> %arg) #0 |
| 134 | !kernel_arg_addr_space !14 !kernel_arg_access_qual !15 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !17 { |
| 135 | ret void |
| 136 | } |
| 137 | |
Yaxun Liu | c928f2a | 2017-10-30 14:30:28 +0000 | [diff] [blame] | 138 | ; CHECK: attributes #[[AT_CALLER]] = { "calls-enqueue-kernel" } |
Yaxun Liu | a99e7d8 | 2018-03-12 16:34:06 +0000 | [diff] [blame] | 139 | ; CHECK: attributes #[[AT1]] = {{.*}}"runtime-handle"="__test_block_invoke_kernel.runtime_handle" |
| 140 | ; CHECK: attributes #[[AT2]] = {{.*}}"runtime-handle"="__test_block_invoke_2_kernel.runtime_handle" |
| 141 | ; CHECK: attributes #[[AT3]] = {{.*}}"runtime-handle"="__amdgpu_enqueued_kernel.runtime_handle" |
| 142 | ; CHECK: attributes #[[AT4]] = {{.*}}"runtime-handle"="__amdgpu_enqueued_kernel.1.runtime_handle" |
Yaxun Liu | de4b88d | 2017-10-10 19:39:48 +0000 | [diff] [blame] | 143 | |
| 144 | attributes #0 = { "enqueued-block" } |
| 145 | |
| 146 | !3 = !{i32 1, i32 0, i32 1, i32 0} |
| 147 | !4 = !{!"none", !"none", !"none", !"none"} |
| 148 | !5 = !{!"char*", !"char", !"long*", !"long"} |
| 149 | !6 = !{!"", !"", !"", !""} |
| 150 | !14 = !{i32 0} |
| 151 | !15 = !{!"none"} |
| 152 | !16 = !{!"__block_literal"} |
| 153 | !17 = !{!""} |