Yaxun Liu | de4b88d | 2017-10-10 19:39:48 +0000 | [diff] [blame] | 1 | ; RUN: opt -amdgpu-lower-enqueued-block -S < %s | FileCheck %s |
| 2 | |
| 3 | ; CHECK: @__test_block_invoke_kernel_runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)* |
| 4 | ; CHECK: @__test_block_invoke_2_kernel_runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)* |
| 5 | |
| 6 | target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" |
| 7 | target triple = "amdgcn-amdhsa-amd-opencl" |
| 8 | |
| 9 | %struct.ndrange_t = type { i32 } |
| 10 | %opencl.queue_t = type opaque |
| 11 | |
Yaxun Liu | c928f2a | 2017-10-30 14:30:28 +0000 | [diff] [blame^] | 12 | ; CHECK: define amdgpu_kernel void @non_caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr !kernel_arg_addr_space |
| 13 | define amdgpu_kernel void @non_caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr |
| 14 | !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { |
| 15 | ret void |
| 16 | } |
| 17 | |
| 18 | ; CHECK: define amdgpu_kernel void @caller_indirect(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr #[[AT_CALLER:[0-9]+]] |
| 19 | define amdgpu_kernel void @caller_indirect(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr |
| 20 | !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { |
| 21 | call void @caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) |
| 22 | ret void |
| 23 | } |
| 24 | |
| 25 | ; CHECK: define amdgpu_kernel void @caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr #[[AT_CALLER]] |
| 26 | define amdgpu_kernel void @caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr |
Yaxun Liu | de4b88d | 2017-10-10 19:39:48 +0000 | [diff] [blame] | 27 | !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { |
| 28 | entry: |
| 29 | %block = alloca <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>, align 8 |
| 30 | %tmp = alloca %struct.ndrange_t, align 4 |
| 31 | %block2 = alloca <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, align 8 |
| 32 | %tmp3 = alloca %struct.ndrange_t, align 4 |
| 33 | %block.size = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>, <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>* %block, i32 0, i32 0 |
| 34 | store i32 25, i32* %block.size, align 8 |
| 35 | %block.align = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>, <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>* %block, i32 0, i32 1 |
| 36 | store i32 8, i32* %block.align, align 4 |
| 37 | %block.invoke = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>, <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>* %block, i32 0, i32 2 |
| 38 | store i8 addrspace(4)* addrspacecast (i8* bitcast (void (<{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>)* @__test_block_invoke_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke, align 8 |
| 39 | %block.captured = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>, <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>* %block, i32 0, i32 3 |
| 40 | store i8 addrspace(1)* %a, i8 addrspace(1)** %block.captured, align 8 |
| 41 | %block.captured1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>, <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>* %block, i32 0, i32 4 |
| 42 | store i8 %b, i8* %block.captured1, align 8 |
| 43 | %tmp1 = bitcast <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>* %block to void ()* |
| 44 | %tmp2 = bitcast void ()* %tmp1 to i8* |
| 45 | %tmp4 = addrspacecast i8* %tmp2 to i8 addrspace(4)* |
| 46 | %tmp5 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t* byval nonnull %tmp, i8 addrspace(4)* nonnull %tmp4) #2 |
| 47 | %block.size4 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>* %block2, i32 0, i32 0 |
| 48 | store i32 41, i32* %block.size4, align 8 |
| 49 | %block.align5 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>* %block2, i32 0, i32 1 |
| 50 | store i32 8, i32* %block.align5, align 4 |
| 51 | %block.invoke6 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>* %block2, i32 0, i32 2 |
| 52 | store i8 addrspace(4)* addrspacecast (i8* bitcast (void (<{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>)* @__test_block_invoke_2_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke6, align 8 |
| 53 | %block.captured7 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>* %block2, i32 0, i32 3 |
| 54 | store i8 addrspace(1)* %a, i8 addrspace(1)** %block.captured7, align 8 |
| 55 | %block.captured8 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>* %block2, i32 0, i32 6 |
| 56 | store i8 %b, i8* %block.captured8, align 8 |
| 57 | %block.captured9 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>* %block2, i32 0, i32 4 |
| 58 | store i64 addrspace(1)* %c, i64 addrspace(1)** %block.captured9, align 8 |
| 59 | %block.captured10 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>* %block2, i32 0, i32 5 |
| 60 | store i64 %d, i64* %block.captured10, align 8 |
| 61 | %tmp6 = bitcast <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>* %block2 to void ()* |
| 62 | %tmp7 = bitcast void ()* %tmp6 to i8* |
| 63 | %tmp8 = addrspacecast i8* %tmp7 to i8 addrspace(4)* |
| 64 | %tmp9 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t* byval nonnull %tmp3, i8 addrspace(4)* nonnull %tmp8) #2 |
| 65 | ret void |
| 66 | } |
| 67 | |
| 68 | ; CHECK: define amdgpu_kernel void @__test_block_invoke_kernel({{.*}}) #[[AT1:[0-9]+]] |
| 69 | define internal amdgpu_kernel void @__test_block_invoke_kernel(<{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }> %arg) #0 |
| 70 | !kernel_arg_addr_space !14 !kernel_arg_access_qual !15 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !17 { |
| 71 | entry: |
| 72 | %.fca.3.extract = extractvalue <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }> %arg, 3 |
| 73 | %.fca.4.extract = extractvalue <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }> %arg, 4 |
| 74 | store i8 %.fca.4.extract, i8 addrspace(1)* %.fca.3.extract, align 1 |
| 75 | ret void |
| 76 | } |
| 77 | |
| 78 | declare i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)*, i32, %struct.ndrange_t*, i8 addrspace(4)*) local_unnamed_addr |
| 79 | |
| 80 | ; CHECK: define amdgpu_kernel void @__test_block_invoke_2_kernel({{.*}}) #[[AT2:[0-9]+]] |
| 81 | define internal amdgpu_kernel void @__test_block_invoke_2_kernel(<{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, |
| 82 | i64 addrspace(1)*, i64, i8 }> %arg) #0 !kernel_arg_addr_space !14 !kernel_arg_access_qual !15 |
| 83 | !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !17 { |
| 84 | entry: |
| 85 | %.fca.3.extract = extractvalue <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> %arg, 3 |
| 86 | %.fca.4.extract = extractvalue <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> %arg, 4 |
| 87 | %.fca.5.extract = extractvalue <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> %arg, 5 |
| 88 | %.fca.6.extract = extractvalue <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> %arg, 6 |
| 89 | store i8 %.fca.6.extract, i8 addrspace(1)* %.fca.3.extract, align 1 |
| 90 | store i64 %.fca.5.extract, i64 addrspace(1)* %.fca.4.extract, align 8 |
| 91 | ret void |
| 92 | } |
| 93 | |
Yaxun Liu | c928f2a | 2017-10-30 14:30:28 +0000 | [diff] [blame^] | 94 | ; CHECK: attributes #[[AT_CALLER]] = { "calls-enqueue-kernel" } |
Yaxun Liu | de4b88d | 2017-10-10 19:39:48 +0000 | [diff] [blame] | 95 | ; CHECK: attributes #[[AT1]] = {{.*}}"runtime-handle"="__test_block_invoke_kernel_runtime_handle" |
| 96 | ; CHECK: attributes #[[AT2]] = {{.*}}"runtime-handle"="__test_block_invoke_2_kernel_runtime_handle" |
| 97 | |
| 98 | attributes #0 = { "enqueued-block" } |
| 99 | |
| 100 | !3 = !{i32 1, i32 0, i32 1, i32 0} |
| 101 | !4 = !{!"none", !"none", !"none", !"none"} |
| 102 | !5 = !{!"char*", !"char", !"long*", !"long"} |
| 103 | !6 = !{!"", !"", !"", !""} |
| 104 | !14 = !{i32 0} |
| 105 | !15 = !{!"none"} |
| 106 | !16 = !{!"__block_literal"} |
| 107 | !17 = !{!""} |