blob: a09c31784fe8acdb24f1522fbdf184aac38e6d75 [file] [log] [blame]
Yaxun Liu46439e82018-03-06 16:04:39 +00001; RUN: opt -data-layout=A5 -amdgpu-lower-enqueued-block -S < %s | FileCheck %s
Yaxun Liude4b88d2017-10-10 19:39:48 +00002
3; CHECK: @__test_block_invoke_kernel_runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)*
4; CHECK: @__test_block_invoke_2_kernel_runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)*
5
Yaxun Liude4b88d2017-10-10 19:39:48 +00006%struct.ndrange_t = type { i32 }
7%opencl.queue_t = type opaque
8
Yaxun Liuc928f2a2017-10-30 14:30:28 +00009; CHECK: define amdgpu_kernel void @non_caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr !kernel_arg_addr_space
10define amdgpu_kernel void @non_caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr
11 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
12 ret void
13}
14
15; CHECK: define amdgpu_kernel void @caller_indirect(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr #[[AT_CALLER:[0-9]+]]
16define amdgpu_kernel void @caller_indirect(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr
17 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
18 call void @caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d)
19 ret void
20}
21
22; CHECK: define amdgpu_kernel void @caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr #[[AT_CALLER]]
23define amdgpu_kernel void @caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr
Yaxun Liude4b88d2017-10-10 19:39:48 +000024 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
25entry:
Yaxun Liu46439e82018-03-06 16:04:39 +000026 %block = alloca <{ i32, i32, i8 addrspace(1)*, i8 }>, align 8, addrspace(5)
27 %tmp = alloca %struct.ndrange_t, align 4, addrspace(5)
28 %block2 = alloca <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, align 8, addrspace(5)
29 %tmp3 = alloca %struct.ndrange_t, align 4, addrspace(5)
30 %block.size = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i8 }>, <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %block, i32 0, i32 0
31 store i32 25, i32 addrspace(5)* %block.size, align 8
32 %block.align = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i8 }>, <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %block, i32 0, i32 1
33 store i32 8, i32 addrspace(5)* %block.align, align 4
34 %block.captured = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i8 }>, <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %block, i32 0, i32 2
35 store i8 addrspace(1)* %a, i8 addrspace(1)* addrspace(5)* %block.captured, align 8
36 %block.captured1 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i8 }>, <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %block, i32 0, i32 3
37 store i8 %b, i8 addrspace(5)* %block.captured1, align 8
38 %tmp1 = bitcast <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %block to void () addrspace(5)*
39 %tmp4 = addrspacecast void () addrspace(5)* %tmp1 to i8*
40 %tmp5 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t addrspace(5)* byval nonnull %tmp,
41 i8* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i8 }>)* @__test_block_invoke_kernel to i8*), i8* nonnull %tmp4) #2
42 %tmp10 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t addrspace(5)* byval nonnull %tmp,
43 i8* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i8 }>)* @__test_block_invoke_kernel to i8*), i8* nonnull %tmp4) #2
44 %block.size4 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 0
45 store i32 41, i32 addrspace(5)* %block.size4, align 8
46 %block.align5 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 1
47 store i32 8, i32 addrspace(5)* %block.align5, align 4
48 %block.captured7 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 2
49 store i8 addrspace(1)* %a, i8 addrspace(1)* addrspace(5)* %block.captured7, align 8
50 %block.captured8 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 5
51 store i8 %b, i8 addrspace(5)* %block.captured8, align 8
52 %block.captured9 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 3
53 store i64 addrspace(1)* %c, i64 addrspace(1)* addrspace(5)* %block.captured9, align 8
54 %block.captured10 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 4
55 store i64 %d, i64 addrspace(5)* %block.captured10, align 8
56 %tmp6 = bitcast <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2 to void () addrspace(5)*
57 %tmp8 = addrspacecast void () addrspace(5)* %tmp6 to i8*
58 %tmp9 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t addrspace(5)* byval nonnull %tmp3,
59 i8* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>)* @__test_block_invoke_2_kernel to i8*), i8* nonnull %tmp8) #2
Yaxun Liude4b88d2017-10-10 19:39:48 +000060 ret void
61}
62
Rafael Espindolae4b02312018-01-11 22:15:05 +000063; CHECK: define dso_local amdgpu_kernel void @__test_block_invoke_kernel({{.*}}) #[[AT1:[0-9]+]]
Yaxun Liu46439e82018-03-06 16:04:39 +000064define internal amdgpu_kernel void @__test_block_invoke_kernel(<{ i32, i32, i8 addrspace(1)*, i8 }> %arg) #0
Yaxun Liude4b88d2017-10-10 19:39:48 +000065 !kernel_arg_addr_space !14 !kernel_arg_access_qual !15 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !17 {
66entry:
Yaxun Liu46439e82018-03-06 16:04:39 +000067 %.fca.3.extract = extractvalue <{ i32, i32, i8 addrspace(1)*, i8 }> %arg, 2
68 %.fca.4.extract = extractvalue <{ i32, i32, i8 addrspace(1)*, i8 }> %arg, 3
Yaxun Liude4b88d2017-10-10 19:39:48 +000069 store i8 %.fca.4.extract, i8 addrspace(1)* %.fca.3.extract, align 1
70 ret void
71}
72
Yaxun Liu46439e82018-03-06 16:04:39 +000073declare i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)*, i32, %struct.ndrange_t addrspace(5)*, i8*, i8*) local_unnamed_addr
Yaxun Liude4b88d2017-10-10 19:39:48 +000074
Rafael Espindolae4b02312018-01-11 22:15:05 +000075; CHECK: define dso_local amdgpu_kernel void @__test_block_invoke_2_kernel({{.*}}) #[[AT2:[0-9]+]]
Yaxun Liu46439e82018-03-06 16:04:39 +000076define internal amdgpu_kernel void @__test_block_invoke_2_kernel(<{ i32, i32, i8 addrspace(1)*,
Yaxun Liude4b88d2017-10-10 19:39:48 +000077 i64 addrspace(1)*, i64, i8 }> %arg) #0 !kernel_arg_addr_space !14 !kernel_arg_access_qual !15
78 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !17 {
79entry:
Yaxun Liu46439e82018-03-06 16:04:39 +000080 %.fca.3.extract = extractvalue <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> %arg, 2
81 %.fca.4.extract = extractvalue <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> %arg, 3
82 %.fca.5.extract = extractvalue <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> %arg, 4
83 %.fca.6.extract = extractvalue <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> %arg, 5
Yaxun Liude4b88d2017-10-10 19:39:48 +000084 store i8 %.fca.6.extract, i8 addrspace(1)* %.fca.3.extract, align 1
85 store i64 %.fca.5.extract, i64 addrspace(1)* %.fca.4.extract, align 8
86 ret void
87}
88
Yaxun Liuc928f2a2017-10-30 14:30:28 +000089; CHECK: attributes #[[AT_CALLER]] = { "calls-enqueue-kernel" }
Yaxun Liude4b88d2017-10-10 19:39:48 +000090; CHECK: attributes #[[AT1]] = {{.*}}"runtime-handle"="__test_block_invoke_kernel_runtime_handle"
91; CHECK: attributes #[[AT2]] = {{.*}}"runtime-handle"="__test_block_invoke_2_kernel_runtime_handle"
92
93attributes #0 = { "enqueued-block" }
94
95!3 = !{i32 1, i32 0, i32 1, i32 0}
96!4 = !{!"none", !"none", !"none", !"none"}
97!5 = !{!"char*", !"char", !"long*", !"long"}
98!6 = !{!"", !"", !"", !""}
99!14 = !{i32 0}
100!15 = !{!"none"}
101!16 = !{!"__block_literal"}
102!17 = !{!""}