blob: 7358d9b0dc82c0e4c4b2b234d6fcefca3aeac1d2 [file] [log] [blame]
Yaxun Liu46439e82018-03-06 16:04:39 +00001; RUN: opt -data-layout=A5 -amdgpu-lower-enqueued-block -S < %s | FileCheck %s
Yaxun Liude4b88d2017-10-10 19:39:48 +00002
Yaxun Liufb17bf62018-06-13 17:31:51 +00003; CHECK: @__test_block_invoke_kernel.runtime_handle = addrspace(1) global [2 x i64] zeroinitializer
4; CHECK: @__test_block_invoke_2_kernel.runtime_handle = addrspace(1) global [2 x i64] zeroinitializer
5; CHECK: @__amdgpu_enqueued_kernel.runtime_handle = addrspace(1) global [2 x i64] zeroinitializer
6; CHECK: @__amdgpu_enqueued_kernel.1.runtime_handle = addrspace(1) global [2 x i64] zeroinitializer
Yaxun Liude4b88d2017-10-10 19:39:48 +00007
Yaxun Liude4b88d2017-10-10 19:39:48 +00008%struct.ndrange_t = type { i32 }
9%opencl.queue_t = type opaque
10
Yaxun Liua99e7d82018-03-12 16:34:06 +000011; CHECK-LABEL: define amdgpu_kernel void @non_caller
12; CHECK-NOT: #{{[0-9]+}}
Yaxun Liuc928f2a2017-10-30 14:30:28 +000013define amdgpu_kernel void @non_caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr
14 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
15 ret void
16}
17
Yaxun Liua99e7d82018-03-12 16:34:06 +000018; CHECK-LABEL: define amdgpu_kernel void @caller_indirect
19; CHECK-SAME: #[[AT_CALLER:[0-9]+]]
Yaxun Liuc928f2a2017-10-30 14:30:28 +000020define amdgpu_kernel void @caller_indirect(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr
21 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
22 call void @caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d)
23 ret void
24}
25
Yaxun Liua99e7d82018-03-12 16:34:06 +000026; CHECK-LABEL: define amdgpu_kernel void @caller
27; CHECK-SAME: #[[AT_CALLER]]
28; CHECK-NOT: @__test_block_invoke_kernel
29; CHECK-NOT: @__test_block_invoke_2_kernel
30; CHECK-NOT: @__amdgpu_enqueued_kernel
31; CHECK-NOT: @__amdgpu_enqueued_kernel.1
32; CHECK-NOT: @0
33; CHECK-NOT: @1
34; CHECK: call i32 @__enqueue_kernel_basic({{.*}}@__test_block_invoke_kernel.runtime_handle
35; CHECK: call i32 @__enqueue_kernel_basic({{.*}}@__test_block_invoke_kernel.runtime_handle
36; CHECK: call i32 @__enqueue_kernel_basic({{.*}}@__amdgpu_enqueued_kernel.runtime_handle
37; CHECK: call i32 @__enqueue_kernel_basic({{.*}}@__amdgpu_enqueued_kernel.1.runtime_handle
38; CHECK: call i32 @__enqueue_kernel_basic({{.*}}@__test_block_invoke_2_kernel.runtime_handle
Yaxun Liuc928f2a2017-10-30 14:30:28 +000039define amdgpu_kernel void @caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr
Yaxun Liude4b88d2017-10-10 19:39:48 +000040 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
41entry:
Yaxun Liu46439e82018-03-06 16:04:39 +000042 %block = alloca <{ i32, i32, i8 addrspace(1)*, i8 }>, align 8, addrspace(5)
43 %tmp = alloca %struct.ndrange_t, align 4, addrspace(5)
44 %block2 = alloca <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, align 8, addrspace(5)
45 %tmp3 = alloca %struct.ndrange_t, align 4, addrspace(5)
46 %block.size = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i8 }>, <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %block, i32 0, i32 0
47 store i32 25, i32 addrspace(5)* %block.size, align 8
48 %block.align = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i8 }>, <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %block, i32 0, i32 1
49 store i32 8, i32 addrspace(5)* %block.align, align 4
50 %block.captured = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i8 }>, <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %block, i32 0, i32 2
51 store i8 addrspace(1)* %a, i8 addrspace(1)* addrspace(5)* %block.captured, align 8
52 %block.captured1 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i8 }>, <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %block, i32 0, i32 3
53 store i8 %b, i8 addrspace(5)* %block.captured1, align 8
54 %tmp1 = bitcast <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %block to void () addrspace(5)*
55 %tmp4 = addrspacecast void () addrspace(5)* %tmp1 to i8*
56 %tmp5 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t addrspace(5)* byval nonnull %tmp,
57 i8* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i8 }>)* @__test_block_invoke_kernel to i8*), i8* nonnull %tmp4) #2
58 %tmp10 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t addrspace(5)* byval nonnull %tmp,
59 i8* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i8 }>)* @__test_block_invoke_kernel to i8*), i8* nonnull %tmp4) #2
Yaxun Liua99e7d82018-03-12 16:34:06 +000060 %tmp11 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t addrspace(5)* byval nonnull %tmp,
61 i8* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i8 }>)* @0 to i8*), i8* nonnull %tmp4) #2
62 %tmp12 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t addrspace(5)* byval nonnull %tmp,
63 i8* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i8 }>)* @1 to i8*), i8* nonnull %tmp4) #2
Yaxun Liu46439e82018-03-06 16:04:39 +000064 %block.size4 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 0
65 store i32 41, i32 addrspace(5)* %block.size4, align 8
66 %block.align5 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 1
67 store i32 8, i32 addrspace(5)* %block.align5, align 4
68 %block.captured7 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 2
69 store i8 addrspace(1)* %a, i8 addrspace(1)* addrspace(5)* %block.captured7, align 8
70 %block.captured8 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 5
71 store i8 %b, i8 addrspace(5)* %block.captured8, align 8
72 %block.captured9 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 3
73 store i64 addrspace(1)* %c, i64 addrspace(1)* addrspace(5)* %block.captured9, align 8
74 %block.captured10 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 4
75 store i64 %d, i64 addrspace(5)* %block.captured10, align 8
76 %tmp6 = bitcast <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2 to void () addrspace(5)*
77 %tmp8 = addrspacecast void () addrspace(5)* %tmp6 to i8*
78 %tmp9 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t addrspace(5)* byval nonnull %tmp3,
79 i8* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>)* @__test_block_invoke_2_kernel to i8*), i8* nonnull %tmp8) #2
Yaxun Liude4b88d2017-10-10 19:39:48 +000080 ret void
81}
82
Yaxun Liu9381ae92018-04-11 14:46:15 +000083; __enqueue_kernel* functions may get inlined
84; CHECK-LABEL: define amdgpu_kernel void @inlined_caller
85; CHECK-SAME: #[[AT_CALLER]]
86; CHECK-NOT: @__test_block_invoke_kernel
Yaxun Liufb17bf62018-06-13 17:31:51 +000087; CHECK: load i64, i64 addrspace(1)* getelementptr inbounds ([2 x i64], [2 x i64] addrspace(1)* @__test_block_invoke_kernel.runtime_handle, i32 0, i32 0)
Yaxun Liu9381ae92018-04-11 14:46:15 +000088define amdgpu_kernel void @inlined_caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr
89 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
90entry:
91 %tmp = load i64, i64 addrspace(1)* addrspacecast (i64* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i8 }>)* @__test_block_invoke_kernel to i64*) to i64 addrspace(1)*)
92 store i64 %tmp, i64 addrspace(1)* %c
93 ret void
94}
95
Yaxun Liua99e7d82018-03-12 16:34:06 +000096; CHECK-LABEL: define dso_local amdgpu_kernel void @__test_block_invoke_kernel
97; CHECK-SAME: #[[AT1:[0-9]+]]
Yaxun Liu46439e82018-03-06 16:04:39 +000098define internal amdgpu_kernel void @__test_block_invoke_kernel(<{ i32, i32, i8 addrspace(1)*, i8 }> %arg) #0
Yaxun Liude4b88d2017-10-10 19:39:48 +000099 !kernel_arg_addr_space !14 !kernel_arg_access_qual !15 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !17 {
100entry:
Yaxun Liu46439e82018-03-06 16:04:39 +0000101 %.fca.3.extract = extractvalue <{ i32, i32, i8 addrspace(1)*, i8 }> %arg, 2
102 %.fca.4.extract = extractvalue <{ i32, i32, i8 addrspace(1)*, i8 }> %arg, 3
Yaxun Liude4b88d2017-10-10 19:39:48 +0000103 store i8 %.fca.4.extract, i8 addrspace(1)* %.fca.3.extract, align 1
104 ret void
105}
106
Yaxun Liu46439e82018-03-06 16:04:39 +0000107declare i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)*, i32, %struct.ndrange_t addrspace(5)*, i8*, i8*) local_unnamed_addr
Yaxun Liude4b88d2017-10-10 19:39:48 +0000108
Yaxun Liua99e7d82018-03-12 16:34:06 +0000109; CHECK-LABEL: define dso_local amdgpu_kernel void @__test_block_invoke_2_kernel
110; CHECK-SAME: #[[AT2:[0-9]+]]
Yaxun Liu46439e82018-03-06 16:04:39 +0000111define internal amdgpu_kernel void @__test_block_invoke_2_kernel(<{ i32, i32, i8 addrspace(1)*,
Yaxun Liude4b88d2017-10-10 19:39:48 +0000112 i64 addrspace(1)*, i64, i8 }> %arg) #0 !kernel_arg_addr_space !14 !kernel_arg_access_qual !15
113 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !17 {
114entry:
Yaxun Liu46439e82018-03-06 16:04:39 +0000115 %.fca.3.extract = extractvalue <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> %arg, 2
116 %.fca.4.extract = extractvalue <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> %arg, 3
117 %.fca.5.extract = extractvalue <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> %arg, 4
118 %.fca.6.extract = extractvalue <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> %arg, 5
Yaxun Liude4b88d2017-10-10 19:39:48 +0000119 store i8 %.fca.6.extract, i8 addrspace(1)* %.fca.3.extract, align 1
120 store i64 %.fca.5.extract, i64 addrspace(1)* %.fca.4.extract, align 8
121 ret void
122}
123
Yaxun Liua99e7d82018-03-12 16:34:06 +0000124; CHECK-LABEL: define dso_local amdgpu_kernel void @__amdgpu_enqueued_kernel
125; CHECK-SAME: #[[AT3:[0-9]+]]
126define internal amdgpu_kernel void @0(<{ i32, i32, i8 addrspace(1)*, i8 }> %arg) #0
127 !kernel_arg_addr_space !14 !kernel_arg_access_qual !15 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !17 {
128 ret void
129}
130
131; CHECK-LABEL: define dso_local amdgpu_kernel void @__amdgpu_enqueued_kernel.1
132; CHECK-SAME: #[[AT4:[0-9]+]]
133define internal amdgpu_kernel void @1(<{ i32, i32, i8 addrspace(1)*, i8 }> %arg) #0
134 !kernel_arg_addr_space !14 !kernel_arg_access_qual !15 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !17 {
135 ret void
136}
137
Yaxun Liuc928f2a2017-10-30 14:30:28 +0000138; CHECK: attributes #[[AT_CALLER]] = { "calls-enqueue-kernel" }
Yaxun Liua99e7d82018-03-12 16:34:06 +0000139; CHECK: attributes #[[AT1]] = {{.*}}"runtime-handle"="__test_block_invoke_kernel.runtime_handle"
140; CHECK: attributes #[[AT2]] = {{.*}}"runtime-handle"="__test_block_invoke_2_kernel.runtime_handle"
141; CHECK: attributes #[[AT3]] = {{.*}}"runtime-handle"="__amdgpu_enqueued_kernel.runtime_handle"
142; CHECK: attributes #[[AT4]] = {{.*}}"runtime-handle"="__amdgpu_enqueued_kernel.1.runtime_handle"
Yaxun Liude4b88d2017-10-10 19:39:48 +0000143
144attributes #0 = { "enqueued-block" }
145
146!3 = !{i32 1, i32 0, i32 1, i32 0}
147!4 = !{!"none", !"none", !"none", !"none"}
148!5 = !{!"char*", !"char", !"long*", !"long"}
149!6 = !{!"", !"", !"", !""}
150!14 = !{i32 0}
151!15 = !{!"none"}
152!16 = !{!"__block_literal"}
153!17 = !{!""}