Sameer Sahasrabuddhe | b4f2d1c | 2018-09-25 09:39:21 +0000 | [diff] [blame] | 1 | ; RUN: llc -march=amdgcn -mcpu=gfx900 -print-after=si-annotate-control-flow %s -o /dev/null 2>&1 | FileCheck %s |
| 2 | |
| 3 | ; CHECK-LABEL: @switch_unreachable_default |
| 4 | |
| 5 | define amdgpu_kernel void @switch_unreachable_default(i32 addrspace(1)* %out, i8 addrspace(1)* %in0, i8 addrspace(1)* %in1) #0 { |
| 6 | centry: |
| 7 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
| 8 | switch i32 %tid, label %sw.default [ |
| 9 | i32 0, label %sw.bb0 |
| 10 | i32 1, label %sw.bb1 |
| 11 | ] |
| 12 | |
| 13 | sw.bb0: |
| 14 | br label %sw.epilog |
| 15 | |
| 16 | sw.bb1: |
| 17 | br label %sw.epilog |
| 18 | |
| 19 | sw.default: |
| 20 | unreachable |
| 21 | |
| 22 | sw.epilog: |
| 23 | %ptr = phi i8 addrspace(1)* [%in0, %sw.bb0], [%in1, %sw.bb1] |
| 24 | %gep_in = getelementptr inbounds i8, i8 addrspace(1)* %ptr, i64 0 |
| 25 | br label %sw.while |
| 26 | |
| 27 | ; The loop below is necessary to preserve the effect of the |
| 28 | ; unreachable default on divergence analysis in the presence of other |
| 29 | ; optimizations. The loop consists of a single block where the loop |
| 30 | ; exit is divergent because it depends on the divergent phi at the |
| 31 | ; start of the block. The checks below ensure that the loop exit is |
| 32 | ; handled correctly as divergent. But the data-flow within the block |
| 33 | ; is sensitive to optimizations; so we just ensure that the relevant |
| 34 | ; operations in the block body are indeed in the same block. |
| 35 | |
| 36 | ; CHECK: [[PHI:%[a-zA-Z0-9._]+]] = phi i64 |
| 37 | ; CHECK-NOT: {{ br }} |
| 38 | ; CHECK: load i8 |
| 39 | ; CHECK-NOT: {{ br }} |
| 40 | ; CHECK: [[ICMP:%[a-zA-Z0-9._]+]] = icmp eq |
| 41 | ; CHECK: [[IF:%[a-zA-Z0-9._]+]] = call i64 @llvm.amdgcn.if.break(i1 [[ICMP]], i64 [[PHI]]) |
| 42 | ; CHECK: [[LOOP:%[a-zA-Z0-9._]+]] = call i1 @llvm.amdgcn.loop(i64 [[IF]]) |
| 43 | ; CHECK: br i1 [[LOOP]] |
| 44 | |
| 45 | sw.while: |
| 46 | %p = phi i8 addrspace(1)* [ %gep_in, %sw.epilog ], [ %incdec.ptr, %sw.while ] |
| 47 | %count = phi i32 [ 0, %sw.epilog ], [ %count.inc, %sw.while ] |
| 48 | %char = load i8, i8 addrspace(1)* %p, align 1 |
| 49 | %tobool = icmp eq i8 %char, 0 |
| 50 | %incdec.ptr = getelementptr inbounds i8, i8 addrspace(1)* %p, i64 1 |
| 51 | %count.inc = add i32 %count, 1 |
| 52 | br i1 %tobool, label %sw.exit, label %sw.while |
| 53 | |
| 54 | sw.exit: |
| 55 | %tid64 = zext i32 %tid to i64 |
| 56 | %gep_out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid64 |
| 57 | store i32 %count, i32 addrspace(1)* %gep_out, align 4 |
| 58 | ret void |
| 59 | } |
| 60 | |
| 61 | declare i32 @llvm.amdgcn.workitem.id.x() #0 |
| 62 | |
| 63 | attributes #0 = { nounwind readnone } |
| 64 | attributes #1 = { convergent noinline optnone } |