blob: fc6867eca417aeb67e99161226ee0425208139f2 [file] [log] [blame]
Justin Lebar5b82c9b2016-02-22 17:50:52 +00001; RUN: llc -O2 -tail-dup-size=100 -enable-tail-merge=0 < %s | FileCheck %s
2target triple = "nvptx64-nvidia-cuda"
3
4declare void @foo()
Justin Bognera4635372016-07-06 20:02:45 +00005declare void @llvm.nvvm.barrier0()
Justin Lebar5b82c9b2016-02-22 17:50:52 +00006
7; syncthreads shouldn't be duplicated.
8; CHECK: .func call_syncthreads
9; CHECK: bar.sync
10; CHECK-NOT: bar.sync
11define void @call_syncthreads(i32* %a, i32* %b, i1 %cond, i1 %cond2) nounwind {
12 br i1 %cond, label %L1, label %L2
13 br i1 %cond2, label %Ret, label %L1
14Ret:
15 ret void
16L1:
17 store i32 0, i32* %a
18 br label %L42
19L2:
20 store i32 1, i32* %a
21 br label %L42
22L42:
Justin Bognera4635372016-07-06 20:02:45 +000023 call void @llvm.nvvm.barrier0()
Justin Lebar5b82c9b2016-02-22 17:50:52 +000024 br label %Ret
25}
26
27; Check that call_syncthreads really does trigger tail duplication.
28; CHECK: .func call_foo
29; CHECK: call
30; CHECK: call
31define void @call_foo(i32* %a, i32* %b, i1 %cond, i1 %cond2) nounwind {
32 br i1 %cond, label %L1, label %L2
33 br i1 %cond2, label %Ret, label %L1
34Ret:
35 ret void
36L1:
37 store i32 0, i32* %a
38 br label %L42
39L2:
40 store i32 1, i32* %a
41 br label %L42
42L42:
43 call void @foo()
44 br label %Ret
45}