blob: be6e3fd05ae7317859c8deb2b33dc938e0c3db99 [file] [log] [blame]
Alexander Timofeev18009562016-12-08 17:28:47 +00001; RUN: llc -mtriple amdgcn--amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads=true -verify-machineinstrs < %s | FileCheck %s
2
3; CHECK-LABEL: %bb11
4
5; Load from %arg in a Loop body has alias store
6
7; CHECK: flat_load_dword
8
9; CHECK-LABEL: %bb20
10; CHECK: flat_store_dword
11
12; #####################################################################
13
14; CHECK-LABEL: %bb22
15
16; Load from %arg has alias store in Loop
17
18; CHECK: flat_load_dword
19
20; #####################################################################
21
22; Load from %arg1 has no-alias store in Loop - arg1[i+1] never alias arg1[i]
23
24; CHECK: s_load_dword
25
26define amdgpu_kernel void @cfg(i32 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) #0 {
27bb:
28 %tmp = sext i32 %arg2 to i64
29 %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp
30 %tmp4 = load i32, i32 addrspace(1)* %tmp3, align 4, !tbaa !0
31 %tmp5 = icmp sgt i32 %tmp4, 0
32 br i1 %tmp5, label %bb6, label %bb8
33
34bb6: ; preds = %bb
35 br label %bb11
36
37bb7: ; preds = %bb22
38 br label %bb8
39
40bb8: ; preds = %bb7, %bb
41 %tmp9 = phi i32 [ 0, %bb ], [ %tmp30, %bb7 ]
42 %tmp10 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp
43 store i32 %tmp9, i32 addrspace(1)* %tmp10, align 4, !tbaa !0
44 ret void
45
46bb11: ; preds = %bb22, %bb6
47 %tmp12 = phi i32 [ %tmp30, %bb22 ], [ 0, %bb6 ]
48 %tmp13 = phi i32 [ %tmp25, %bb22 ], [ 0, %bb6 ]
49 %tmp14 = srem i32 %tmp13, %arg2
50 %tmp15 = sext i32 %tmp14 to i64
51 %tmp16 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp15
52 %tmp17 = load i32, i32 addrspace(1)* %tmp16, align 4, !tbaa !0
53 %tmp18 = icmp sgt i32 %tmp17, 100
54 %tmp19 = sext i32 %tmp13 to i64
55 br i1 %tmp18, label %bb20, label %bb22
56
57bb20: ; preds = %bb11
58 %tmp21 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp19
59 store i32 0, i32 addrspace(1)* %tmp21, align 4, !tbaa !0
60 br label %bb22
61
62bb22: ; preds = %bb20, %bb11
63 %tmp23 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp19
64 %tmp24 = load i32, i32 addrspace(1)* %tmp23, align 4, !tbaa !0
65 %tmp25 = add nuw nsw i32 %tmp13, 1
66 %tmp26 = sext i32 %tmp25 to i64
67 %tmp27 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp26
68 %tmp28 = load i32, i32 addrspace(1)* %tmp27, align 4, !tbaa !0
69 %tmp29 = add i32 %tmp24, %tmp12
70 %tmp30 = add i32 %tmp29, %tmp28
71 %tmp31 = icmp eq i32 %tmp25, %tmp4
72 br i1 %tmp31, label %bb7, label %bb11
73}
74
Alexander Timofeev0f9c84c2017-06-15 19:33:10 +000075; one more test to ensure that aliasing store after the load
76; is considered clobbering if load parent block is the same
77; as a loop header block.
78
79; CHECK-LABEL: %bb1
80
81; Load from %arg has alias store that is after the load
82; but is considered clobbering because of the loop.
83
84; CHECK: flat_load_dword
85
86define amdgpu_kernel void @cfg_selfloop(i32 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) #0 {
87bb:
88 br label %bb1
89
90bb2:
91 ret void
92
93bb1:
94 %tmp13 = phi i32 [ %tmp25, %bb1 ], [ 0, %bb ]
95 %tmp14 = srem i32 %tmp13, %arg2
96 %tmp15 = sext i32 %tmp14 to i64
97 %tmp16 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp15
98 %tmp17 = load i32, i32 addrspace(1)* %tmp16, align 4, !tbaa !0
99 %tmp19 = sext i32 %tmp13 to i64
100 %tmp21 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp19
101 store i32 %tmp17, i32 addrspace(1)* %tmp21, align 4, !tbaa !0
102 %tmp25 = add nuw nsw i32 %tmp13, 1
103 %tmp31 = icmp eq i32 %tmp25, 100
104 br i1 %tmp31, label %bb2, label %bb1
105}
106
107
Alexander Timofeev18009562016-12-08 17:28:47 +0000108attributes #0 = { "target-cpu"="fiji" }
109
110!0 = !{!1, !1, i64 0}
111!1 = !{!"int", !2, i64 0}
112!2 = !{!"omnipotent char", !3, i64 0}
113!3 = !{!"Simple C/C++ TBAA"}