Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 1 | ; RUN: llc -mtriple amdgcn--amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads=true -verify-machineinstrs < %s | FileCheck %s |
| 2 | |
| 3 | ; CHECK-LABEL: %bb11 |
| 4 | |
| 5 | ; Load from %arg in a Loop body has alias store |
| 6 | |
| 7 | ; CHECK: flat_load_dword |
| 8 | |
| 9 | ; CHECK-LABEL: %bb20 |
| 10 | ; CHECK: flat_store_dword |
| 11 | |
| 12 | ; ##################################################################### |
| 13 | |
| 14 | ; CHECK-LABEL: %bb22 |
| 15 | |
| 16 | ; Load from %arg has alias store in Loop |
| 17 | |
| 18 | ; CHECK: flat_load_dword |
| 19 | |
| 20 | ; ##################################################################### |
| 21 | |
| 22 | ; Load from %arg1 has no-alias store in Loop - arg1[i+1] never alias arg1[i] |
| 23 | |
| 24 | ; CHECK: s_load_dword |
| 25 | |
| 26 | define amdgpu_kernel void @cfg(i32 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) #0 { |
| 27 | bb: |
| 28 | %tmp = sext i32 %arg2 to i64 |
| 29 | %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp |
| 30 | %tmp4 = load i32, i32 addrspace(1)* %tmp3, align 4, !tbaa !0 |
| 31 | %tmp5 = icmp sgt i32 %tmp4, 0 |
| 32 | br i1 %tmp5, label %bb6, label %bb8 |
| 33 | |
| 34 | bb6: ; preds = %bb |
| 35 | br label %bb11 |
| 36 | |
| 37 | bb7: ; preds = %bb22 |
| 38 | br label %bb8 |
| 39 | |
| 40 | bb8: ; preds = %bb7, %bb |
| 41 | %tmp9 = phi i32 [ 0, %bb ], [ %tmp30, %bb7 ] |
| 42 | %tmp10 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp |
| 43 | store i32 %tmp9, i32 addrspace(1)* %tmp10, align 4, !tbaa !0 |
| 44 | ret void |
| 45 | |
| 46 | bb11: ; preds = %bb22, %bb6 |
| 47 | %tmp12 = phi i32 [ %tmp30, %bb22 ], [ 0, %bb6 ] |
| 48 | %tmp13 = phi i32 [ %tmp25, %bb22 ], [ 0, %bb6 ] |
| 49 | %tmp14 = srem i32 %tmp13, %arg2 |
| 50 | %tmp15 = sext i32 %tmp14 to i64 |
| 51 | %tmp16 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp15 |
| 52 | %tmp17 = load i32, i32 addrspace(1)* %tmp16, align 4, !tbaa !0 |
| 53 | %tmp18 = icmp sgt i32 %tmp17, 100 |
| 54 | %tmp19 = sext i32 %tmp13 to i64 |
| 55 | br i1 %tmp18, label %bb20, label %bb22 |
| 56 | |
| 57 | bb20: ; preds = %bb11 |
| 58 | %tmp21 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp19 |
| 59 | store i32 0, i32 addrspace(1)* %tmp21, align 4, !tbaa !0 |
| 60 | br label %bb22 |
| 61 | |
| 62 | bb22: ; preds = %bb20, %bb11 |
| 63 | %tmp23 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp19 |
| 64 | %tmp24 = load i32, i32 addrspace(1)* %tmp23, align 4, !tbaa !0 |
| 65 | %tmp25 = add nuw nsw i32 %tmp13, 1 |
| 66 | %tmp26 = sext i32 %tmp25 to i64 |
| 67 | %tmp27 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp26 |
| 68 | %tmp28 = load i32, i32 addrspace(1)* %tmp27, align 4, !tbaa !0 |
| 69 | %tmp29 = add i32 %tmp24, %tmp12 |
| 70 | %tmp30 = add i32 %tmp29, %tmp28 |
| 71 | %tmp31 = icmp eq i32 %tmp25, %tmp4 |
| 72 | br i1 %tmp31, label %bb7, label %bb11 |
| 73 | } |
| 74 | |
Alexander Timofeev | 0f9c84c | 2017-06-15 19:33:10 +0000 | [diff] [blame] | 75 | ; one more test to ensure that aliasing store after the load |
| 76 | ; is considered clobbering if load parent block is the same |
| 77 | ; as a loop header block. |
| 78 | |
| 79 | ; CHECK-LABEL: %bb1 |
| 80 | |
| 81 | ; Load from %arg has alias store that is after the load |
| 82 | ; but is considered clobbering because of the loop. |
| 83 | |
| 84 | ; CHECK: flat_load_dword |
| 85 | |
| 86 | define amdgpu_kernel void @cfg_selfloop(i32 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) #0 { |
| 87 | bb: |
| 88 | br label %bb1 |
| 89 | |
| 90 | bb2: |
| 91 | ret void |
| 92 | |
| 93 | bb1: |
| 94 | %tmp13 = phi i32 [ %tmp25, %bb1 ], [ 0, %bb ] |
| 95 | %tmp14 = srem i32 %tmp13, %arg2 |
| 96 | %tmp15 = sext i32 %tmp14 to i64 |
| 97 | %tmp16 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp15 |
| 98 | %tmp17 = load i32, i32 addrspace(1)* %tmp16, align 4, !tbaa !0 |
| 99 | %tmp19 = sext i32 %tmp13 to i64 |
| 100 | %tmp21 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp19 |
| 101 | store i32 %tmp17, i32 addrspace(1)* %tmp21, align 4, !tbaa !0 |
| 102 | %tmp25 = add nuw nsw i32 %tmp13, 1 |
| 103 | %tmp31 = icmp eq i32 %tmp25, 100 |
| 104 | br i1 %tmp31, label %bb2, label %bb1 |
| 105 | } |
| 106 | |
| 107 | |
Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 108 | attributes #0 = { "target-cpu"="fiji" } |
| 109 | |
| 110 | !0 = !{!1, !1, i64 0} |
| 111 | !1 = !{!"int", !2, i64 0} |
| 112 | !2 = !{!"omnipotent char", !3, i64 0} |
| 113 | !3 = !{!"Simple C/C++ TBAA"} |