Stanislav Mekhanoshin | c90347d | 2017-04-12 20:48:56 +0000 | [diff] [blame] | 1 | ; RUN: llc -march=amdgcn < %s | FileCheck %s |
Scott Linder | 92bb783 | 2018-10-31 19:57:36 +0000 | [diff] [blame] | 2 | ; RUN: llc -O0 -march=amdgcn < %s | FileCheck %s |
Stanislav Mekhanoshin | c90347d | 2017-04-12 20:48:56 +0000 | [diff] [blame] | 3 | ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-intrinsics < %s | FileCheck -check-prefix=OPT %s |
| 4 | |
| 5 | ; CHECK-NOT: and_b32 |
| 6 | |
| 7 | ; OPT-LABEL: @zext_grp_size_128 |
Matt Arsenault | 7b82b4b | 2017-05-02 16:57:44 +0000 | [diff] [blame] | 8 | ; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0 |
| 9 | ; OPT: tail call i32 @llvm.amdgcn.workitem.id.y(), !range !0 |
| 10 | ; OPT: tail call i32 @llvm.amdgcn.workitem.id.z(), !range !0 |
Stanislav Mekhanoshin | c90347d | 2017-04-12 20:48:56 +0000 | [diff] [blame] | 11 | define amdgpu_kernel void @zext_grp_size_128(i32 addrspace(1)* nocapture %arg) #0 { |
| 12 | bb: |
Matt Arsenault | 7b82b4b | 2017-05-02 16:57:44 +0000 | [diff] [blame] | 13 | %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() |
Stanislav Mekhanoshin | c90347d | 2017-04-12 20:48:56 +0000 | [diff] [blame] | 14 | %tmp1 = and i32 %tmp, 127 |
| 15 | store i32 %tmp1, i32 addrspace(1)* %arg, align 4 |
Matt Arsenault | 7b82b4b | 2017-05-02 16:57:44 +0000 | [diff] [blame] | 16 | %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y() |
Stanislav Mekhanoshin | c90347d | 2017-04-12 20:48:56 +0000 | [diff] [blame] | 17 | %tmp3 = and i32 %tmp2, 127 |
| 18 | %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1 |
| 19 | store i32 %tmp3, i32 addrspace(1)* %tmp4, align 4 |
Matt Arsenault | 7b82b4b | 2017-05-02 16:57:44 +0000 | [diff] [blame] | 20 | %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z() |
Stanislav Mekhanoshin | c90347d | 2017-04-12 20:48:56 +0000 | [diff] [blame] | 21 | %tmp6 = and i32 %tmp5, 127 |
| 22 | %tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2 |
| 23 | store i32 %tmp6, i32 addrspace(1)* %tmp7, align 4 |
| 24 | ret void |
| 25 | } |
| 26 | |
| 27 | ; OPT-LABEL: @zext_grp_size_32x4x1 |
Matt Arsenault | 7b82b4b | 2017-05-02 16:57:44 +0000 | [diff] [blame] | 28 | ; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !2 |
| 29 | ; OPT: tail call i32 @llvm.amdgcn.workitem.id.y(), !range !3 |
| 30 | ; OPT: tail call i32 @llvm.amdgcn.workitem.id.z(), !range !4 |
Stanislav Mekhanoshin | c90347d | 2017-04-12 20:48:56 +0000 | [diff] [blame] | 31 | define amdgpu_kernel void @zext_grp_size_32x4x1(i32 addrspace(1)* nocapture %arg) #0 !reqd_work_group_size !0 { |
| 32 | bb: |
Matt Arsenault | 7b82b4b | 2017-05-02 16:57:44 +0000 | [diff] [blame] | 33 | %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() |
Stanislav Mekhanoshin | c90347d | 2017-04-12 20:48:56 +0000 | [diff] [blame] | 34 | %tmp1 = and i32 %tmp, 31 |
| 35 | store i32 %tmp1, i32 addrspace(1)* %arg, align 4 |
Matt Arsenault | 7b82b4b | 2017-05-02 16:57:44 +0000 | [diff] [blame] | 36 | %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y() |
Stanislav Mekhanoshin | c90347d | 2017-04-12 20:48:56 +0000 | [diff] [blame] | 37 | %tmp3 = and i32 %tmp2, 3 |
| 38 | %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1 |
| 39 | store i32 %tmp3, i32 addrspace(1)* %tmp4, align 4 |
Matt Arsenault | 7b82b4b | 2017-05-02 16:57:44 +0000 | [diff] [blame] | 40 | %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z() |
Stanislav Mekhanoshin | c90347d | 2017-04-12 20:48:56 +0000 | [diff] [blame] | 41 | %tmp6 = and i32 %tmp5, 1 |
| 42 | %tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2 |
| 43 | store i32 %tmp6, i32 addrspace(1)* %tmp7, align 4 |
| 44 | ret void |
| 45 | } |
| 46 | |
Scott Linder | 92bb783 | 2018-10-31 19:57:36 +0000 | [diff] [blame] | 47 | ; When EarlyCSE is not run this call produces a range max with 0 active bits, |
| 48 | ; which is a special case as an AssertZext from width 0 is invalid. |
| 49 | ; OPT-LABEL: @zext_grp_size_1x1x1 |
| 50 | ; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !4 |
| 51 | define amdgpu_kernel void @zext_grp_size_1x1x1(i32 addrspace(1)* nocapture %arg) #0 !reqd_work_group_size !1 { |
| 52 | %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() |
| 53 | %tmp1 = and i32 %tmp, 1 |
| 54 | store i32 %tmp1, i32 addrspace(1)* %arg, align 4 |
| 55 | ret void |
| 56 | } |
| 57 | |
Stanislav Mekhanoshin | c90347d | 2017-04-12 20:48:56 +0000 | [diff] [blame] | 58 | ; OPT-LABEL: @zext_grp_size_512 |
Scott Linder | 92bb783 | 2018-10-31 19:57:36 +0000 | [diff] [blame] | 59 | ; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !6 |
| 60 | ; OPT: tail call i32 @llvm.amdgcn.workitem.id.y(), !range !6 |
| 61 | ; OPT: tail call i32 @llvm.amdgcn.workitem.id.z(), !range !6 |
Stanislav Mekhanoshin | c90347d | 2017-04-12 20:48:56 +0000 | [diff] [blame] | 62 | define amdgpu_kernel void @zext_grp_size_512(i32 addrspace(1)* nocapture %arg) #1 { |
| 63 | bb: |
Matt Arsenault | 7b82b4b | 2017-05-02 16:57:44 +0000 | [diff] [blame] | 64 | %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() |
Stanislav Mekhanoshin | c90347d | 2017-04-12 20:48:56 +0000 | [diff] [blame] | 65 | %tmp1 = and i32 %tmp, 65535 |
| 66 | store i32 %tmp1, i32 addrspace(1)* %arg, align 4 |
Matt Arsenault | 7b82b4b | 2017-05-02 16:57:44 +0000 | [diff] [blame] | 67 | %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y() |
Stanislav Mekhanoshin | c90347d | 2017-04-12 20:48:56 +0000 | [diff] [blame] | 68 | %tmp3 = and i32 %tmp2, 65535 |
| 69 | %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1 |
| 70 | store i32 %tmp3, i32 addrspace(1)* %tmp4, align 4 |
Matt Arsenault | 7b82b4b | 2017-05-02 16:57:44 +0000 | [diff] [blame] | 71 | %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z() |
Stanislav Mekhanoshin | c90347d | 2017-04-12 20:48:56 +0000 | [diff] [blame] | 72 | %tmp6 = and i32 %tmp5, 65535 |
| 73 | %tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2 |
| 74 | store i32 %tmp6, i32 addrspace(1)* %tmp7, align 4 |
| 75 | ret void |
| 76 | } |
| 77 | |
Matt Arsenault | b791802 | 2017-10-23 17:09:35 +0000 | [diff] [blame] | 78 | ; OPT-LABEL: @func_test_workitem_id_x_known_max_range( |
| 79 | ; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0 |
| 80 | define void @func_test_workitem_id_x_known_max_range(i32 addrspace(1)* nocapture %out) #0 { |
| 81 | entry: |
| 82 | %id = tail call i32 @llvm.amdgcn.workitem.id.x() |
| 83 | %and = and i32 %id, 1023 |
| 84 | store i32 %and, i32 addrspace(1)* %out, align 4 |
| 85 | ret void |
| 86 | } |
| 87 | |
| 88 | ; OPT-LABEL: @func_test_workitem_id_x_default_range( |
Scott Linder | 92bb783 | 2018-10-31 19:57:36 +0000 | [diff] [blame] | 89 | ; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !7 |
Matt Arsenault | b791802 | 2017-10-23 17:09:35 +0000 | [diff] [blame] | 90 | define void @func_test_workitem_id_x_default_range(i32 addrspace(1)* nocapture %out) #4 { |
| 91 | entry: |
| 92 | %id = tail call i32 @llvm.amdgcn.workitem.id.x() |
| 93 | %and = and i32 %id, 1023 |
| 94 | store i32 %and, i32 addrspace(1)* %out, align 4 |
| 95 | ret void |
| 96 | } |
| 97 | |
Stanislav Mekhanoshin | c90347d | 2017-04-12 20:48:56 +0000 | [diff] [blame] | 98 | declare i32 @llvm.amdgcn.workitem.id.x() #2 |
| 99 | |
| 100 | declare i32 @llvm.amdgcn.workitem.id.y() #2 |
| 101 | |
| 102 | declare i32 @llvm.amdgcn.workitem.id.z() #2 |
| 103 | |
| 104 | attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,128" } |
| 105 | attributes #1 = { nounwind "amdgpu-flat-work-group-size"="512,512" } |
Matt Arsenault | 7b82b4b | 2017-05-02 16:57:44 +0000 | [diff] [blame] | 106 | attributes #2 = { nounwind readnone speculatable } |
| 107 | attributes #3 = { nounwind readnone } |
Matt Arsenault | b791802 | 2017-10-23 17:09:35 +0000 | [diff] [blame] | 108 | attributes #4 = { nounwind } |
Stanislav Mekhanoshin | c90347d | 2017-04-12 20:48:56 +0000 | [diff] [blame] | 109 | |
| 110 | !0 = !{i32 32, i32 4, i32 1} |
Scott Linder | 92bb783 | 2018-10-31 19:57:36 +0000 | [diff] [blame] | 111 | !1 = !{i32 1, i32 1, i32 1} |
Stanislav Mekhanoshin | c90347d | 2017-04-12 20:48:56 +0000 | [diff] [blame] | 112 | |
| 113 | ; OPT: !0 = !{i32 0, i32 128} |
| 114 | ; OPT: !1 = !{i32 32, i32 4, i32 1} |
| 115 | ; OPT: !2 = !{i32 0, i32 32} |
| 116 | ; OPT: !3 = !{i32 0, i32 4} |
| 117 | ; OPT: !4 = !{i32 0, i32 1} |
Scott Linder | 92bb783 | 2018-10-31 19:57:36 +0000 | [diff] [blame] | 118 | ; OPT: !5 = !{i32 1, i32 1, i32 1} |
| 119 | ; OPT: !6 = !{i32 0, i32 512} |
| 120 | ; OPT: !7 = !{i32 0, i32 1024} |