Stanislav Mekhanoshin | c90347d | 2017-04-12 20:48:56 +0000 | [diff] [blame] | 1 | ; RUN: llc -march=amdgcn < %s | FileCheck %s |
| 2 | ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-intrinsics < %s | FileCheck -check-prefix=OPT %s |
| 3 | |
| 4 | ; CHECK-NOT: and_b32 |
| 5 | |
| 6 | ; OPT-LABEL: @zext_grp_size_128 |
| 7 | ; OPT: tail call i32 @llvm.amdgcn.workitem.id.x() #2, !range !0 |
| 8 | ; OPT: tail call i32 @llvm.amdgcn.workitem.id.y() #2, !range !0 |
| 9 | ; OPT: tail call i32 @llvm.amdgcn.workitem.id.z() #2, !range !0 |
| 10 | define amdgpu_kernel void @zext_grp_size_128(i32 addrspace(1)* nocapture %arg) #0 { |
| 11 | bb: |
| 12 | %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #2 |
| 13 | %tmp1 = and i32 %tmp, 127 |
| 14 | store i32 %tmp1, i32 addrspace(1)* %arg, align 4 |
| 15 | %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y() #2 |
| 16 | %tmp3 = and i32 %tmp2, 127 |
| 17 | %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1 |
| 18 | store i32 %tmp3, i32 addrspace(1)* %tmp4, align 4 |
| 19 | %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z() #2 |
| 20 | %tmp6 = and i32 %tmp5, 127 |
| 21 | %tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2 |
| 22 | store i32 %tmp6, i32 addrspace(1)* %tmp7, align 4 |
| 23 | ret void |
| 24 | } |
| 25 | |
| 26 | ; OPT-LABEL: @zext_grp_size_32x4x1 |
| 27 | ; OPT: tail call i32 @llvm.amdgcn.workitem.id.x() #2, !range !2 |
| 28 | ; OPT: tail call i32 @llvm.amdgcn.workitem.id.y() #2, !range !3 |
| 29 | ; OPT: tail call i32 @llvm.amdgcn.workitem.id.z() #2, !range !4 |
| 30 | define amdgpu_kernel void @zext_grp_size_32x4x1(i32 addrspace(1)* nocapture %arg) #0 !reqd_work_group_size !0 { |
| 31 | bb: |
| 32 | %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #2 |
| 33 | %tmp1 = and i32 %tmp, 31 |
| 34 | store i32 %tmp1, i32 addrspace(1)* %arg, align 4 |
| 35 | %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y() #2 |
| 36 | %tmp3 = and i32 %tmp2, 3 |
| 37 | %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1 |
| 38 | store i32 %tmp3, i32 addrspace(1)* %tmp4, align 4 |
| 39 | %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z() #2 |
| 40 | %tmp6 = and i32 %tmp5, 1 |
| 41 | %tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2 |
| 42 | store i32 %tmp6, i32 addrspace(1)* %tmp7, align 4 |
| 43 | ret void |
| 44 | } |
| 45 | |
| 46 | ; OPT-LABEL: @zext_grp_size_512 |
| 47 | ; OPT: tail call i32 @llvm.amdgcn.workitem.id.x() #2, !range !5 |
| 48 | ; OPT: tail call i32 @llvm.amdgcn.workitem.id.y() #2, !range !5 |
| 49 | ; OPT: tail call i32 @llvm.amdgcn.workitem.id.z() #2, !range !5 |
| 50 | define amdgpu_kernel void @zext_grp_size_512(i32 addrspace(1)* nocapture %arg) #1 { |
| 51 | bb: |
| 52 | %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #2 |
| 53 | %tmp1 = and i32 %tmp, 65535 |
| 54 | store i32 %tmp1, i32 addrspace(1)* %arg, align 4 |
| 55 | %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y() #2 |
| 56 | %tmp3 = and i32 %tmp2, 65535 |
| 57 | %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1 |
| 58 | store i32 %tmp3, i32 addrspace(1)* %tmp4, align 4 |
| 59 | %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z() #2 |
| 60 | %tmp6 = and i32 %tmp5, 65535 |
| 61 | %tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2 |
| 62 | store i32 %tmp6, i32 addrspace(1)* %tmp7, align 4 |
| 63 | ret void |
| 64 | } |
| 65 | |
| 66 | declare i32 @llvm.amdgcn.workitem.id.x() #2 |
| 67 | |
| 68 | declare i32 @llvm.amdgcn.workitem.id.y() #2 |
| 69 | |
| 70 | declare i32 @llvm.amdgcn.workitem.id.z() #2 |
| 71 | |
| 72 | attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,128" } |
| 73 | attributes #1 = { nounwind "amdgpu-flat-work-group-size"="512,512" } |
| 74 | attributes #2 = { nounwind readnone } |
| 75 | |
| 76 | !0 = !{i32 32, i32 4, i32 1} |
| 77 | |
| 78 | ; OPT: !0 = !{i32 0, i32 128} |
| 79 | ; OPT: !1 = !{i32 32, i32 4, i32 1} |
| 80 | ; OPT: !2 = !{i32 0, i32 32} |
| 81 | ; OPT: !3 = !{i32 0, i32 4} |
| 82 | ; OPT: !4 = !{i32 0, i32 1} |
| 83 | ; OPT: !5 = !{i32 0, i32 512} |