[AMDGPU] simplify add x, *ext (setcc) => addc|subb x, 0, setcc This simplification allows to avoid generating v_cndmask_b32 to serialize condition code between compare and use. Differential Revision: https://reviews.llvm.org/D34300 llvm-svn: 305962

commit: e3eb42cef64ffee02fe00d633eeb0c44e24217e9 [log] [tgz]
author: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> Wed Jun 21 22:05:06 2017 +0000
committer: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> Wed Jun 21 22:05:06 2017 +0000
tree: ab5530f576af2daa86d42c14c7d8c14ad496e2ab
parent: 1b587358be643836c00a96130b1e3b4f7f3cad06 [diff] [blame]
diff --git a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
new file mode 100644
index 0000000..06cc7fc
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll

@@ -0,0 +1,43 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}add1:
+; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
+; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, 0, v{{[0-9]+}}, [[CC]]
+; GCN-NOT: v_cndmask
+
+define amdgpu_kernel void @add1(i32 addrspace(1)* nocapture %arg) {
+bb:
+  %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+  %y = tail call i32 @llvm.amdgcn.workitem.id.y()
+  %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
+  %v = load i32, i32 addrspace(1)* %gep, align 4
+  %cmp = icmp ugt i32 %x, %y
+  %ext = zext i1 %cmp to i32
+  %add = add i32 %v, %ext
+  store i32 %add, i32 addrspace(1)* %gep, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}sub1:
+; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
+; GCN: v_subb_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, 0, [[CC]]
+; GCN-NOT: v_cndmask
+
+define amdgpu_kernel void @sub1(i32 addrspace(1)* nocapture %arg) {
+bb:
+  %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+  %y = tail call i32 @llvm.amdgcn.workitem.id.y()
+  %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
+  %v = load i32, i32 addrspace(1)* %gep, align 4
+  %cmp = icmp ugt i32 %x, %y
+  %ext = sext i1 %cmp to i32
+  %add = add i32 %v, %ext
+  store i32 %add, i32 addrspace(1)* %gep, align 4
+  ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+
+declare i32 @llvm.amdgcn.workitem.id.y() #0
+
+attributes #0 = { nounwind readnone speculatable }
commit	e3eb42cef64ffee02fe00d633eeb0c44e24217e9	[log] [tgz]
author	Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>	Wed Jun 21 22:05:06 2017 +0000
committer	Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>	Wed Jun 21 22:05:06 2017 +0000
tree	ab5530f576af2daa86d42c14c7d8c14ad496e2ab
parent	1b587358be643836c00a96130b1e3b4f7f3cad06 [diff] [blame]