R600/SI: Allow commuting some 3 op instructions e.g. v_mad_f32 a, b, c -> v_mad_f32 b, a, c This simplifies matching v_madmk_f32. This looks somewhat surprising, but it appears to be OK to do this. We can commute src0 and src1 in all of these instructions, and that's all that appears to matter. llvm-svn: 221910

commit: 95e48668b6081bcbc632970d531c75733ae47647 [log] [tgz]
author: Matt Arsenault <Matthew.Arsenault@amd.com> Thu Nov 13 19:26:47 2014 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> Thu Nov 13 19:26:47 2014 +0000
tree: 7652fd2b8fe04c5aadbaf202d11625be88f578ae
parent: 853b881b8b43abbfffeecfe357a30170239f638c [diff] [blame]
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.umad24.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.umad24.ll
index 4de1f75..59d6248 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.umad24.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.umad24.ll

@@ -5,6 +5,7 @@
 ; XUN: llc -march=r600 -mcpu=rv770 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
 
 declare i32 @llvm.AMDGPU.umad24(i32, i32, i32) nounwind readnone
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
 
 ; FUNC-LABEL: {{^}}test_umad24:
 ; SI: v_mad_u32_u24
@@ -17,3 +18,21 @@
   ret void
 }
 
+; FUNC-LABEL: {{^}}commute_umad24:
+; SI-DAG: buffer_load_dword [[SRC0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[SRC2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
+; SI: v_mad_u32_u24 [[RESULT:v[0-9]+]], 4, [[SRC0]], [[SRC2]]
+; SI: buffer_store_dword [[RESULT]]
+define void @commute_umad24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %out.gep = getelementptr i32 addrspace(1)* %out, i32 %tid
+  %src0.gep = getelementptr i32 addrspace(1)* %out, i32 %tid
+  %src2.gep = getelementptr i32 addrspace(1)* %src0.gep, i32 1
+
+  %src0 = load i32 addrspace(1)* %src0.gep, align 4
+  %src2 = load i32 addrspace(1)* %src2.gep, align 4
+  %mad = call i32 @llvm.AMDGPU.umad24(i32 %src0, i32 4, i32 %src2) nounwind readnone
+  store i32 %mad, i32 addrspace(1)* %out.gep, align 4
+  ret void
+}
+
commit	95e48668b6081bcbc632970d531c75733ae47647	[log] [tgz]
author	Matt Arsenault <Matthew.Arsenault@amd.com>	Thu Nov 13 19:26:47 2014 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	Thu Nov 13 19:26:47 2014 +0000
tree	7652fd2b8fe04c5aadbaf202d11625be88f578ae
parent	853b881b8b43abbfffeecfe357a30170239f638c [diff] [blame]