R600/SI: Allow commuting some 3 op instructions
e.g. v_mad_f32 a, b, c -> v_mad_f32 b, a, c
This simplifies matching v_madmk_f32.
This looks somewhat surprising, but it appears to be
OK to do this. We can commute src0 and src1 in all
of these instructions, and that's all that appears
to matter.
llvm-svn: 221910
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.umad24.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.umad24.ll
index 4de1f75..59d6248 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.umad24.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.umad24.ll
@@ -5,6 +5,7 @@
; XUN: llc -march=r600 -mcpu=rv770 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
declare i32 @llvm.AMDGPU.umad24(i32, i32, i32) nounwind readnone
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; FUNC-LABEL: {{^}}test_umad24:
; SI: v_mad_u32_u24
@@ -17,3 +18,21 @@
ret void
}
+; FUNC-LABEL: {{^}}commute_umad24:
+; SI-DAG: buffer_load_dword [[SRC0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[SRC2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
+; SI: v_mad_u32_u24 [[RESULT:v[0-9]+]], 4, [[SRC0]], [[SRC2]]
+; SI: buffer_store_dword [[RESULT]]
+define void @commute_umad24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %out.gep = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %src0.gep = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %src2.gep = getelementptr i32 addrspace(1)* %src0.gep, i32 1
+
+ %src0 = load i32 addrspace(1)* %src0.gep, align 4
+ %src2 = load i32 addrspace(1)* %src2.gep, align 4
+ %mad = call i32 @llvm.AMDGPU.umad24(i32 %src0, i32 4, i32 %src2) nounwind readnone
+ store i32 %mad, i32 addrspace(1)* %out.gep, align 4
+ ret void
+}
+