R600/SI: Combine min3/max3 instructions
llvm-svn: 222032
diff --git a/llvm/test/CodeGen/R600/fmax3.ll b/llvm/test/CodeGen/R600/fmax3.ll
new file mode 100644
index 0000000..cf371b3
--- /dev/null
+++ b/llvm/test/CodeGen/R600/fmax3.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare float @llvm.maxnum.f32(float, float) nounwind readnone
+
+; SI-LABEL: {{^}}test_fmax3_olt_0:
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @test_fmax3_olt_0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
+ %a = load float addrspace(1)* %aptr, align 4
+ %b = load float addrspace(1)* %bptr, align 4
+ %c = load float addrspace(1)* %cptr, align 4
+ %f0 = call float @llvm.maxnum.f32(float %a, float %b) nounwind readnone
+ %f1 = call float @llvm.maxnum.f32(float %f0, float %c) nounwind readnone
+ store float %f1, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; Commute operand of second fmax
+; SI-LABEL: {{^}}test_fmax3_olt_1:
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @test_fmax3_olt_1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
+ %a = load float addrspace(1)* %aptr, align 4
+ %b = load float addrspace(1)* %bptr, align 4
+ %c = load float addrspace(1)* %cptr, align 4
+ %f0 = call float @llvm.maxnum.f32(float %a, float %b) nounwind readnone
+ %f1 = call float @llvm.maxnum.f32(float %c, float %f0) nounwind readnone
+ store float %f1, float addrspace(1)* %out, align 4
+ ret void
+}
diff --git a/llvm/test/CodeGen/R600/fmin3.ll b/llvm/test/CodeGen/R600/fmin3.ll
new file mode 100644
index 0000000..7420368
--- /dev/null
+++ b/llvm/test/CodeGen/R600/fmin3.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare float @llvm.minnum.f32(float, float) nounwind readnone
+
+; SI-LABEL: {{^}}test_fmin3_olt_0:
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: v_min3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @test_fmin3_olt_0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
+ %a = load float addrspace(1)* %aptr, align 4
+ %b = load float addrspace(1)* %bptr, align 4
+ %c = load float addrspace(1)* %cptr, align 4
+ %f0 = call float @llvm.minnum.f32(float %a, float %b) nounwind readnone
+ %f1 = call float @llvm.minnum.f32(float %f0, float %c) nounwind readnone
+ store float %f1, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; Commute operand of second fmin
+; SI-LABEL: {{^}}test_fmin3_olt_1:
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: v_min3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @test_fmin3_olt_1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
+ %a = load float addrspace(1)* %aptr, align 4
+ %b = load float addrspace(1)* %bptr, align 4
+ %c = load float addrspace(1)* %cptr, align 4
+ %f0 = call float @llvm.minnum.f32(float %a, float %b) nounwind readnone
+ %f1 = call float @llvm.minnum.f32(float %c, float %f0) nounwind readnone
+ store float %f1, float addrspace(1)* %out, align 4
+ ret void
+}
diff --git a/llvm/test/CodeGen/R600/max3.ll b/llvm/test/CodeGen/R600/max3.ll
new file mode 100644
index 0000000..74b08f6
--- /dev/null
+++ b/llvm/test/CodeGen/R600/max3.ll
@@ -0,0 +1,41 @@
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+; FUNC-LABEL: @v_test_imax3_sgt_i32
+; SI: v_max3_i32
+define void @v_test_imax3_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %a = load i32 addrspace(1)* %gep0, align 4
+ %b = load i32 addrspace(1)* %gep1, align 4
+ %c = load i32 addrspace(1)* %gep2, align 4
+ %icmp0 = icmp sgt i32 %a, %b
+ %i0 = select i1 %icmp0, i32 %a, i32 %b
+ %icmp1 = icmp sgt i32 %i0, %c
+ %i1 = select i1 %icmp1, i32 %i0, i32 %c
+ store i32 %i1, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @v_test_umax3_ugt_i32
+; SI: v_max3_u32
+define void @v_test_umax3_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %a = load i32 addrspace(1)* %gep0, align 4
+ %b = load i32 addrspace(1)* %gep1, align 4
+ %c = load i32 addrspace(1)* %gep2, align 4
+ %icmp0 = icmp ugt i32 %a, %b
+ %i0 = select i1 %icmp0, i32 %a, i32 %b
+ %icmp1 = icmp ugt i32 %i0, %c
+ %i1 = select i1 %icmp1, i32 %i0, i32 %c
+ store i32 %i1, i32 addrspace(1)* %out, align 4
+ ret void
+}
diff --git a/llvm/test/CodeGen/R600/min3.ll b/llvm/test/CodeGen/R600/min3.ll
new file mode 100644
index 0000000..f852cff
--- /dev/null
+++ b/llvm/test/CodeGen/R600/min3.ll
@@ -0,0 +1,111 @@
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+; FUNC-LABEL: @v_test_imin3_slt_i32
+; SI: v_min3_i32
+define void @v_test_imin3_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %a = load i32 addrspace(1)* %gep0, align 4
+ %b = load i32 addrspace(1)* %gep1, align 4
+ %c = load i32 addrspace(1)* %gep2, align 4
+ %icmp0 = icmp slt i32 %a, %b
+ %i0 = select i1 %icmp0, i32 %a, i32 %b
+ %icmp1 = icmp slt i32 %i0, %c
+ %i1 = select i1 %icmp1, i32 %i0, i32 %c
+ store i32 %i1, i32 addrspace(1)* %outgep, align 4
+ ret void
+}
+
+; FUNC-LABEL: @v_test_umin3_ult_i32
+; SI: v_min3_u32
+define void @v_test_umin3_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %a = load i32 addrspace(1)* %gep0, align 4
+ %b = load i32 addrspace(1)* %gep1, align 4
+ %c = load i32 addrspace(1)* %gep2, align 4
+ %icmp0 = icmp ult i32 %a, %b
+ %i0 = select i1 %icmp0, i32 %a, i32 %b
+ %icmp1 = icmp ult i32 %i0, %c
+ %i1 = select i1 %icmp1, i32 %i0, i32 %c
+ store i32 %i1, i32 addrspace(1)* %outgep, align 4
+ ret void
+}
+
+; FUNC-LABEL: @v_test_umin_umin_umin
+; SI: v_min_i32
+; SI: v_min3_i32
+define void @v_test_umin_umin_umin(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %tid2 = mul i32 %tid, 2
+ %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
+
+ %gep3 = getelementptr i32 addrspace(1)* %aptr, i32 %tid2
+ %gep4 = getelementptr i32 addrspace(1)* %bptr, i32 %tid2
+ %gep5 = getelementptr i32 addrspace(1)* %cptr, i32 %tid2
+
+ %outgep0 = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %outgep1 = getelementptr i32 addrspace(1)* %out, i32 %tid2
+
+ %a = load i32 addrspace(1)* %gep0, align 4
+ %b = load i32 addrspace(1)* %gep1, align 4
+ %c = load i32 addrspace(1)* %gep2, align 4
+ %d = load i32 addrspace(1)* %gep3, align 4
+
+ %icmp0 = icmp slt i32 %a, %b
+ %i0 = select i1 %icmp0, i32 %a, i32 %b
+
+ %icmp1 = icmp slt i32 %c, %d
+ %i1 = select i1 %icmp1, i32 %c, i32 %d
+
+ %icmp2 = icmp slt i32 %i0, %i1
+ %i2 = select i1 %icmp2, i32 %i0, i32 %i1
+
+ store i32 %i2, i32 addrspace(1)* %outgep1, align 4
+ ret void
+}
+
+; FUNC-LABEL: @v_test_umin3_2_uses
+; SI-NOT: v_min3
+define void @v_test_umin3_2_uses(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %tid2 = mul i32 %tid, 2
+ %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
+
+ %gep3 = getelementptr i32 addrspace(1)* %aptr, i32 %tid2
+ %gep4 = getelementptr i32 addrspace(1)* %bptr, i32 %tid2
+ %gep5 = getelementptr i32 addrspace(1)* %cptr, i32 %tid2
+
+ %outgep0 = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %outgep1 = getelementptr i32 addrspace(1)* %out, i32 %tid2
+
+ %a = load i32 addrspace(1)* %gep0, align 4
+ %b = load i32 addrspace(1)* %gep1, align 4
+ %c = load i32 addrspace(1)* %gep2, align 4
+ %d = load i32 addrspace(1)* %gep3, align 4
+
+ %icmp0 = icmp slt i32 %a, %b
+ %i0 = select i1 %icmp0, i32 %a, i32 %b
+
+ %icmp1 = icmp slt i32 %c, %d
+ %i1 = select i1 %icmp1, i32 %c, i32 %d
+
+ %icmp2 = icmp slt i32 %i0, %c
+ %i2 = select i1 %icmp2, i32 %i0, i32 %c
+
+ store i32 %i2, i32 addrspace(1)* %outgep0, align 4
+ store i32 %i0, i32 addrspace(1)* %outgep1, align 4
+ ret void
+}