AMDGPU/SI: Select mad patterns to v_mac_f32

The two-address instruction pass will convert these back to v_mad_f32
if necessary.

Differential Revision: http://reviews.llvm.org/D11060

llvm-svn: 242038
diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.ll
index ae84d84..600f0cb 100644
--- a/llvm/test/CodeGen/AMDGPU/fmuladd.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmuladd.ll
@@ -6,7 +6,7 @@
 declare float @llvm.fabs.f32(float) nounwind readnone
 
 ; CHECK-LABEL: {{^}}fmuladd_f32:
-; CHECK: v_mad_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
+; CHECK: v_mac_f32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @fmuladd_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
                          float addrspace(1)* %in2, float addrspace(1)* %in3) {
@@ -34,8 +34,8 @@
 ; CHECK-LABEL: {{^}}fmuladd_2.0_a_b_f32
 ; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
-; CHECK: buffer_store_dword [[RESULT]]
+; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
+; CHECK: buffer_store_dword [[R2]]
 define void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -53,8 +53,8 @@
 ; CHECK-LABEL: {{^}}fmuladd_a_2.0_b_f32
 ; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
-; CHECK: buffer_store_dword [[RESULT]]
+; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
+; CHECK: buffer_store_dword [[R2]]
 define void @fmuladd_a_2.0_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -72,8 +72,8 @@
 ; CHECK-LABEL: {{^}}fadd_a_a_b_f32:
 ; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
-; CHECK: buffer_store_dword [[RESULT]]
+; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
+; CHECK: buffer_store_dword [[R2]]
 define void @fadd_a_a_b_f32(float addrspace(1)* %out,
                             float addrspace(1)* %in1,
                             float addrspace(1)* %in2) {
@@ -94,8 +94,8 @@
 ; CHECK-LABEL: {{^}}fadd_b_a_a_f32:
 ; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
-; CHECK: buffer_store_dword [[RESULT]]
+; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
+; CHECK: buffer_store_dword [[R2]]
 define void @fadd_b_a_a_f32(float addrspace(1)* %out,
                             float addrspace(1)* %in1,
                             float addrspace(1)* %in2) {
@@ -116,8 +116,8 @@
 ; CHECK-LABEL: {{^}}fmuladd_neg_2.0_a_b_f32
 ; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], -2.0, [[R1]], [[R2]]
-; CHECK: buffer_store_dword [[RESULT]]
+; CHECK: v_mac_f32_e32 [[R2]], -2.0, [[R1]]
+; CHECK: buffer_store_dword [[R2]]
 define void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -136,8 +136,8 @@
 ; CHECK-LABEL: {{^}}fmuladd_neg_2.0_neg_a_b_f32
 ; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
-; CHECK: buffer_store_dword [[RESULT]]
+; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
+; CHECK: buffer_store_dword [[R2]]
 define void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -158,8 +158,8 @@
 ; CHECK-LABEL: {{^}}fmuladd_2.0_neg_a_b_f32
 ; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], -2.0, [[R1]], [[R2]]
-; CHECK: buffer_store_dword [[RESULT]]
+; CHECK: v_mac_f32_e32 [[R2]], -2.0, [[R1]]
+; CHECK: buffer_store_dword [[R2]]
 define void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll
index 4e4c2ec..a64dd0e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll
@@ -5,7 +5,7 @@
 
 ; FUNC-LABEL: {{^}}test_lrp:
 ; SI: v_sub_f32
-; SI: v_mad_f32
+; SI: v_mac_f32_e32
 define void @test_lrp(float addrspace(1)* %out, float %src0, float %src1, float %src2) nounwind {
   %mad = call float @llvm.AMDGPU.lrp(float %src0, float %src1, float %src2) nounwind readnone
   store float %mad, float addrspace(1)* %out, align 4
diff --git a/llvm/test/CodeGen/AMDGPU/mad-combine.ll b/llvm/test/CodeGen/AMDGPU/mad-combine.ll
index bc07162..c98f851 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-combine.ll
@@ -19,7 +19,7 @@
 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
 ; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
 
-; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
+; SI-STD: v_mac_f32_e32 [[C]], [[B]], [[A]]
 
 ; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
 
@@ -29,7 +29,8 @@
 ; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
 ; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]]
 
-; SI: buffer_store_dword [[RESULT]]
+; SI-DENORM: buffer_store_dword [[RESULT]]
+; SI-STD: buffer_store_dword [[C]]
 define void @combine_to_mad_f32_0(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
   %tid = tail call i32 @llvm.r600.read.tidig.x() #0
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
@@ -54,8 +55,8 @@
 ; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
 ; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
 
-; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], [[C]]
-; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], [[D]]
+; SI-STD-DAG: v_mac_f32_e32 [[C]], [[B]], [[A]]
+; SI-STD-DAG: v_mac_f32_e32 [[D]], [[B]], [[A]]
 
 ; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], [[C]]
 ; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], [[D]]
@@ -64,8 +65,10 @@
 ; SI-DENORM-SLOWFMAF-DAG: v_add_f32_e32 [[RESULT0:v[0-9]+]], [[C]], [[TMP]]
 ; SI-DENORM-SLOWFMAF-DAG: v_add_f32_e32 [[RESULT1:v[0-9]+]], [[D]], [[TMP]]
 
-; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
+; SI-DENORM-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DENORM-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
+; SI-STD-DAG: buffer_store_dword [[C]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-STD-DAG: buffer_store_dword [[D]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
 ; SI: s_endpgm
 define void @combine_to_mad_f32_0_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
   %tid = tail call i32 @llvm.r600.read.tidig.x() #0
@@ -96,13 +99,14 @@
 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
 ; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
 
-; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
+; SI-STD: v_mac_f32_e32 [[C]], [[B]], [[A]]
 ; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
 
 ; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
 ; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]]
 
-; SI: buffer_store_dword [[RESULT]]
+; SI-DENORM: buffer_store_dword [[RESULT]]
+; SI-STD: buffer_store_dword [[C]]
 define void @combine_to_mad_f32_1(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
   %tid = tail call i32 @llvm.r600.read.tidig.x() #0
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
@@ -482,7 +486,7 @@
 ; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
 
 ; SI-STD: v_mad_f32 [[TMP:v[0-9]+]], [[D]], [[E]], -[[C]]
-; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[TMP]]
+; SI-STD: v_mac_f32_e32 [[TMP]], [[B]], [[A]]
 
 ; SI-DENORM: v_fma_f32 [[TMP:v[0-9]+]], [[D]], [[E]], -[[C]]
 ; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[TMP]]
@@ -492,7 +496,8 @@
 ; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[TMP2:v[0-9]+]], [[TMP0]], [[TMP1]]
 ; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP2]]
 
-; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DENORM: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-STD: buffer_store_dword [[TMP]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; SI: s_endpgm
 define void @aggressive_combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
   %tid = tail call i32 @llvm.r600.read.tidig.x() #0
diff --git a/llvm/test/CodeGen/AMDGPU/mad-sub.ll b/llvm/test/CodeGen/AMDGPU/mad-sub.ll
index aa4194f..24ff23a 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-sub.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-sub.ll
@@ -123,7 +123,7 @@
 }
 
 ; FUNC-LABEL: {{^}}neg_neg_mad_f32:
-; SI: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_mac_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
 define void @neg_neg_mad_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
   %tid = tail call i32 @llvm.r600.read.tidig.x() #0
   %tid.ext = sext i32 %tid to i64
@@ -172,8 +172,8 @@
 ; FUNC-LABEL: {{^}}fsub_c_fadd_a_a:
 ; SI-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; SI-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; SI: v_mad_f32 [[RESULT:v[0-9]+]], -2.0, [[R1]], [[R2]]
-; SI: buffer_store_dword [[RESULT]]
+; SI: v_mac_f32_e32 [[R2]], -2.0, [[R1]]
+; SI: buffer_store_dword [[R2]]
 define void @fsub_c_fadd_a_a(float addrspace(1)* %out, float addrspace(1)* %in) {
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
diff --git a/llvm/test/CodeGen/AMDGPU/madak.ll b/llvm/test/CodeGen/AMDGPU/madak.ll
index 933bb01..2e90cf1 100644
--- a/llvm/test/CodeGen/AMDGPU/madak.ll
+++ b/llvm/test/CodeGen/AMDGPU/madak.ll
@@ -9,7 +9,7 @@
 ; GCN-LABEL: {{^}}madak_f32:
 ; GCN: buffer_load_dword [[VA:v[0-9]+]]
 ; GCN: buffer_load_dword [[VB:v[0-9]+]]
-; GCN: v_madak_f32_e32 {{v[0-9]+}}, [[VB]], [[VA]], 0x41200000
+; GCN: v_madak_f32_e32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
 define void @madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
   %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
@@ -34,8 +34,8 @@
 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 ; GCN-DAG: buffer_load_dword [[VC:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
 ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
-; GCN-DAG: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], [[VK]]
-; GCN-DAG: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VC]], [[VK]]
+; GCN-DAG: v_mad_f32 {{v[0-9]+}}, [[VB]], [[VA]], [[VK]]
+; GCN-DAG: v_mac_f32_e32 [[VK]], [[VC]], [[VA]]
 ; GCN: s_endpgm
 define void @madak_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
   %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
@@ -105,7 +105,7 @@
 ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
 ; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]]
 ; GCN-NOT: v_madak_f32
-; GCN: v_mad_f32 {{v[0-9]+}}, [[SB]], [[VA]], [[VK]]
+; GCN: v_mac_f32_e32 [[VK]], [[SB]], [[VA]]
 define void @s_v_madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float %b) nounwind {
   %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
@@ -124,7 +124,7 @@
 ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
 ; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]]
 ; GCN-NOT: v_madak_f32
-; GCN: v_mad_f32 {{v[0-9]+}}, [[VA]], [[SB]], [[VK]]
+; GCN: v_mac_f32_e32 [[VK]], [[SB]], [[VA]]
 define void @v_s_madak_f32(float addrspace(1)* noalias %out, float %a, float addrspace(1)* noalias %in.b) nounwind {
   %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
@@ -140,7 +140,7 @@
 
 ; GCN-LABEL: {{^}}s_s_madak_f32:
 ; GCN-NOT: v_madak_f32
-; GCN: v_mad_f32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; GCN: v_mac_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
 define void @s_s_madak_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
   %mul = fmul float %a, %b
   %madak = fadd float %mul, 10.0
diff --git a/llvm/test/CodeGen/AMDGPU/madmk.ll b/llvm/test/CodeGen/AMDGPU/madmk.ll
index ba7bb22..f8e14e3 100644
--- a/llvm/test/CodeGen/AMDGPU/madmk.ll
+++ b/llvm/test/CodeGen/AMDGPU/madmk.ll
@@ -28,8 +28,8 @@
 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 ; GCN-DAG: buffer_load_dword [[VC:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
 ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
-; GCN-DAG: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VK]], [[VB]]
-; GCN-DAG: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VK]], [[VC]]
+; GCN-DAG: v_mac_f32_e32 [[VB]], [[VK]], [[VA]]
+; GCN-DAG: v_mac_f32_e32 [[VC]], [[VK]], [[VA]]
 ; GCN: s_endpgm
 define void @madmk_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
   %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
@@ -59,7 +59,7 @@
 ; GCN-LABEL: {{^}}madmk_inline_imm_f32:
 ; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; GCN: v_mad_f32 {{v[0-9]+}}, 4.0, [[VA]], [[VB]]
+; GCN: v_mac_f32_e32 [[VB]], 4.0, [[VA]]
 define void @madmk_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
   %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
@@ -77,7 +77,7 @@
 
 ; GCN-LABEL: {{^}}s_s_madmk_f32:
 ; GCN-NOT: v_madmk_f32
-; GCN: v_mad_f32
+; GCN: v_mac_f32_e32
 ; GCN: s_endpgm
 define void @s_s_madmk_f32(float addrspace(1)* noalias %out, float %a, float %b) nounwind {
   %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
@@ -107,7 +107,7 @@
 
 ; GCN-LABEL: {{^}}scalar_vector_madmk_f32:
 ; GCN-NOT: v_madmk_f32
-; GCN: v_mad_f32
+; GCN: v_mac_f32_e32
 ; GCN: s_endpgm
 define void @scalar_vector_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %a) nounwind {
   %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/v_mac.ll b/llvm/test/CodeGen/AMDGPU/v_mac.ll
new file mode 100644
index 0000000..a4eaec3
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/v_mac.ll
@@ -0,0 +1,155 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}mac_vvv:
+; GCN: buffer_load_dword [[A:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0{{$}}
+; GCN: buffer_load_dword [[B:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:4
+; GCN: buffer_load_dword [[C:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:8
+; GCN: v_mac_f32_e32 [[C]], [[B]], [[A]]
+; GCN: buffer_store_dword [[C]]
+define void @mac_vvv(float addrspace(1)* %out, float addrspace(1)* %in) {
+entry:
+  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
+  %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2
+
+  %a = load float, float addrspace(1)* %in
+  %b = load float, float addrspace(1)* %b_ptr
+  %c = load float, float addrspace(1)* %c_ptr
+
+  %tmp0 = fmul float %a, %b
+  %tmp1 = fadd float %tmp0, %c
+  store float %tmp1, float addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}mad_inline_sgpr_inline:
+; GCN-NOT: v_mac_f32
+; GCN: v_mad_f32 v{{[0-9]}}, 0.5, s{{[0-9]+}}, 0.5
+define void @mad_inline_sgpr_inline(float addrspace(1)* %out, float %in) {
+entry:
+  %tmp0 = fmul float 0.5, %in
+  %tmp1 = fadd float %tmp0, 0.5
+  store float %tmp1, float addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}mad_vvs:
+; GCN-NOT: v_mac_f32
+; GCN: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
+define void @mad_vvs(float addrspace(1)* %out, float addrspace(1)* %in, float %c) {
+entry:
+  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
+
+  %a = load float, float addrspace(1)* %in
+  %b = load float, float addrspace(1)* %b_ptr
+
+  %tmp0 = fmul float %a, %b
+  %tmp1 = fadd float %tmp0, %c
+  store float %tmp1, float addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}mac_ssv:
+; GCN: v_mac_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
+define void @mac_ssv(float addrspace(1)* %out, float addrspace(1)* %in, float %a) {
+entry:
+  %c = load float, float addrspace(1)* %in
+
+  %tmp0 = fmul float %a, %a
+  %tmp1 = fadd float %tmp0, %c
+  store float %tmp1, float addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}mac_mad_same_add:
+; GCN: v_mad_f32 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD:v[0-9]+]]
+; GCN: v_mac_f32_e32 [[ADD]], v{{[0-9]+}}, v{{[0-9]+}}
+define void @mac_mad_same_add(float addrspace(1)* %out, float addrspace(1)* %in) {
+entry:
+  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
+  %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2
+  %d_ptr = getelementptr float, float addrspace(1)* %in, i32 3
+  %e_ptr = getelementptr float, float addrspace(1)* %in, i32 4
+
+  %a = load float, float addrspace(1)* %in
+  %b = load float, float addrspace(1)* %b_ptr
+  %c = load float, float addrspace(1)* %c_ptr
+  %d = load float, float addrspace(1)* %d_ptr
+  %e = load float, float addrspace(1)* %e_ptr
+
+  %tmp0 = fmul float %a, %b
+  %tmp1 = fadd float %tmp0, %c
+
+  %tmp2 = fmul float %d, %e
+  %tmp3 = fadd float %tmp2, %c
+
+  %out1 = getelementptr float, float addrspace(1)* %out, i32 1
+  store float %tmp1, float addrspace(1)* %out
+  store float %tmp3, float addrspace(1)* %out1
+  ret void
+}
+
+; There is no advantage to using v_mac when one of the operands is negated
+; and v_mad accepts more operand types.
+
+; GCN-LABEL: {{^}}mad_neg_src0:
+; GCN-NOT: v_mac_f32
+; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
+define void @mad_neg_src0(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+entry:
+  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
+  %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2
+
+  %a = load float, float addrspace(1)* %in
+  %b = load float, float addrspace(1)* %b_ptr
+  %c = load float, float addrspace(1)* %c_ptr
+
+  %neg_a = fsub float 0.0, %a
+  %tmp0 = fmul float %neg_a, %b
+  %tmp1 = fadd float %tmp0, %c
+
+  store float %tmp1, float addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}mad_neg_src1:
+; GCN-NOT: v_mac_f32
+; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
+define void @mad_neg_src1(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+entry:
+  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
+  %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2
+
+  %a = load float, float addrspace(1)* %in
+  %b = load float, float addrspace(1)* %b_ptr
+  %c = load float, float addrspace(1)* %c_ptr
+
+  %neg_b = fsub float 0.0, %b
+  %tmp0 = fmul float %a, %neg_b
+  %tmp1 = fadd float %tmp0, %c
+
+  store float %tmp1, float addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}mad_neg_src2:
+; GCN-NOT: v_mac
+; GCN: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}}
+define void @mad_neg_src2(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+entry:
+  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
+  %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2
+
+  %a = load float, float addrspace(1)* %in
+  %b = load float, float addrspace(1)* %b_ptr
+  %c = load float, float addrspace(1)* %c_ptr
+
+  %neg_c = fsub float 0.0, %c
+  %tmp0 = fmul float %a, %b
+  %tmp1 = fadd float %tmp0, %neg_c
+
+  store float %tmp1, float addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { "true" "unsafe-fp-math"="true" }