[RISCV] Add support for the various RISC-V FMA instruction variants

Adds support for the various RISC-V FMA instructions (fmadd, fmsub, fnmsub, fnmadd).

The criteria for choosing whether a fused add or subtract is used, as well as
whether the product is negated or not, is whether some of the arguments to the
llvm.fma.* intrinsic are negated or not. In the tests, extraneous fadd
instructions were added to avoid the negation being performed using a xor
trick, which prevented the proper FMA forms from being selected and thus
tested.

The FMA instruction patterns might seem incorrect (e.g., fnmadd: -rs1 * rs2 -
rs3), but they should be correct. The misleading names were inherited from
MIPS, where the negation happens after computing the sum.

The llvm.fmuladd.* intrinsics still do not generate RISC-V FMA instructions,
as that depends on TargetLowering::isFMAFasterthanFMulAndFAdd.

Some comments in the test files about what type of instructions are there
tested were updated, to better reflect the current content of those test
files.

Differential Revision: https://reviews.llvm.org/D54205
Patch by Luís Marques.

llvm-svn: 349023
diff --git a/llvm/test/CodeGen/RISCV/alu32.ll b/llvm/test/CodeGen/RISCV/alu32.ll
index 6ee6ed7..3776e53 100644
--- a/llvm/test/CodeGen/RISCV/alu32.ll
+++ b/llvm/test/CodeGen/RISCV/alu32.ll
@@ -4,9 +4,9 @@
 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s -check-prefix=RV64I
 
-; These tests are each targeted at a particular RISC-V ALU instruction. Other
-; files in this folder exercise LLVM IR instructions that don't directly match a
-; RISC-V instruction
+; These tests are each targeted at a particular RISC-V ALU instruction. Most
+; other files in this folder exercise LLVM IR instructions that don't directly
+; match a RISC-V instruction.
 
 ; Register-immediate instructions.
 
diff --git a/llvm/test/CodeGen/RISCV/double-arith.ll b/llvm/test/CodeGen/RISCV/double-arith.ll
index a9bdf68..cd3a1d9 100644
--- a/llvm/test/CodeGen/RISCV/double-arith.ll
+++ b/llvm/test/CodeGen/RISCV/double-arith.ll
@@ -2,6 +2,10 @@
 ; RUN: llc -mtriple=riscv32 -mattr=+d -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefix=RV32IFD %s
 
+; These tests are each targeted at a particular RISC-V FPU instruction. Most
+; other files in this folder exercise LLVM IR instructions that don't directly
+; match a RISC-V instruction.
+
 define double @fadd_d(double %a, double %b) nounwind {
 ; RV32IFD-LABEL: fadd_d:
 ; RV32IFD:       # %bb.0:
@@ -277,3 +281,118 @@
   %2 = zext i1 %1 to i32
   ret i32 %2
 }
+
+declare double @llvm.fma.f64(double, double, double)
+
+define double @fmadd_d(double %a, double %b, double %c) nounwind {
+; RV32IFD-LABEL: fmadd_d:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw a4, 8(sp)
+; RV32IFD-NEXT:    sw a5, 12(sp)
+; RV32IFD-NEXT:    fld ft0, 8(sp)
+; RV32IFD-NEXT:    sw a2, 8(sp)
+; RV32IFD-NEXT:    sw a3, 12(sp)
+; RV32IFD-NEXT:    fld ft1, 8(sp)
+; RV32IFD-NEXT:    sw a0, 8(sp)
+; RV32IFD-NEXT:    sw a1, 12(sp)
+; RV32IFD-NEXT:    fld ft2, 8(sp)
+; RV32IFD-NEXT:    fmadd.d ft0, ft2, ft1, ft0
+; RV32IFD-NEXT:    fsd ft0, 8(sp)
+; RV32IFD-NEXT:    lw a0, 8(sp)
+; RV32IFD-NEXT:    lw a1, 12(sp)
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+  %1 = call double @llvm.fma.f64(double %a, double %b, double %c)
+  ret double %1
+}
+
+define double @fmsub_d(double %a, double %b, double %c) nounwind {
+; RV32IFD-LABEL: fmsub_d:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw a2, 8(sp)
+; RV32IFD-NEXT:    sw a3, 12(sp)
+; RV32IFD-NEXT:    fld ft0, 8(sp)
+; RV32IFD-NEXT:    sw a0, 8(sp)
+; RV32IFD-NEXT:    sw a1, 12(sp)
+; RV32IFD-NEXT:    fld ft1, 8(sp)
+; RV32IFD-NEXT:    sw a4, 8(sp)
+; RV32IFD-NEXT:    sw a5, 12(sp)
+; RV32IFD-NEXT:    fld ft2, 8(sp)
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI15_0)
+; RV32IFD-NEXT:    addi a0, a0, %lo(.LCPI15_0)
+; RV32IFD-NEXT:    fld ft3, 0(a0)
+; RV32IFD-NEXT:    fadd.d ft2, ft2, ft3
+; RV32IFD-NEXT:    fmsub.d ft0, ft1, ft0, ft2
+; RV32IFD-NEXT:    fsd ft0, 8(sp)
+; RV32IFD-NEXT:    lw a0, 8(sp)
+; RV32IFD-NEXT:    lw a1, 12(sp)
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+  %c_ = fadd double 0.0, %c ; avoid negation using xor
+  %negc = fsub double -0.0, %c_
+  %1 = call double @llvm.fma.f64(double %a, double %b, double %negc)
+  ret double %1
+}
+
+define double @fnmadd_d(double %a, double %b, double %c) nounwind {
+; RV32IFD-LABEL: fnmadd_d:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw a2, 8(sp)
+; RV32IFD-NEXT:    sw a3, 12(sp)
+; RV32IFD-NEXT:    fld ft0, 8(sp)
+; RV32IFD-NEXT:    sw a0, 8(sp)
+; RV32IFD-NEXT:    sw a1, 12(sp)
+; RV32IFD-NEXT:    fld ft1, 8(sp)
+; RV32IFD-NEXT:    sw a4, 8(sp)
+; RV32IFD-NEXT:    sw a5, 12(sp)
+; RV32IFD-NEXT:    fld ft2, 8(sp)
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI16_0)
+; RV32IFD-NEXT:    addi a0, a0, %lo(.LCPI16_0)
+; RV32IFD-NEXT:    fld ft3, 0(a0)
+; RV32IFD-NEXT:    fadd.d ft2, ft2, ft3
+; RV32IFD-NEXT:    fadd.d ft1, ft1, ft3
+; RV32IFD-NEXT:    fnmadd.d ft0, ft1, ft0, ft2
+; RV32IFD-NEXT:    fsd ft0, 8(sp)
+; RV32IFD-NEXT:    lw a0, 8(sp)
+; RV32IFD-NEXT:    lw a1, 12(sp)
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+  %a_ = fadd double 0.0, %a
+  %c_ = fadd double 0.0, %c
+  %nega = fsub double -0.0, %a_
+  %negc = fsub double -0.0, %c_
+  %1 = call double @llvm.fma.f64(double %nega, double %b, double %negc)
+  ret double %1
+}
+
+define double @fnmsub_d(double %a, double %b, double %c) nounwind {
+; RV32IFD-LABEL: fnmsub_d:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw a4, 8(sp)
+; RV32IFD-NEXT:    sw a5, 12(sp)
+; RV32IFD-NEXT:    fld ft0, 8(sp)
+; RV32IFD-NEXT:    sw a2, 8(sp)
+; RV32IFD-NEXT:    sw a3, 12(sp)
+; RV32IFD-NEXT:    fld ft1, 8(sp)
+; RV32IFD-NEXT:    sw a0, 8(sp)
+; RV32IFD-NEXT:    sw a1, 12(sp)
+; RV32IFD-NEXT:    fld ft2, 8(sp)
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI17_0)
+; RV32IFD-NEXT:    addi a0, a0, %lo(.LCPI17_0)
+; RV32IFD-NEXT:    fld ft3, 0(a0)
+; RV32IFD-NEXT:    fadd.d ft2, ft2, ft3
+; RV32IFD-NEXT:    fnmsub.d ft0, ft2, ft1, ft0
+; RV32IFD-NEXT:    fsd ft0, 8(sp)
+; RV32IFD-NEXT:    lw a0, 8(sp)
+; RV32IFD-NEXT:    lw a1, 12(sp)
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+  %a_ = fadd double 0.0, %a
+  %nega = fsub double -0.0, %a_
+  %1 = call double @llvm.fma.f64(double %nega, double %b, double %c)
+  ret double %1
+}
diff --git a/llvm/test/CodeGen/RISCV/double-intrinsics.ll b/llvm/test/CodeGen/RISCV/double-intrinsics.ll
index 4a5239f..cd14c19 100644
--- a/llvm/test/CodeGen/RISCV/double-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/double-intrinsics.ll
@@ -4,7 +4,7 @@
 
 declare double @llvm.sqrt.f64(double)
 
-define double @sqrt_f64(double %a) {
+define double @sqrt_f64(double %a) nounwind {
 ; RV32IFD-LABEL: sqrt_f64:
 ; RV32IFD:       # %bb.0:
 ; RV32IFD-NEXT:    addi sp, sp, -16
@@ -18,12 +18,12 @@
 ; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
   %1 = call double @llvm.sqrt.f64(double %a)
-	ret double %1
+  ret double %1
 }
 
 declare double @llvm.powi.f64(double, i32)
 
-define double @powi_f64(double %a, i32 %b) {
+define double @powi_f64(double %a, i32 %b) nounwind {
 ; RV32IFD-LABEL: powi_f64:
 ; RV32IFD:       # %bb.0:
 ; RV32IFD-NEXT:    addi sp, sp, -16
@@ -33,12 +33,12 @@
 ; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
   %1 = call double @llvm.powi.f64(double %a, i32 %b)
-	ret double %1
+  ret double %1
 }
 
 declare double @llvm.sin.f64(double)
 
-define double @sin_f64(double %a) {
+define double @sin_f64(double %a) nounwind {
 ; RV32IFD-LABEL: sin_f64:
 ; RV32IFD:       # %bb.0:
 ; RV32IFD-NEXT:    addi sp, sp, -16
@@ -48,12 +48,12 @@
 ; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
   %1 = call double @llvm.sin.f64(double %a)
-	ret double %1
+  ret double %1
 }
 
 declare double @llvm.cos.f64(double)
 
-define double @cos_f64(double %a) {
+define double @cos_f64(double %a) nounwind {
 ; RV32IFD-LABEL: cos_f64:
 ; RV32IFD:       # %bb.0:
 ; RV32IFD-NEXT:    addi sp, sp, -16
@@ -63,11 +63,11 @@
 ; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
   %1 = call double @llvm.cos.f64(double %a)
-	ret double %1
+  ret double %1
 }
 
 ; The sin+cos combination results in an FSINCOS SelectionDAG node.
-define double @sincos_f64(double %a) {
+define double @sincos_f64(double %a) nounwind {
 ; RV32IFD-LABEL: sincos_f64:
 ; RV32IFD:       # %bb.0:
 ; RV32IFD-NEXT:    addi sp, sp, -32
@@ -104,12 +104,12 @@
   %1 = call double @llvm.sin.f64(double %a)
   %2 = call double @llvm.cos.f64(double %a)
   %3 = fadd double %1, %2
-	ret double %3
+  ret double %3
 }
 
 declare double @llvm.pow.f64(double, double)
 
-define double @pow_f64(double %a, double %b) {
+define double @pow_f64(double %a, double %b) nounwind {
 ; RV32IFD-LABEL: pow_f64:
 ; RV32IFD:       # %bb.0:
 ; RV32IFD-NEXT:    addi sp, sp, -16
@@ -119,12 +119,12 @@
 ; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
   %1 = call double @llvm.pow.f64(double %a, double %b)
-	ret double %1
+  ret double %1
 }
 
 declare double @llvm.exp.f64(double)
 
-define double @exp_f64(double %a) {
+define double @exp_f64(double %a) nounwind {
 ; RV32IFD-LABEL: exp_f64:
 ; RV32IFD:       # %bb.0:
 ; RV32IFD-NEXT:    addi sp, sp, -16
@@ -134,12 +134,12 @@
 ; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
   %1 = call double @llvm.exp.f64(double %a)
-	ret double %1
+  ret double %1
 }
 
 declare double @llvm.exp2.f64(double)
 
-define double @exp2_f64(double %a) {
+define double @exp2_f64(double %a) nounwind {
 ; RV32IFD-LABEL: exp2_f64:
 ; RV32IFD:       # %bb.0:
 ; RV32IFD-NEXT:    addi sp, sp, -16
@@ -149,12 +149,12 @@
 ; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
   %1 = call double @llvm.exp2.f64(double %a)
-	ret double %1
+  ret double %1
 }
 
 declare double @llvm.log.f64(double)
 
-define double @log_f64(double %a) {
+define double @log_f64(double %a) nounwind {
 ; RV32IFD-LABEL: log_f64:
 ; RV32IFD:       # %bb.0:
 ; RV32IFD-NEXT:    addi sp, sp, -16
@@ -164,12 +164,12 @@
 ; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
   %1 = call double @llvm.log.f64(double %a)
-	ret double %1
+  ret double %1
 }
 
 declare double @llvm.log10.f64(double)
 
-define double @log10_f64(double %a) {
+define double @log10_f64(double %a) nounwind {
 ; RV32IFD-LABEL: log10_f64:
 ; RV32IFD:       # %bb.0:
 ; RV32IFD-NEXT:    addi sp, sp, -16
@@ -179,12 +179,12 @@
 ; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
   %1 = call double @llvm.log10.f64(double %a)
-	ret double %1
+  ret double %1
 }
 
 declare double @llvm.log2.f64(double)
 
-define double @log2_f64(double %a) {
+define double @log2_f64(double %a) nounwind {
 ; RV32IFD-LABEL: log2_f64:
 ; RV32IFD:       # %bb.0:
 ; RV32IFD-NEXT:    addi sp, sp, -16
@@ -194,28 +194,64 @@
 ; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
   %1 = call double @llvm.log2.f64(double %a)
-	ret double %1
+  ret double %1
 }
 
 declare double @llvm.fma.f64(double, double, double)
 
-; TODO: Select RISC-V FMA instruction.
-define double @fma_f64(double %a, double %b, double %c) {
+define double @fma_f64(double %a, double %b, double %c) nounwind {
 ; RV32IFD-LABEL: fma_f64:
 ; RV32IFD:       # %bb.0:
 ; RV32IFD-NEXT:    addi sp, sp, -16
-; RV32IFD-NEXT:    sw ra, 12(sp)
-; RV32IFD-NEXT:    call fma
-; RV32IFD-NEXT:    lw ra, 12(sp)
+; RV32IFD-NEXT:    sw a4, 8(sp)
+; RV32IFD-NEXT:    sw a5, 12(sp)
+; RV32IFD-NEXT:    fld ft0, 8(sp)
+; RV32IFD-NEXT:    sw a2, 8(sp)
+; RV32IFD-NEXT:    sw a3, 12(sp)
+; RV32IFD-NEXT:    fld ft1, 8(sp)
+; RV32IFD-NEXT:    sw a0, 8(sp)
+; RV32IFD-NEXT:    sw a1, 12(sp)
+; RV32IFD-NEXT:    fld ft2, 8(sp)
+; RV32IFD-NEXT:    fmadd.d ft0, ft2, ft1, ft0
+; RV32IFD-NEXT:    fsd ft0, 8(sp)
+; RV32IFD-NEXT:    lw a0, 8(sp)
+; RV32IFD-NEXT:    lw a1, 12(sp)
 ; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
   %1 = call double @llvm.fma.f64(double %a, double %b, double %c)
-	ret double %1
+  ret double %1
+}
+
+declare double @llvm.fmuladd.f64(double, double, double)
+
+define double @fmuladd_f64(double %a, double %b, double %c) nounwind {
+; Use of fmadd depends on TargetLowering::isFMAFasterthanFMulAndFAdd
+; RV32IFD-LABEL: fmuladd_f64:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw a2, 8(sp)
+; RV32IFD-NEXT:    sw a3, 12(sp)
+; RV32IFD-NEXT:    fld ft0, 8(sp)
+; RV32IFD-NEXT:    sw a0, 8(sp)
+; RV32IFD-NEXT:    sw a1, 12(sp)
+; RV32IFD-NEXT:    fld ft1, 8(sp)
+; RV32IFD-NEXT:    fmul.d ft0, ft1, ft0
+; RV32IFD-NEXT:    sw a4, 8(sp)
+; RV32IFD-NEXT:    sw a5, 12(sp)
+; RV32IFD-NEXT:    fld ft1, 8(sp)
+; RV32IFD-NEXT:    fadd.d ft0, ft0, ft1
+; RV32IFD-NEXT:    fsd ft0, 8(sp)
+; RV32IFD-NEXT:    lw a0, 8(sp)
+; RV32IFD-NEXT:    lw a1, 12(sp)
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+  %1 = call double @llvm.fmuladd.f64(double %a, double %b, double %c)
+  ret double %1
 }
 
 declare double @llvm.fabs.f64(double)
 
-define double @fabs_f64(double %a) {
+define double @fabs_f64(double %a) nounwind {
 ; RV32IFD-LABEL: fabs_f64:
 ; RV32IFD:       # %bb.0:
 ; RV32IFD-NEXT:    addi sp, sp, -16
@@ -229,7 +265,7 @@
 ; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
   %1 = call double @llvm.fabs.f64(double %a)
-	ret double %1
+  ret double %1
 }
 
 declare double @llvm.minnum.f64(double, double)
@@ -317,7 +353,7 @@
 
 declare double @llvm.floor.f64(double)
 
-define double @floor_f64(double %a) {
+define double @floor_f64(double %a) nounwind {
 ; RV32IFD-LABEL: floor_f64:
 ; RV32IFD:       # %bb.0:
 ; RV32IFD-NEXT:    addi sp, sp, -16
@@ -327,12 +363,12 @@
 ; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
   %1 = call double @llvm.floor.f64(double %a)
-	ret double %1
+  ret double %1
 }
 
 declare double @llvm.ceil.f64(double)
 
-define double @ceil_f64(double %a) {
+define double @ceil_f64(double %a) nounwind {
 ; RV32IFD-LABEL: ceil_f64:
 ; RV32IFD:       # %bb.0:
 ; RV32IFD-NEXT:    addi sp, sp, -16
@@ -342,12 +378,12 @@
 ; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
   %1 = call double @llvm.ceil.f64(double %a)
-	ret double %1
+  ret double %1
 }
 
 declare double @llvm.trunc.f64(double)
 
-define double @trunc_f64(double %a) {
+define double @trunc_f64(double %a) nounwind {
 ; RV32IFD-LABEL: trunc_f64:
 ; RV32IFD:       # %bb.0:
 ; RV32IFD-NEXT:    addi sp, sp, -16
@@ -357,12 +393,12 @@
 ; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
   %1 = call double @llvm.trunc.f64(double %a)
-	ret double %1
+  ret double %1
 }
 
 declare double @llvm.rint.f64(double)
 
-define double @rint_f64(double %a) {
+define double @rint_f64(double %a) nounwind {
 ; RV32IFD-LABEL: rint_f64:
 ; RV32IFD:       # %bb.0:
 ; RV32IFD-NEXT:    addi sp, sp, -16
@@ -372,12 +408,12 @@
 ; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
   %1 = call double @llvm.rint.f64(double %a)
-	ret double %1
+  ret double %1
 }
 
 declare double @llvm.nearbyint.f64(double)
 
-define double @nearbyint_f64(double %a) {
+define double @nearbyint_f64(double %a) nounwind {
 ; RV32IFD-LABEL: nearbyint_f64:
 ; RV32IFD:       # %bb.0:
 ; RV32IFD-NEXT:    addi sp, sp, -16
@@ -387,12 +423,12 @@
 ; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
   %1 = call double @llvm.nearbyint.f64(double %a)
-	ret double %1
+  ret double %1
 }
 
 declare double @llvm.round.f64(double)
 
-define double @round_f64(double %a) {
+define double @round_f64(double %a) nounwind {
 ; RV32IFD-LABEL: round_f64:
 ; RV32IFD:       # %bb.0:
 ; RV32IFD-NEXT:    addi sp, sp, -16
@@ -402,5 +438,5 @@
 ; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
   %1 = call double @llvm.round.f64(double %a)
-	ret double %1
+  ret double %1
 }
diff --git a/llvm/test/CodeGen/RISCV/float-arith.ll b/llvm/test/CodeGen/RISCV/float-arith.ll
index f3ec61b..ab87447 100644
--- a/llvm/test/CodeGen/RISCV/float-arith.ll
+++ b/llvm/test/CodeGen/RISCV/float-arith.ll
@@ -2,6 +2,10 @@
 ; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefix=RV32IF %s
 
+; These tests are each targeted at a particular RISC-V FPU instruction. Most
+; other files in this folder exercise LLVM IR instructions that don't directly
+; match a RISC-V instruction.
+
 define float @fadd_s(float %a, float %b) nounwind {
 ; RV32IF-LABEL: fadd_s:
 ; RV32IF:       # %bb.0:
@@ -186,3 +190,78 @@
   %2 = zext i1 %1 to i32
   ret i32 %2
 }
+
+declare float @llvm.fma.f32(float, float, float)
+
+define float @fmadd_s(float %a, float %b, float %c) nounwind {
+; RV32IF-LABEL: fmadd_s:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    fmv.w.x ft0, a2
+; RV32IF-NEXT:    fmv.w.x ft1, a1
+; RV32IF-NEXT:    fmv.w.x ft2, a0
+; RV32IF-NEXT:    fmadd.s ft0, ft2, ft1, ft0
+; RV32IF-NEXT:    fmv.x.w a0, ft0
+; RV32IF-NEXT:    ret
+  %1 = call float @llvm.fma.f32(float %a, float %b, float %c)
+  ret float %1
+}
+
+define float @fmsub_s(float %a, float %b, float %c) nounwind {
+; RV32IF-LABEL: fmsub_s:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    fmv.w.x ft0, a2
+; RV32IF-NEXT:    lui a2, %hi(.LCPI15_0)
+; RV32IF-NEXT:    addi a2, a2, %lo(.LCPI15_0)
+; RV32IF-NEXT:    flw ft1, 0(a2)
+; RV32IF-NEXT:    fadd.s ft0, ft0, ft1
+; RV32IF-NEXT:    fmv.w.x ft1, a1
+; RV32IF-NEXT:    fmv.w.x ft2, a0
+; RV32IF-NEXT:    fmsub.s ft0, ft2, ft1, ft0
+; RV32IF-NEXT:    fmv.x.w a0, ft0
+; RV32IF-NEXT:    ret
+  %c_ = fadd float 0.0, %c ; avoid negation using xor
+  %negc = fsub float -0.0, %c_
+  %1 = call float @llvm.fma.f32(float %a, float %b, float %negc)
+  ret float %1
+}
+
+define float @fnmadd_s(float %a, float %b, float %c) nounwind {
+; RV32IF-LABEL: fnmadd_s:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    fmv.w.x ft0, a2
+; RV32IF-NEXT:    lui a2, %hi(.LCPI16_0)
+; RV32IF-NEXT:    addi a2, a2, %lo(.LCPI16_0)
+; RV32IF-NEXT:    flw ft1, 0(a2)
+; RV32IF-NEXT:    fadd.s ft0, ft0, ft1
+; RV32IF-NEXT:    fmv.w.x ft2, a0
+; RV32IF-NEXT:    fadd.s ft1, ft2, ft1
+; RV32IF-NEXT:    fmv.w.x ft2, a1
+; RV32IF-NEXT:    fnmadd.s ft0, ft1, ft2, ft0
+; RV32IF-NEXT:    fmv.x.w a0, ft0
+; RV32IF-NEXT:    ret
+  %a_ = fadd float 0.0, %a
+  %c_ = fadd float 0.0, %c
+  %nega = fsub float -0.0, %a_
+  %negc = fsub float -0.0, %c_
+  %1 = call float @llvm.fma.f32(float %nega, float %b, float %negc)
+  ret float %1
+}
+
+define float @fnmsub_s(float %a, float %b, float %c) nounwind {
+; RV32IF-LABEL: fnmsub_s:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    fmv.w.x ft0, a0
+; RV32IF-NEXT:    lui a0, %hi(.LCPI17_0)
+; RV32IF-NEXT:    addi a0, a0, %lo(.LCPI17_0)
+; RV32IF-NEXT:    flw ft1, 0(a0)
+; RV32IF-NEXT:    fadd.s ft0, ft0, ft1
+; RV32IF-NEXT:    fmv.w.x ft1, a2
+; RV32IF-NEXT:    fmv.w.x ft2, a1
+; RV32IF-NEXT:    fnmsub.s ft0, ft0, ft2, ft1
+; RV32IF-NEXT:    fmv.x.w a0, ft0
+; RV32IF-NEXT:    ret
+  %a_ = fadd float 0.0, %a
+  %nega = fsub float -0.0, %a_
+  %1 = call float @llvm.fma.f32(float %nega, float %b, float %c)
+  ret float %1
+}
diff --git a/llvm/test/CodeGen/RISCV/float-intrinsics.ll b/llvm/test/CodeGen/RISCV/float-intrinsics.ll
index 1da644f..57f3a28 100644
--- a/llvm/test/CodeGen/RISCV/float-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/float-intrinsics.ll
@@ -6,7 +6,7 @@
 
 declare float @llvm.sqrt.f32(float)
 
-define float @sqrt_f32(float %a) {
+define float @sqrt_f32(float %a) nounwind {
 ; RV32IF-LABEL: sqrt_f32:
 ; RV32IF:       # %bb.0:
 ; RV32IF-NEXT:    fmv.w.x ft0, a0
@@ -14,12 +14,12 @@
 ; RV32IF-NEXT:    fmv.x.w a0, ft0
 ; RV32IF-NEXT:    ret
   %1 = call float @llvm.sqrt.f32(float %a)
-	ret float %1
+  ret float %1
 }
 
 declare float @llvm.powi.f32(float, i32)
 
-define float @powi_f32(float %a, i32 %b) {
+define float @powi_f32(float %a, i32 %b) nounwind {
 ; RV32IF-LABEL: powi_f32:
 ; RV32IF:       # %bb.0:
 ; RV32IF-NEXT:    addi sp, sp, -16
@@ -29,12 +29,12 @@
 ; RV32IF-NEXT:    addi sp, sp, 16
 ; RV32IF-NEXT:    ret
   %1 = call float @llvm.powi.f32(float %a, i32 %b)
-	ret float %1
+  ret float %1
 }
 
 declare float @llvm.sin.f32(float)
 
-define float @sin_f32(float %a) {
+define float @sin_f32(float %a) nounwind {
 ; RV32IF-LABEL: sin_f32:
 ; RV32IF:       # %bb.0:
 ; RV32IF-NEXT:    addi sp, sp, -16
@@ -44,12 +44,12 @@
 ; RV32IF-NEXT:    addi sp, sp, 16
 ; RV32IF-NEXT:    ret
   %1 = call float @llvm.sin.f32(float %a)
-	ret float %1
+  ret float %1
 }
 
 declare float @llvm.cos.f32(float)
 
-define float @cos_f32(float %a) {
+define float @cos_f32(float %a) nounwind {
 ; RV32IF-LABEL: cos_f32:
 ; RV32IF:       # %bb.0:
 ; RV32IF-NEXT:    addi sp, sp, -16
@@ -59,11 +59,11 @@
 ; RV32IF-NEXT:    addi sp, sp, 16
 ; RV32IF-NEXT:    ret
   %1 = call float @llvm.cos.f32(float %a)
-	ret float %1
+  ret float %1
 }
 
 ; The sin+cos combination results in an FSINCOS SelectionDAG node.
-define float @sincos_f32(float %a) {
+define float @sincos_f32(float %a) nounwind {
 ; RV32IF-LABEL: sincos_f32:
 ; RV32IF:       # %bb.0:
 ; RV32IF-NEXT:    addi sp, sp, -16
@@ -87,12 +87,12 @@
   %1 = call float @llvm.sin.f32(float %a)
   %2 = call float @llvm.cos.f32(float %a)
   %3 = fadd float %1, %2
-	ret float %3
+  ret float %3
 }
 
 declare float @llvm.pow.f32(float, float)
 
-define float @pow_f32(float %a, float %b) {
+define float @pow_f32(float %a, float %b) nounwind {
 ; RV32IF-LABEL: pow_f32:
 ; RV32IF:       # %bb.0:
 ; RV32IF-NEXT:    addi sp, sp, -16
@@ -102,12 +102,12 @@
 ; RV32IF-NEXT:    addi sp, sp, 16
 ; RV32IF-NEXT:    ret
   %1 = call float @llvm.pow.f32(float %a, float %b)
-	ret float %1
+  ret float %1
 }
 
 declare float @llvm.exp.f32(float)
 
-define float @exp_f32(float %a) {
+define float @exp_f32(float %a) nounwind {
 ; RV32IF-LABEL: exp_f32:
 ; RV32IF:       # %bb.0:
 ; RV32IF-NEXT:    addi sp, sp, -16
@@ -117,12 +117,12 @@
 ; RV32IF-NEXT:    addi sp, sp, 16
 ; RV32IF-NEXT:    ret
   %1 = call float @llvm.exp.f32(float %a)
-	ret float %1
+  ret float %1
 }
 
 declare float @llvm.exp2.f32(float)
 
-define float @exp2_f32(float %a) {
+define float @exp2_f32(float %a) nounwind {
 ; RV32IF-LABEL: exp2_f32:
 ; RV32IF:       # %bb.0:
 ; RV32IF-NEXT:    addi sp, sp, -16
@@ -132,12 +132,12 @@
 ; RV32IF-NEXT:    addi sp, sp, 16
 ; RV32IF-NEXT:    ret
   %1 = call float @llvm.exp2.f32(float %a)
-	ret float %1
+  ret float %1
 }
 
 declare float @llvm.log.f32(float)
 
-define float @log_f32(float %a) {
+define float @log_f32(float %a) nounwind {
 ; RV32IF-LABEL: log_f32:
 ; RV32IF:       # %bb.0:
 ; RV32IF-NEXT:    addi sp, sp, -16
@@ -147,12 +147,12 @@
 ; RV32IF-NEXT:    addi sp, sp, 16
 ; RV32IF-NEXT:    ret
   %1 = call float @llvm.log.f32(float %a)
-	ret float %1
+  ret float %1
 }
 
 declare float @llvm.log10.f32(float)
 
-define float @log10_f32(float %a) {
+define float @log10_f32(float %a) nounwind {
 ; RV32IF-LABEL: log10_f32:
 ; RV32IF:       # %bb.0:
 ; RV32IF-NEXT:    addi sp, sp, -16
@@ -162,12 +162,12 @@
 ; RV32IF-NEXT:    addi sp, sp, 16
 ; RV32IF-NEXT:    ret
   %1 = call float @llvm.log10.f32(float %a)
-	ret float %1
+  ret float %1
 }
 
 declare float @llvm.log2.f32(float)
 
-define float @log2_f32(float %a) {
+define float @log2_f32(float %a) nounwind {
 ; RV32IF-LABEL: log2_f32:
 ; RV32IF:       # %bb.0:
 ; RV32IF-NEXT:    addi sp, sp, -16
@@ -177,28 +177,44 @@
 ; RV32IF-NEXT:    addi sp, sp, 16
 ; RV32IF-NEXT:    ret
   %1 = call float @llvm.log2.f32(float %a)
-	ret float %1
+  ret float %1
 }
 
 declare float @llvm.fma.f32(float, float, float)
 
-; TODO: Select RISC-V FMA instruction.
-define float @fma_f32(float %a, float %b, float %c) {
+define float @fma_f32(float %a, float %b, float %c) nounwind {
 ; RV32IF-LABEL: fma_f32:
 ; RV32IF:       # %bb.0:
-; RV32IF-NEXT:    addi sp, sp, -16
-; RV32IF-NEXT:    sw ra, 12(sp)
-; RV32IF-NEXT:    call fmaf
-; RV32IF-NEXT:    lw ra, 12(sp)
-; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    fmv.w.x ft0, a2
+; RV32IF-NEXT:    fmv.w.x ft1, a1
+; RV32IF-NEXT:    fmv.w.x ft2, a0
+; RV32IF-NEXT:    fmadd.s ft0, ft2, ft1, ft0
+; RV32IF-NEXT:    fmv.x.w a0, ft0
 ; RV32IF-NEXT:    ret
   %1 = call float @llvm.fma.f32(float %a, float %b, float %c)
-	ret float %1
+  ret float %1
+}
+
+declare float @llvm.fmuladd.f32(float, float, float)
+
+define float @fmuladd_f32(float %a, float %b, float %c) nounwind {
+; Use of fmadd depends on TargetLowering::isFMAFasterthanFMulAndFAdd
+; RV32IF-LABEL: fmuladd_f32:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    fmv.w.x ft0, a1
+; RV32IF-NEXT:    fmv.w.x ft1, a0
+; RV32IF-NEXT:    fmul.s ft0, ft1, ft0
+; RV32IF-NEXT:    fmv.w.x ft1, a2
+; RV32IF-NEXT:    fadd.s ft0, ft0, ft1
+; RV32IF-NEXT:    fmv.x.w a0, ft0
+; RV32IF-NEXT:    ret
+  %1 = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
+  ret float %1
 }
 
 declare float @llvm.fabs.f32(float)
 
-define float @fabs_f32(float %a) {
+define float @fabs_f32(float %a) nounwind {
 ; RV32IF-LABEL: fabs_f32:
 ; RV32IF:       # %bb.0:
 ; RV32IF-NEXT:    lui a1, 524288
@@ -206,7 +222,7 @@
 ; RV32IF-NEXT:    and a0, a0, a1
 ; RV32IF-NEXT:    ret
   %1 = call float @llvm.fabs.f32(float %a)
-	ret float %1
+  ret float %1
 }
 
 declare float @llvm.minnum.f32(float, float)
@@ -270,7 +286,7 @@
 
 declare float @llvm.floor.f32(float)
 
-define float @floor_f32(float %a) {
+define float @floor_f32(float %a) nounwind {
 ; RV32IF-LABEL: floor_f32:
 ; RV32IF:       # %bb.0:
 ; RV32IF-NEXT:    addi sp, sp, -16
@@ -280,12 +296,12 @@
 ; RV32IF-NEXT:    addi sp, sp, 16
 ; RV32IF-NEXT:    ret
   %1 = call float @llvm.floor.f32(float %a)
-	ret float %1
+  ret float %1
 }
 
 declare float @llvm.ceil.f32(float)
 
-define float @ceil_f32(float %a) {
+define float @ceil_f32(float %a) nounwind {
 ; RV32IF-LABEL: ceil_f32:
 ; RV32IF:       # %bb.0:
 ; RV32IF-NEXT:    addi sp, sp, -16
@@ -295,12 +311,12 @@
 ; RV32IF-NEXT:    addi sp, sp, 16
 ; RV32IF-NEXT:    ret
   %1 = call float @llvm.ceil.f32(float %a)
-	ret float %1
+  ret float %1
 }
 
 declare float @llvm.trunc.f32(float)
 
-define float @trunc_f32(float %a) {
+define float @trunc_f32(float %a) nounwind {
 ; RV32IF-LABEL: trunc_f32:
 ; RV32IF:       # %bb.0:
 ; RV32IF-NEXT:    addi sp, sp, -16
@@ -310,12 +326,12 @@
 ; RV32IF-NEXT:    addi sp, sp, 16
 ; RV32IF-NEXT:    ret
   %1 = call float @llvm.trunc.f32(float %a)
-	ret float %1
+  ret float %1
 }
 
 declare float @llvm.rint.f32(float)
 
-define float @rint_f32(float %a) {
+define float @rint_f32(float %a) nounwind {
 ; RV32IF-LABEL: rint_f32:
 ; RV32IF:       # %bb.0:
 ; RV32IF-NEXT:    addi sp, sp, -16
@@ -325,12 +341,12 @@
 ; RV32IF-NEXT:    addi sp, sp, 16
 ; RV32IF-NEXT:    ret
   %1 = call float @llvm.rint.f32(float %a)
-	ret float %1
+  ret float %1
 }
 
 declare float @llvm.nearbyint.f32(float)
 
-define float @nearbyint_f32(float %a) {
+define float @nearbyint_f32(float %a) nounwind {
 ; RV32IF-LABEL: nearbyint_f32:
 ; RV32IF:       # %bb.0:
 ; RV32IF-NEXT:    addi sp, sp, -16
@@ -340,12 +356,12 @@
 ; RV32IF-NEXT:    addi sp, sp, 16
 ; RV32IF-NEXT:    ret
   %1 = call float @llvm.nearbyint.f32(float %a)
-	ret float %1
+  ret float %1
 }
 
 declare float @llvm.round.f32(float)
 
-define float @round_f32(float %a) {
+define float @round_f32(float %a) nounwind {
 ; RV32IF-LABEL: round_f32:
 ; RV32IF:       # %bb.0:
 ; RV32IF-NEXT:    addi sp, sp, -16
@@ -355,5 +371,5 @@
 ; RV32IF-NEXT:    addi sp, sp, 16
 ; RV32IF-NEXT:    ret
   %1 = call float @llvm.round.f32(float %a)
-	ret float %1
+  ret float %1
 }