[SubZero] Implement GP to/from FP moves for MIPS

The patch implements mtc1/mfc1 instructions which are required for GP to/from FP registers moves.
The patch also implements fptosi and sitofp for float and i32 types to test mtc1/mfc1 instructions.

R=stichnot@chromium.org

Review URL: https://codereview.chromium.org/2316933002 .

Patch from Jaydeep Patil <jaydeep.patil@imgtec.com>.
diff --git a/src/IceInstMIPS32.cpp b/src/IceInstMIPS32.cpp
index c25fbe1..85cf07b 100644
--- a/src/IceInstMIPS32.cpp
+++ b/src/IceInstMIPS32.cpp
@@ -733,27 +733,39 @@
 
   // reg to reg
   if (DestIsReg && SrcIsReg) {
-    switch (Dest->getType()) {
-    case IceType_f32:
-      ActualOpcode = "mov.s";
-      break;
-    case IceType_f64:
-      ActualOpcode = "mov.d";
-      break;
-    case IceType_i1:
-    case IceType_i8:
-    case IceType_i16:
-    case IceType_i32:
-      Str << "\t"
-          << "move"
-          << "\t";
-      getDest()->emit(Func);
-      Str << ", ";
-      getSrc(0)->emit(Func);
-      return;
-    default:
-      UnimplementedError(getFlags());
-      return;
+    const Type DstType = Dest->getType();
+    const Type SrcType = Src->getType();
+
+    // move GP to/from FP
+    if (DstType != SrcType) {
+      if (isScalarFloatingType(DstType)) {
+        Str << "\t"
+               "mtc1"
+               "\t";
+        getSrc(0)->emit(Func);
+        Str << ", ";
+        getDest()->emit(Func);
+        return;
+      }
+      ActualOpcode = "mfc1";
+    } else {
+      switch (Dest->getType()) {
+      case IceType_f32:
+        ActualOpcode = "mov.s";
+        break;
+      case IceType_f64:
+        ActualOpcode = "mov.d";
+        break;
+      case IceType_i1:
+      case IceType_i8:
+      case IceType_i16:
+      case IceType_i32:
+        ActualOpcode = "move";
+        break;
+      default:
+        UnimplementedError(getFlags());
+        return;
+      }
     }
 
     assert(ActualOpcode);
diff --git a/src/IceRegistersMIPS32.h b/src/IceRegistersMIPS32.h
index 221356b..2880b74 100644
--- a/src/IceRegistersMIPS32.h
+++ b/src/IceRegistersMIPS32.h
@@ -72,8 +72,11 @@
 }
 
 static inline bool isGPRReg(RegNumT RegNum) {
-  return (int(Reg_GPR_First) <= int(RegNum)) &&
-         (unsigned(RegNum) <= Reg_GPR_Last);
+  bool IsGPR = ((int(Reg_GPR_First) <= int(RegNum)) &&
+                (unsigned(RegNum) <= Reg_GPR_Last)) ||
+               ((int(Reg_I64PAIR_First) <= int(RegNum)) &&
+                (unsigned(RegNum) <= Reg_I64PAIR_Last));
+  return IsGPR;
 }
 
 static inline FPRRegister getEncodedFPR(RegNumT RegNum) {
diff --git a/src/IceTargetLoweringMIPS32.cpp b/src/IceTargetLoweringMIPS32.cpp
index 237eef4..e4f2fd0 100644
--- a/src/IceTargetLoweringMIPS32.cpp
+++ b/src/IceTargetLoweringMIPS32.cpp
@@ -1452,6 +1452,69 @@
     return;
 
   bool Legalized = false;
+  auto *SrcR = llvm::cast<Variable>(Src);
+  if (Dest->hasReg() && SrcR->hasReg()) {
+    // This might be a GP to/from FP move generated due to argument passing.
+    // Use mtc1/mfc1 instead of mov.[s/d] if src and dst registers are of
+    // different types.
+    const bool IsDstGPR = RegMIPS32::isGPRReg(Dest->getRegNum());
+    const bool IsSrcGPR = RegMIPS32::isGPRReg(SrcR->getRegNum());
+    const RegNumT SRegNum = SrcR->getRegNum();
+    const RegNumT DRegNum = Dest->getRegNum();
+    if (IsDstGPR != IsSrcGPR) {
+      if (IsDstGPR) {
+        // Dest is GPR and SrcR is FPR. Use mfc1.
+        if (typeWidthInBytes(Dest->getType()) == 8) {
+          // Split it into two mfc1 instructions
+          Variable *SrcGPRHi = Target->makeReg(
+              IceType_f32, RegMIPS32::get64PairFirstRegNum(SRegNum));
+          Variable *SrcGPRLo = Target->makeReg(
+              IceType_f32, RegMIPS32::get64PairSecondRegNum(SRegNum));
+          Variable *DstFPRHi = Target->makeReg(
+              IceType_i32, RegMIPS32::get64PairFirstRegNum(DRegNum));
+          Variable *DstFPRLo = Target->makeReg(
+              IceType_i32, RegMIPS32::get64PairSecondRegNum(DRegNum));
+          Target->_mov(DstFPRHi, SrcGPRLo);
+          Target->_mov(DstFPRLo, SrcGPRHi);
+          Legalized = true;
+        } else {
+          Variable *SrcGPR = Target->makeReg(IceType_f32, SRegNum);
+          Variable *DstFPR = Target->makeReg(IceType_i32, DRegNum);
+          Target->_mov(DstFPR, SrcGPR);
+          Legalized = true;
+        }
+      } else {
+        // Dest is FPR and SrcR is GPR. Use mtc1.
+        if (typeWidthInBytes(SrcR->getType()) == 8) {
+          // Split it into two mtc1 instructions
+          Variable *SrcGPRHi = Target->makeReg(
+              IceType_i32, RegMIPS32::get64PairFirstRegNum(SRegNum));
+          Variable *SrcGPRLo = Target->makeReg(
+              IceType_i32, RegMIPS32::get64PairSecondRegNum(SRegNum));
+          Variable *DstFPRHi = Target->makeReg(
+              IceType_f32, RegMIPS32::get64PairFirstRegNum(DRegNum));
+          Variable *DstFPRLo = Target->makeReg(
+              IceType_f32, RegMIPS32::get64PairSecondRegNum(DRegNum));
+          Target->_mov(DstFPRHi, SrcGPRLo);
+          Target->_mov(DstFPRLo, SrcGPRHi);
+          Legalized = true;
+        } else {
+          Variable *SrcGPR = Target->makeReg(IceType_i32, SRegNum);
+          Variable *DstFPR = Target->makeReg(IceType_f32, DRegNum);
+          Target->_mov(DstFPR, SrcGPR);
+          Legalized = true;
+        }
+      }
+    }
+    if (Legalized) {
+      if (MovInstr->isDestRedefined()) {
+        Target->_set_dest_redefined();
+      }
+      MovInstr->setDeleted();
+      return;
+    }
+  }
+
   if (!Dest->hasReg()) {
     auto *SrcR = llvm::cast<Variable>(Src);
     assert(SrcR->hasReg());
@@ -1469,22 +1532,15 @@
     // case type of the SrcR is still FP thus we need to explicitly generate sw
     // instead of swc1.
     const RegNumT RegNum = SrcR->getRegNum();
-    const bool isSrcGPReg = ((unsigned)RegNum >= RegMIPS32::Reg_A0 &&
-                             (unsigned)RegNum <= RegMIPS32::Reg_A3) ||
-                            ((unsigned)RegNum >= RegMIPS32::Reg_A0A1 &&
-                             (unsigned)RegNum <= RegMIPS32::Reg_A2A3);
-    if (SrcTy == IceType_f32 && isSrcGPReg == true) {
+    const bool IsSrcGPReg = RegMIPS32::isGPRReg(SrcR->getRegNum());
+    if (SrcTy == IceType_f32 && IsSrcGPReg == true) {
       Variable *SrcGPR = Target->makeReg(IceType_i32, RegNum);
       Target->_sw(SrcGPR, Addr);
-    } else if (SrcTy == IceType_f64 && isSrcGPReg == true) {
-      Variable *SrcGPRHi, *SrcGPRLo;
-      if (RegNum == RegMIPS32::Reg_A0A1) {
-        SrcGPRLo = Target->makeReg(IceType_i32, RegMIPS32::Reg_A0);
-        SrcGPRHi = Target->makeReg(IceType_i32, RegMIPS32::Reg_A1);
-      } else {
-        SrcGPRLo = Target->makeReg(IceType_i32, RegMIPS32::Reg_A2);
-        SrcGPRHi = Target->makeReg(IceType_i32, RegMIPS32::Reg_A3);
-      }
+    } else if (SrcTy == IceType_f64 && IsSrcGPReg == true) {
+      Variable *SrcGPRHi =
+          Target->makeReg(IceType_i32, RegMIPS32::get64PairFirstRegNum(RegNum));
+      Variable *SrcGPRLo = Target->makeReg(
+          IceType_i32, RegMIPS32::get64PairSecondRegNum(RegNum));
       OperandMIPS32Mem *AddrHi = OperandMIPS32Mem::create(
           Target->Func, DestTy, Base,
           llvm::cast<ConstantInteger32>(
@@ -2411,15 +2467,33 @@
     _mov(Dest, DestR);
     break;
   }
-  case InstCast::Fptosi: //
-    UnimplementedLoweringError(this, Instr);
+  case InstCast::Fptosi: {
+    if (Src0Ty == IceType_f32 && DestTy == IceType_i32) {
+      Variable *Src0R = legalizeToReg(Src0);
+      Variable *FTmp = makeReg(IceType_f32);
+      _trunc_w_s(FTmp, Src0R);
+      _mov(Dest, FTmp);
+    } else {
+      UnimplementedLoweringError(this, Instr);
+    }
     break;
+  }
   case InstCast::Fptoui:
     UnimplementedLoweringError(this, Instr);
     break;
-  case InstCast::Sitofp: //
-    UnimplementedLoweringError(this, Instr);
+  case InstCast::Sitofp: {
+    if (Src0Ty == IceType_i32 && DestTy == IceType_f32) {
+      Variable *Src0R = legalizeToReg(Src0);
+      Variable *FTmp1 = makeReg(IceType_f32);
+      Variable *FTmp2 = makeReg(IceType_f32);
+      _mov(FTmp1, Src0R);
+      _cvt_s_w(FTmp2, FTmp1);
+      _mov(Dest, FTmp2);
+    } else {
+      UnimplementedLoweringError(this, Instr);
+    }
     break;
+  }
   case InstCast::Uitofp: {
     UnimplementedLoweringError(this, Instr);
     break;
diff --git a/tests_lit/llvm2ice_tests/fp.convert.ll b/tests_lit/llvm2ice_tests/fp.convert.ll
index d1bd743..9c3dabb 100644
--- a/tests_lit/llvm2ice_tests/fp.convert.ll
+++ b/tests_lit/llvm2ice_tests/fp.convert.ll
@@ -21,6 +21,11 @@
 ; RUN:   | %if --need=allow_dump --need=target_MIPS32 --command FileCheck %s \
 ; RUN:   --check-prefix=MIPS32
 
+; RUN: %if --need=allow_dump --need=target_MIPS32 --command %p2i \
+; RUN:   --filetype=asm --target mips32 -i %s --args -O2 --skip-unimplemented \
+; RUN:   | %if --need=allow_dump --need=target_MIPS32 --command FileCheck %s \
+; RUN:   --check-prefix=MIPS32O2
+
 define internal float @fptrunc(double %a) {
 entry:
   %conv = fptrunc double %a to float
@@ -33,6 +38,8 @@
 ; ARM32: vcvt.f32.f64 {{s[0-9]+}}, {{d[0-9]+}}
 ; MIPS32-LABEL: fptrunc
 ; MIPS32: cvt.s.d
+; MIPS32O2-LABEL: fptrunc
+; MIPS32O2: cvt.s.d
 
 define internal double @fpext(float %a) {
 entry:
@@ -46,6 +53,8 @@
 ; ARM32: vcvt.f64.f32 {{d[0-9]+}}, {{s[0-9]+}}
 ; MIPS32-LABEL: fpext
 ; MIPS32: cvt.d.s
+; MIPS32O2-LABEL: fpext
+; MIPS32O2: cvt.d.s
 
 define internal i64 @doubleToSigned64(double %a) {
 entry:
@@ -56,6 +65,10 @@
 ; CHECK: call {{.*}} R_{{.*}} __Sz_fptosi_f64_i64
 ; ARM32-LABEL: doubleToSigned64
 ; TODO(jpp): implement this test.
+; MIPS32-LABEL: doubleToSigned64
+; MIPS32: jal __Sz_fptosi_f64_i64
+; MIPS32O2-LABEL: doubleToSigned64
+; MIPS32O2: jal __Sz_fptosi_f64_i64
 
 define internal i64 @floatToSigned64(float %a) {
 entry:
@@ -66,6 +79,10 @@
 ; CHECK: call {{.*}} R_{{.*}} __Sz_fptosi_f32_i64
 ; ARM32-LABEL: floatToSigned64
 ; TODO(jpp): implement this test.
+; MIPS32-LABEL: floatToSigned64
+; MIPS32: jal __Sz_fptosi_f32_i64
+; MIPS32O2-LABEL: floatToSigned64
+; MIPS32O2: jal __Sz_fptosi_f32_i64
 
 define internal i64 @doubleToUnsigned64(double %a) {
 entry:
@@ -76,6 +93,10 @@
 ; CHECK: call {{.*}} R_{{.*}} __Sz_fptoui_f64_i64
 ; ARM32-LABEL: doubleToUnsigned64
 ; TODO(jpp): implement this test.
+; MIPS32-LABEL: doubleToUnsigned64
+; MIPS32: jal __Sz_fptoui_f64_i64
+; MIPS32O2-LABEL: doubleToUnsigned64
+; MIPS32O2: jal __Sz_fptoui_f64_i64
 
 define internal i64 @floatToUnsigned64(float %a) {
 entry:
@@ -86,6 +107,10 @@
 ; CHECK: call {{.*}} R_{{.*}} __Sz_fptoui_f32_i64
 ; ARM32-LABEL: floatToUnsigned64
 ; TODO(jpp): implement this test.
+; MIPS32-LABEL: floatToUnsigned64
+; MIPS32: jal __Sz_fptoui_f32_i64
+; MIPS32O2-LABEL: floatToUnsigned64
+; MIPS32O2: jal __Sz_fptoui_f32_i64
 
 define internal i32 @doubleToSigned32(double %a) {
 entry:
@@ -122,6 +147,11 @@
 ; ARM32-LABEL: floatToSigned32
 ; ARM32-DAG: vcvt.s32.f32 [[REG:s[0-9]+]], {{s[0-9]+}}
 ; ARM32-DAG: vmov {{r[0-9]+}}, [[REG]]
+; MIPS32-LABEL: floatToSigned32
+; MIPS32: trunc.w.s $f{{.*}}, $f{{.*}}
+; MIPS32O2-LABEL: floatToSigned32
+; MIPS32O2: trunc.w.s $[[REG:f[0-9]+]], $f{{.*}}
+; MIPS32O2: mfc1 $v0, $[[REG]]
 
 define internal i32 @doubleToUnsigned32(double %a) {
 entry:
@@ -382,6 +412,11 @@
 ; ARM32-LABEL: signed32ToFloat
 ; ARM32-DAG: vmov [[SRC:s[0-9]+]], {{r[0-9]+}}
 ; ARM32-DAG: vcvt.f32.s32 {{s[0-9]+}}, [[SRC]]
+; MIPS32-LABEL: signed32ToFloat
+; MIPS32: cvt.s.w $f{{.*}}, $f{{.*}}
+; MIPS32O2-LABEL: signed32ToFloat
+; MIPS32O2: mtc1 $a0, $[[REG:f[0-9]+]]
+; MIPS32O2: cvt.s.w $f{{.*}}, $[[REG]]
 
 define internal double @unsigned32ToDouble(i32 %a) {
 entry:
diff --git a/tests_lit/llvm2ice_tests/fp.load_store.ll b/tests_lit/llvm2ice_tests/fp.load_store.ll
index 63013dd..0608d8d 100644
--- a/tests_lit/llvm2ice_tests/fp.load_store.ll
+++ b/tests_lit/llvm2ice_tests/fp.load_store.ll
@@ -13,6 +13,13 @@
 ; RUN:   | %if --need=target_MIPS32 --need=allow_dump \
 ; RUN:   --command FileCheck --check-prefix MIPS32 %s
 
+; RUN: %if --need=target_MIPS32 --need=allow_dump \
+; RUN:   --command %p2i --filetype=asm --assemble \
+; RUN:   --disassemble --target mips32 -i %s --args -O2 --skip-unimplemented \
+; RUN:   -allow-externally-defined-symbols \
+; RUN:   | %if --need=target_MIPS32 --need=allow_dump \
+; RUN:   --command FileCheck --check-prefix MIPS32O2 %s
+
 define internal float @loadFloat(i32 %a) {
 entry:
   %__1 = inttoptr i32 %a to float*
@@ -25,6 +32,8 @@
 
 ; MIPS32-LABEL: loadFloat
 ; MIPS32: lwc1 $f{{.*}},0{{.*}}
+; MIPS32O2-LABEL: loadFloat
+; MIPS32O2: lwc1 $f{{.*}},0{{.*}}
 
 define internal double @loadDouble(i32 %a) {
 entry:
@@ -38,6 +47,8 @@
 
 ; MIPS32-LABEL: loadDouble
 ; MIPS32: ldc1 $f{{.*}},0{{.*}}
+; MIPS32O2-LABEL: loadDouble
+; MIPS32O2: ldc1 $f{{.*}},0{{.*}}
 
 define internal void @storeFloat(i32 %a, float %value) {
 entry:
@@ -51,6 +62,9 @@
 
 ; MIPS32-LABEL: storeFloat
 ; MIPS32: swc1 $f{{.*}},0{{.*}}
+; MIPS32O2-LABEL: storeFloat
+; MIPS32O2: mtc1 a1,$f{{.*}}
+; MIPS32O2: swc1 $f{{.*}},0(a0)
 
 define internal void @storeDouble(i32 %a, double %value) {
 entry:
@@ -65,6 +79,10 @@
 ; MIPS32-LABEL: storeDouble
 ; MIPS32: ldc1 $f{{.*}},4{{.*}}
 ; MIPS32: sdc1 $f{{.*}},0{{.*}}
+; MIPS32O2-LABEL: storeDouble
+; MIPS32O2: mtc1 a3,$f{{.*}}
+; MIPS32O2: mtc1 a2,$f{{.*}}
+; MIPS32O2: sdc1 $f{{.*}},0(a0)
 
 define internal void @storeFloatConst(i32 %a) {
 entry:
@@ -80,6 +98,10 @@
 ; MIPS32: lui {{.*}},{{.*}}
 ; MIPS32: lwc1 $f{{.*}},{{.*}}
 ; MIPS32: swc1 $f{{.*}},0{{.*}}
+; MIPS32O2-LABEL: storeFloatConst
+; MIPS32O2: lui {{.*}},{{.*}}
+; MIPS32O2: lwc1 $f{{.*}},{{.*}}
+; MIPS32O2: swc1 $f{{.*}},0{{.*}}
 
 define internal void @storeDoubleConst(i32 %a) {
 entry:
@@ -95,3 +117,7 @@
 ; MIPS32: lui {{.*}},{{.*}}
 ; MIPS32: ldc1 $f{{.*}},{{.*}}
 ; MIPS32: sdc1 $f{{.*}},0{{.*}}
+; MIPS32O2-LABEL: storeDoubleConst
+; MIPS32O2: lui {{.*}},{{.*}}
+; MIPS32O2: ldc1 $f{{.*}},{{.*}}
+; MIPS32O2: sdc1 $f{{.*}},0{{.*}}