Subzero: implement 64 bit multiply in mips32
Implement 64 bit multiply in mips32 and, in addition, add the lo/hi registers which are also used for other 64 bit math such as div, rem.
BUG=
R=jpp@chromium.org, stichnot@chromium.org
Review URL: https://codereview.chromium.org/1716483003 .
Patch from Reed Kotler <rkotlerimgtec@gmail.com>.
diff --git a/src/IceTargetLoweringMIPS32.cpp b/src/IceTargetLoweringMIPS32.cpp
index f9ee059..4fcd3ce 100644
--- a/src/IceTargetLoweringMIPS32.cpp
+++ b/src/IceTargetLoweringMIPS32.cpp
@@ -582,6 +582,7 @@
case InstArithmetic::Or:
case InstArithmetic::Sub:
case InstArithmetic::Xor:
+ case InstArithmetic::Mul:
break;
default:
UnimplementedLoweringError(this, Instr);
@@ -644,6 +645,24 @@
_mov(DestHi, T_Hi);
return;
}
+ case InstArithmetic::Mul: {
+ // TODO(rkotler): Make sure that mul has the side effect of clobbering
+ // LO, HI. Check for any other LO, HI quirkiness in this section.
+ auto *T_Lo = I32Reg(RegMIPS32::Reg_LO), *T_Hi = I32Reg(RegMIPS32::Reg_HI);
+ auto *T1 = I32Reg(), *T2 = I32Reg();
+ auto *TM1 = I32Reg(), *TM2 = I32Reg(), *TM3 = I32Reg(), *TM4 = I32Reg();
+ _multu(T_Lo, Src0LoR, Src1LoR);
+ Context.insert<InstFakeDef>(T_Hi, T_Lo);
+ _mflo(T1, T_Lo);
+ _mfhi(T2, T_Hi);
+ _mov(DestLo, T1);
+ _mul(TM1, Src0HiR, Src1LoR);
+ _mul(TM2, Src0LoR, Src1HiR);
+ _addu(TM3, TM1, T2);
+ _addu(TM4, TM3, TM2);
+ _mov(DestHi, TM4);
+ return;
+ }
default:
UnimplementedLoweringError(this, Instr);
return;