Next PPC long double bits: ppcf128->i32 conversion.
Surprisingly complicated.
Adds getTargetNode for 2 outputs, no inputs (missing).

llvm-svn: 42822
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 20ec3c3..4a20665 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -76,6 +76,9 @@
   // Shortening conversions involving ppcf128 get expanded (2 regs -> 1 reg)
   setConvertAction(MVT::ppcf128, MVT::f64, Expand);
   setConvertAction(MVT::ppcf128, MVT::f32, Expand);
+  // This is used in the ppcf128->int sequence.  Note it has different semantics
+  // from FP_ROUND:  that rounds to nearest, this rounds to zero.
+  setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
 
   // PowerPC has no intrinsics for these particular operations
   setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
@@ -2079,6 +2082,64 @@
   return Bits;
 }
 
+static SDOperand LowerFP_ROUND_INREG(SDOperand Op, SelectionDAG &DAG) {
+  assert(Op.getValueType() == MVT::ppcf128);
+  SDNode *Node = Op.Val;
+  assert(Node->getOperand(0).getValueType() == MVT::ppcf128);
+  assert(Node->getOperand(0).Val->getOpcode()==ISD::BUILD_PAIR);
+  SDOperand Lo = Node->getOperand(0).Val->getOperand(0);
+  SDOperand Hi = Node->getOperand(0).Val->getOperand(1);
+
+  // This sequence changes FPSCR to do round-to-zero, adds the two halves
+  // of the long double, and puts FPSCR back the way it was.  We do not
+  // actually model FPSCR.
+  std::vector<MVT::ValueType> NodeTys;
+  SDOperand Ops[4], Result, MFFSreg, InFlag, FPreg;
+
+  NodeTys.push_back(MVT::f64);   // Return register
+  NodeTys.push_back(MVT::Flag);    // Returns a flag for later insns
+  Result = DAG.getNode(PPCISD::MFFS, NodeTys, &InFlag, 0);
+  MFFSreg = Result.getValue(0);
+  InFlag = Result.getValue(1);
+
+  NodeTys.clear();
+  NodeTys.push_back(MVT::Flag);   // Returns a flag
+  Ops[0] = DAG.getConstant(31, MVT::i32);
+  Ops[1] = InFlag;
+  Result = DAG.getNode(PPCISD::MTFSB1, NodeTys, Ops, 2);
+  InFlag = Result.getValue(0);
+
+  NodeTys.clear();
+  NodeTys.push_back(MVT::Flag);   // Returns a flag
+  Ops[0] = DAG.getConstant(30, MVT::i32);
+  Ops[1] = InFlag;
+  Result = DAG.getNode(PPCISD::MTFSB0, NodeTys, Ops, 2);
+  InFlag = Result.getValue(0);
+
+  NodeTys.clear();
+  NodeTys.push_back(MVT::f64);    // result of add
+  NodeTys.push_back(MVT::Flag);   // Returns a flag
+  Ops[0] = Lo;
+  Ops[1] = Hi;
+  Ops[2] = InFlag;
+  Result = DAG.getNode(PPCISD::FADDRTZ, NodeTys, Ops, 3);
+  FPreg = Result.getValue(0);
+  InFlag = Result.getValue(1);
+
+  NodeTys.clear();
+  NodeTys.push_back(MVT::f64);
+  Ops[0] = DAG.getConstant(1, MVT::i32);
+  Ops[1] = MFFSreg;
+  Ops[2] = FPreg;
+  Ops[3] = InFlag;
+  Result = DAG.getNode(PPCISD::MTFSF, NodeTys, Ops, 4);
+  FPreg = Result.getValue(0);
+
+  // We know the low half is about to be thrown away, so just use something
+  // convenient.
+  return DAG.getNode(ISD::BUILD_PAIR, Lo.getValueType(), FPreg, FPreg);
+}
+
 static SDOperand LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
   if (Op.getOperand(0).getValueType() == MVT::i64) {
     SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0));
@@ -2935,6 +2996,7 @@
   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
   case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
   case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
+  case ISD::FP_ROUND_INREG:     return LowerFP_ROUND_INREG(Op, DAG);
 
   // Lower 64-bit shifts.
   case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 33f6381..01a35a8 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -129,7 +129,28 @@
       /// byte-swapping load instruction.  It loads "Type" bits, byte swaps it,
       /// then puts it in the bottom bits of the GPRC.  TYPE can be either i16
       /// or i32.
-      LBRX
+      LBRX,
+
+      // The following 5 instructions are used only as part of the
+      // long double-to-int conversion sequence.
+
+      /// OUTFLAG = MFFS F8RC - This moves the FPSCR (not modelled) into the
+      /// register.
+      MFFS,
+
+      /// OUTFLAG = MTFSB0 INFLAG - This clears a bit in the FPSCR.
+      MTFSB0,
+
+      /// OUTFLAG = MTFSB1 INFLAG - This sets a bit in the FPSCR.
+      MTFSB1,
+
+      /// F8RC, OUTFLAG = FADDRTZ F8RC, F8RC, INFLAG - This is an FADD done with
+      /// rounding towards zero.  It has flags added so it won't move past the 
+      /// FPSCR-setting instructions.
+      FADDRTZ,
+
+      /// MTFSF = F8RC, INFLAG - This moves the register into the FPSCR.
+      MTFSF
     };
   }
 
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/llvm/lib/Target/PowerPC/PPCInstrFormats.td
index 865320c..98e1e35 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -315,6 +315,34 @@
   : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
 }
 
+// This is used for MFFS, MTFSB0, MTFSB1.  42 is arbitrary; this series of
+// numbers presumably relates to some document, but I haven't found it.
+class XForm_42<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+  : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+  let Pattern = pattern;
+
+  bit RC = 0;    // set by isDOT
+
+  let Inst{6-10}  = RST;
+  let Inst{11-20} = 0;
+  let Inst{21-30} = xo;
+  let Inst{31}    = RC;
+}
+class XForm_43<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+  : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+  let Pattern = pattern;
+  bits<5> FM;
+
+  bit RC = 0;    // set by isDOT
+
+  let Inst{6-10}  = FM;
+  let Inst{11-20} = 0;
+  let Inst{21-30} = xo;
+  let Inst{31}    = RC;
+}
+
 // DCB_Form - Form X instruction, used for dcb* instructions.
 class DCB_Form<bits<10> xo, bits<5> immfield, dag OOL, dag IOL, string asmstr, 
                       InstrItinClass itin, list<dag> pattern>
@@ -513,6 +541,27 @@
   let SPR = spr;
 }
 
+// XFL-Form - MTFSF
+// This is probably 1.7.9, but I don't have the reference that uses this
+// numbering scheme...
+class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, 
+                      string cstr, InstrItinClass itin, list<dag>pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<8> FM;
+  bits<5> RT;
+
+  bit RC = 0;    // set by isDOT
+  let Pattern = pattern;
+  let Constraints = cstr;
+
+  let Inst{6} = 0;
+  let Inst{7-14}  = FM;
+  let Inst{15} = 0;
+  let Inst{16-20} = RT;
+  let Inst{21-30} = xo;
+  let Inst{31}    = RC;
+}
+
 // 1.7.10 XS-Form - SRADI.
 class XSForm_1<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
                InstrItinClass itin, list<dag> pattern>
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 1591148..f62f7cb 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -53,6 +53,21 @@
 def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>;
 def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx, [SDNPHasChain]>;
 
+// This sequence is used for long double->int conversions.  It changes the
+// bits in the FPSCR which is not modelled.  
+def PPCmffs   : SDNode<"PPCISD::MFFS", SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>,
+                        [SDNPOutFlag]>;
+def PPCmtfsb0 : SDNode<"PPCISD::MTFSB0", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
+                       [SDNPInFlag, SDNPOutFlag]>;
+def PPCmtfsb1 : SDNode<"PPCISD::MTFSB1", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
+                       [SDNPInFlag, SDNPOutFlag]>;
+def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp,
+                       [SDNPInFlag, SDNPOutFlag]>;
+def PPCmtfsf  : SDNode<"PPCISD::MTFSF", SDTypeProfile<1, 3, 
+                       [SDTCisVT<0, f64>, SDTCisInt<1>, SDTCisVT<2, f64>,
+                        SDTCisVT<3, f64>]>,
+                       [SDNPInFlag]>;
+
 def PPCfsel   : SDNode<"PPCISD::FSEL",  
    // Type constraint for fsel.
    SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, 
@@ -873,6 +888,37 @@
                        "mfcr $rT, $FXM", SprMFCR>,
             PPC970_DGroup_First, PPC970_Unit_CRU;
 
+// Instructions to manipulate FPSCR.  Only long double handling uses these.
+// FPSCR is not modelled; we use the SDNode Flag to keep things in order.
+
+def MFFS   : XForm_42<63, 583, (outs F8RC:$rT), (ins), 
+                       "mffs $rT", IntMFFS,
+                       [(set F8RC:$rT, (PPCmffs))]>,
+             PPC970_DGroup_Single, PPC970_Unit_FPU;
+def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM),
+                       "mtfsb0 $FM", IntMTFSB0,
+                      [(PPCmtfsb0 (i32 imm:$FM))]>,
+             PPC970_DGroup_Single, PPC970_Unit_FPU;
+def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
+                       "mtfsb1 $FM", IntMTFSB0,
+                      [(PPCmtfsb1 (i32 imm:$FM))]>,
+             PPC970_DGroup_Single, PPC970_Unit_FPU;
+def FADDrtz: AForm_2<63, 21,
+                    (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+                    "fadd $FRT, $FRA, $FRB", FPGeneral,
+                    [(set F8RC:$FRT, (PPCfaddrtz F8RC:$FRA, F8RC:$FRB))]>,
+             PPC970_DGroup_Single, PPC970_Unit_FPU;
+// MTFSF does not actually produce an FP result.  We pretend it copies
+// input reg B to the output.  If we didn't do this it would look like the
+// instruction had no outputs (because we aren't modelling the FPSCR) and
+// it would be deleted.
+def MTFSF  : XFLForm<63, 711, (outs F8RC:$FRA),
+                              (ins i32imm:$FM, F8RC:$rT, F8RC:$FRB),
+                       "mtfsf $FM, $rT", "$FRB = $FRA", IntMTFSB0,
+                       [(set F8RC:$FRA, (PPCmtfsf (i32 imm:$FM), 
+                                                   F8RC:$rT, F8RC:$FRB))]>,
+             PPC970_DGroup_Single, PPC970_Unit_FPU;
+
 let PPC970_Unit = 1 in {  // FXU Operations.
 
 // XO-Form instructions.  Arithmetic instructions that can set overflow bit