Fix preload instruction isel. Only v7 supports pli, and only v7 with mp extension supports pldw. Add subtarget attribute to denote mp extension support and legalize illegal ones to nothing.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@118160 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 0ebdd75..eef152f 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -64,6 +64,9 @@
 def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
                                              "Prefer 32-bit Thumb instrs">;
 
+// Multiprocessing extension.
+def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
+                                 "Supports Multiprocessing extension">;
 
 // ARM architectures.
 def ArchV4T     : SubtargetFeature<"v4t", "ARMArchVersion", "V4T",
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index b816e66..6bd8503 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -598,10 +598,7 @@
   setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i64, Expand);
   setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand);
 
-  // ARM v5TE+ and Thumb2 has preload instructions.
-  if (Subtarget->isThumb2() ||
-      (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps()))
-    setOperationAction(ISD::PREFETCH,   MVT::Other, Legal);
+    setOperationAction(ISD::PREFETCH,   MVT::Other, Custom);
 
   // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
   if (!Subtarget->hasV6Ops()) {
@@ -777,6 +774,8 @@
   case ARMISD::MEMBARRIER:    return "ARMISD::MEMBARRIER";
   case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
 
+  case ARMISD::PRELOAD:       return "ARMISD::PRELOAD";
+
   case ARMISD::VCEQ:          return "ARMISD::VCEQ";
   case ARMISD::VCGE:          return "ARMISD::VCGE";
   case ARMISD::VCGEU:         return "ARMISD::VCGEU";
@@ -2060,6 +2059,31 @@
                      DAG.getConstant(DMBOpt, MVT::i32));
 }
 
+static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
+                             const ARMSubtarget *Subtarget) {
+  // ARM pre v5TE and Thumb1 does not have preload instructions.
+  if (!(Subtarget->isThumb2() ||
+        (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
+    // Just preserve the chain.
+    return Op.getOperand(0);
+
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned Flavor = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+  if (Flavor != 3) {
+    if (!Subtarget->hasV7Ops())
+      return Op.getOperand(0);
+    else if (Flavor == 2 && !Subtarget->hasMPExtension())
+      return Op.getOperand(0);
+  }
+
+  if (Subtarget->isThumb())
+    // Invert the bits.
+    Flavor = ~Flavor & 0x3;
+
+  return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
+                     Op.getOperand(1), DAG.getConstant(Flavor, MVT::i32));
+}
+
 static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
   MachineFunction &MF = DAG.getMachineFunction();
   ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
@@ -3842,6 +3866,7 @@
   case ISD::BR_JT:         return LowerBR_JT(Op, DAG);
   case ISD::VASTART:       return LowerVASTART(Op, DAG);
   case ISD::MEMBARRIER:    return LowerMEMBARRIER(Op, DAG, Subtarget);
+  case ISD::PREFETCH:      return LowerPREFETCH(Op, DAG, Subtarget);
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP:    return LowerINT_TO_FP(Op, DAG);
   case ISD::FP_TO_SINT:
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index bd2fa8e..8504b83 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -83,6 +83,8 @@
 
       MEMBARRIER,   // Memory barrier (DMB)
       MEMBARRIER_MCR, // Memory barrier (MCR)
+
+      PRELOAD,      // Preload
       
       VCEQ,         // Vector compare equal.
       VCGE,         // Vector compare greater than or equal.
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 71557c9..c98d45b 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -62,6 +62,8 @@
 
 def SDT_ARMMEMBARRIER     : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
 
+def SDT_ARMPRELOAD : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
+
 def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
 
 def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
@@ -130,6 +132,8 @@
                                [SDNPHasChain]>;
 def ARMMemBarrierMCR  : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER,
                                [SDNPHasChain]>;
+def ARMPreload        : SDNode<"ARMISD::PRELOAD", SDT_ARMPRELOAD,
+                               [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
 
 def ARMrbit          : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
 
@@ -159,6 +163,8 @@
                                  AssemblerPredicate;
 def HasDB            : Predicate<"Subtarget->hasDataBarrier()">,
                                  AssemblerPredicate;
+def HasMP            : Predicate<"Subtarget->hasMPExtension()">,
+                                 AssemblerPredicate;
 def UseNEONForFP     : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
 def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
 def IsThumb          : Predicate<"Subtarget->isThumb()">, AssemblerPredicate;
@@ -988,14 +994,11 @@
 
 // Preload signals the memory system of possible future data/instruction access.
 // These are for disassembly only.
-//
-// A8.6.117, A8.6.118.  Different instructions are generated for #0 and #-0.
-// The neg_zero operand translates -0 to -1, -1 to -2, ..., etc.
 multiclass APreLoad<bits<2> data_read, string opc> {
 
-  def i12 : AXI<(outs), (ins addrmode_imm12:$addr), MiscFrm, NoItinerary,
+  def i12 : AXI<(outs), (ins addrmode_imm12:$addr), MiscFrm, IIC_Preload,
                 !strconcat(opc, "\t$addr"),
-                [(prefetch addrmode_imm12:$addr, imm, (i32 data_read))]> {
+                [(ARMPreload addrmode_imm12:$addr, (i32 data_read))]> {
     bits<4> Rt;
     bits<17> addr;
     let Inst{31-26} = 0b111101;
@@ -1009,9 +1012,9 @@
     let Inst{11-0}  = addr{11-0};   // imm12
   }
 
-  def rs : AXI<(outs), (ins ldst_so_reg:$shift), MiscFrm, NoItinerary,
+  def rs : AXI<(outs), (ins ldst_so_reg:$shift), MiscFrm, IIC_Preload,
                !strconcat(opc, "\t$shift"),
-               [(prefetch ldst_so_reg:$shift, imm, (i32 data_read))]> {
+               [(ARMPreload ldst_so_reg:$shift, (i32 data_read))]> {
     bits<4> Rt;
     bits<17> shift;
     let Inst{31-26} = 0b111101;
@@ -1025,9 +1028,9 @@
   }
 }
 
-defm PLD  : APreLoad<3, "pld">;
-defm PLDW : APreLoad<2, "pldw">;
-defm PLI  : APreLoad<1, "pli">;
+defm PLD  : APreLoad<3, "pld">,  Requires<[IsARM]>;
+defm PLDW : APreLoad<2, "pldw">, Requires<[IsARM,HasV7,HasMP]>;
+defm PLI  : APreLoad<1, "pli">,  Requires<[IsARM,HasV7]>;
 
 def SETEND : AXI<(outs),(ins setend_op:$end), MiscFrm, NoItinerary,
                  "setend\t$end",
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 4472411..b105bc7 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -1171,67 +1171,66 @@
 
 // T2Ipl (Preload Data/Instruction) signals the memory system of possible future
 // data/instruction access.  These are for disassembly only.
-//
-// A8.6.117, A8.6.118.  Different instructions are generated for #0 and #-0.
-// The neg_zero operand translates -0 to -1, -1 to -2, ..., etc.
-multiclass T2Ipl<bit instr, bit write, bits<2> data_read, string opc> {
+// instr_write is inverted for Thumb mode: (prefetch 3) -> (preload 0),
+// (prefetch 1) -> (preload 2),  (prefetch 2) -> (preload 1).
+multiclass T2Ipl<bits<2> instr_write, string opc> {
 
-  def i12 : T2Ii12<(outs), (ins t2addrmode_imm12:$addr), IIC_iLoad_i, opc,
+  def i12 : T2Ii12<(outs), (ins t2addrmode_imm12:$addr), IIC_Preload, opc,
                 "\t$addr",
-                [(prefetch t2addrmode_imm12:$addr, imm, (i32 data_read))]> {
+                [(ARMPreload t2addrmode_imm12:$addr, (i32 instr_write))]> {
     let Inst{31-25} = 0b1111100;
-    let Inst{24} = instr;
+    let Inst{24} = instr_write{1};
     let Inst{23} = 1; // U = 1
     let Inst{22} = 0;
-    let Inst{21} = write;
+    let Inst{21} = instr_write{0};
     let Inst{20} = 1;
     let Inst{15-12} = 0b1111;
   }
 
-  def i8 : T2Ii8<(outs), (ins t2addrmode_imm8:$addr), IIC_iLoad_i, opc,
+  def i8 : T2Ii8<(outs), (ins t2addrmode_imm8:$addr), IIC_Preload, opc,
                 "\t$addr",
-               [(prefetch t2addrmode_imm8:$addr, imm, (i32 data_read))]> {
+               [(ARMPreload t2addrmode_imm8:$addr, (i32 instr_write))]> {
     let Inst{31-25} = 0b1111100;
-    let Inst{24} = instr;
+    let Inst{24} = instr_write{1};
     let Inst{23} = 0; // U = 0
     let Inst{22} = 0;
-    let Inst{21} = write;
+    let Inst{21} = instr_write{0};
     let Inst{20} = 1;
     let Inst{15-12} = 0b1111;
     let Inst{11-8} = 0b1100;
   }
 
-  def s : T2Iso<(outs), (ins t2addrmode_so_reg:$addr), IIC_iLoad_i, opc,
+  def s : T2Iso<(outs), (ins t2addrmode_so_reg:$addr), IIC_Preload, opc,
                "\t$addr",
-              [(prefetch t2addrmode_so_reg:$addr, imm, (i32 data_read))]> {
+              [(ARMPreload t2addrmode_so_reg:$addr, (i32 instr_write))]> {
     let Inst{31-25} = 0b1111100;
-    let Inst{24} = instr;
+    let Inst{24} = instr_write{1};
     let Inst{23} = 0; // add = TRUE for T1
     let Inst{22} = 0;
-    let Inst{21} = write;
+    let Inst{21} = instr_write{0};
     let Inst{20} = 1;
     let Inst{15-12} = 0b1111;
     let Inst{11-6} = 0000000;
   }
 
   let isCodeGenOnly = 1 in
-  def pci : T2Ipc<(outs), (ins i32imm:$addr), IIC_iLoad_i, opc,
+  def pci : T2Ipc<(outs), (ins i32imm:$addr), IIC_Preload, opc,
                 "\t$addr",
                []> {
     let Inst{31-25} = 0b1111100;
-    let Inst{24} = instr;
+    let Inst{24} = instr_write{1};
     let Inst{23} = ?; // add = (U == 1)
     let Inst{22} = 0;
-    let Inst{21} = write;
+    let Inst{21} = instr_write{0};
     let Inst{20} = 1;
     let Inst{19-16} = 0b1111; // Rn = 0b1111
     let Inst{15-12} = 0b1111;
   }
 }
 
-defm t2PLD  : T2Ipl<0, 0, 3, "pld">;
-defm t2PLDW : T2Ipl<0, 1, 2, "pldw">;
-defm t2PLI  : T2Ipl<1, 0, 1, "pli">;
+defm t2PLD  : T2Ipl<0, "pld">,  Requires<[IsThumb2]>;
+defm t2PLDW : T2Ipl<1, "pldw">, Requires<[IsThumb2,HasV7,HasMP]>;
+defm t2PLI  : T2Ipl<2, "pli">,  Requires<[IsThumb2,HasV7]>;
 
 //===----------------------------------------------------------------------===//
 //  Load / store multiple Instructions.
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index c35cadb..1739618 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -90,6 +90,7 @@
 def IIC_iStore_d_ru  : InstrItinClass;
 def IIC_iStore_m   : InstrItinClass<0>;  // micro-coded
 def IIC_iStore_mu  : InstrItinClass<0>;  // micro-coded
+def IIC_Preload    : InstrItinClass;
 def IIC_Br         : InstrItinClass;
 def IIC_fpSTAT     : InstrItinClass;
 def IIC_fpUNA32    : InstrItinClass;
diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td
index 94b22c9..fc62faa 100644
--- a/lib/Target/ARM/ARMScheduleA8.td
+++ b/lib/Target/ARM/ARMScheduleA8.td
@@ -225,6 +225,10 @@
   InstrItinData<IIC_iStore_mu, [InstrStage<2, [A8_Pipe0], 0>,
                                 InstrStage<2, [A8_LSPipe]>], [2]>,
 
+  //
+  // Preload
+  InstrItinData<IIC_Preload, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+
   // Branch
   //
   // no delay slots, so the latency of a branch is unimportant
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index d5ab5dc..bc325b1 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -402,6 +402,10 @@
                                 InstrStage<1, [A9_AGU], 0>,
                                 InstrStage<2, [A9_LSUnit]>], [2]>,
 
+  //
+  // Preload
+  InstrItinData<IIC_Preload,   [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>,
+
   // Branch
   //
   // no delay slots, so the latency of a branch is unimportant
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 787bc30..0833172 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -51,6 +51,7 @@
   , HasT2ExtractPack(false)
   , HasDataBarrier(false)
   , Pref32BitThumb(false)
+  , HasMPExtension(false)
   , FPOnlySP(false)
   , AllowsUnalignedMem(false)
   , stackAlignment(4)
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index ca9921e..551b2f9 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -106,6 +106,10 @@
   /// over 16-bit ones.
   bool Pref32BitThumb;
 
+  /// HasMPExtension - True if the subtarget supports Multiprocessing
+  /// extension (ARMv7 only).
+  bool HasMPExtension;
+
   /// FPOnlySP - If true, the floating point unit only supports single
   /// precision.
   bool FPOnlySP;
@@ -176,6 +180,7 @@
   bool isFPBrccSlow() const { return SlowFPBrcc; }
   bool isFPOnlySP() const { return FPOnlySP; }
   bool prefers32BitThumb() const { return Pref32BitThumb; }
+  bool hasMPExtension() const { return HasMPExtension; }
 
   bool hasFP16() const { return HasFP16; }
   bool hasD16() const { return HasD16; }