We are missing opportunites to use ldm. Take code like this:

void t(int *cp0, int *cp1, int *dp, int fmd) {
  int c0, c1, d0, d1, d2, d3;
  c0 = (*cp0++ & 0xffff) | ((*cp1++ << 16) & 0xffff0000);
  c1 = (*cp0++ & 0xffff) | ((*cp1++ << 16) & 0xffff0000);
  /* ... */
}

It code gens into something pretty bad. But with this change (analogous to the
X86 back-end), it will use ldm and generate few instructions.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@106693 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 8d3ad80..b9bc0b8 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1306,6 +1306,107 @@
   return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
 }
 
+/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
+/// determine if two loads are loading from the same base address. It should
+/// only return true if the base pointers are the same and the only differences
+/// between the two addresses is the offset. It also returns the offsets by
+/// reference.
+bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+                                               int64_t &Offset1,
+                                               int64_t &Offset2) const {
+  // Don't worry about Thumb: just ARM and Thumb2.
+  if (Subtarget.isThumb1Only()) return false;
+
+  if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
+    return false;
+
+  switch (Load1->getMachineOpcode()) {
+  default:
+    return false;
+  case ARM::LDR:
+  case ARM::LDRB:
+  case ARM::LDRD:
+  case ARM::LDRH:
+  case ARM::LDRSB:
+  case ARM::LDRSH:
+  case ARM::VLDRD:
+  case ARM::VLDRS:
+  case ARM::t2LDRi8:
+  case ARM::t2LDRDi8:
+  case ARM::t2LDRSHi8:
+  case ARM::t2LDRi12:
+  case ARM::t2LDRSHi12:
+    break;
+  }
+
+  switch (Load2->getMachineOpcode()) {
+  default:
+    return false;
+  case ARM::LDR:
+  case ARM::LDRB:
+  case ARM::LDRD:
+  case ARM::LDRH:
+  case ARM::LDRSB:
+  case ARM::LDRSH:
+  case ARM::VLDRD:
+  case ARM::VLDRS:
+  case ARM::t2LDRi8:
+  case ARM::t2LDRDi8:
+  case ARM::t2LDRSHi8:
+  case ARM::t2LDRi12:
+  case ARM::t2LDRSHi12:
+    break;
+  }
+
+  // Check if base addresses and chain operands match.
+  if (Load1->getOperand(0) != Load2->getOperand(0) ||
+      Load1->getOperand(4) != Load2->getOperand(4))
+    return false;
+
+  // Index should be Reg0.
+  if (Load1->getOperand(3) != Load2->getOperand(3))
+    return false;
+
+  // Determine the offsets.
+  if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
+      isa<ConstantSDNode>(Load2->getOperand(1))) {
+    Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
+    Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
+    return true;
+  }
+
+  return false;
+}
+
+/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
+/// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should
+/// be scheduled togther. On some targets if two loads are loading from
+/// addresses in the same cache line, it's better if they are scheduled
+/// together. This function takes two integers that represent the load offsets
+/// from the common base address. It returns true if it decides it's desirable
+/// to schedule the two loads together. "NumLoads" is the number of loads that
+/// have already been scheduled after Load1.
+bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+                                               int64_t Offset1, int64_t Offset2,
+                                               unsigned NumLoads) const {
+  // Don't worry about Thumb: just ARM and Thumb2.
+  if (Subtarget.isThumb1Only()) return false;
+
+  assert(Offset2 > Offset1);
+
+  if ((Offset2 - Offset1) / 8 > 64)
+    return false;
+
+  if (Load1->getMachineOpcode() != Load2->getMachineOpcode())
+    return false;  // FIXME: overly conservative?
+
+  // Four loads in a row should be sufficient.
+  if (NumLoads >= 3)
+    return false;
+
+  return true;
+}
+
 bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
                                             const MachineBasicBlock *MBB,
                                             const MachineFunction &MF) const {