For the current Atom processor, the fastest way to handle a call indirect through a memory address is to load the memory address into a register and then call indirect through the register. This patch implements this improvement by modifying SelectionDAG to force a function address which is a memory reference to be loaded into a virtual register. Patch by Sriram Murali. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178171 91177308-0d34-0410-b5e6-96231b3b80d8

commit: 1edadea42f6f5c393b4fdb9d7ce1cf7eb9c24ab4 [log] [tgz]
author: Preston Gurd <preston.gurd@intel.com> Wed Mar 27 19:14:02 2013 +0000
committer: Preston Gurd <preston.gurd@intel.com> Wed Mar 27 19:14:02 2013 +0000
tree: 0703e20d41246fa36a72779d0d1ba5b58b6ee2d7
parent: e915047fed99221afb8c540d8a7e81038a6483f1 [diff]
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index e87da56..bf09501 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td

@@ -134,6 +134,9 @@
 def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
                                      "PadShortFunctions", "true",
                                      "Pad short functions">;
+def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect",
+                                     "CallRegIndirect", "true",
+                                     "Call register indirect">;
 
 //===----------------------------------------------------------------------===//
 // X86 processors supported.
@@ -181,7 +184,9 @@
 def : ProcessorModel<"atom", AtomModel,
                      [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B,
                       FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP,
-                      FeatureSlowDivide, FeaturePadShortFunctions]>;
+                      FeatureSlowDivide,
+                      FeatureCallRegIndirect,
+                      FeaturePadShortFunctions]>;
 
 // "Arrandale" along with corei3 and corei5
 def : ProcessorModel<"corei7", SandyBridgeModel,

diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 9ef6a3b..0eaab0f 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp

@@ -2629,6 +2629,19 @@
     InFlag = Chain.getValue(1);
   }
 
+  // Use indirect reference through register, when CALL uses a memory reference.
+  if (Subtarget->callRegIndirect() &&
+      Callee.getOpcode() == ISD::LOAD) {
+    const TargetRegisterClass *AddrRegClass =
+      getRegClassFor(Subtarget->is64Bit() ? MVT::i64:MVT::i32);
+    MachineRegisterInfo &MRI = MF.getRegInfo();
+    unsigned VReg = MRI.createVirtualRegister(AddrRegClass);
+    SDValue tempValue = DAG.getCopyFromReg(Callee,
+                                           dl, VReg, Callee.getValueType());
+    Chain = DAG.getCopyToReg(Chain, dl, VReg, tempValue, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
   Ops.push_back(Chain);
   Ops.push_back(Callee);
 

diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 4b368b4..6e66c1a 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp

@@ -458,6 +458,7 @@
   HasSlowDivide = false;
   PostRAScheduler = false;
   PadShortFunctions = false;
+  CallRegIndirect = false;
   stackAlignment = 4;
   // FIXME: this is a known good value for Yonah. How about others?
   MaxInlineSizeThreshold = 128;

diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 108ef0e..cac3f57 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h

@@ -159,6 +159,10 @@
   /// a stall when returning too early.
   bool PadShortFunctions;
 
+  /// CallRegIndirect - True if the Calls with memory reference should be converted
+  /// to a register-based indirect call.
+  bool CallRegIndirect;
+
   /// stackAlignment - The minimum alignment known to hold of the stack frame on
   /// entry to the function and which must be maintained by every function.
   unsigned stackAlignment;
@@ -269,6 +273,7 @@
   bool useLeaForSP() const { return UseLeaForSP; }
   bool hasSlowDivide() const { return HasSlowDivide; }
   bool padShortFunctions() const { return PadShortFunctions; }
+  bool callRegIndirect() const { return CallRegIndirect; }
 
   bool isAtom() const { return X86ProcFamily == IntelAtom; }
commit	1edadea42f6f5c393b4fdb9d7ce1cf7eb9c24ab4	[log] [tgz]
author	Preston Gurd <preston.gurd@intel.com>	Wed Mar 27 19:14:02 2013 +0000
committer	Preston Gurd <preston.gurd@intel.com>	Wed Mar 27 19:14:02 2013 +0000
tree	0703e20d41246fa36a72779d0d1ba5b58b6ee2d7
parent	e915047fed99221afb8c540d8a7e81038a6483f1 [diff]