Make X86-64 in the Large model always emit 64-bit calls.
The large code model is documented at
http://www.x86-64.org/documentation/abi.pdf and says that calls should
assume their target doesn't live within the 32-bit pc-relative offset
that fits in the call instruction.

To do this, we turn off the global-address->target-global-address
conversion in X86TargetLowering::LowerCall(). The first attempt at
this broke the lazy JIT because it can separate the movabs(imm->reg)
from the actual call instruction. The lazy JIT receives the address of
the movabs as a relocation and needs to record the return address from
the call; and then when that call happens, it needs to patch the
movabs with the newly-compiled target. We could thread the call
instruction into the relocation and record the movabs<->call mapping
explicitly, but that seems to require at least as much new
complication in the code generator as this change.

To fix this, we make lazy functions _always_ go through a call
stub. You'd think we'd only have to force lazy calls through a stub on
difficult platforms, but that turns out to break indirect calls
through a function pointer. The right fix for that is to distinguish
between calls and address-of operations on uncompiled functions, but
that's complex enough to leave for someone else to do.

Another attempt at this defined a new CALL64i pseudo-instruction,
which expanded to a 2-instruction sequence in the assembly output and
was special-cased in the X86CodeEmitter's emitInstruction()
function. That broke indirect calls in the same way as above.

This patch also removes a hack forcing Darwin to the small code model.
Without far-call-stubs, the small code model requires things of the
JITMemoryManager that the DefaultJITMemoryManager can't provide.

Thanks to echristo for lots of testing!



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@88984 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index ab49b9e..4497931 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -82,7 +82,7 @@
     void emitPCRelativeBlockAddress(MachineBasicBlock *MBB);
     void emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
                            intptr_t Disp = 0, intptr_t PCAdj = 0,
-                           bool MayNeedFarStub = false, bool Indirect = false);
+                           bool Indirect = false);
     void emitExternalSymbolAddress(const char *ES, unsigned Reloc);
     void emitConstPoolAddress(unsigned CPI, unsigned Reloc, intptr_t Disp = 0,
                               intptr_t PCAdj = 0);
@@ -176,7 +176,6 @@
 void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
                                 intptr_t Disp /* = 0 */,
                                 intptr_t PCAdj /* = 0 */,
-                                bool MayNeedFarStub /* = false */,
                                 bool Indirect /* = false */) {
   intptr_t RelocCST = Disp;
   if (Reloc == X86::reloc_picrel_word)
@@ -185,9 +184,9 @@
     RelocCST = PCAdj;
   MachineRelocation MR = Indirect
     ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
-                                           GV, RelocCST, MayNeedFarStub)
+                                           GV, RelocCST, false)
     : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
-                               GV, RelocCST, MayNeedFarStub);
+                               GV, RelocCST, false);
   MCE.addRelocation(MR);
   // The relocated value will be added to the displacement
   if (Reloc == X86::reloc_absolute_dword)
@@ -333,10 +332,9 @@
     // do it, otherwise fallback to absolute (this is determined by IsPCRel). 
     //  89 05 00 00 00 00     mov    %eax,0(%rip)  # PC-relative
     //  89 04 25 00 00 00 00  mov    %eax,0x0      # Absolute
-    bool MayNeedFarStub = isa<Function>(RelocOp->getGlobal());
     bool Indirect = gvNeedsNonLazyPtr(*RelocOp, TM);
     emitGlobalAddress(RelocOp->getGlobal(), RelocType, RelocOp->getOffset(),
-                      Adj, MayNeedFarStub, Indirect);
+                      Adj, Indirect);
   } else if (RelocOp->isSymbol()) {
     emitExternalSymbolAddress(RelocOp->getSymbolName(), RelocType);
   } else if (RelocOp->isCPI()) {
@@ -633,14 +631,8 @@
     }
     
     if (MO.isGlobal()) {
-      // Assume undefined functions may be outside the Small codespace.
-      bool MayNeedFarStub = 
-        (Is64BitMode && 
-            (TM.getCodeModel() == CodeModel::Large ||
-             TM.getSubtarget<X86Subtarget>().isTargetDarwin())) ||
-        Opcode == X86::TAILJMPd;
       emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word,
-                        MO.getOffset(), 0, MayNeedFarStub);
+                        MO.getOffset(), 0);
       break;
     }
     
@@ -681,10 +673,9 @@
     if (Opcode == X86::MOV64ri)
       rt = X86::reloc_absolute_dword;  // FIXME: add X86II flag?
     if (MO1.isGlobal()) {
-      bool MayNeedFarStub = isa<Function>(MO1.getGlobal());
       bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
       emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
-                        MayNeedFarStub, Indirect);
+                        Indirect);
     } else if (MO1.isSymbol())
       emitExternalSymbolAddress(MO1.getSymbolName(), rt);
     else if (MO1.isCPI())
@@ -790,10 +781,9 @@
     if (Opcode == X86::MOV64ri32)
       rt = X86::reloc_absolute_word_sext;  // FIXME: add X86II flag?
     if (MO1.isGlobal()) {
-      bool MayNeedFarStub = isa<Function>(MO1.getGlobal());
       bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
       emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
-                        MayNeedFarStub, Indirect);
+                        Indirect);
     } else if (MO1.isSymbol())
       emitExternalSymbolAddress(MO1.getSymbolName(), rt);
     else if (MO1.isCPI())
@@ -831,10 +821,9 @@
     if (Opcode == X86::MOV64mi32)
       rt = X86::reloc_absolute_word_sext;  // FIXME: add X86II flag?
     if (MO.isGlobal()) {
-      bool MayNeedFarStub = isa<Function>(MO.getGlobal());
       bool Indirect = gvNeedsNonLazyPtr(MO, TM);
       emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0,
-                        MayNeedFarStub, Indirect);
+                        Indirect);
     } else if (MO.isSymbol())
       emitExternalSymbolAddress(MO.getSymbolName(), rt);
     else if (MO.isCPI())
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index dacb2c3..6018cf5 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1937,9 +1937,19 @@
                                      FPDiff, dl);
   }
 
-  // If the callee is a GlobalAddress node (quite common, every direct call is)
-  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
-  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+  bool WasGlobalOrExternal = false;
+  if (getTargetMachine().getCodeModel() == CodeModel::Large) {
+    assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
+    // In the 64-bit large code model, we have to make all calls
+    // through a register, since the call instruction's 32-bit
+    // pc-relative offset may not be large enough to hold the whole
+    // address.
+  } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    WasGlobalOrExternal = true;
+    // If the callee is a GlobalAddress node (quite common, every direct call
+    // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
+    // it.
+
     // We should use extra load for direct calls to dllimported functions in
     // non-JIT mode.
     GlobalValue *GV = G->getGlobal();
@@ -1967,6 +1977,7 @@
                                           G->getOffset(), OpFlags);
     }
   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+    WasGlobalOrExternal = true;
     unsigned char OpFlags = 0;
 
     // On ELF targets, in either X86-64 or X86-32 mode, direct calls to external
@@ -1984,7 +1995,9 @@
 
     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
                                          OpFlags);
-  } else if (isTailCall) {
+  }
+
+  if (isTailCall && !WasGlobalOrExternal) {
     unsigned Opc = Is64Bit ? X86::R11 : X86::EAX;
 
     Chain = DAG.getCopyToReg(Chain,  dl,
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index 62ca47f..0792bdd 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -367,8 +367,9 @@
   // Rewrite the call target... so that we don't end up here every time we
   // execute the call.
 #if defined (X86_64_JIT)
-  if (!isStub)
-    *(intptr_t *)(RetAddr - 0xa) = NewVal;
+  assert(isStub &&
+         "X86-64 doesn't support rewriting non-stub lazy compilation calls:"
+         " the call instruction varies too much.");
 #else
   *(intptr_t *)RetAddr = (intptr_t)(NewVal-RetAddr-4);
 #endif
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 5d58a87..0cda8bc 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -185,14 +185,8 @@
   }
   
   // 64-bit JIT places everything in the same buffer except external functions.
-  // On Darwin, use small code model but hack the call instruction for 
-  // externals.  Elsewhere, do not assume globals are in the lower 4G.
-  if (Subtarget.is64Bit()) {
-    if (Subtarget.isTargetDarwin())
-      setCodeModel(CodeModel::Small);
-    else
+  if (Subtarget.is64Bit())
       setCodeModel(CodeModel::Large);
-  }
 
   PM.add(createX86CodeEmitterPass(*this, MCE));
 
@@ -211,14 +205,8 @@
   }
   
   // 64-bit JIT places everything in the same buffer except external functions.
-  // On Darwin, use small code model but hack the call instruction for 
-  // externals.  Elsewhere, do not assume globals are in the lower 4G.
-  if (Subtarget.is64Bit()) {
-    if (Subtarget.isTargetDarwin())
-      setCodeModel(CodeModel::Small);
-    else
+  if (Subtarget.is64Bit())
       setCodeModel(CodeModel::Large);
-  }
 
   PM.add(createX86JITCodeEmitterPass(*this, JCE));