Make X86-64 in the Large model always emit 64-bit calls.
The large code model is documented at
http://www.x86-64.org/documentation/abi.pdf and says that calls should
assume their target doesn't live within the 32-bit pc-relative offset
that fits in the call instruction.
To do this, we turn off the global-address->target-global-address
conversion in X86TargetLowering::LowerCall(). The first attempt at
this broke the lazy JIT because it can separate the movabs(imm->reg)
from the actual call instruction. The lazy JIT receives the address of
the movabs as a relocation and needs to record the return address from
the call; and then when that call happens, it needs to patch the
movabs with the newly-compiled target. We could thread the call
instruction into the relocation and record the movabs<->call mapping
explicitly, but that seems to require at least as much new
complication in the code generator as this change.
To fix this, we make lazy functions _always_ go through a call
stub. You'd think we'd only have to force lazy calls through a stub on
difficult platforms, but that turns out to break indirect calls
through a function pointer. The right fix for that is to distinguish
between calls and address-of operations on uncompiled functions, but
that's complex enough to leave for someone else to do.
Another attempt at this defined a new CALL64i pseudo-instruction,
which expanded to a 2-instruction sequence in the assembly output and
was special-cased in the X86CodeEmitter's emitInstruction()
function. That broke indirect calls in the same way as above.
This patch also removes a hack forcing Darwin to the small code model.
Without far-call-stubs, the small code model requires things of the
JITMemoryManager that the DefaultJITMemoryManager can't provide.
Thanks to echristo for lots of testing!
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@88984 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index ab49b9e..4497931 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -82,7 +82,7 @@
void emitPCRelativeBlockAddress(MachineBasicBlock *MBB);
void emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
intptr_t Disp = 0, intptr_t PCAdj = 0,
- bool MayNeedFarStub = false, bool Indirect = false);
+ bool Indirect = false);
void emitExternalSymbolAddress(const char *ES, unsigned Reloc);
void emitConstPoolAddress(unsigned CPI, unsigned Reloc, intptr_t Disp = 0,
intptr_t PCAdj = 0);
@@ -176,7 +176,6 @@
void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
intptr_t Disp /* = 0 */,
intptr_t PCAdj /* = 0 */,
- bool MayNeedFarStub /* = false */,
bool Indirect /* = false */) {
intptr_t RelocCST = Disp;
if (Reloc == X86::reloc_picrel_word)
@@ -185,9 +184,9 @@
RelocCST = PCAdj;
MachineRelocation MR = Indirect
? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
- GV, RelocCST, MayNeedFarStub)
+ GV, RelocCST, false)
: MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
- GV, RelocCST, MayNeedFarStub);
+ GV, RelocCST, false);
MCE.addRelocation(MR);
// The relocated value will be added to the displacement
if (Reloc == X86::reloc_absolute_dword)
@@ -333,10 +332,9 @@
// do it, otherwise fallback to absolute (this is determined by IsPCRel).
// 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative
// 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute
- bool MayNeedFarStub = isa<Function>(RelocOp->getGlobal());
bool Indirect = gvNeedsNonLazyPtr(*RelocOp, TM);
emitGlobalAddress(RelocOp->getGlobal(), RelocType, RelocOp->getOffset(),
- Adj, MayNeedFarStub, Indirect);
+ Adj, Indirect);
} else if (RelocOp->isSymbol()) {
emitExternalSymbolAddress(RelocOp->getSymbolName(), RelocType);
} else if (RelocOp->isCPI()) {
@@ -633,14 +631,8 @@
}
if (MO.isGlobal()) {
- // Assume undefined functions may be outside the Small codespace.
- bool MayNeedFarStub =
- (Is64BitMode &&
- (TM.getCodeModel() == CodeModel::Large ||
- TM.getSubtarget<X86Subtarget>().isTargetDarwin())) ||
- Opcode == X86::TAILJMPd;
emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word,
- MO.getOffset(), 0, MayNeedFarStub);
+ MO.getOffset(), 0);
break;
}
@@ -681,10 +673,9 @@
if (Opcode == X86::MOV64ri)
rt = X86::reloc_absolute_dword; // FIXME: add X86II flag?
if (MO1.isGlobal()) {
- bool MayNeedFarStub = isa<Function>(MO1.getGlobal());
bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
- MayNeedFarStub, Indirect);
+ Indirect);
} else if (MO1.isSymbol())
emitExternalSymbolAddress(MO1.getSymbolName(), rt);
else if (MO1.isCPI())
@@ -790,10 +781,9 @@
if (Opcode == X86::MOV64ri32)
rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag?
if (MO1.isGlobal()) {
- bool MayNeedFarStub = isa<Function>(MO1.getGlobal());
bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
- MayNeedFarStub, Indirect);
+ Indirect);
} else if (MO1.isSymbol())
emitExternalSymbolAddress(MO1.getSymbolName(), rt);
else if (MO1.isCPI())
@@ -831,10 +821,9 @@
if (Opcode == X86::MOV64mi32)
rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag?
if (MO.isGlobal()) {
- bool MayNeedFarStub = isa<Function>(MO.getGlobal());
bool Indirect = gvNeedsNonLazyPtr(MO, TM);
emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0,
- MayNeedFarStub, Indirect);
+ Indirect);
} else if (MO.isSymbol())
emitExternalSymbolAddress(MO.getSymbolName(), rt);
else if (MO.isCPI())
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index dacb2c3..6018cf5 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1937,9 +1937,19 @@
FPDiff, dl);
}
- // If the callee is a GlobalAddress node (quite common, every direct call is)
- // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ bool WasGlobalOrExternal = false;
+ if (getTargetMachine().getCodeModel() == CodeModel::Large) {
+ assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
+ // In the 64-bit large code model, we have to make all calls
+ // through a register, since the call instruction's 32-bit
+ // pc-relative offset may not be large enough to hold the whole
+ // address.
+ } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ WasGlobalOrExternal = true;
+ // If the callee is a GlobalAddress node (quite common, every direct call
+ // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
+ // it.
+
// We should use extra load for direct calls to dllimported functions in
// non-JIT mode.
GlobalValue *GV = G->getGlobal();
@@ -1967,6 +1977,7 @@
G->getOffset(), OpFlags);
}
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ WasGlobalOrExternal = true;
unsigned char OpFlags = 0;
// On ELF targets, in either X86-64 or X86-32 mode, direct calls to external
@@ -1984,7 +1995,9 @@
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
OpFlags);
- } else if (isTailCall) {
+ }
+
+ if (isTailCall && !WasGlobalOrExternal) {
unsigned Opc = Is64Bit ? X86::R11 : X86::EAX;
Chain = DAG.getCopyToReg(Chain, dl,
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index 62ca47f..0792bdd 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -367,8 +367,9 @@
// Rewrite the call target... so that we don't end up here every time we
// execute the call.
#if defined (X86_64_JIT)
- if (!isStub)
- *(intptr_t *)(RetAddr - 0xa) = NewVal;
+ assert(isStub &&
+ "X86-64 doesn't support rewriting non-stub lazy compilation calls:"
+ " the call instruction varies too much.");
#else
*(intptr_t *)RetAddr = (intptr_t)(NewVal-RetAddr-4);
#endif
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 5d58a87..0cda8bc 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -185,14 +185,8 @@
}
// 64-bit JIT places everything in the same buffer except external functions.
- // On Darwin, use small code model but hack the call instruction for
- // externals. Elsewhere, do not assume globals are in the lower 4G.
- if (Subtarget.is64Bit()) {
- if (Subtarget.isTargetDarwin())
- setCodeModel(CodeModel::Small);
- else
+ if (Subtarget.is64Bit())
setCodeModel(CodeModel::Large);
- }
PM.add(createX86CodeEmitterPass(*this, MCE));
@@ -211,14 +205,8 @@
}
// 64-bit JIT places everything in the same buffer except external functions.
- // On Darwin, use small code model but hack the call instruction for
- // externals. Elsewhere, do not assume globals are in the lower 4G.
- if (Subtarget.is64Bit()) {
- if (Subtarget.isTargetDarwin())
- setCodeModel(CodeModel::Small);
- else
+ if (Subtarget.is64Bit())
setCodeModel(CodeModel::Large);
- }
PM.add(createX86JITCodeEmitterPass(*this, JCE));