Merge lowerCall and lowerRet between x86 and x64
BUG=
R=jpp@chromium.org, stichnot@chromium.org
Review URL: https://codereview.chromium.org/1592033002 .
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index bff2066..c154901 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -153,259 +153,46 @@
_jmp(JumpTarget);
}
-void TargetX8632::lowerCall(const InstCall *Instr) {
- // x86-32 calling convention:
- //
- // * At the point before the call, the stack must be aligned to 16 bytes.
- //
- // * The first four arguments of vector type, regardless of their position
- // relative to the other arguments in the argument list, are placed in
- // registers xmm0 - xmm3.
- //
- // * Other arguments are pushed onto the stack in right-to-left order, such
- // that the left-most argument ends up on the top of the stack at the lowest
- // memory address.
- //
- // * Stack arguments of vector type are aligned to start at the next highest
- // multiple of 16 bytes. Other stack arguments are aligned to 4 bytes.
- //
- // This intends to match the section "IA-32 Function Calling Convention" of
- // the document "OS X ABI Function Call Guide" by Apple.
- NeedsStackAlignment = true;
-
- OperandList XmmArgs;
- OperandList StackArgs, StackArgLocations;
- int32_t ParameterAreaSizeBytes = 0;
-
- // Classify each argument operand according to the location where the
- // argument is passed.
- for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
- Operand *Arg = Instr->getArg(i);
- Type Ty = Arg->getType();
- // The PNaCl ABI requires the width of arguments to be at least 32 bits.
- assert(typeWidthInBytes(Ty) >= 4);
- if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
- XmmArgs.push_back(Arg);
+Inst *TargetX8632::emitCallToTarget(Operand *CallTarget, Variable *ReturnReg) {
+ std::unique_ptr<AutoBundle> Bundle;
+ if (NeedSandboxing) {
+ if (llvm::isa<Constant>(CallTarget)) {
+ Bundle = makeUnique<AutoBundle>(this, InstBundleLock::Opt_AlignToEnd);
} else {
- StackArgs.push_back(Arg);
- if (isVectorType(Arg->getType())) {
- ParameterAreaSizeBytes =
- Traits::applyStackAlignment(ParameterAreaSizeBytes);
- }
- Variable *esp =
- Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
- Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
- auto *Mem = Traits::X86OperandMem::create(Func, Ty, esp, Loc);
- StackArgLocations.push_back(Mem);
- ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
+ Variable *CallTargetVar = nullptr;
+ _mov(CallTargetVar, CallTarget);
+ Bundle = makeUnique<AutoBundle>(this, InstBundleLock::Opt_AlignToEnd);
+ const SizeT BundleSize =
+ 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
+ _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
+ CallTarget = CallTargetVar;
}
}
- // Ensure there is enough space for the fstp/movs for floating returns.
- Variable *Dest = Instr->getDest();
- if (Dest != nullptr && isScalarFloatingType(Dest->getType())) {
- ParameterAreaSizeBytes =
- std::max(static_cast<size_t>(ParameterAreaSizeBytes),
- typeWidthInBytesOnStack(Dest->getType()));
- }
-
- // Adjust the parameter area so that the stack is aligned. It is assumed that
- // the stack is already aligned at the start of the calling sequence.
- ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
- assert(static_cast<uint32_t>(ParameterAreaSizeBytes) <=
- maxOutArgsSizeBytes());
-
- // Copy arguments that are passed on the stack to the appropriate stack
- // locations.
- for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
- lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
- }
-
- // Copy arguments to be passed in registers to the appropriate registers.
- // TODO: Investigate the impact of lowering arguments passed in registers
- // after lowering stack arguments as opposed to the other way around.
- // Lowering register arguments after stack arguments may reduce register
- // pressure. On the other hand, lowering register arguments first (before
- // stack arguments) may result in more compact code, as the memory operand
- // displacements may end up being smaller before any stack adjustment is
- // done.
- for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
- Variable *Reg =
- legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i);
- // Generate a FakeUse of register arguments so that they do not get dead
- // code eliminated as a result of the FakeKill of scratch registers after
- // the call.
- Context.insert<InstFakeUse>(Reg);
- }
- // Generate the call instruction. Assign its result to a temporary with high
- // register allocation weight.
- // ReturnReg doubles as ReturnRegLo as necessary.
- Variable *ReturnReg = nullptr;
- Variable *ReturnRegHi = nullptr;
- if (Dest) {
- const Type DestTy = Dest->getType();
- switch (DestTy) {
- case IceType_NUM:
- case IceType_void:
- case IceType_i1:
- case IceType_i8:
- case IceType_i16:
- llvm::report_fatal_error("Invalid Call dest type");
- break;
- case IceType_i32:
- ReturnReg = makeReg(DestTy, Traits::RegisterSet::Reg_eax);
- break;
- case IceType_i64:
- ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
- ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
- break;
- case IceType_f32:
- case IceType_f64:
- // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with the
- // fstp instruction.
- break;
- case IceType_v4i1:
- case IceType_v8i1:
- case IceType_v16i1:
- case IceType_v16i8:
- case IceType_v8i16:
- case IceType_v4i32:
- case IceType_v4f32:
- ReturnReg = makeReg(DestTy, Traits::RegisterSet::Reg_xmm0);
- break;
- }
- }
-
- Operand *CallTarget =
- legalize(Instr->getCallTarget(), Legal_Reg | Legal_Imm | Legal_AddrAbs);
-
- Traits::Insts::Call *NewCall;
- /* AutoBundle scoping */ {
- std::unique_ptr<AutoBundle> Bundle;
- if (NeedSandboxing) {
- if (llvm::isa<Constant>(CallTarget)) {
- Bundle = makeUnique<AutoBundle>(this, InstBundleLock::Opt_AlignToEnd);
- } else {
- Variable *CallTargetVar = nullptr;
- _mov(CallTargetVar, CallTarget);
- Bundle = makeUnique<AutoBundle>(this, InstBundleLock::Opt_AlignToEnd);
- const SizeT BundleSize =
- 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
- _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
- CallTarget = CallTargetVar;
- }
- }
- NewCall = Context.insert<Traits::Insts::Call>(ReturnReg, CallTarget);
- }
-
- if (ReturnRegHi)
- Context.insert<InstFakeDef>(ReturnRegHi);
-
- // Insert a register-kill pseudo instruction.
- Context.insert<InstFakeKill>(NewCall);
-
- if (Dest != nullptr && isScalarFloatingType(Dest->getType())) {
- // Special treatment for an FP function which returns its result in st(0).
- // If Dest ends up being a physical xmm register, the fstp emit code will
- // route st(0) through the space reserved in the function argument area
- // we allocated.
- _fstp(Dest);
- // Create a fake use of Dest in case it actually isn't used, because st(0)
- // still needs to be popped.
- Context.insert<InstFakeUse>(Dest);
- }
-
- // Generate a FakeUse to keep the call live if necessary.
- if (Instr->hasSideEffects() && ReturnReg) {
- Context.insert<InstFakeUse>(ReturnReg);
- }
-
- if (!Dest)
- return;
-
- // Assign the result of the call to Dest.
- if (ReturnReg) {
- if (ReturnRegHi) {
- auto *Dest64On32 = llvm::cast<Variable64On32>(Dest);
- Variable *DestLo = Dest64On32->getLo();
- Variable *DestHi = Dest64On32->getHi();
- _mov(DestLo, ReturnReg);
- _mov(DestHi, ReturnRegHi);
- } else {
- const Type DestTy = Dest->getType();
- assert(DestTy == IceType_i32 || DestTy == IceType_i16 ||
- DestTy == IceType_i8 || DestTy == IceType_i1 ||
- isVectorType(DestTy));
- if (isVectorType(DestTy)) {
- _movp(Dest, ReturnReg);
- } else {
- _mov(Dest, ReturnReg);
- }
- }
- }
+ return Context.insert<Traits::Insts::Call>(ReturnReg, CallTarget);
}
-void TargetX8632::lowerArguments() {
- VarList &Args = Func->getArgs();
- // The first four arguments of vector type, regardless of their position
- // relative to the other arguments in the argument list, are passed in
- // registers xmm0 - xmm3.
- unsigned NumXmmArgs = 0;
-
- Context.init(Func->getEntryNode());
- Context.setInsertPoint(Context.getCur());
-
- for (SizeT I = 0, E = Args.size();
- I < E && NumXmmArgs < Traits::X86_MAX_XMM_ARGS; ++I) {
- Variable *Arg = Args[I];
- Type Ty = Arg->getType();
- if (!isVectorType(Ty))
- continue;
- // Replace Arg in the argument list with the home register. Then generate
- // an instruction in the prolog to copy the home register to the assigned
- // location of Arg.
- int32_t RegNum = Traits::RegisterSet::Reg_xmm0 + NumXmmArgs;
- ++NumXmmArgs;
- Variable *RegisterArg = Func->makeVariable(Ty);
- if (BuildDefs::dump())
- RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
- RegisterArg->setRegNum(RegNum);
- RegisterArg->setIsArg();
- Arg->setIsArg(false);
-
- Args[I] = RegisterArg;
- Context.insert<InstAssign>(Arg, RegisterArg);
- }
-}
-
-void TargetX8632::lowerRet(const InstRet *Inst) {
- Variable *Reg = nullptr;
- if (Inst->hasRetValue()) {
- Operand *Src0 = legalize(Inst->getRetValue());
- const Type Src0Ty = Src0->getType();
- // TODO(jpp): this is not needed.
- if (Src0Ty == IceType_i64) {
+Variable *TargetX8632::moveReturnValueToRegister(Operand *Value,
+ Type ReturnType) {
+ if (isVectorType(ReturnType)) {
+ return legalizeToReg(Value, Traits::RegisterSet::Reg_xmm0);
+ } else if (isScalarFloatingType(ReturnType)) {
+ _fld(Value);
+ return nullptr;
+ } else {
+ assert(ReturnType == IceType_i32 || ReturnType == IceType_i64);
+ if (ReturnType == IceType_i64) {
Variable *eax =
- legalizeToReg(loOperand(Src0), Traits::RegisterSet::Reg_eax);
+ legalizeToReg(loOperand(Value), Traits::RegisterSet::Reg_eax);
Variable *edx =
- legalizeToReg(hiOperand(Src0), Traits::RegisterSet::Reg_edx);
- Reg = eax;
+ legalizeToReg(hiOperand(Value), Traits::RegisterSet::Reg_edx);
Context.insert<InstFakeUse>(edx);
- } else if (isScalarFloatingType(Src0Ty)) {
- _fld(Src0);
- } else if (isVectorType(Src0Ty)) {
- Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0);
+ return eax;
} else {
- assert(Src0Ty == IceType_i32);
- _mov(Reg, Src0, Traits::RegisterSet::Reg_eax);
+ Variable *Reg = nullptr;
+ _mov(Reg, Value, Traits::RegisterSet::Reg_eax);
+ return Reg;
}
}
- // Add a ret instruction even if sandboxing is enabled, because addEpilog
- // explicitly looks for a ret instruction as a marker for where to insert the
- // frame removal instructions.
- _ret(Reg);
- // Add a fake use of esp to make sure esp stays alive for the entire
- // function. Otherwise post-call esp adjustments get dead-code eliminated.
- keepEspLiveAtExit();
}
void TargetX8632::addProlog(CfgNode *Node) {
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index 1f8e4a7..39ba608 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -58,9 +58,8 @@
void initSandbox() {}
void lowerIndirectJump(Variable *JumpTarget);
- void lowerCall(const InstCall *Instr) override;
- void lowerArguments() override;
- void lowerRet(const InstRet *Inst) override;
+ Inst *emitCallToTarget(Operand *CallTarget, Variable *ReturnReg) override;
+ Variable *moveReturnValueToRegister(Operand *Value, Type ReturnType) override;
void addProlog(CfgNode *Node) override;
void addEpilog(CfgNode *Node) override;
diff --git a/src/IceTargetLoweringX8632Traits.h b/src/IceTargetLoweringX8632Traits.h
index d804bb2..e54ffd2 100644
--- a/src/IceTargetLoweringX8632Traits.h
+++ b/src/IceTargetLoweringX8632Traits.h
@@ -68,10 +68,11 @@
using Cond = ::Ice::CondX86;
using RegisterSet = ::Ice::RegX8632;
- static const SizeT StackPtr = RegX8632::Reg_esp;
- static const SizeT FramePtr = RegX8632::Reg_ebp;
- static const GPRRegister Encoded_Reg_Accumulator = RegX8632::Encoded_Reg_eax;
- static const GPRRegister Encoded_Reg_Counter = RegX8632::Encoded_Reg_ecx;
+ static constexpr SizeT StackPtr = RegX8632::Reg_esp;
+ static constexpr SizeT FramePtr = RegX8632::Reg_ebp;
+ static constexpr GPRRegister Encoded_Reg_Accumulator =
+ RegX8632::Encoded_Reg_eax;
+ static constexpr GPRRegister Encoded_Reg_Counter = RegX8632::Encoded_Reg_ecx;
static constexpr FixupKind FK_PcRel = llvm::ELF::R_386_PC32;
static constexpr FixupKind FK_Abs = llvm::ELF::R_386_32;
static constexpr FixupKind FK_Gotoff = llvm::ELF::R_386_GOTOFF;
@@ -658,21 +659,50 @@
llvm::report_fatal_error("no rdx in non-64-bit mode.");
}
+ // x86-32 calling convention:
+ //
+ // * The first four arguments of vector type, regardless of their position
+ // relative to the other arguments in the argument list, are placed in
+ // registers xmm0 - xmm3.
+ //
+ // This intends to match the section "IA-32 Function Calling Convention" of
+ // the document "OS X ABI Function Call Guide" by Apple.
+
/// The maximum number of arguments to pass in XMM registers
- static const uint32_t X86_MAX_XMM_ARGS = 4;
+ static constexpr uint32_t X86_MAX_XMM_ARGS = 4;
/// The maximum number of arguments to pass in GPR registers
- static const uint32_t X86_MAX_GPR_ARGS = 0;
+ static constexpr uint32_t X86_MAX_GPR_ARGS = 0;
+ /// Whether scalar floating point arguments are passed in XMM registers
+ static constexpr bool X86_PASS_SCALAR_FP_IN_XMM = false;
+ /// Get the register for a given argument slot in the XMM registers.
+ static int32_t getRegisterForXmmArgNum(uint32_t ArgNum) {
+ // TODO(sehr): Change to use the CCArg technique used in ARM32.
+ static_assert(RegisterSet::Reg_xmm0 + 1 == RegisterSet::Reg_xmm1,
+ "Inconsistency between XMM register numbers and ordinals");
+ if (ArgNum >= X86_MAX_XMM_ARGS) {
+ return Variable::NoRegister;
+ }
+ return static_cast<int32_t>(RegisterSet::Reg_xmm0 + ArgNum);
+ }
+ /// Get the register for a given argument slot in the GPRs.
+ static int32_t getRegisterForGprArgNum(Type Ty, uint32_t ArgNum) {
+ assert(Ty == IceType_i64 || Ty == IceType_i32);
+ (void)Ty;
+ (void)ArgNum;
+ return Variable::NoRegister;
+ }
+
/// The number of bits in a byte
- static const uint32_t X86_CHAR_BIT = 8;
+ static constexpr uint32_t X86_CHAR_BIT = 8;
/// Stack alignment. This is defined in IceTargetLoweringX8632.cpp because it
/// is used as an argument to std::max(), and the default std::less<T> has an
/// operator(T const&, T const&) which requires this member to have an
/// address.
static const uint32_t X86_STACK_ALIGNMENT_BYTES;
/// Size of the return address on the stack
- static const uint32_t X86_RET_IP_SIZE_BYTES = 4;
+ static constexpr uint32_t X86_RET_IP_SIZE_BYTES = 4;
/// The number of different NOP instructions
- static const uint32_t X86_NUM_NOP_VARIANTS = 5;
+ static constexpr uint32_t X86_NUM_NOP_VARIANTS = 5;
/// \name Limits for unrolling memory intrinsics.
/// @{
diff --git a/src/IceTargetLoweringX8664.cpp b/src/IceTargetLoweringX8664.cpp
index 89f8208..6fe92bf 100644
--- a/src/IceTargetLoweringX8664.cpp
+++ b/src/IceTargetLoweringX8664.cpp
@@ -384,209 +384,11 @@
_jmp(JumpTarget);
}
-namespace {
-static inline TargetX8664::Traits::RegisterSet::AllRegisters
-getRegisterForXmmArgNum(uint32_t ArgNum) {
- assert(ArgNum < TargetX8664::Traits::X86_MAX_XMM_ARGS);
- return static_cast<TargetX8664::Traits::RegisterSet::AllRegisters>(
- TargetX8664::Traits::RegisterSet::Reg_xmm0 + ArgNum);
-}
-
-static inline TargetX8664::Traits::RegisterSet::AllRegisters
-getRegisterForGprArgNum(Type Ty, uint32_t ArgNum) {
- assert(ArgNum < TargetX8664::Traits::X86_MAX_GPR_ARGS);
- static const TargetX8664::Traits::RegisterSet::AllRegisters GprForArgNum[] = {
- TargetX8664::Traits::RegisterSet::Reg_rdi,
- TargetX8664::Traits::RegisterSet::Reg_rsi,
- TargetX8664::Traits::RegisterSet::Reg_rdx,
- TargetX8664::Traits::RegisterSet::Reg_rcx,
- TargetX8664::Traits::RegisterSet::Reg_r8,
- TargetX8664::Traits::RegisterSet::Reg_r9,
- };
- static_assert(llvm::array_lengthof(GprForArgNum) ==
- TargetX8664::TargetX8664::Traits::X86_MAX_GPR_ARGS,
- "Mismatch between MAX_GPR_ARGS and GprForArgNum.");
- assert(Ty == IceType_i64 || Ty == IceType_i32);
- return static_cast<TargetX8664::Traits::RegisterSet::AllRegisters>(
- TargetX8664::Traits::getGprForType(Ty, GprForArgNum[ArgNum]));
-}
-
-// constexprMax returns a (constexpr) max(S0, S1), and it is used for defining
-// OperandList in lowerCall. std::max() is supposed to work, but it doesn't.
-constexpr SizeT constexprMax(SizeT S0, SizeT S1) { return S0 < S1 ? S1 : S0; }
-
-} // end of anonymous namespace
-
-void TargetX8664::lowerCall(const InstCall *Instr) {
- // x86-64 calling convention:
- //
- // * At the point before the call, the stack must be aligned to 16 bytes.
- //
- // * The first eight arguments of vector/fp type, regardless of their
- // position relative to the other arguments in the argument list, are placed
- // in registers %xmm0 - %xmm7.
- //
- // * The first six arguments of integer types, regardless of their position
- // relative to the other arguments in the argument list, are placed in
- // registers %rdi, %rsi, %rdx, %rcx, %r8, and %r9.
- //
- // * Other arguments are pushed onto the stack in right-to-left order, such
- // that the left-most argument ends up on the top of the stack at the lowest
- // memory address.
- //
- // * Stack arguments of vector type are aligned to start at the next highest
- // multiple of 16 bytes. Other stack arguments are aligned to 8 bytes.
- //
- // This intends to match the section "Function Calling Sequence" of the
- // document "System V Application Binary Interface."
- NeedsStackAlignment = true;
-
- using OperandList =
- llvm::SmallVector<Operand *, constexprMax(Traits::X86_MAX_XMM_ARGS,
- Traits::X86_MAX_GPR_ARGS)>;
- OperandList XmmArgs;
- CfgVector<std::pair<const Type, Operand *>> GprArgs;
- OperandList StackArgs, StackArgLocations;
- int32_t ParameterAreaSizeBytes = 0;
-
- // Classify each argument operand according to the location where the
- // argument is passed.
- for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
- Operand *Arg = Instr->getArg(i);
- Type Ty = Arg->getType();
- // The PNaCl ABI requires the width of arguments to be at least 32 bits.
- assert(typeWidthInBytes(Ty) >= 4);
- if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
- XmmArgs.push_back(Arg);
- } else if (isScalarFloatingType(Ty) &&
- XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
- XmmArgs.push_back(Arg);
- } else if (isScalarIntegerType(Ty) &&
- GprArgs.size() < Traits::X86_MAX_GPR_ARGS) {
- GprArgs.emplace_back(Ty, Arg);
- } else {
- StackArgs.push_back(Arg);
- if (isVectorType(Arg->getType())) {
- ParameterAreaSizeBytes =
- Traits::applyStackAlignment(ParameterAreaSizeBytes);
- }
- Variable *esp =
- getPhysicalRegister(Traits::RegisterSet::Reg_rsp, IceType_i64);
- Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
- StackArgLocations.push_back(
- Traits::X86OperandMem::create(Func, Ty, esp, Loc));
- ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
- }
- }
-
- // Adjust the parameter area so that the stack is aligned. It is assumed that
- // the stack is already aligned at the start of the calling sequence.
- ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
- assert(static_cast<uint32_t>(ParameterAreaSizeBytes) <=
- maxOutArgsSizeBytes());
-
- // Copy arguments that are passed on the stack to the appropriate stack
- // locations.
- for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
- lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
- }
-
- // Copy arguments to be passed in registers to the appropriate registers.
- // TODO: Investigate the impact of lowering arguments passed in registers
- // after lowering stack arguments as opposed to the other way around.
- // Lowering register arguments after stack arguments may reduce register
- // pressure. On the other hand, lowering register arguments first (before
- // stack arguments) may result in more compact code, as the memory operand
- // displacements may end up being smaller before any stack adjustment is
- // done.
- for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
- Variable *Reg = legalizeToReg(XmmArgs[i], getRegisterForXmmArgNum(i));
- // Generate a FakeUse of register arguments so that they do not get dead
- // code eliminated as a result of the FakeKill of scratch registers after
- // the call.
- Context.insert<InstFakeUse>(Reg);
- }
-
- for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) {
- const Type SignatureTy = GprArgs[i].first;
- Operand *Arg = GprArgs[i].second;
- Variable *Reg =
- legalizeToReg(Arg, getRegisterForGprArgNum(Arg->getType(), i));
- assert(SignatureTy == IceType_i64 || SignatureTy == IceType_i32);
- if (SignatureTy != Arg->getType()) {
- if (SignatureTy == IceType_i32) {
- assert(Arg->getType() == IceType_i64);
- Variable *T = makeReg(
- IceType_i32, Traits::getGprForType(IceType_i32, Reg->getRegNum()));
- _mov(T, Reg);
- Reg = T;
- } else {
- // This branch has never been reached, so we leave the assert(false)
- // here until we figure out how to exercise it.
- assert(false);
- assert(Arg->getType() == IceType_i32);
- Variable *T = makeReg(
- IceType_i64, Traits::getGprForType(IceType_i64, Reg->getRegNum()));
- _movzx(T, Reg);
- Reg = T;
- }
- }
- Context.insert<InstFakeUse>(Reg);
- }
-
- // Generate the call instruction. Assign its result to a temporary with high
- // register allocation weight.
- Variable *Dest = Instr->getDest();
- // ReturnReg doubles as ReturnRegLo as necessary.
- Variable *ReturnReg = nullptr;
- if (Dest) {
- switch (Dest->getType()) {
- case IceType_NUM:
- case IceType_void:
- llvm::report_fatal_error("Invalid Call dest type");
- break;
- case IceType_i1:
- case IceType_i8:
- case IceType_i16:
- // The bitcode should never return an i1, i8, or i16.
- assert(false);
- // Fallthrough intended.
- case IceType_i32:
- ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
- break;
- case IceType_i64:
- ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_rax);
- break;
- case IceType_f32:
- case IceType_f64:
- case IceType_v4i1:
- case IceType_v8i1:
- case IceType_v16i1:
- case IceType_v16i8:
- case IceType_v8i16:
- case IceType_v4i32:
- case IceType_v4f32:
- ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);
- break;
- }
- }
-
- InstX86Label *ReturnAddress = nullptr;
- Operand *CallTarget =
- legalize(Instr->getCallTarget(), Legal_Reg | Legal_Imm | Legal_AddrAbs);
- auto *CallTargetR = llvm::dyn_cast<Variable>(CallTarget);
+Inst *TargetX8664::emitCallToTarget(Operand *CallTarget, Variable *ReturnReg) {
Inst *NewCall = nullptr;
- if (!NeedSandboxing) {
- if (CallTargetR != nullptr) {
- // x86-64 in Subzero is ILP32. Therefore, CallTarget is i32, but the
- // emitted call needs a i64 register (for textual asm.)
- Variable *T = makeReg(IceType_i64);
- _movzx(T, CallTargetR);
- CallTarget = T;
- }
- NewCall = Context.insert<Traits::Insts::Call>(ReturnReg, CallTarget);
- } else {
- ReturnAddress = InstX86Label::create(Func, this);
+ auto *CallTargetR = llvm::dyn_cast<Variable>(CallTarget);
+ if (NeedSandboxing) {
+ InstX86Label *ReturnAddress = InstX86Label::create(Func, this);
ReturnAddress->setIsReturnLocation(true);
constexpr bool SuppressMangling = true;
/* AutoBundle scoping */ {
@@ -620,104 +422,32 @@
}
Context.insert(ReturnAddress);
- }
-
- // Insert a register-kill pseudo instruction.
- Context.insert<InstFakeKill>(NewCall);
-
- // Generate a FakeUse to keep the call live if necessary.
- if (Instr->hasSideEffects() && ReturnReg) {
- Context.insert<InstFakeUse>(ReturnReg);
- }
-
- if (!Dest)
- return;
-
- assert(ReturnReg && "x86-64 always returns value on registers.");
-
- if (isVectorType(Dest->getType())) {
- _movp(Dest, ReturnReg);
} else {
- assert(isScalarFloatingType(Dest->getType()) ||
- isScalarIntegerType(Dest->getType()));
- _mov(Dest, ReturnReg);
+ if (CallTargetR != nullptr) {
+ // x86-64 in Subzero is ILP32. Therefore, CallTarget is i32, but the
+ // emitted call needs a i64 register (for textual asm.)
+ Variable *T = makeReg(IceType_i64);
+ _movzx(T, CallTargetR);
+ CallTarget = T;
+ }
+ NewCall = Context.insert<Traits::Insts::Call>(ReturnReg, CallTarget);
}
+ return NewCall;
}
-void TargetX8664::lowerArguments() {
- VarList &Args = Func->getArgs();
- // The first eight vector typed arguments (as well as fp arguments) are
- // passed in %xmm0 through %xmm7 regardless of their position in the argument
- // list.
- unsigned NumXmmArgs = 0;
- // The first six integer typed arguments are passed in %rdi, %rsi, %rdx,
- // %rcx, %r8, and %r9 regardless of their position in the argument list.
- unsigned NumGprArgs = 0;
-
- Context.init(Func->getEntryNode());
- Context.setInsertPoint(Context.getCur());
-
- for (SizeT i = 0, End = Args.size();
- i < End && (NumXmmArgs < Traits::X86_MAX_XMM_ARGS ||
- NumGprArgs < Traits::X86_MAX_XMM_ARGS);
- ++i) {
- Variable *Arg = Args[i];
- Type Ty = Arg->getType();
- Variable *RegisterArg = nullptr;
- int32_t RegNum = Variable::NoRegister;
- if ((isVectorType(Ty) || isScalarFloatingType(Ty))) {
- if (NumXmmArgs >= Traits::X86_MAX_XMM_ARGS) {
- continue;
- }
- RegNum = getRegisterForXmmArgNum(NumXmmArgs);
- ++NumXmmArgs;
- RegisterArg = Func->makeVariable(Ty);
- } else if (isScalarIntegerType(Ty)) {
- if (NumGprArgs >= Traits::X86_MAX_GPR_ARGS) {
- continue;
- }
- RegNum = getRegisterForGprArgNum(Ty, NumGprArgs);
- ++NumGprArgs;
- RegisterArg = Func->makeVariable(Ty);
- }
- assert(RegNum != Variable::NoRegister);
- assert(RegisterArg != nullptr);
- // Replace Arg in the argument list with the home register. Then generate
- // an instruction in the prolog to copy the home register to the assigned
- // location of Arg.
- if (BuildDefs::dump())
- RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
- RegisterArg->setRegNum(RegNum);
- RegisterArg->setIsArg();
- Arg->setIsArg(false);
-
- Args[i] = RegisterArg;
- Context.insert<InstAssign>(Arg, RegisterArg);
+Variable *TargetX8664::moveReturnValueToRegister(Operand *Value,
+ Type ReturnType) {
+ if (isVectorType(ReturnType) || isScalarFloatingType(ReturnType)) {
+ return legalizeToReg(Value, Traits::RegisterSet::Reg_xmm0);
+ } else {
+ assert(ReturnType == IceType_i32 || ReturnType == IceType_i64);
+ Variable *Reg = nullptr;
+ _mov(Reg, Value,
+ Traits::getGprForType(ReturnType, Traits::RegisterSet::Reg_rax));
+ return Reg;
}
}
-void TargetX8664::lowerRet(const InstRet *Inst) {
- Variable *Reg = nullptr;
- if (Inst->hasRetValue()) {
- Operand *Src0 = legalize(Inst->getRetValue());
- const Type Src0Ty = Src0->getType();
- if (isVectorType(Src0Ty) || isScalarFloatingType(Src0Ty)) {
- Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0);
- } else {
- assert(Src0Ty == IceType_i32 || Src0Ty == IceType_i64);
- _mov(Reg, Src0,
- Traits::getGprForType(Src0Ty, Traits::RegisterSet::Reg_rax));
- }
- }
- // Add a ret instruction even if sandboxing is enabled, because addEpilog
- // explicitly looks for a ret instruction as a marker for where to insert the
- // frame removal instructions.
- _ret(Reg);
- // Add a fake use of esp to make sure esp stays alive for the entire
- // function. Otherwise post-call esp adjustments get dead-code eliminated.
- keepEspLiveAtExit();
-}
-
void TargetX8664::addProlog(CfgNode *Node) {
// Stack frame layout:
//
diff --git a/src/IceTargetLoweringX8664.h b/src/IceTargetLoweringX8664.h
index 0f8722b..e10d834 100644
--- a/src/IceTargetLoweringX8664.h
+++ b/src/IceTargetLoweringX8664.h
@@ -60,9 +60,8 @@
void initSandbox();
void lowerIndirectJump(Variable *JumpTarget);
- void lowerCall(const InstCall *Instr) override;
- void lowerArguments() override;
- void lowerRet(const InstRet *Inst) override;
+ Inst *emitCallToTarget(Operand *CallTarget, Variable *ReturnReg) override;
+ Variable *moveReturnValueToRegister(Operand *Value, Type ReturnType) override;
void addProlog(CfgNode *Node) override;
void addEpilog(CfgNode *Node) override;
diff --git a/src/IceTargetLoweringX8664Traits.h b/src/IceTargetLoweringX8664Traits.h
index 39c5b95..bfa0231 100644
--- a/src/IceTargetLoweringX8664Traits.h
+++ b/src/IceTargetLoweringX8664Traits.h
@@ -68,10 +68,11 @@
using Cond = ::Ice::CondX8664;
using RegisterSet = ::Ice::RegX8664;
- static const SizeT StackPtr = RegX8664::Reg_rsp;
- static const SizeT FramePtr = RegX8664::Reg_rbp;
- static const GPRRegister Encoded_Reg_Accumulator = RegX8664::Encoded_Reg_eax;
- static const GPRRegister Encoded_Reg_Counter = RegX8664::Encoded_Reg_ecx;
+ static constexpr SizeT StackPtr = RegX8664::Reg_rsp;
+ static constexpr SizeT FramePtr = RegX8664::Reg_rbp;
+ static constexpr GPRRegister Encoded_Reg_Accumulator =
+ RegX8664::Encoded_Reg_eax;
+ static constexpr GPRRegister Encoded_Reg_Counter = RegX8664::Encoded_Reg_ecx;
static constexpr FixupKind FK_PcRel = llvm::ELF::R_X86_64_PC32;
static constexpr FixupKind FK_Abs = llvm::ELF::R_X86_64_32;
static constexpr FixupKind FK_Gotoff = llvm::ELF::R_X86_64_GOTOFF64;
@@ -715,21 +716,61 @@
static int32_t getRdxOrDie() { return RegisterSet::Reg_rdx; }
+ // x86-64 calling convention:
+ //
+ // * The first eight arguments of vector/fp type, regardless of their
+ // position relative to the other arguments in the argument list, are placed
+ // in registers %xmm0 - %xmm7.
+ //
+ // * The first six arguments of integer types, regardless of their position
+ // relative to the other arguments in the argument list, are placed in
+ // registers %rdi, %rsi, %rdx, %rcx, %r8, and %r9.
+ //
+ // This intends to match the section "Function Calling Sequence" of the
+ // document "System V Application Binary Interface."
+
/// The maximum number of arguments to pass in XMM registers
- static const uint32_t X86_MAX_XMM_ARGS = 8;
+ static constexpr uint32_t X86_MAX_XMM_ARGS = 8;
/// The maximum number of arguments to pass in GPR registers
- static const uint32_t X86_MAX_GPR_ARGS = 6;
+ static constexpr uint32_t X86_MAX_GPR_ARGS = 6;
+ /// Whether scalar floating point arguments are passed in XMM registers
+ static constexpr bool X86_PASS_SCALAR_FP_IN_XMM = true;
+ /// Get the register for a given argument slot in the XMM registers.
+ static int32_t getRegisterForXmmArgNum(uint32_t ArgNum) {
+ // TODO(sehr): Change to use the CCArg technique used in ARM32.
+ static_assert(RegisterSet::Reg_xmm0 + 1 == RegisterSet::Reg_xmm1,
+ "Inconsistency between XMM register numbers and ordinals");
+ if (ArgNum >= X86_MAX_XMM_ARGS) {
+ return Variable::NoRegister;
+ }
+ return static_cast<int32_t>(RegisterSet::Reg_xmm0 + ArgNum);
+ }
+ /// Get the register for a given argument slot in the GPRs.
+ static int32_t getRegisterForGprArgNum(Type Ty, uint32_t ArgNum) {
+ if (ArgNum >= X86_MAX_GPR_ARGS) {
+ return Variable::NoRegister;
+ }
+ static const RegisterSet::AllRegisters GprForArgNum[] = {
+ RegisterSet::Reg_rdi, RegisterSet::Reg_rsi, RegisterSet::Reg_rdx,
+ RegisterSet::Reg_rcx, RegisterSet::Reg_r8, RegisterSet::Reg_r9,
+ };
+ static_assert(llvm::array_lengthof(GprForArgNum) == X86_MAX_GPR_ARGS,
+ "Mismatch between MAX_GPR_ARGS and GprForArgNum.");
+ assert(Ty == IceType_i64 || Ty == IceType_i32);
+ return static_cast<int32_t>(getGprForType(Ty, GprForArgNum[ArgNum]));
+ }
+
/// The number of bits in a byte
- static const uint32_t X86_CHAR_BIT = 8;
+ static constexpr uint32_t X86_CHAR_BIT = 8;
/// Stack alignment. This is defined in IceTargetLoweringX8664.cpp because it
/// is used as an argument to std::max(), and the default std::less<T> has an
/// operator(T const&, T const&) which requires this member to have an
/// address.
static const uint32_t X86_STACK_ALIGNMENT_BYTES;
/// Size of the return address on the stack
- static const uint32_t X86_RET_IP_SIZE_BYTES = 8;
+ static constexpr uint32_t X86_RET_IP_SIZE_BYTES = 8;
/// The number of different NOP instructions
- static const uint32_t X86_NUM_NOP_VARIANTS = 5;
+ static constexpr uint32_t X86_NUM_NOP_VARIANTS = 5;
/// \name Limits for unrolling memory intrinsics.
/// @{
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index cc3bd73..72e4f20 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -197,9 +197,11 @@
}
void lowerAlloca(const InstAlloca *Inst) override;
+ void lowerArguments() override;
void lowerArithmetic(const InstArithmetic *Inst) override;
void lowerAssign(const InstAssign *Inst) override;
void lowerBr(const InstBr *Inst) override;
+ void lowerCall(const InstCall *Inst) override;
void lowerCast(const InstCast *Inst) override;
void lowerExtractElement(const InstExtractElement *Inst) override;
void lowerFcmp(const InstFcmp *Inst) override;
@@ -209,6 +211,7 @@
void lowerInsertElement(const InstInsertElement *Inst) override;
void lowerLoad(const InstLoad *Inst) override;
void lowerPhi(const InstPhi *Inst) override;
+ void lowerRet(const InstRet *Inst) override;
void lowerSelect(const InstSelect *Inst) override;
void lowerStore(const InstStore *Inst) override;
void lowerSwitch(const InstSwitch *Inst) override;
@@ -279,6 +282,13 @@
void scalarizeArithmetic(InstArithmetic::OpKind K, Variable *Dest,
Operand *Src0, Operand *Src1);
+ /// Emit just the call instruction (without argument or return variable
+ /// processing), sandboxing if needed.
+ virtual Inst *emitCallToTarget(Operand *CallTarget, Variable *ReturnReg) = 0;
+ /// Materialize the moves needed to return a value of the specified type.
+ virtual Variable *moveReturnValueToRegister(Operand *Value,
+ Type ReturnType) = 0;
+
/// Emit a fake use of esp to make sure esp stays alive for the entire
/// function. Otherwise some esp adjustments get dead-code eliminated.
void keepEspLiveAtExit() {
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 20fd64f..28f3f2f 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -1096,6 +1096,67 @@
}
}
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerArguments() {
+ VarList &Args = Func->getArgs();
+ unsigned NumXmmArgs = 0;
+ bool XmmSlotsRemain = true;
+ unsigned NumGprArgs = 0;
+ bool GprSlotsRemain = true;
+
+ Context.init(Func->getEntryNode());
+ Context.setInsertPoint(Context.getCur());
+
+ for (SizeT i = 0, End = Args.size();
+ i < End && (XmmSlotsRemain || GprSlotsRemain); ++i) {
+ Variable *Arg = Args[i];
+ Type Ty = Arg->getType();
+ Variable *RegisterArg = nullptr;
+ int32_t RegNum = Variable::NoRegister;
+ if (isVectorType(Ty)) {
+ RegNum = Traits::getRegisterForXmmArgNum(NumXmmArgs);
+ if (RegNum == Variable::NoRegister) {
+ XmmSlotsRemain = false;
+ continue;
+ }
+ ++NumXmmArgs;
+ RegisterArg = Func->makeVariable(Ty);
+ } else if (isScalarFloatingType(Ty)) {
+ if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) {
+ continue;
+ }
+ RegNum = Traits::getRegisterForXmmArgNum(NumXmmArgs);
+ if (RegNum == Variable::NoRegister) {
+ XmmSlotsRemain = false;
+ continue;
+ }
+ ++NumXmmArgs;
+ RegisterArg = Func->makeVariable(Ty);
+ } else if (isScalarIntegerType(Ty)) {
+ RegNum = Traits::getRegisterForGprArgNum(Ty, NumGprArgs);
+ if (RegNum == Variable::NoRegister) {
+ GprSlotsRemain = false;
+ continue;
+ }
+ ++NumGprArgs;
+ RegisterArg = Func->makeVariable(Ty);
+ }
+ assert(RegNum != Variable::NoRegister);
+ assert(RegisterArg != nullptr);
+ // Replace Arg in the argument list with the home register. Then generate
+ // an instruction in the prolog to copy the home register to the assigned
+ // location of Arg.
+ if (BuildDefs::dump())
+ RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
+ RegisterArg->setRegNum(RegNum);
+ RegisterArg->setIsArg();
+ Arg->setIsArg(false);
+
+ Args[i] = RegisterArg;
+ Context.insert<InstAssign>(Arg, RegisterArg);
+ }
+}
+
/// Strength-reduce scalar integer multiplication by a constant (for i32 or
/// narrower) for certain constants. The lea instruction can be used to multiply
/// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of
@@ -2028,6 +2089,204 @@
_br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());
}
+// constexprMax returns a (constexpr) max(S0, S1), and it is used for defining
+// OperandList in lowerCall. std::max() is supposed to work, but it doesn't.
+inline constexpr SizeT constexprMax(SizeT S0, SizeT S1) {
+ return S0 < S1 ? S1 : S0;
+}
+
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerCall(const InstCall *Instr) {
+ // Common x86 calling convention lowering:
+ //
+ // * At the point before the call, the stack must be aligned to 16 bytes.
+ //
+ // * Non-register arguments are pushed onto the stack in right-to-left order,
+ // such that the left-most argument ends up on the top of the stack at the
+ // lowest memory address.
+ //
+ // * Stack arguments of vector type are aligned to start at the next highest
+ // multiple of 16 bytes. Other stack arguments are aligned to the next word
+ // size boundary (4 or 8 bytes, respectively).
+ NeedsStackAlignment = true;
+
+ using OperandList =
+ llvm::SmallVector<Operand *, constexprMax(Traits::X86_MAX_XMM_ARGS,
+ Traits::X86_MAX_GPR_ARGS)>;
+ OperandList XmmArgs;
+ CfgVector<std::pair<const Type, Operand *>> GprArgs;
+ OperandList StackArgs, StackArgLocations;
+ uint32_t ParameterAreaSizeBytes = 0;
+
+ // Classify each argument operand according to the location where the argument
+ // is passed.
+ for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
+ Operand *Arg = Instr->getArg(i);
+ const Type Ty = Arg->getType();
+ // The PNaCl ABI requires the width of arguments to be at least 32 bits.
+ assert(typeWidthInBytes(Ty) >= 4);
+ if (isVectorType(Ty) && (Traits::getRegisterForXmmArgNum(XmmArgs.size()) !=
+ Variable::NoRegister)) {
+ XmmArgs.push_back(Arg);
+ } else if (isScalarFloatingType(Ty) && Traits::X86_PASS_SCALAR_FP_IN_XMM &&
+ (Traits::getRegisterForXmmArgNum(0) != Variable::NoRegister)) {
+ XmmArgs.push_back(Arg);
+ } else if (isScalarIntegerType(Ty) &&
+ (Traits::getRegisterForGprArgNum(Ty, GprArgs.size()) !=
+ Variable::NoRegister)) {
+ GprArgs.emplace_back(Ty, Arg);
+ } else {
+ // Place on stack.
+ StackArgs.push_back(Arg);
+ if (isVectorType(Arg->getType())) {
+ ParameterAreaSizeBytes =
+ Traits::applyStackAlignment(ParameterAreaSizeBytes);
+ }
+ Variable *esp = getPhysicalRegister(getStackReg(), Traits::WordType);
+ Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
+ StackArgLocations.push_back(
+ Traits::X86OperandMem::create(Func, Ty, esp, Loc));
+ ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
+ }
+ }
+ // Ensure there is enough space for the fstp/movs for floating returns.
+ Variable *Dest = Instr->getDest();
+ const Type DestTy = Dest ? Dest->getType() : IceType_void;
+ if (Traits::X86_PASS_SCALAR_FP_IN_XMM) {
+ if (isScalarFloatingType(DestTy)) {
+ ParameterAreaSizeBytes =
+ std::max(static_cast<size_t>(ParameterAreaSizeBytes),
+ typeWidthInBytesOnStack(DestTy));
+ }
+ }
+ // Adjust the parameter area so that the stack is aligned. It is assumed that
+ // the stack is already aligned at the start of the calling sequence.
+ ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
+ assert(ParameterAreaSizeBytes <= maxOutArgsSizeBytes());
+ // Copy arguments that are passed on the stack to the appropriate stack
+ // locations.
+ for (SizeT i = 0, NumStackArgs = StackArgs.size(); i < NumStackArgs; ++i) {
+ lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
+ }
+ // Copy arguments to be passed in registers to the appropriate registers.
+ for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
+ Variable *Reg =
+ legalizeToReg(XmmArgs[i], Traits::getRegisterForXmmArgNum(i));
+ // Generate a FakeUse of register arguments so that they do not get dead
+ // code eliminated as a result of the FakeKill of scratch registers after
+ // the call.
+ Context.insert<InstFakeUse>(Reg);
+ }
+ // Materialize moves for arguments passed in GPRs.
+ for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) {
+ const Type SignatureTy = GprArgs[i].first;
+ Operand *Arg = GprArgs[i].second;
+ Variable *Reg =
+ legalizeToReg(Arg, Traits::getRegisterForGprArgNum(Arg->getType(), i));
+ assert(SignatureTy == IceType_i64 || SignatureTy == IceType_i32);
+ assert(SignatureTy == Arg->getType());
+ (void)SignatureTy;
+ Context.insert<InstFakeUse>(Reg);
+ }
+ // Generate the call instruction. Assign its result to a temporary with high
+ // register allocation weight.
+ // ReturnReg doubles as ReturnRegLo as necessary.
+ Variable *ReturnReg = nullptr;
+ Variable *ReturnRegHi = nullptr;
+ if (Dest) {
+ switch (DestTy) {
+ case IceType_NUM:
+ case IceType_void:
+ case IceType_i1:
+ case IceType_i8:
+ case IceType_i16:
+ llvm::report_fatal_error("Invalid Call dest type");
+ break;
+ case IceType_i32:
+ ReturnReg = makeReg(DestTy, Traits::RegisterSet::Reg_eax);
+ break;
+ case IceType_i64:
+ if (Traits::Is64Bit) {
+ ReturnReg = makeReg(
+ IceType_i64,
+ Traits::getGprForType(IceType_i64, Traits::RegisterSet::Reg_eax));
+ } else {
+ ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
+ ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
+ }
+ break;
+ case IceType_f32:
+ case IceType_f64:
+ if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) {
+ // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with
+ // the fstp instruction.
+ break;
+ }
+ // Fallthrough intended.
+ case IceType_v4i1:
+ case IceType_v8i1:
+ case IceType_v16i1:
+ case IceType_v16i8:
+ case IceType_v8i16:
+ case IceType_v4i32:
+ case IceType_v4f32:
+ ReturnReg = makeReg(DestTy, Traits::RegisterSet::Reg_xmm0);
+ break;
+ }
+ }
+ // Emit the call to the function.
+ Operand *CallTarget =
+ legalize(Instr->getCallTarget(), Legal_Reg | Legal_Imm | Legal_AddrAbs);
+ Inst *NewCall = emitCallToTarget(CallTarget, ReturnReg);
+ // Keep the upper return register live on 32-bit platform.
+ if (ReturnRegHi)
+ Context.insert<InstFakeDef>(ReturnRegHi);
+ // Mark the call as killing all the caller-save registers.
+ Context.insert<InstFakeKill>(NewCall);
+ // Handle x86-32 floating point returns.
+ if (Dest != nullptr && isScalarFloatingType(Dest->getType()) &&
+ !Traits::X86_PASS_SCALAR_FP_IN_XMM) {
+ // Special treatment for an FP function which returns its result in st(0).
+ // If Dest ends up being a physical xmm register, the fstp emit code will
+ // route st(0) through the space reserved in the function argument area
+ // we allocated.
+ _fstp(Dest);
+ // Create a fake use of Dest in case it actually isn't used, because st(0)
+ // still needs to be popped.
+ Context.insert<InstFakeUse>(Dest);
+ }
+ // Generate a FakeUse to keep the call live if necessary.
+ if (Instr->hasSideEffects() && ReturnReg) {
+ Context.insert<InstFakeUse>(ReturnReg);
+ }
+ // Process the return value, if any.
+ if (Dest == nullptr)
+ return;
+ // Assign the result of the call to Dest.
+ if (isVectorType(DestTy)) {
+ assert(ReturnReg && "Vector type requires a return register");
+ _movp(Dest, ReturnReg);
+ } else if (isScalarFloatingType(DestTy)) {
+ if (Traits::X86_PASS_SCALAR_FP_IN_XMM) {
+ assert(ReturnReg && "FP type requires a return register");
+ _mov(Dest, ReturnReg);
+ }
+ } else {
+ assert(isScalarIntegerType(DestTy));
+ assert(ReturnReg && "Integer type requires a return register");
+ if (DestTy == IceType_i64 && !Traits::Is64Bit) {
+ assert(ReturnRegHi && "64-bit type requires two return registers");
+ auto *Dest64On32 = llvm::cast<Variable64On32>(Dest);
+ Variable *DestLo = Dest64On32->getLo();
+ Variable *DestHi = Dest64On32->getHi();
+ _mov(DestLo, ReturnReg);
+ _mov(DestHi, ReturnRegHi);
+ } else {
+ _mov(Dest, ReturnReg);
+ }
+ }
+}
+
template <typename TraitsType>
void TargetX86Base<TraitsType>::lowerCast(const InstCast *Inst) {
// a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
@@ -4821,6 +5080,25 @@
}
template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerRet(const InstRet *Inst) {
+ Variable *Reg = nullptr;
+ if (Inst->hasRetValue()) {
+ Operand *RetValue = legalize(Inst->getRetValue());
+ const Type ReturnType = RetValue->getType();
+ assert(isVectorType(ReturnType) || isScalarFloatingType(ReturnType) ||
+ (ReturnType == IceType_i32) || (ReturnType == IceType_i64));
+ Reg = moveReturnValueToRegister(RetValue, ReturnType);
+ }
+ // Add a ret instruction even if sandboxing is enabled, because addEpilog
+ // explicitly looks for a ret instruction as a marker for where to insert the
+ // frame removal instructions.
+ _ret(Reg);
+ // Add a fake use of esp to make sure esp stays alive for the entire
+ // function. Otherwise post-call esp adjustments get dead-code eliminated.
+ keepEspLiveAtExit();
+}
+
+template <typename TraitsType>
void TargetX86Base<TraitsType>::lowerSelect(const InstSelect *Select) {
Variable *Dest = Select->getDest();