blob: 671bab029d0d30871a55e94f9a27052f27f36a9d [file] [log] [blame]
//===- subzero/src/IceTargetLowering.cpp - Basic lowering implementation --===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file implements the skeleton of the TargetLowering class,
/// specifically invoking the appropriate lowering method for a given
/// instruction kind and driving global register allocation. It also
/// implements the non-deleted instruction iteration in
/// LoweringContext.
///
//===----------------------------------------------------------------------===//
#include "IceTargetLowering.h"
#include "IceAssemblerARM32.h"
#include "IceAssemblerMIPS32.h"
#include "IceAssemblerX8632.h"
#include "IceAssemblerX8664.h"
#include "IceCfg.h" // setError()
#include "IceCfgNode.h"
#include "IceGlobalInits.h"
#include "IceOperand.h"
#include "IceRegAlloc.h"
#include "IceTargetLoweringARM32.h"
#include "IceTargetLoweringMIPS32.h"
#include "IceTargetLoweringX8632.h"
#include "IceTargetLoweringX8664.h"
namespace Ice {
void LoweringContext::init(CfgNode *N) {
Node = N;
End = getNode()->getInsts().end();
rewind();
advanceForward(Next);
}
void LoweringContext::rewind() {
Begin = getNode()->getInsts().begin();
Cur = Begin;
skipDeleted(Cur);
Next = Cur;
}
void LoweringContext::insert(Inst *Inst) {
getNode()->getInsts().insert(Next, Inst);
LastInserted = Inst;
}
void LoweringContext::skipDeleted(InstList::iterator &I) const {
while (I != End && I->isDeleted())
++I;
}
void LoweringContext::advanceForward(InstList::iterator &I) const {
if (I != End) {
++I;
skipDeleted(I);
}
}
Inst *LoweringContext::getLastInserted() const {
assert(LastInserted);
return LastInserted;
}
TargetLowering *TargetLowering::createLowering(TargetArch Target, Cfg *Func) {
#define SUBZERO_TARGET(X) \
if (Target == Target_##X) \
return Target##X::create(Func);
#include "llvm/Config/SZTargets.def"
Func->setError("Unsupported target");
return nullptr;
}
TargetLowering::TargetLowering(Cfg *Func)
: Func(Func), Ctx(Func->getContext()), Context() {}
std::unique_ptr<Assembler> TargetLowering::createAssembler(TargetArch Target,
Cfg *Func) {
#define SUBZERO_TARGET(X) \
if (Target == Target_##X) \
return std::unique_ptr<Assembler>(new X::Assembler##X());
#include "llvm/Config/SZTargets.def"
Func->setError("Unsupported target assembler");
return nullptr;
}
void TargetLowering::doAddressOpt() {
if (llvm::isa<InstLoad>(*Context.getCur()))
doAddressOptLoad();
else if (llvm::isa<InstStore>(*Context.getCur()))
doAddressOptStore();
Context.advanceCur();
Context.advanceNext();
}
void TargetLowering::doNopInsertion() {
Inst *I = Context.getCur();
bool ShouldSkip = llvm::isa<InstFakeUse>(I) || llvm::isa<InstFakeDef>(I) ||
llvm::isa<InstFakeKill>(I) || I->isRedundantAssign() ||
I->isDeleted();
if (!ShouldSkip) {
int Probability = Ctx->getFlags().getNopProbabilityAsPercentage();
for (int I = 0; I < Ctx->getFlags().getMaxNopsPerInstruction(); ++I) {
randomlyInsertNop(Probability / 100.0);
}
}
}
// Lowers a single instruction according to the information in
// Context, by checking the Context.Cur instruction kind and calling
// the appropriate lowering method. The lowering method should insert
// target instructions at the Cur.Next insertion point, and should not
// delete the Context.Cur instruction or advance Context.Cur.
//
// The lowering method may look ahead in the instruction stream as
// desired, and lower additional instructions in conjunction with the
// current one, for example fusing a compare and branch. If it does,
// it should advance Context.Cur to point to the next non-deleted
// instruction to process, and it should delete any additional
// instructions it consumes.
void TargetLowering::lower() {
assert(!Context.atEnd());
Inst *Inst = Context.getCur();
Inst->deleteIfDead();
if (!Inst->isDeleted() && !llvm::isa<InstFakeDef>(Inst) &&
!llvm::isa<InstFakeUse>(Inst)) {
// Mark the current instruction as deleted before lowering,
// otherwise the Dest variable will likely get marked as non-SSA.
// See Variable::setDefinition(). However, just pass-through
// FakeDef and FakeUse instructions that might have been inserted
// prior to lowering.
Inst->setDeleted();
switch (Inst->getKind()) {
case Inst::Alloca:
lowerAlloca(llvm::cast<InstAlloca>(Inst));
break;
case Inst::Arithmetic:
lowerArithmetic(llvm::cast<InstArithmetic>(Inst));
break;
case Inst::Assign:
lowerAssign(llvm::cast<InstAssign>(Inst));
break;
case Inst::Br:
lowerBr(llvm::cast<InstBr>(Inst));
break;
case Inst::Call:
lowerCall(llvm::cast<InstCall>(Inst));
break;
case Inst::Cast:
lowerCast(llvm::cast<InstCast>(Inst));
break;
case Inst::ExtractElement:
lowerExtractElement(llvm::cast<InstExtractElement>(Inst));
break;
case Inst::Fcmp:
lowerFcmp(llvm::cast<InstFcmp>(Inst));
break;
case Inst::Icmp:
lowerIcmp(llvm::cast<InstIcmp>(Inst));
break;
case Inst::InsertElement:
lowerInsertElement(llvm::cast<InstInsertElement>(Inst));
break;
case Inst::IntrinsicCall: {
InstIntrinsicCall *Call = llvm::cast<InstIntrinsicCall>(Inst);
if (Call->getIntrinsicInfo().ReturnsTwice)
setCallsReturnsTwice(true);
lowerIntrinsicCall(Call);
break;
}
case Inst::Load:
lowerLoad(llvm::cast<InstLoad>(Inst));
break;
case Inst::Phi:
lowerPhi(llvm::cast<InstPhi>(Inst));
break;
case Inst::Ret:
lowerRet(llvm::cast<InstRet>(Inst));
break;
case Inst::Select:
lowerSelect(llvm::cast<InstSelect>(Inst));
break;
case Inst::Store:
lowerStore(llvm::cast<InstStore>(Inst));
break;
case Inst::Switch:
lowerSwitch(llvm::cast<InstSwitch>(Inst));
break;
case Inst::Unreachable:
lowerUnreachable(llvm::cast<InstUnreachable>(Inst));
break;
default:
lowerOther(Inst);
break;
}
postLower();
}
Context.advanceCur();
Context.advanceNext();
}
void TargetLowering::lowerOther(const Inst *Instr) {
(void)Instr;
Func->setError("Can't lower unsupported instruction type");
}
// Drives register allocation, allowing all physical registers (except
// perhaps for the frame pointer) to be allocated. This set of
// registers could potentially be parameterized if we want to restrict
// registers e.g. for performance testing.
void TargetLowering::regAlloc(RegAllocKind Kind) {
TimerMarker T(TimerStack::TT_regAlloc, Func);
LinearScan LinearScan(Func);
RegSetMask RegInclude = RegSet_None;
RegSetMask RegExclude = RegSet_None;
RegInclude |= RegSet_CallerSave;
RegInclude |= RegSet_CalleeSave;
if (hasFramePointer())
RegExclude |= RegSet_FramePointer;
LinearScan.init(Kind);
llvm::SmallBitVector RegMask = getRegisterSet(RegInclude, RegExclude);
LinearScan.scan(RegMask, Ctx->getFlags().shouldRandomizeRegAlloc());
}
void TargetLowering::inferTwoAddress() {
// Find two-address non-SSA instructions where Dest==Src0, and set
// the DestNonKillable flag to keep liveness analysis consistent.
for (auto Inst = Context.getCur(), E = Context.getNext(); Inst != E; ++Inst) {
if (Inst->isDeleted())
continue;
if (Variable *Dest = Inst->getDest()) {
// TODO(stichnot): We may need to consider all source
// operands, not just the first one, if using 3-address
// instructions.
if (Inst->getSrcSize() > 0 && Inst->getSrc(0) == Dest)
Inst->setDestNonKillable();
}
}
}
void TargetLowering::sortVarsByAlignment(VarList &Dest,
const VarList &Source) const {
Dest = Source;
// Instead of std::sort, we could do a bucket sort with log2(alignment)
// as the buckets, if performance is an issue.
std::sort(Dest.begin(), Dest.end(),
[this](const Variable *V1, const Variable *V2) {
return typeWidthInBytesOnStack(V1->getType()) >
typeWidthInBytesOnStack(V2->getType());
});
}
void TargetLowering::getVarStackSlotParams(
VarList &SortedSpilledVariables, llvm::SmallBitVector &RegsUsed,
size_t *GlobalsSize, size_t *SpillAreaSizeBytes,
uint32_t *SpillAreaAlignmentBytes, uint32_t *LocalsSlotsAlignmentBytes,
std::function<bool(Variable *)> TargetVarHook) {
const VariablesMetadata *VMetadata = Func->getVMetadata();
llvm::BitVector IsVarReferenced(Func->getNumVariables());
for (CfgNode *Node : Func->getNodes()) {
for (Inst &Inst : Node->getInsts()) {
if (Inst.isDeleted())
continue;
if (const Variable *Var = Inst.getDest())
IsVarReferenced[Var->getIndex()] = true;
for (SizeT I = 0; I < Inst.getSrcSize(); ++I) {
Operand *Src = Inst.getSrc(I);
SizeT NumVars = Src->getNumVars();
for (SizeT J = 0; J < NumVars; ++J) {
const Variable *Var = Src->getVar(J);
IsVarReferenced[Var->getIndex()] = true;
}
}
}
}
// If SimpleCoalescing is false, each variable without a register
// gets its own unique stack slot, which leads to large stack
// frames. If SimpleCoalescing is true, then each "global" variable
// without a register gets its own slot, but "local" variable slots
// are reused across basic blocks. E.g., if A and B are local to
// block 1 and C is local to block 2, then C may share a slot with A or B.
//
// We cannot coalesce stack slots if this function calls a "returns twice"
// function. In that case, basic blocks may be revisited, and variables
// local to those basic blocks are actually live until after the
// called function returns a second time.
const bool SimpleCoalescing = !callsReturnsTwice();
std::vector<size_t> LocalsSize(Func->getNumNodes());
const VarList &Variables = Func->getVariables();
VarList SpilledVariables;
for (Variable *Var : Variables) {
if (Var->hasReg()) {
RegsUsed[Var->getRegNum()] = true;
continue;
}
// An argument either does not need a stack slot (if passed in a
// register) or already has one (if passed on the stack).
if (Var->getIsArg())
continue;
// An unreferenced variable doesn't need a stack slot.
if (!IsVarReferenced[Var->getIndex()])
continue;
// Check a target-specific variable (it may end up sharing stack slots)
// and not need accounting here.
if (TargetVarHook(Var))
continue;
SpilledVariables.push_back(Var);
}
SortedSpilledVariables.reserve(SpilledVariables.size());
sortVarsByAlignment(SortedSpilledVariables, SpilledVariables);
for (Variable *Var : SortedSpilledVariables) {
size_t Increment = typeWidthInBytesOnStack(Var->getType());
// We have sorted by alignment, so the first variable we encounter that
// is located in each area determines the max alignment for the area.
if (!*SpillAreaAlignmentBytes)
*SpillAreaAlignmentBytes = Increment;
if (SimpleCoalescing && VMetadata->isTracked(Var)) {
if (VMetadata->isMultiBlock(Var)) {
*GlobalsSize += Increment;
} else {
SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex();
LocalsSize[NodeIndex] += Increment;
if (LocalsSize[NodeIndex] > *SpillAreaSizeBytes)
*SpillAreaSizeBytes = LocalsSize[NodeIndex];
if (!*LocalsSlotsAlignmentBytes)
*LocalsSlotsAlignmentBytes = Increment;
}
} else {
*SpillAreaSizeBytes += Increment;
}
}
}
void TargetLowering::alignStackSpillAreas(uint32_t SpillAreaStartOffset,
uint32_t SpillAreaAlignmentBytes,
size_t GlobalsSize,
uint32_t LocalsSlotsAlignmentBytes,
uint32_t *SpillAreaPaddingBytes,
uint32_t *LocalsSlotsPaddingBytes) {
if (SpillAreaAlignmentBytes) {
uint32_t PaddingStart = SpillAreaStartOffset;
uint32_t SpillAreaStart =
Utils::applyAlignment(PaddingStart, SpillAreaAlignmentBytes);
*SpillAreaPaddingBytes = SpillAreaStart - PaddingStart;
}
// If there are separate globals and locals areas, make sure the
// locals area is aligned by padding the end of the globals area.
if (LocalsSlotsAlignmentBytes) {
uint32_t GlobalsAndSubsequentPaddingSize = GlobalsSize;
GlobalsAndSubsequentPaddingSize =
Utils::applyAlignment(GlobalsSize, LocalsSlotsAlignmentBytes);
*LocalsSlotsPaddingBytes = GlobalsAndSubsequentPaddingSize - GlobalsSize;
}
}
void TargetLowering::assignVarStackSlots(VarList &SortedSpilledVariables,
size_t SpillAreaPaddingBytes,
size_t SpillAreaSizeBytes,
size_t GlobalsAndSubsequentPaddingSize,
bool UsesFramePointer) {
const VariablesMetadata *VMetadata = Func->getVMetadata();
size_t GlobalsSpaceUsed = SpillAreaPaddingBytes;
size_t NextStackOffset = SpillAreaPaddingBytes;
std::vector<size_t> LocalsSize(Func->getNumNodes());
const bool SimpleCoalescing = !callsReturnsTwice();
for (Variable *Var : SortedSpilledVariables) {
size_t Increment = typeWidthInBytesOnStack(Var->getType());
if (SimpleCoalescing && VMetadata->isTracked(Var)) {
if (VMetadata->isMultiBlock(Var)) {
GlobalsSpaceUsed += Increment;
NextStackOffset = GlobalsSpaceUsed;
} else {
SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex();
LocalsSize[NodeIndex] += Increment;
NextStackOffset = SpillAreaPaddingBytes +
GlobalsAndSubsequentPaddingSize +
LocalsSize[NodeIndex];
}
} else {
NextStackOffset += Increment;
}
if (UsesFramePointer)
Var->setStackOffset(-NextStackOffset);
else
Var->setStackOffset(SpillAreaSizeBytes - NextStackOffset);
}
}
InstCall *TargetLowering::makeHelperCall(const IceString &Name, Variable *Dest,
SizeT MaxSrcs) {
const bool HasTailCall = false;
Constant *CallTarget = Ctx->getConstantExternSym(Name);
InstCall *Call =
InstCall::create(Func, MaxSrcs, Dest, CallTarget, HasTailCall);
return Call;
}
void TargetLowering::emitWithoutPrefix(const ConstantRelocatable *C) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Ctx->getStrEmit();
if (C->getSuppressMangling())
Str << C->getName();
else
Str << Ctx->mangleName(C->getName());
RelocOffsetT Offset = C->getOffset();
if (Offset) {
if (Offset > 0)
Str << "+";
Str << Offset;
}
}
void TargetLowering::emit(const ConstantRelocatable *C) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Ctx->getStrEmit();
Str << getConstantPrefix();
emitWithoutPrefix(C);
}
std::unique_ptr<TargetDataLowering>
TargetDataLowering::createLowering(GlobalContext *Ctx) {
TargetArch Target = Ctx->getFlags().getTargetArch();
#define SUBZERO_TARGET(X) \
if (Target == Target_##X) \
return TargetData##X::create(Ctx);
#include "llvm/Config/SZTargets.def"
llvm::report_fatal_error("Unsupported target data lowering");
}
TargetDataLowering::~TargetDataLowering() = default;
namespace {
// dataSectionSuffix decides whether to use SectionSuffix or MangledVarName as
// data section suffix. Essentially, when using separate data sections for
// globals SectionSuffix is not necessary.
IceString dataSectionSuffix(const IceString &SectionSuffix,
const IceString &MangledVarName,
const bool DataSections) {
if (SectionSuffix.empty() && !DataSections) {
return "";
}
if (DataSections) {
// With data sections we don't need to use the SectionSuffix.
return "." + MangledVarName;
}
assert(!SectionSuffix.empty());
return "." + SectionSuffix;
}
} // end of anonymous namespace
void TargetDataLowering::emitGlobal(const VariableDeclaration &Var,
const IceString &SectionSuffix) {
if (!BuildDefs::dump())
return;
// If external and not initialized, this must be a cross test.
// Don't generate a declaration for such cases.
const bool IsExternal =
Var.isExternal() || Ctx->getFlags().getDisableInternal();
if (IsExternal && !Var.hasInitializer())
return;
Ostream &Str = Ctx->getStrEmit();
const bool HasNonzeroInitializer = Var.hasNonzeroInitializer();
const bool IsConstant = Var.getIsConstant();
const SizeT Size = Var.getNumBytes();
const IceString MangledName = Var.mangleName(Ctx);
Str << "\t.type\t" << MangledName << ",%object\n";
const bool UseDataSections = Ctx->getFlags().getDataSections();
const IceString Suffix =
dataSectionSuffix(SectionSuffix, MangledName, UseDataSections);
if (IsConstant)
Str << "\t.section\t.rodata" << Suffix << ",\"a\",%progbits\n";
else if (HasNonzeroInitializer)
Str << "\t.section\t.data" << Suffix << ",\"aw\",%progbits\n";
else
Str << "\t.section\t.bss" << Suffix << ",\"aw\",%nobits\n";
if (IsExternal)
Str << "\t.globl\t" << MangledName << "\n";
const uint32_t Align = Var.getAlignment();
if (Align > 1) {
assert(llvm::isPowerOf2_32(Align));
// Use the .p2align directive, since the .align N directive can either
// interpret N as bytes, or power of 2 bytes, depending on the target.
Str << "\t.p2align\t" << llvm::Log2_32(Align) << "\n";
}
Str << MangledName << ":\n";
if (HasNonzeroInitializer) {
for (const std::unique_ptr<VariableDeclaration::Initializer> &Init :
Var.getInitializers()) {
switch (Init->getKind()) {
case VariableDeclaration::Initializer::DataInitializerKind: {
const auto &Data =
llvm::cast<VariableDeclaration::DataInitializer>(Init.get())
->getContents();
for (SizeT i = 0; i < Init->getNumBytes(); ++i) {
Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
}
break;
}
case VariableDeclaration::Initializer::ZeroInitializerKind:
Str << "\t.zero\t" << Init->getNumBytes() << "\n";
break;
case VariableDeclaration::Initializer::RelocInitializerKind: {
const auto *Reloc =
llvm::cast<VariableDeclaration::RelocInitializer>(Init.get());
Str << "\t" << getEmit32Directive() << "\t";
Str << Reloc->getDeclaration()->mangleName(Ctx);
if (RelocOffsetT Offset = Reloc->getOffset()) {
if (Offset >= 0 || (Offset == INT32_MIN))
Str << " + " << Offset;
else
Str << " - " << -Offset;
}
Str << "\n";
break;
}
}
}
} else {
// NOTE: for non-constant zero initializers, this is BSS (no bits),
// so an ELF writer would not write to the file, and only track
// virtual offsets, but the .s writer still needs this .zero and
// cannot simply use the .size to advance offsets.
Str << "\t.zero\t" << Size << "\n";
}
Str << "\t.size\t" << MangledName << ", " << Size << "\n";
}
std::unique_ptr<TargetHeaderLowering>
TargetHeaderLowering::createLowering(GlobalContext *Ctx) {
TargetArch Target = Ctx->getFlags().getTargetArch();
#define SUBZERO_TARGET(X) \
if (Target == Target_##X) \
return TargetHeader##X::create(Ctx);
#include "llvm/Config/SZTargets.def"
llvm::report_fatal_error("Unsupported target header lowering");
}
TargetHeaderLowering::~TargetHeaderLowering() = default;
} // end of namespace Ice