Check in LLVM r95781.
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
new file mode 100644
index 0000000..8840622
--- /dev/null
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -0,0 +1,937 @@
+//===----- AggressiveAntiDepBreaker.cpp - Anti-dep breaker ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AggressiveAntiDepBreaker class, which
+// implements register anti-dependence breaking during post-RA
+// scheduling. It attempts to break all anti-dependencies within a
+// block.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "post-RA-sched"
+#include "AggressiveAntiDepBreaker.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+// If DebugDiv > 0 then only break antidep with (ID % DebugDiv) == DebugMod
+static cl::opt<int>
+DebugDiv("agg-antidep-debugdiv",
+ cl::desc("Debug control for aggressive anti-dep breaker"),
+ cl::init(0), cl::Hidden);
+static cl::opt<int>
+DebugMod("agg-antidep-debugmod",
+ cl::desc("Debug control for aggressive anti-dep breaker"),
+ cl::init(0), cl::Hidden);
+
+AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs,
+ MachineBasicBlock *BB) :
+ NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0) {
+
+ const unsigned BBSize = BB->size();
+ for (unsigned i = 0; i < NumTargetRegs; ++i) {
+ // Initialize all registers to be in their own group. Initially we
+ // assign the register to the same-indexed GroupNode.
+ GroupNodeIndices[i] = i;
+ // Initialize the indices to indicate that no registers are live.
+ KillIndices[i] = ~0u;
+ DefIndices[i] = BBSize;
+ }
+}
+
+unsigned AggressiveAntiDepState::GetGroup(unsigned Reg)
+{
+ unsigned Node = GroupNodeIndices[Reg];
+ while (GroupNodes[Node] != Node)
+ Node = GroupNodes[Node];
+
+ return Node;
+}
+
+void AggressiveAntiDepState::GetGroupRegs(
+ unsigned Group,
+ std::vector<unsigned> &Regs,
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference> *RegRefs)
+{
+ for (unsigned Reg = 0; Reg != NumTargetRegs; ++Reg) {
+ if ((GetGroup(Reg) == Group) && (RegRefs->count(Reg) > 0))
+ Regs.push_back(Reg);
+ }
+}
+
+unsigned AggressiveAntiDepState::UnionGroups(unsigned Reg1, unsigned Reg2)
+{
+ assert(GroupNodes[0] == 0 && "GroupNode 0 not parent!");
+ assert(GroupNodeIndices[0] == 0 && "Reg 0 not in Group 0!");
+
+ // find group for each register
+ unsigned Group1 = GetGroup(Reg1);
+ unsigned Group2 = GetGroup(Reg2);
+
+ // if either group is 0, then that must become the parent
+ unsigned Parent = (Group1 == 0) ? Group1 : Group2;
+ unsigned Other = (Parent == Group1) ? Group2 : Group1;
+ GroupNodes.at(Other) = Parent;
+ return Parent;
+}
+
+unsigned AggressiveAntiDepState::LeaveGroup(unsigned Reg)
+{
+ // Create a new GroupNode for Reg. Reg's existing GroupNode must
+ // stay as is because there could be other GroupNodes referring to
+ // it.
+ unsigned idx = GroupNodes.size();
+ GroupNodes.push_back(idx);
+ GroupNodeIndices[Reg] = idx;
+ return idx;
+}
+
+bool AggressiveAntiDepState::IsLive(unsigned Reg)
+{
+ // KillIndex must be defined and DefIndex not defined for a register
+ // to be live.
+ return((KillIndices[Reg] != ~0u) && (DefIndices[Reg] == ~0u));
+}
+
+
+
+AggressiveAntiDepBreaker::
+AggressiveAntiDepBreaker(MachineFunction& MFi,
+ TargetSubtarget::RegClassVector& CriticalPathRCs) :
+ AntiDepBreaker(), MF(MFi),
+ MRI(MF.getRegInfo()),
+ TRI(MF.getTarget().getRegisterInfo()),
+ AllocatableSet(TRI->getAllocatableSet(MF)),
+ State(NULL) {
+ /* Collect a bitset of all registers that are only broken if they
+ are on the critical path. */
+ for (unsigned i = 0, e = CriticalPathRCs.size(); i < e; ++i) {
+ BitVector CPSet = TRI->getAllocatableSet(MF, CriticalPathRCs[i]);
+ if (CriticalPathSet.none())
+ CriticalPathSet = CPSet;
+ else
+ CriticalPathSet |= CPSet;
+ }
+
+ DEBUG(dbgs() << "AntiDep Critical-Path Registers:");
+ DEBUG(for (int r = CriticalPathSet.find_first(); r != -1;
+ r = CriticalPathSet.find_next(r))
+ dbgs() << " " << TRI->getName(r));
+ DEBUG(dbgs() << '\n');
+}
+
+AggressiveAntiDepBreaker::~AggressiveAntiDepBreaker() {
+ delete State;
+}
+
+void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
+ assert(State == NULL);
+ State = new AggressiveAntiDepState(TRI->getNumRegs(), BB);
+
+ bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn());
+ unsigned *KillIndices = State->GetKillIndices();
+ unsigned *DefIndices = State->GetDefIndices();
+
+ // Determine the live-out physregs for this block.
+ if (IsReturnBlock) {
+ // In a return block, examine the function live-out regs.
+ for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
+ E = MRI.liveout_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ State->UnionGroups(Reg, 0);
+ KillIndices[Reg] = BB->size();
+ DefIndices[Reg] = ~0u;
+ // Repeat, for all aliases.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ State->UnionGroups(AliasReg, 0);
+ KillIndices[AliasReg] = BB->size();
+ DefIndices[AliasReg] = ~0u;
+ }
+ }
+ } else {
+ // In a non-return block, examine the live-in regs of all successors.
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI)
+ for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+ E = (*SI)->livein_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ State->UnionGroups(Reg, 0);
+ KillIndices[Reg] = BB->size();
+ DefIndices[Reg] = ~0u;
+ // Repeat, for all aliases.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ State->UnionGroups(AliasReg, 0);
+ KillIndices[AliasReg] = BB->size();
+ DefIndices[AliasReg] = ~0u;
+ }
+ }
+ }
+
+ // Mark live-out callee-saved registers. In a return block this is
+ // all callee-saved registers. In non-return this is any
+ // callee-saved register that is not saved in the prolog.
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ BitVector Pristine = MFI->getPristineRegs(BB);
+ for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
+ unsigned Reg = *I;
+ if (!IsReturnBlock && !Pristine.test(Reg)) continue;
+ State->UnionGroups(Reg, 0);
+ KillIndices[Reg] = BB->size();
+ DefIndices[Reg] = ~0u;
+ // Repeat, for all aliases.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ State->UnionGroups(AliasReg, 0);
+ KillIndices[AliasReg] = BB->size();
+ DefIndices[AliasReg] = ~0u;
+ }
+ }
+}
+
+void AggressiveAntiDepBreaker::FinishBlock() {
+ delete State;
+ State = NULL;
+}
+
+void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
+ unsigned InsertPosIndex) {
+ assert(Count < InsertPosIndex && "Instruction index out of expected range!");
+
+ std::set<unsigned> PassthruRegs;
+ GetPassthruRegs(MI, PassthruRegs);
+ PrescanInstruction(MI, Count, PassthruRegs);
+ ScanInstruction(MI, Count);
+
+ DEBUG(dbgs() << "Observe: ");
+ DEBUG(MI->dump());
+ DEBUG(dbgs() << "\tRegs:");
+
+ unsigned *DefIndices = State->GetDefIndices();
+ for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) {
+ // If Reg is current live, then mark that it can't be renamed as
+ // we don't know the extent of its live-range anymore (now that it
+ // has been scheduled). If it is not live but was defined in the
+ // previous schedule region, then set its def index to the most
+ // conservative location (i.e. the beginning of the previous
+ // schedule region).
+ if (State->IsLive(Reg)) {
+ DEBUG(if (State->GetGroup(Reg) != 0)
+ dbgs() << " " << TRI->getName(Reg) << "=g" <<
+ State->GetGroup(Reg) << "->g0(region live-out)");
+ State->UnionGroups(Reg, 0);
+ } else if ((DefIndices[Reg] < InsertPosIndex)
+ && (DefIndices[Reg] >= Count)) {
+ DefIndices[Reg] = Count;
+ }
+ }
+ DEBUG(dbgs() << '\n');
+}
+
+bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr *MI,
+ MachineOperand& MO)
+{
+ if (!MO.isReg() || !MO.isImplicit())
+ return false;
+
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ return false;
+
+ MachineOperand *Op = NULL;
+ if (MO.isDef())
+ Op = MI->findRegisterUseOperand(Reg, true);
+ else
+ Op = MI->findRegisterDefOperand(Reg);
+
+ return((Op != NULL) && Op->isImplicit());
+}
+
+void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI,
+ std::set<unsigned>& PassthruRegs) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ if ((MO.isDef() && MI->isRegTiedToUseOperand(i)) ||
+ IsImplicitDefUse(MI, MO)) {
+ const unsigned Reg = MO.getReg();
+ PassthruRegs.insert(Reg);
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg) {
+ PassthruRegs.insert(*Subreg);
+ }
+ }
+ }
+}
+
+/// AntiDepEdges - Return in Edges the anti- and output- dependencies
+/// in SU that we want to consider for breaking.
+static void AntiDepEdges(SUnit *SU, std::vector<SDep*>& Edges) {
+ SmallSet<unsigned, 4> RegSet;
+ for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+ P != PE; ++P) {
+ if ((P->getKind() == SDep::Anti) || (P->getKind() == SDep::Output)) {
+ unsigned Reg = P->getReg();
+ if (RegSet.count(Reg) == 0) {
+ Edges.push_back(&*P);
+ RegSet.insert(Reg);
+ }
+ }
+ }
+}
+
+/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
+/// critical path.
+static SUnit *CriticalPathStep(SUnit *SU) {
+ SDep *Next = 0;
+ unsigned NextDepth = 0;
+ // Find the predecessor edge with the greatest depth.
+ if (SU != 0) {
+ for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+ P != PE; ++P) {
+ SUnit *PredSU = P->getSUnit();
+ unsigned PredLatency = P->getLatency();
+ unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
+ // In the case of a latency tie, prefer an anti-dependency edge over
+ // other types of edges.
+ if (NextDepth < PredTotalLatency ||
+ (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
+ NextDepth = PredTotalLatency;
+ Next = &*P;
+ }
+ }
+ }
+
+ return (Next) ? Next->getSUnit() : 0;
+}
+
+void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
+ const char *tag,
+ const char *header,
+ const char *footer) {
+ unsigned *KillIndices = State->GetKillIndices();
+ unsigned *DefIndices = State->GetDefIndices();
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+ RegRefs = State->GetRegRefs();
+
+ if (!State->IsLive(Reg)) {
+ KillIndices[Reg] = KillIdx;
+ DefIndices[Reg] = ~0u;
+ RegRefs.erase(Reg);
+ State->LeaveGroup(Reg);
+ DEBUG(if (header != NULL) {
+ dbgs() << header << TRI->getName(Reg); header = NULL; });
+ DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag);
+ }
+ // Repeat for subregisters.
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg) {
+ unsigned SubregReg = *Subreg;
+ if (!State->IsLive(SubregReg)) {
+ KillIndices[SubregReg] = KillIdx;
+ DefIndices[SubregReg] = ~0u;
+ RegRefs.erase(SubregReg);
+ State->LeaveGroup(SubregReg);
+ DEBUG(if (header != NULL) {
+ dbgs() << header << TRI->getName(Reg); header = NULL; });
+ DEBUG(dbgs() << " " << TRI->getName(SubregReg) << "->g" <<
+ State->GetGroup(SubregReg) << tag);
+ }
+ }
+
+ DEBUG(if ((header == NULL) && (footer != NULL)) dbgs() << footer);
+}
+
+void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
+ unsigned Count,
+ std::set<unsigned>& PassthruRegs)
+{
+ unsigned *DefIndices = State->GetDefIndices();
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+ RegRefs = State->GetRegRefs();
+
+ // Handle dead defs by simulating a last-use of the register just
+ // after the def. A dead def can occur because the def is truely
+ // dead, or because only a subregister is live at the def. If we
+ // don't do this the dead def will be incorrectly merged into the
+ // previous def.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ HandleLastUse(Reg, Count + 1, "", "\tDead Def: ", "\n");
+ }
+
+ DEBUG(dbgs() << "\tDef Groups:");
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << State->GetGroup(Reg));
+
+ // If MI's defs have a special allocation requirement, don't allow
+ // any def registers to be changed. Also assume all registers
+ // defined in a call must not be changed (ABI).
+ if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq()) {
+ DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
+ State->UnionGroups(Reg, 0);
+ }
+
+ // Any aliased that are live at this point are completely or
+ // partially defined here, so group those aliases with Reg.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ if (State->IsLive(AliasReg)) {
+ State->UnionGroups(Reg, AliasReg);
+ DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " <<
+ TRI->getName(AliasReg) << ")");
+ }
+ }
+
+ // Note register reference...
+ const TargetRegisterClass *RC = NULL;
+ if (i < MI->getDesc().getNumOperands())
+ RC = MI->getDesc().OpInfo[i].getRegClass(TRI);
+ AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
+ RegRefs.insert(std::make_pair(Reg, RR));
+ }
+
+ DEBUG(dbgs() << '\n');
+
+ // Scan the register defs for this instruction and update
+ // live-ranges.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ // Ignore KILLs and passthru registers for liveness...
+ if (MI->isKill() || (PassthruRegs.count(Reg) != 0))
+ continue;
+
+ // Update def for Reg and aliases.
+ DefIndices[Reg] = Count;
+ for (const unsigned *Alias = TRI->getAliasSet(Reg);
+ *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ DefIndices[AliasReg] = Count;
+ }
+ }
+}
+
+void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
+ unsigned Count) {
+ DEBUG(dbgs() << "\tUse Groups:");
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+ RegRefs = State->GetRegRefs();
+
+ // Scan the register uses for this instruction and update
+ // live-ranges, groups and RegRefs.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" <<
+ State->GetGroup(Reg));
+
+ // It wasn't previously live but now it is, this is a kill. Forget
+ // the previous live-range information and start a new live-range
+ // for the register.
+ HandleLastUse(Reg, Count, "(last-use)");
+
+ // If MI's uses have special allocation requirement, don't allow
+ // any use registers to be changed. Also assume all registers
+ // used in a call must not be changed (ABI).
+ if (MI->getDesc().isCall() || MI->getDesc().hasExtraSrcRegAllocReq()) {
+ DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
+ State->UnionGroups(Reg, 0);
+ }
+
+ // Note register reference...
+ const TargetRegisterClass *RC = NULL;
+ if (i < MI->getDesc().getNumOperands())
+ RC = MI->getDesc().OpInfo[i].getRegClass(TRI);
+ AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
+ RegRefs.insert(std::make_pair(Reg, RR));
+ }
+
+ DEBUG(dbgs() << '\n');
+
+ // Form a group of all defs and uses of a KILL instruction to ensure
+ // that all registers are renamed as a group.
+ if (MI->isKill()) {
+ DEBUG(dbgs() << "\tKill Group:");
+
+ unsigned FirstReg = 0;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ if (FirstReg != 0) {
+ DEBUG(dbgs() << "=" << TRI->getName(Reg));
+ State->UnionGroups(FirstReg, Reg);
+ } else {
+ DEBUG(dbgs() << " " << TRI->getName(Reg));
+ FirstReg = Reg;
+ }
+ }
+
+ DEBUG(dbgs() << "->g" << State->GetGroup(FirstReg) << '\n');
+ }
+}
+
+BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) {
+ BitVector BV(TRI->getNumRegs(), false);
+ bool first = true;
+
+ // Check all references that need rewriting for Reg. For each, use
+ // the corresponding register class to narrow the set of registers
+ // that are appropriate for renaming.
+ std::pair<std::multimap<unsigned,
+ AggressiveAntiDepState::RegisterReference>::iterator,
+ std::multimap<unsigned,
+ AggressiveAntiDepState::RegisterReference>::iterator>
+ Range = State->GetRegRefs().equal_range(Reg);
+ for (std::multimap<unsigned,
+ AggressiveAntiDepState::RegisterReference>::iterator Q = Range.first,
+ QE = Range.second; Q != QE; ++Q) {
+ const TargetRegisterClass *RC = Q->second.RC;
+ if (RC == NULL) continue;
+
+ BitVector RCBV = TRI->getAllocatableSet(MF, RC);
+ if (first) {
+ BV |= RCBV;
+ first = false;
+ } else {
+ BV &= RCBV;
+ }
+
+ DEBUG(dbgs() << " " << RC->getName());
+ }
+
+ return BV;
+}
+
+bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
+ unsigned AntiDepGroupIndex,
+ RenameOrderType& RenameOrder,
+ std::map<unsigned, unsigned> &RenameMap) {
+ unsigned *KillIndices = State->GetKillIndices();
+ unsigned *DefIndices = State->GetDefIndices();
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+ RegRefs = State->GetRegRefs();
+
+ // Collect all referenced registers in the same group as
+ // AntiDepReg. These all need to be renamed together if we are to
+ // break the anti-dependence.
+ std::vector<unsigned> Regs;
+ State->GetGroupRegs(AntiDepGroupIndex, Regs, &RegRefs);
+ assert(Regs.size() > 0 && "Empty register group!");
+ if (Regs.size() == 0)
+ return false;
+
+ // Find the "superest" register in the group. At the same time,
+ // collect the BitVector of registers that can be used to rename
+ // each register.
+ DEBUG(dbgs() << "\tRename Candidates for Group g" << AntiDepGroupIndex
+ << ":\n");
+ std::map<unsigned, BitVector> RenameRegisterMap;
+ unsigned SuperReg = 0;
+ for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+ unsigned Reg = Regs[i];
+ if ((SuperReg == 0) || TRI->isSuperRegister(SuperReg, Reg))
+ SuperReg = Reg;
+
+ // If Reg has any references, then collect possible rename regs
+ if (RegRefs.count(Reg) > 0) {
+ DEBUG(dbgs() << "\t\t" << TRI->getName(Reg) << ":");
+
+ BitVector BV = GetRenameRegisters(Reg);
+ RenameRegisterMap.insert(std::pair<unsigned, BitVector>(Reg, BV));
+
+ DEBUG(dbgs() << " ::");
+ DEBUG(for (int r = BV.find_first(); r != -1; r = BV.find_next(r))
+ dbgs() << " " << TRI->getName(r));
+ DEBUG(dbgs() << "\n");
+ }
+ }
+
+ // All group registers should be a subreg of SuperReg.
+ for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+ unsigned Reg = Regs[i];
+ if (Reg == SuperReg) continue;
+ bool IsSub = TRI->isSubRegister(SuperReg, Reg);
+ assert(IsSub && "Expecting group subregister");
+ if (!IsSub)
+ return false;
+ }
+
+#ifndef NDEBUG
+ // If DebugDiv > 0 then only rename (renamecnt % DebugDiv) == DebugMod
+ if (DebugDiv > 0) {
+ static int renamecnt = 0;
+ if (renamecnt++ % DebugDiv != DebugMod)
+ return false;
+
+ dbgs() << "*** Performing rename " << TRI->getName(SuperReg) <<
+ " for debug ***\n";
+ }
+#endif
+
+ // Check each possible rename register for SuperReg in round-robin
+ // order. If that register is available, and the corresponding
+ // registers are available for the other group subregisters, then we
+ // can use those registers to rename.
+ const TargetRegisterClass *SuperRC =
+ TRI->getPhysicalRegisterRegClass(SuperReg, MVT::Other);
+
+ const TargetRegisterClass::iterator RB = SuperRC->allocation_order_begin(MF);
+ const TargetRegisterClass::iterator RE = SuperRC->allocation_order_end(MF);
+ if (RB == RE) {
+ DEBUG(dbgs() << "\tEmpty Super Regclass!!\n");
+ return false;
+ }
+
+ DEBUG(dbgs() << "\tFind Registers:");
+
+ if (RenameOrder.count(SuperRC) == 0)
+ RenameOrder.insert(RenameOrderType::value_type(SuperRC, RE));
+
+ const TargetRegisterClass::iterator OrigR = RenameOrder[SuperRC];
+ const TargetRegisterClass::iterator EndR = ((OrigR == RE) ? RB : OrigR);
+ TargetRegisterClass::iterator R = OrigR;
+ do {
+ if (R == RB) R = RE;
+ --R;
+ const unsigned NewSuperReg = *R;
+ // Don't replace a register with itself.
+ if (NewSuperReg == SuperReg) continue;
+
+ DEBUG(dbgs() << " [" << TRI->getName(NewSuperReg) << ':');
+ RenameMap.clear();
+
+ // For each referenced group register (which must be a SuperReg or
+ // a subregister of SuperReg), find the corresponding subregister
+ // of NewSuperReg and make sure it is free to be renamed.
+ for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+ unsigned Reg = Regs[i];
+ unsigned NewReg = 0;
+ if (Reg == SuperReg) {
+ NewReg = NewSuperReg;
+ } else {
+ unsigned NewSubRegIdx = TRI->getSubRegIndex(SuperReg, Reg);
+ if (NewSubRegIdx != 0)
+ NewReg = TRI->getSubReg(NewSuperReg, NewSubRegIdx);
+ }
+
+ DEBUG(dbgs() << " " << TRI->getName(NewReg));
+
+ // Check if Reg can be renamed to NewReg.
+ BitVector BV = RenameRegisterMap[Reg];
+ if (!BV.test(NewReg)) {
+ DEBUG(dbgs() << "(no rename)");
+ goto next_super_reg;
+ }
+
+ // If NewReg is dead and NewReg's most recent def is not before
+ // Regs's kill, it's safe to replace Reg with NewReg. We
+ // must also check all aliases of NewReg, because we can't define a
+ // register when any sub or super is already live.
+ if (State->IsLive(NewReg) || (KillIndices[Reg] > DefIndices[NewReg])) {
+ DEBUG(dbgs() << "(live)");
+ goto next_super_reg;
+ } else {
+ bool found = false;
+ for (const unsigned *Alias = TRI->getAliasSet(NewReg);
+ *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ if (State->IsLive(AliasReg) ||
+ (KillIndices[Reg] > DefIndices[AliasReg])) {
+ DEBUG(dbgs() << "(alias " << TRI->getName(AliasReg) << " live)");
+ found = true;
+ break;
+ }
+ }
+ if (found)
+ goto next_super_reg;
+ }
+
+ // Record that 'Reg' can be renamed to 'NewReg'.
+ RenameMap.insert(std::pair<unsigned, unsigned>(Reg, NewReg));
+ }
+
+ // If we fall-out here, then every register in the group can be
+ // renamed, as recorded in RenameMap.
+ RenameOrder.erase(SuperRC);
+ RenameOrder.insert(RenameOrderType::value_type(SuperRC, R));
+ DEBUG(dbgs() << "]\n");
+ return true;
+
+ next_super_reg:
+ DEBUG(dbgs() << ']');
+ } while (R != EndR);
+
+ DEBUG(dbgs() << '\n');
+
+ // No registers are free and available!
+ return false;
+}
+
+/// BreakAntiDependencies - Identifiy anti-dependencies within the
+/// ScheduleDAG and break them by renaming registers.
+///
+unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
+ std::vector<SUnit>& SUnits,
+ MachineBasicBlock::iterator& Begin,
+ MachineBasicBlock::iterator& End,
+ unsigned InsertPosIndex) {
+ unsigned *KillIndices = State->GetKillIndices();
+ unsigned *DefIndices = State->GetDefIndices();
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+ RegRefs = State->GetRegRefs();
+
+ // The code below assumes that there is at least one instruction,
+ // so just duck out immediately if the block is empty.
+ if (SUnits.empty()) return 0;
+
+ // For each regclass the next register to use for renaming.
+ RenameOrderType RenameOrder;
+
+ // ...need a map from MI to SUnit.
+ std::map<MachineInstr *, SUnit *> MISUnitMap;
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ SUnit *SU = &SUnits[i];
+ MISUnitMap.insert(std::pair<MachineInstr *, SUnit *>(SU->getInstr(), SU));
+ }
+
+ // Track progress along the critical path through the SUnit graph as
+ // we walk the instructions. This is needed for regclasses that only
+ // break critical-path anti-dependencies.
+ SUnit *CriticalPathSU = 0;
+ MachineInstr *CriticalPathMI = 0;
+ if (CriticalPathSet.any()) {
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ SUnit *SU = &SUnits[i];
+ if (!CriticalPathSU ||
+ ((SU->getDepth() + SU->Latency) >
+ (CriticalPathSU->getDepth() + CriticalPathSU->Latency))) {
+ CriticalPathSU = SU;
+ }
+ }
+
+ CriticalPathMI = CriticalPathSU->getInstr();
+ }
+
+#ifndef NDEBUG
+ DEBUG(dbgs() << "\n===== Aggressive anti-dependency breaking\n");
+ DEBUG(dbgs() << "Available regs:");
+ for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
+ if (!State->IsLive(Reg))
+ DEBUG(dbgs() << " " << TRI->getName(Reg));
+ }
+ DEBUG(dbgs() << '\n');
+#endif
+
+ // Attempt to break anti-dependence edges. Walk the instructions
+ // from the bottom up, tracking information about liveness as we go
+ // to help determine which registers are available.
+ unsigned Broken = 0;
+ unsigned Count = InsertPosIndex - 1;
+ for (MachineBasicBlock::iterator I = End, E = Begin;
+ I != E; --Count) {
+ MachineInstr *MI = --I;
+
+ DEBUG(dbgs() << "Anti: ");
+ DEBUG(MI->dump());
+
+ std::set<unsigned> PassthruRegs;
+ GetPassthruRegs(MI, PassthruRegs);
+
+ // Process the defs in MI...
+ PrescanInstruction(MI, Count, PassthruRegs);
+
+ // The dependence edges that represent anti- and output-
+ // dependencies that are candidates for breaking.
+ std::vector<SDep*> Edges;
+ SUnit *PathSU = MISUnitMap[MI];
+ AntiDepEdges(PathSU, Edges);
+
+ // If MI is not on the critical path, then we don't rename
+ // registers in the CriticalPathSet.
+ BitVector *ExcludeRegs = NULL;
+ if (MI == CriticalPathMI) {
+ CriticalPathSU = CriticalPathStep(CriticalPathSU);
+ CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : 0;
+ } else {
+ ExcludeRegs = &CriticalPathSet;
+ }
+
+ // Ignore KILL instructions (they form a group in ScanInstruction
+ // but don't cause any anti-dependence breaking themselves)
+ if (!MI->isKill()) {
+ // Attempt to break each anti-dependency...
+ for (unsigned i = 0, e = Edges.size(); i != e; ++i) {
+ SDep *Edge = Edges[i];
+ SUnit *NextSU = Edge->getSUnit();
+
+ if ((Edge->getKind() != SDep::Anti) &&
+ (Edge->getKind() != SDep::Output)) continue;
+
+ unsigned AntiDepReg = Edge->getReg();
+ DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg));
+ assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
+
+ if (!AllocatableSet.test(AntiDepReg)) {
+ // Don't break anti-dependencies on non-allocatable registers.
+ DEBUG(dbgs() << " (non-allocatable)\n");
+ continue;
+ } else if ((ExcludeRegs != NULL) && ExcludeRegs->test(AntiDepReg)) {
+ // Don't break anti-dependencies for critical path registers
+ // if not on the critical path
+ DEBUG(dbgs() << " (not critical-path)\n");
+ continue;
+ } else if (PassthruRegs.count(AntiDepReg) != 0) {
+ // If the anti-dep register liveness "passes-thru", then
+ // don't try to change it. It will be changed along with
+ // the use if required to break an earlier antidep.
+ DEBUG(dbgs() << " (passthru)\n");
+ continue;
+ } else {
+ // No anti-dep breaking for implicit deps
+ MachineOperand *AntiDepOp = MI->findRegisterDefOperand(AntiDepReg);
+ assert(AntiDepOp != NULL &&
+ "Can't find index for defined register operand");
+ if ((AntiDepOp == NULL) || AntiDepOp->isImplicit()) {
+ DEBUG(dbgs() << " (implicit)\n");
+ continue;
+ }
+
+ // If the SUnit has other dependencies on the SUnit that
+ // it anti-depends on, don't bother breaking the
+ // anti-dependency since those edges would prevent such
+ // units from being scheduled past each other
+ // regardless.
+ //
+ // Also, if there are dependencies on other SUnits with the
+ // same register as the anti-dependency, don't attempt to
+ // break it.
+ for (SUnit::pred_iterator P = PathSU->Preds.begin(),
+ PE = PathSU->Preds.end(); P != PE; ++P) {
+ if (P->getSUnit() == NextSU ?
+ (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
+ (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) {
+ AntiDepReg = 0;
+ break;
+ }
+ }
+ for (SUnit::pred_iterator P = PathSU->Preds.begin(),
+ PE = PathSU->Preds.end(); P != PE; ++P) {
+ if ((P->getSUnit() == NextSU) && (P->getKind() != SDep::Anti) &&
+ (P->getKind() != SDep::Output)) {
+ DEBUG(dbgs() << " (real dependency)\n");
+ AntiDepReg = 0;
+ break;
+ } else if ((P->getSUnit() != NextSU) &&
+ (P->getKind() == SDep::Data) &&
+ (P->getReg() == AntiDepReg)) {
+ DEBUG(dbgs() << " (other dependency)\n");
+ AntiDepReg = 0;
+ break;
+ }
+ }
+
+ if (AntiDepReg == 0) continue;
+ }
+
+ assert(AntiDepReg != 0);
+ if (AntiDepReg == 0) continue;
+
+ // Determine AntiDepReg's register group.
+ const unsigned GroupIndex = State->GetGroup(AntiDepReg);
+ if (GroupIndex == 0) {
+ DEBUG(dbgs() << " (zero group)\n");
+ continue;
+ }
+
+ DEBUG(dbgs() << '\n');
+
+ // Look for a suitable register to use to break the anti-dependence.
+ std::map<unsigned, unsigned> RenameMap;
+ if (FindSuitableFreeRegisters(GroupIndex, RenameOrder, RenameMap)) {
+ DEBUG(dbgs() << "\tBreaking anti-dependence edge on "
+ << TRI->getName(AntiDepReg) << ":");
+
+ // Handle each group register...
+ for (std::map<unsigned, unsigned>::iterator
+ S = RenameMap.begin(), E = RenameMap.end(); S != E; ++S) {
+ unsigned CurrReg = S->first;
+ unsigned NewReg = S->second;
+
+ DEBUG(dbgs() << " " << TRI->getName(CurrReg) << "->" <<
+ TRI->getName(NewReg) << "(" <<
+ RegRefs.count(CurrReg) << " refs)");
+
+ // Update the references to the old register CurrReg to
+ // refer to the new register NewReg.
+ std::pair<std::multimap<unsigned,
+ AggressiveAntiDepState::RegisterReference>::iterator,
+ std::multimap<unsigned,
+ AggressiveAntiDepState::RegisterReference>::iterator>
+ Range = RegRefs.equal_range(CurrReg);
+ for (std::multimap<unsigned,
+ AggressiveAntiDepState::RegisterReference>::iterator
+ Q = Range.first, QE = Range.second; Q != QE; ++Q) {
+ Q->second.Operand->setReg(NewReg);
+ }
+
+ // We just went back in time and modified history; the
+ // liveness information for CurrReg is now inconsistent. Set
+ // the state as if it were dead.
+ State->UnionGroups(NewReg, 0);
+ RegRefs.erase(NewReg);
+ DefIndices[NewReg] = DefIndices[CurrReg];
+ KillIndices[NewReg] = KillIndices[CurrReg];
+
+ State->UnionGroups(CurrReg, 0);
+ RegRefs.erase(CurrReg);
+ DefIndices[CurrReg] = KillIndices[CurrReg];
+ KillIndices[CurrReg] = ~0u;
+ assert(((KillIndices[CurrReg] == ~0u) !=
+ (DefIndices[CurrReg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for AntiDepReg!");
+ }
+
+ ++Broken;
+ DEBUG(dbgs() << '\n');
+ }
+ }
+ }
+
+ ScanInstruction(MI, Count);
+ }
+
+ return Broken;
+}
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
new file mode 100644
index 0000000..a62d68c
--- /dev/null
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -0,0 +1,183 @@
+//=- llvm/CodeGen/AggressiveAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AggressiveAntiDepBreaker class, which
+// implements register anti-dependence breaking during post-RA
+// scheduling. It attempts to break all anti-dependencies within a
+// block.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
+#define LLVM_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
+
+#include "AntiDepBreaker.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include <map>
+
+namespace llvm {
+ /// Class AggressiveAntiDepState
+ /// Contains all the state necessary for anti-dep breaking.
+ class AggressiveAntiDepState {
+ public:
+ /// RegisterReference - Information about a register reference
+ /// within a liverange
+ typedef struct {
+ /// Operand - The registers operand
+ MachineOperand *Operand;
+ /// RC - The register class
+ const TargetRegisterClass *RC;
+ } RegisterReference;
+
+ private:
+ /// NumTargetRegs - Number of non-virtual target registers
+ /// (i.e. TRI->getNumRegs()).
+ const unsigned NumTargetRegs;
+
+ /// GroupNodes - Implements a disjoint-union data structure to
+ /// form register groups. A node is represented by an index into
+ /// the vector. A node can "point to" itself to indicate that it
+ /// is the parent of a group, or point to another node to indicate
+ /// that it is a member of the same group as that node.
+ std::vector<unsigned> GroupNodes;
+
+ /// GroupNodeIndices - For each register, the index of the GroupNode
+ /// currently representing the group that the register belongs to.
+ /// Register 0 is always represented by the 0 group, a group
+ /// composed of registers that are not eligible for anti-aliasing.
+ unsigned GroupNodeIndices[TargetRegisterInfo::FirstVirtualRegister];
+
+ /// RegRefs - Map registers to all their references within a live range.
+ std::multimap<unsigned, RegisterReference> RegRefs;
+
+ /// KillIndices - The index of the most recent kill (proceding bottom-up),
+ /// or ~0u if the register is not live.
+ unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister];
+
+ /// DefIndices - The index of the most recent complete def (proceding bottom
+ /// up), or ~0u if the register is live.
+ unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister];
+
+ public:
+ AggressiveAntiDepState(const unsigned TargetRegs, MachineBasicBlock *BB);
+
+ /// GetKillIndices - Return the kill indices.
+ unsigned *GetKillIndices() { return KillIndices; }
+
+ /// GetDefIndices - Return the define indices.
+ unsigned *GetDefIndices() { return DefIndices; }
+
+ /// GetRegRefs - Return the RegRefs map.
+ std::multimap<unsigned, RegisterReference>& GetRegRefs() { return RegRefs; }
+
+ // GetGroup - Get the group for a register. The returned value is
+ // the index of the GroupNode representing the group.
+ unsigned GetGroup(unsigned Reg);
+
+ // GetGroupRegs - Return a vector of the registers belonging to a
+ // group. If RegRefs is non-NULL then only included referenced registers.
+ void GetGroupRegs(
+ unsigned Group,
+ std::vector<unsigned> &Regs,
+ std::multimap<unsigned,
+ AggressiveAntiDepState::RegisterReference> *RegRefs);
+
+ // UnionGroups - Union Reg1's and Reg2's groups to form a new
+ // group. Return the index of the GroupNode representing the
+ // group.
+ unsigned UnionGroups(unsigned Reg1, unsigned Reg2);
+
+ // LeaveGroup - Remove a register from its current group and place
+ // it alone in its own group. Return the index of the GroupNode
+ // representing the registers new group.
+ unsigned LeaveGroup(unsigned Reg);
+
+ /// IsLive - Return true if Reg is live
+ bool IsLive(unsigned Reg);
+ };
+
+
+ /// Class AggressiveAntiDepBreaker
+ class AggressiveAntiDepBreaker : public AntiDepBreaker {
+ MachineFunction& MF;
+ MachineRegisterInfo &MRI;
+ const TargetRegisterInfo *TRI;
+
+ /// AllocatableSet - The set of allocatable registers.
+ /// We'll be ignoring anti-dependencies on non-allocatable registers,
+ /// because they may not be safe to break.
+ const BitVector AllocatableSet;
+
+ /// CriticalPathSet - The set of registers that should only be
+ /// renamed if they are on the critical path.
+ BitVector CriticalPathSet;
+
+ /// State - The state used to identify and rename anti-dependence
+ /// registers.
+ AggressiveAntiDepState *State;
+
+ public:
+ AggressiveAntiDepBreaker(MachineFunction& MFi,
+ TargetSubtarget::RegClassVector& CriticalPathRCs);
+ ~AggressiveAntiDepBreaker();
+
+ /// Start - Initialize anti-dep breaking for a new basic block.
+ void StartBlock(MachineBasicBlock *BB);
+
+ /// BreakAntiDependencies - Identifiy anti-dependencies along the critical
+ /// path
+ /// of the ScheduleDAG and break them by renaming registers.
+ ///
+ unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits,
+ MachineBasicBlock::iterator& Begin,
+ MachineBasicBlock::iterator& End,
+ unsigned InsertPosIndex);
+
+ /// Observe - Update liveness information to account for the current
+ /// instruction, which will not be scheduled.
+ ///
+ void Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex);
+
+ /// Finish - Finish anti-dep breaking for a basic block.
+ void FinishBlock();
+
+ private:
+ typedef std::map<const TargetRegisterClass *,
+ TargetRegisterClass::const_iterator> RenameOrderType;
+
+ /// IsImplicitDefUse - Return true if MO represents a register
+ /// that is both implicitly used and defined in MI
+ bool IsImplicitDefUse(MachineInstr *MI, MachineOperand& MO);
+
+ /// GetPassthruRegs - If MI implicitly def/uses a register, then
+ /// return that register and all subregisters.
+ void GetPassthruRegs(MachineInstr *MI, std::set<unsigned>& PassthruRegs);
+
+ void HandleLastUse(unsigned Reg, unsigned KillIdx, const char *tag,
+ const char *header =NULL, const char *footer =NULL);
+
+ void PrescanInstruction(MachineInstr *MI, unsigned Count,
+ std::set<unsigned>& PassthruRegs);
+ void ScanInstruction(MachineInstr *MI, unsigned Count);
+ BitVector GetRenameRegisters(unsigned Reg);
+ bool FindSuitableFreeRegisters(unsigned AntiDepGroupIndex,
+ RenameOrderType& RenameOrder,
+ std::map<unsigned, unsigned> &RenameMap);
+ };
+}
+
+#endif
diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h
new file mode 100644
index 0000000..3ee30c6
--- /dev/null
+++ b/lib/CodeGen/AntiDepBreaker.h
@@ -0,0 +1,59 @@
+//=- llvm/CodeGen/AntiDepBreaker.h - Anti-Dependence Breaking -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AntiDepBreaker class, which implements
+// anti-dependence breaking heuristics for post-register-allocation scheduling.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ANTIDEPBREAKER_H
+#define LLVM_CODEGEN_ANTIDEPBREAKER_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <vector>
+
+namespace llvm {
+
+/// AntiDepBreaker - This class works into conjunction with the
+/// post-RA scheduler to rename registers to break register
+/// anti-dependencies.
+class AntiDepBreaker {
+public:
+ virtual ~AntiDepBreaker();
+
+ /// Start - Initialize anti-dep breaking for a new basic block.
+ virtual void StartBlock(MachineBasicBlock *BB) =0;
+
+ /// BreakAntiDependencies - Identifiy anti-dependencies within a
+ /// basic-block region and break them by renaming registers. Return
+ /// the number of anti-dependencies broken.
+ ///
+ virtual unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits,
+ MachineBasicBlock::iterator& Begin,
+ MachineBasicBlock::iterator& End,
+ unsigned InsertPosIndex) =0;
+
+ /// Observe - Update liveness information to account for the current
+ /// instruction, which will not be scheduled.
+ ///
+ virtual void Observe(MachineInstr *MI, unsigned Count,
+ unsigned InsertPosIndex) =0;
+
+ /// Finish - Finish anti-dep breaking for a basic block.
+ virtual void FinishBlock() =0;
+};
+
+}
+
+#endif
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
new file mode 100644
index 0000000..fc08384
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -0,0 +1,1790 @@
+//===-- AsmPrinter.cpp - Common AsmPrinter code ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AsmPrinter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+#include <cerrno>
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+char AsmPrinter::ID = 0;
+AsmPrinter::AsmPrinter(formatted_raw_ostream &o, TargetMachine &tm,
+ MCContext &Ctx, MCStreamer &Streamer,
+ const MCAsmInfo *T)
+ : MachineFunctionPass(&ID), O(o),
+ TM(tm), MAI(T), TRI(tm.getRegisterInfo()),
+ OutContext(Ctx), OutStreamer(Streamer),
+ LastMI(0), LastFn(0), Counter(~0U), PrevDLT(NULL) {
+ DW = 0; MMI = 0;
+ VerboseAsm = Streamer.isVerboseAsm();
+}
+
+AsmPrinter::~AsmPrinter() {
+ for (gcp_iterator I = GCMetadataPrinters.begin(),
+ E = GCMetadataPrinters.end(); I != E; ++I)
+ delete I->second;
+
+ delete &OutStreamer;
+ delete &OutContext;
+}
+
+/// getFunctionNumber - Return a unique ID for the current function.
+///
+unsigned AsmPrinter::getFunctionNumber() const {
+ return MF->getFunctionNumber();
+}
+
+TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
+ return TM.getTargetLowering()->getObjFileLowering();
+}
+
+/// getCurrentSection() - Return the current section we are emitting to.
+const MCSection *AsmPrinter::getCurrentSection() const {
+ return OutStreamer.getCurrentSection();
+}
+
+
+void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<GCModuleInfo>();
+ if (VerboseAsm)
+ AU.addRequired<MachineLoopInfo>();
+}
+
+bool AsmPrinter::doInitialization(Module &M) {
+ // Initialize TargetLoweringObjectFile.
+ const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
+ .Initialize(OutContext, TM);
+
+ Mang = new Mangler(*MAI);
+
+ // Allow the target to emit any magic that it wants at the start of the file.
+ EmitStartOfAsmFile(M);
+
+ // Very minimal debug info. It is ignored if we emit actual debug info. If we
+ // don't, this at least helps the user find where a global came from.
+ if (MAI->hasSingleParameterDotFile()) {
+ // .file "foo.c"
+ OutStreamer.EmitFileDirective(M.getModuleIdentifier());
+ }
+
+ GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(MI && "AsmPrinter didn't require GCModuleInfo?");
+ for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I)
+ if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I))
+ MP->beginAssembly(O, *this, *MAI);
+
+ if (!M.getModuleInlineAsm().empty())
+ O << MAI->getCommentString() << " Start of file scope inline assembly\n"
+ << M.getModuleInlineAsm()
+ << '\n' << MAI->getCommentString()
+ << " End of file scope inline assembly\n";
+
+ MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ if (MMI)
+ MMI->AnalyzeModule(M);
+ DW = getAnalysisIfAvailable<DwarfWriter>();
+ if (DW)
+ DW->BeginModule(&M, MMI, O, this, MAI);
+
+ return false;
+}
+
+void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const {
+ switch ((GlobalValue::LinkageTypes)Linkage) {
+ case GlobalValue::CommonLinkage:
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ case GlobalValue::LinkerPrivateLinkage:
+ if (MAI->getWeakDefDirective() != 0) {
+ // .globl _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+ // .weak_definition _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
+ } else if (const char *LinkOnce = MAI->getLinkOnceDirective()) {
+ // .globl _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+ // FIXME: linkonce should be a section attribute, handled by COFF Section
+ // assignment.
+ // http://sourceware.org/binutils/docs-2.20/as/Linkonce.html#Linkonce
+ // .linkonce discard
+ // FIXME: It would be nice to use .linkonce samesize for non-common
+ // globals.
+ O << LinkOnce;
+ } else {
+ // .weak _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak);
+ }
+ break;
+ case GlobalValue::DLLExportLinkage:
+ case GlobalValue::AppendingLinkage:
+ // FIXME: appending linkage variables should go into a section of
+ // their name or something. For now, just emit them as external.
+ case GlobalValue::ExternalLinkage:
+ // If external or appending, declare as a global symbol.
+ // .globl _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+ break;
+ case GlobalValue::PrivateLinkage:
+ case GlobalValue::InternalLinkage:
+ break;
+ default:
+ llvm_unreachable("Unknown linkage type!");
+ }
+}
+
+
+/// EmitGlobalVariable - Emit the specified global variable to the .s file.
+void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+ if (!GV->hasInitializer()) // External globals require no code.
+ return;
+
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(GV))
+ return;
+
+ MCSymbol *GVSym = GetGlobalValueSymbol(GV);
+ EmitVisibility(GVSym, GV->getVisibility());
+
+ if (MAI->hasDotTypeDotSizeDirective())
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
+
+ SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
+
+ const TargetData *TD = TM.getTargetData();
+ unsigned Size = TD->getTypeAllocSize(GV->getType()->getElementType());
+ unsigned AlignLog = TD->getPreferredAlignmentLog(GV);
+
+ // Handle common and BSS local symbols (.lcomm).
+ if (GVKind.isCommon() || GVKind.isBSSLocal()) {
+ if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
+
+ if (VerboseAsm) {
+ WriteAsOperand(OutStreamer.GetCommentOS(), GV,
+ /*PrintType=*/false, GV->getParent());
+ OutStreamer.GetCommentOS() << '\n';
+ }
+
+ // Handle common symbols.
+ if (GVKind.isCommon()) {
+ // .comm _foo, 42, 4
+ OutStreamer.EmitCommonSymbol(GVSym, Size, 1 << AlignLog);
+ return;
+ }
+
+ // Handle local BSS symbols.
+ if (MAI->hasMachoZeroFillDirective()) {
+ const MCSection *TheSection =
+ getObjFileLowering().SectionForGlobal(GV, GVKind, Mang, TM);
+ // .zerofill __DATA, __bss, _foo, 400, 5
+ OutStreamer.EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog);
+ return;
+ }
+
+ if (MAI->hasLCOMMDirective()) {
+ // .lcomm _foo, 42
+ OutStreamer.EmitLocalCommonSymbol(GVSym, Size);
+ return;
+ }
+
+ // .local _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Local);
+ // .comm _foo, 42, 4
+ OutStreamer.EmitCommonSymbol(GVSym, Size, 1 << AlignLog);
+ return;
+ }
+
+ const MCSection *TheSection =
+ getObjFileLowering().SectionForGlobal(GV, GVKind, Mang, TM);
+
+ // Handle the zerofill directive on darwin, which is a special form of BSS
+ // emission.
+ if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective()) {
+ // .globl _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+ // .zerofill __DATA, __common, _foo, 400, 5
+ OutStreamer.EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog);
+ return;
+ }
+
+ OutStreamer.SwitchSection(TheSection);
+
+ EmitLinkage(GV->getLinkage(), GVSym);
+ EmitAlignment(AlignLog, GV);
+
+ if (VerboseAsm) {
+ WriteAsOperand(OutStreamer.GetCommentOS(), GV,
+ /*PrintType=*/false, GV->getParent());
+ OutStreamer.GetCommentOS() << '\n';
+ }
+ OutStreamer.EmitLabel(GVSym);
+
+ EmitGlobalConstant(GV->getInitializer());
+
+ if (MAI->hasDotTypeDotSizeDirective())
+ // .size foo, 42
+ OutStreamer.EmitELFSize(GVSym, MCConstantExpr::Create(Size, OutContext));
+
+ OutStreamer.AddBlankLine();
+}
+
+/// EmitFunctionHeader - This method emits the header for the current
+/// function.
+void AsmPrinter::EmitFunctionHeader() {
+ // Print out constants referenced by the function
+ EmitConstantPool();
+
+ // Print the 'header' of function.
+ const Function *F = MF->getFunction();
+
+ OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
+ EmitVisibility(CurrentFnSym, F->getVisibility());
+
+ EmitLinkage(F->getLinkage(), CurrentFnSym);
+ EmitAlignment(MF->getAlignment(), F);
+
+ if (MAI->hasDotTypeDotSizeDirective())
+ OutStreamer.EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction);
+
+ if (VerboseAsm) {
+ WriteAsOperand(OutStreamer.GetCommentOS(), F,
+ /*PrintType=*/false, F->getParent());
+ OutStreamer.GetCommentOS() << '\n';
+ }
+
+ // Emit the CurrentFnSym. This is a virtual function to allow targets to
+ // do their wild and crazy things as required.
+ EmitFunctionEntryLabel();
+
+ // Add some workaround for linkonce linkage on Cygwin\MinGW.
+ if (MAI->getLinkOnceDirective() != 0 &&
+ (F->hasLinkOnceLinkage() || F->hasWeakLinkage()))
+ // FIXME: What is this?
+ O << "Lllvm$workaround$fake$stub$" << *CurrentFnSym << ":\n";
+
+ // Emit pre-function debug and/or EH information.
+ if (MAI->doesSupportDebugInformation() || MAI->doesSupportExceptionHandling())
+ DW->BeginFunction(MF);
+}
+
+/// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
+/// function. This can be overridden by targets as required to do custom stuff.
+void AsmPrinter::EmitFunctionEntryLabel() {
+ OutStreamer.EmitLabel(CurrentFnSym);
+}
+
+
+/// EmitComments - Pretty-print comments for instructions.
+static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const TargetMachine &TM = MF->getTarget();
+
+ if (!MI.getDebugLoc().isUnknown()) {
+ DILocation DLT = MF->getDILocation(MI.getDebugLoc());
+
+ // Print source line info.
+ DIScope Scope = DLT.getScope();
+ // Omit the directory, because it's likely to be long and uninteresting.
+ if (!Scope.isNull())
+ CommentOS << Scope.getFilename();
+ else
+ CommentOS << "<unknown>";
+ CommentOS << ':' << DLT.getLineNumber();
+ if (DLT.getColumnNumber() != 0)
+ CommentOS << ':' << DLT.getColumnNumber();
+ CommentOS << '\n';
+ }
+
+ // Check for spills and reloads
+ int FI;
+
+ const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
+
+ // We assume a single instruction only has a spill or reload, not
+ // both.
+ const MachineMemOperand *MMO;
+ if (TM.getInstrInfo()->isLoadFromStackSlotPostFE(&MI, FI)) {
+ if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+ MMO = *MI.memoperands_begin();
+ CommentOS << MMO->getSize() << "-byte Reload\n";
+ }
+ } else if (TM.getInstrInfo()->hasLoadFromStackSlot(&MI, MMO, FI)) {
+ if (FrameInfo->isSpillSlotObjectIndex(FI))
+ CommentOS << MMO->getSize() << "-byte Folded Reload\n";
+ } else if (TM.getInstrInfo()->isStoreToStackSlotPostFE(&MI, FI)) {
+ if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+ MMO = *MI.memoperands_begin();
+ CommentOS << MMO->getSize() << "-byte Spill\n";
+ }
+ } else if (TM.getInstrInfo()->hasStoreToStackSlot(&MI, MMO, FI)) {
+ if (FrameInfo->isSpillSlotObjectIndex(FI))
+ CommentOS << MMO->getSize() << "-byte Folded Spill\n";
+ }
+
+ // Check for spill-induced copies
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (TM.getInstrInfo()->isMoveInstr(MI, SrcReg, DstReg,
+ SrcSubIdx, DstSubIdx)) {
+ if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse))
+ CommentOS << " Reload Reuse\n";
+ }
+}
+
+
+
+/// EmitFunctionBody - This method emits the body and trailer for a
+/// function.
+void AsmPrinter::EmitFunctionBody() {
+ // Emit target-specific gunk before the function body.
+ EmitFunctionBodyStart();
+
+ // Print out code for the function.
+ bool HasAnyRealCode = false;
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ // Print a label for the basic block.
+ EmitBasicBlockStart(I);
+ for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+ II != IE; ++II) {
+ // Print the assembly for the instruction.
+ if (!II->isLabel())
+ HasAnyRealCode = true;
+
+ ++EmittedInsts;
+
+ // FIXME: Clean up processDebugLoc.
+ processDebugLoc(II, true);
+
+ if (VerboseAsm)
+ EmitComments(*II, OutStreamer.GetCommentOS());
+
+ switch (II->getOpcode()) {
+ case TargetOpcode::DBG_LABEL:
+ case TargetOpcode::EH_LABEL:
+ case TargetOpcode::GC_LABEL:
+ printLabelInst(II);
+ break;
+ case TargetOpcode::INLINEASM:
+ printInlineAsm(II);
+ break;
+ case TargetOpcode::IMPLICIT_DEF:
+ printImplicitDef(II);
+ break;
+ case TargetOpcode::KILL:
+ printKill(II);
+ break;
+ default:
+ EmitInstruction(II);
+ break;
+ }
+
+ // FIXME: Clean up processDebugLoc.
+ processDebugLoc(II, false);
+ }
+ }
+
+ // If the function is empty and the object file uses .subsections_via_symbols,
+ // then we need to emit *something* to the function body to prevent the
+ // labels from collapsing together. Just emit a 0 byte.
+ if (MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode)
+ OutStreamer.EmitIntValue(0, 1, 0/*addrspace*/);
+
+ // Emit target-specific gunk after the function body.
+ EmitFunctionBodyEnd();
+
+ if (MAI->hasDotTypeDotSizeDirective())
+ O << "\t.size\t" << *CurrentFnSym << ", .-" << *CurrentFnSym << '\n';
+
+ // Emit post-function debug information.
+ if (MAI->doesSupportDebugInformation() || MAI->doesSupportExceptionHandling())
+ DW->EndFunction(MF);
+
+ // Print out jump tables referenced by the function.
+ EmitJumpTableInfo();
+
+ OutStreamer.AddBlankLine();
+}
+
+
+bool AsmPrinter::doFinalization(Module &M) {
+ // Emit global variables.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ EmitGlobalVariable(I);
+
+ // Emit final debug information.
+ if (MAI->doesSupportDebugInformation() || MAI->doesSupportExceptionHandling())
+ DW->EndModule();
+
+ // If the target wants to know about weak references, print them all.
+ if (MAI->getWeakRefDirective()) {
+ // FIXME: This is not lazy, it would be nice to only print weak references
+ // to stuff that is actually used. Note that doing so would require targets
+ // to notice uses in operands (due to constant exprs etc). This should
+ // happen with the MC stuff eventually.
+
+ // Print out module-level global variables here.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ if (!I->hasExternalWeakLinkage()) continue;
+ OutStreamer.EmitSymbolAttribute(GetGlobalValueSymbol(I),
+ MCSA_WeakReference);
+ }
+
+ for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ if (!I->hasExternalWeakLinkage()) continue;
+ OutStreamer.EmitSymbolAttribute(GetGlobalValueSymbol(I),
+ MCSA_WeakReference);
+ }
+ }
+
+ if (MAI->hasSetDirective()) {
+ OutStreamer.AddBlankLine();
+ for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
+ I != E; ++I) {
+ MCSymbol *Name = GetGlobalValueSymbol(I);
+
+ const GlobalValue *GV = cast<GlobalValue>(I->getAliasedGlobal());
+ MCSymbol *Target = GetGlobalValueSymbol(GV);
+
+ if (I->hasExternalLinkage() || !MAI->getWeakRefDirective())
+ OutStreamer.EmitSymbolAttribute(Name, MCSA_Global);
+ else if (I->hasWeakLinkage())
+ OutStreamer.EmitSymbolAttribute(Name, MCSA_WeakReference);
+ else
+ assert(I->hasLocalLinkage() && "Invalid alias linkage");
+
+ EmitVisibility(Name, I->getVisibility());
+
+ // Emit the directives as assignments aka .set:
+ OutStreamer.EmitAssignment(Name,
+ MCSymbolRefExpr::Create(Target, OutContext));
+ }
+ }
+
+ GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(MI && "AsmPrinter didn't require GCModuleInfo?");
+ for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; )
+ if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*--I))
+ MP->finishAssembly(O, *this, *MAI);
+
+ // If we don't have any trampolines, then we don't require stack memory
+ // to be executable. Some targets have a directive to declare this.
+ Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline");
+ if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty())
+ if (MCSection *S = MAI->getNonexecutableStackSection(OutContext))
+ OutStreamer.SwitchSection(S);
+
+ // Allow the target to emit any magic that it wants at the end of the file,
+ // after everything else has gone out.
+ EmitEndOfAsmFile(M);
+
+ delete Mang; Mang = 0;
+ DW = 0; MMI = 0;
+
+ OutStreamer.Finish();
+ return false;
+}
+
+void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
+ this->MF = &MF;
+ // Get the function symbol.
+ CurrentFnSym = GetGlobalValueSymbol(MF.getFunction());
+
+ if (VerboseAsm)
+ LI = &getAnalysis<MachineLoopInfo>();
+}
+
+namespace {
+ // SectionCPs - Keep track the alignment, constpool entries per Section.
+ struct SectionCPs {
+ const MCSection *S;
+ unsigned Alignment;
+ SmallVector<unsigned, 4> CPEs;
+ SectionCPs(const MCSection *s, unsigned a) : S(s), Alignment(a) {}
+ };
+}
+
+/// EmitConstantPool - Print to the current output stream assembly
+/// representations of the constants in the constant pool MCP. This is
+/// used to print out constants which have been "spilled to memory" by
+/// the code generator.
+///
+void AsmPrinter::EmitConstantPool() {
+ const MachineConstantPool *MCP = MF->getConstantPool();
+ const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
+ if (CP.empty()) return;
+
+ // Calculate sections for constant pool entries. We collect entries to go into
+ // the same section together to reduce amount of section switch statements.
+ SmallVector<SectionCPs, 4> CPSections;
+ for (unsigned i = 0, e = CP.size(); i != e; ++i) {
+ const MachineConstantPoolEntry &CPE = CP[i];
+ unsigned Align = CPE.getAlignment();
+
+ SectionKind Kind;
+ switch (CPE.getRelocationInfo()) {
+ default: llvm_unreachable("Unknown section kind");
+ case 2: Kind = SectionKind::getReadOnlyWithRel(); break;
+ case 1:
+ Kind = SectionKind::getReadOnlyWithRelLocal();
+ break;
+ case 0:
+ switch (TM.getTargetData()->getTypeAllocSize(CPE.getType())) {
+ case 4: Kind = SectionKind::getMergeableConst4(); break;
+ case 8: Kind = SectionKind::getMergeableConst8(); break;
+ case 16: Kind = SectionKind::getMergeableConst16();break;
+ default: Kind = SectionKind::getMergeableConst(); break;
+ }
+ }
+
+ const MCSection *S = getObjFileLowering().getSectionForConstant(Kind);
+
+ // The number of sections are small, just do a linear search from the
+ // last section to the first.
+ bool Found = false;
+ unsigned SecIdx = CPSections.size();
+ while (SecIdx != 0) {
+ if (CPSections[--SecIdx].S == S) {
+ Found = true;
+ break;
+ }
+ }
+ if (!Found) {
+ SecIdx = CPSections.size();
+ CPSections.push_back(SectionCPs(S, Align));
+ }
+
+ if (Align > CPSections[SecIdx].Alignment)
+ CPSections[SecIdx].Alignment = Align;
+ CPSections[SecIdx].CPEs.push_back(i);
+ }
+
+ // Now print stuff into the calculated sections.
+ for (unsigned i = 0, e = CPSections.size(); i != e; ++i) {
+ OutStreamer.SwitchSection(CPSections[i].S);
+ EmitAlignment(Log2_32(CPSections[i].Alignment));
+
+ unsigned Offset = 0;
+ for (unsigned j = 0, ee = CPSections[i].CPEs.size(); j != ee; ++j) {
+ unsigned CPI = CPSections[i].CPEs[j];
+ MachineConstantPoolEntry CPE = CP[CPI];
+
+ // Emit inter-object padding for alignment.
+ unsigned AlignMask = CPE.getAlignment() - 1;
+ unsigned NewOffset = (Offset + AlignMask) & ~AlignMask;
+ OutStreamer.EmitFill(NewOffset - Offset, 0/*fillval*/, 0/*addrspace*/);
+
+ const Type *Ty = CPE.getType();
+ Offset = NewOffset + TM.getTargetData()->getTypeAllocSize(Ty);
+
+ // Emit the label with a comment on it.
+ if (VerboseAsm) {
+ OutStreamer.GetCommentOS() << "constant pool ";
+ WriteTypeSymbolic(OutStreamer.GetCommentOS(), CPE.getType(),
+ MF->getFunction()->getParent());
+ OutStreamer.GetCommentOS() << '\n';
+ }
+ OutStreamer.EmitLabel(GetCPISymbol(CPI));
+
+ if (CPE.isMachineConstantPoolEntry())
+ EmitMachineConstantPoolValue(CPE.Val.MachineCPVal);
+ else
+ EmitGlobalConstant(CPE.Val.ConstVal);
+ }
+ }
+}
+
+/// EmitJumpTableInfo - Print assembly representations of the jump tables used
+/// by the current function to the current output stream.
+///
+void AsmPrinter::EmitJumpTableInfo() {
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ if (MJTI == 0) return;
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ if (JT.empty()) return;
+
+ // Pick the directive to use to print the jump table entries, and switch to
+ // the appropriate section.
+ const Function *F = MF->getFunction();
+ bool JTInDiffSection = false;
+ if (// In PIC mode, we need to emit the jump table to the same section as the
+ // function body itself, otherwise the label differences won't make sense.
+ // FIXME: Need a better predicate for this: what about custom entries?
+ MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 ||
+ // We should also do if the section name is NULL or function is declared
+ // in discardable section
+ // FIXME: this isn't the right predicate, should be based on the MCSection
+ // for the function.
+ F->isWeakForLinker()) {
+ OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F,Mang,TM));
+ } else {
+ // Otherwise, drop it in the readonly section.
+ const MCSection *ReadOnlySection =
+ getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly());
+ OutStreamer.SwitchSection(ReadOnlySection);
+ JTInDiffSection = true;
+ }
+
+ EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getTargetData())));
+
+ for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+
+ // If this jump table was deleted, ignore it.
+ if (JTBBs.empty()) continue;
+
+ // For the EK_LabelDifference32 entry, if the target supports .set, emit a
+ // .set directive for each unique entry. This reduces the number of
+ // relocations the assembler will generate for the jump table.
+ if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 &&
+ MAI->hasSetDirective()) {
+ SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets;
+ const TargetLowering *TLI = TM.getTargetLowering();
+ const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext);
+ for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) {
+ const MachineBasicBlock *MBB = JTBBs[ii];
+ if (!EmittedSets.insert(MBB)) continue;
+
+ // .set LJTSet, LBB32-base
+ const MCExpr *LHS =
+ MCSymbolRefExpr::Create(MBB->getSymbol(OutContext), OutContext);
+ OutStreamer.EmitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()),
+ MCBinaryExpr::CreateSub(LHS, Base, OutContext));
+ }
+ }
+
+ // On some targets (e.g. Darwin) we want to emit two consequtive labels
+ // before each jump table. The first label is never referenced, but tells
+ // the assembler and linker the extents of the jump table object. The
+ // second label is actually referenced by the code.
+ if (JTInDiffSection && MAI->getLinkerPrivateGlobalPrefix()[0])
+ // FIXME: This doesn't have to have any specific name, just any randomly
+ // named and numbered 'l' label would work. Simplify GetJTISymbol.
+ OutStreamer.EmitLabel(GetJTISymbol(JTI, true));
+
+ OutStreamer.EmitLabel(GetJTISymbol(JTI));
+
+ for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii)
+ EmitJumpTableEntry(MJTI, JTBBs[ii], JTI);
+ }
+}
+
+/// EmitJumpTableEntry - Emit a jump table entry for the specified MBB to the
+/// current stream.
+void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB,
+ unsigned UID) const {
+ const MCExpr *Value = 0;
+ switch (MJTI->getEntryKind()) {
+ case MachineJumpTableInfo::EK_Custom32:
+ Value = TM.getTargetLowering()->LowerCustomJumpTableEntry(MJTI, MBB, UID,
+ OutContext);
+ break;
+ case MachineJumpTableInfo::EK_BlockAddress:
+ // EK_BlockAddress - Each entry is a plain address of block, e.g.:
+ // .word LBB123
+ Value = MCSymbolRefExpr::Create(MBB->getSymbol(OutContext), OutContext);
+ break;
+ case MachineJumpTableInfo::EK_GPRel32BlockAddress: {
+ // EK_GPRel32BlockAddress - Each entry is an address of block, encoded
+ // with a relocation as gp-relative, e.g.:
+ // .gprel32 LBB123
+ MCSymbol *MBBSym = MBB->getSymbol(OutContext);
+ OutStreamer.EmitGPRel32Value(MCSymbolRefExpr::Create(MBBSym, OutContext));
+ return;
+ }
+
+ case MachineJumpTableInfo::EK_LabelDifference32: {
+ // EK_LabelDifference32 - Each entry is the address of the block minus
+ // the address of the jump table. This is used for PIC jump tables where
+ // gprel32 is not supported. e.g.:
+ // .word LBB123 - LJTI1_2
+ // If the .set directive is supported, this is emitted as:
+ // .set L4_5_set_123, LBB123 - LJTI1_2
+ // .word L4_5_set_123
+
+ // If we have emitted set directives for the jump table entries, print
+ // them rather than the entries themselves. If we're emitting PIC, then
+ // emit the table entries as differences between two text section labels.
+ if (MAI->hasSetDirective()) {
+ // If we used .set, reference the .set's symbol.
+ Value = MCSymbolRefExpr::Create(GetJTSetSymbol(UID, MBB->getNumber()),
+ OutContext);
+ break;
+ }
+ // Otherwise, use the difference as the jump table entry.
+ Value = MCSymbolRefExpr::Create(MBB->getSymbol(OutContext), OutContext);
+ const MCExpr *JTI = MCSymbolRefExpr::Create(GetJTISymbol(UID), OutContext);
+ Value = MCBinaryExpr::CreateSub(Value, JTI, OutContext);
+ break;
+ }
+ }
+
+ assert(Value && "Unknown entry kind!");
+
+ unsigned EntrySize = MJTI->getEntrySize(*TM.getTargetData());
+ OutStreamer.EmitValue(Value, EntrySize, /*addrspace*/0);
+}
+
+
+/// EmitSpecialLLVMGlobal - Check to see if the specified global is a
+/// special global used by LLVM. If so, emit it and return true, otherwise
+/// do nothing and return false.
+bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
+ if (GV->getName() == "llvm.used") {
+ if (MAI->hasNoDeadStrip()) // No need to emit this at all.
+ EmitLLVMUsedList(GV->getInitializer());
+ return true;
+ }
+
+ // Ignore debug and non-emitted data. This handles llvm.compiler.used.
+ if (GV->getSection() == "llvm.metadata" ||
+ GV->hasAvailableExternallyLinkage())
+ return true;
+
+ if (!GV->hasAppendingLinkage()) return false;
+
+ assert(GV->hasInitializer() && "Not a special LLVM global!");
+
+ const TargetData *TD = TM.getTargetData();
+ unsigned Align = Log2_32(TD->getPointerPrefAlignment());
+ if (GV->getName() == "llvm.global_ctors") {
+ OutStreamer.SwitchSection(getObjFileLowering().getStaticCtorSection());
+ EmitAlignment(Align, 0);
+ EmitXXStructorList(GV->getInitializer());
+
+ if (TM.getRelocationModel() == Reloc::Static &&
+ MAI->hasStaticCtorDtorReferenceInStaticMode()) {
+ StringRef Sym(".constructors_used");
+ OutStreamer.EmitSymbolAttribute(OutContext.GetOrCreateSymbol(Sym),
+ MCSA_Reference);
+ }
+ return true;
+ }
+
+ if (GV->getName() == "llvm.global_dtors") {
+ OutStreamer.SwitchSection(getObjFileLowering().getStaticDtorSection());
+ EmitAlignment(Align, 0);
+ EmitXXStructorList(GV->getInitializer());
+
+ if (TM.getRelocationModel() == Reloc::Static &&
+ MAI->hasStaticCtorDtorReferenceInStaticMode()) {
+ StringRef Sym(".destructors_used");
+ OutStreamer.EmitSymbolAttribute(OutContext.GetOrCreateSymbol(Sym),
+ MCSA_Reference);
+ }
+ return true;
+ }
+
+ return false;
+}
+
+/// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each
+/// global in the specified llvm.used list for which emitUsedDirectiveFor
+/// is true, as being used with this directive.
+void AsmPrinter::EmitLLVMUsedList(Constant *List) {
+ // Should be an array of 'i8*'.
+ ConstantArray *InitList = dyn_cast<ConstantArray>(List);
+ if (InitList == 0) return;
+
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+ const GlobalValue *GV =
+ dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts());
+ if (GV && getObjFileLowering().shouldEmitUsedDirectiveFor(GV, Mang))
+ OutStreamer.EmitSymbolAttribute(GetGlobalValueSymbol(GV),
+ MCSA_NoDeadStrip);
+ }
+}
+
+/// EmitXXStructorList - Emit the ctor or dtor list. This just prints out the
+/// function pointers, ignoring the init priority.
+void AsmPrinter::EmitXXStructorList(Constant *List) {
+ // Should be an array of '{ int, void ()* }' structs. The first value is the
+ // init priority, which we ignore.
+ if (!isa<ConstantArray>(List)) return;
+ ConstantArray *InitList = cast<ConstantArray>(List);
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){
+ if (CS->getNumOperands() != 2) return; // Not array of 2-element structs.
+
+ if (CS->getOperand(1)->isNullValue())
+ return; // Found a null terminator, exit printing.
+ // Emit the function pointer.
+ EmitGlobalConstant(CS->getOperand(1));
+ }
+}
+
+//===--------------------------------------------------------------------===//
+// Emission and print routines
+//
+
+/// EmitInt8 - Emit a byte directive and value.
+///
+void AsmPrinter::EmitInt8(int Value) const {
+ OutStreamer.EmitIntValue(Value, 1, 0/*addrspace*/);
+}
+
+/// EmitInt16 - Emit a short directive and value.
+///
+void AsmPrinter::EmitInt16(int Value) const {
+ OutStreamer.EmitIntValue(Value, 2, 0/*addrspace*/);
+}
+
+/// EmitInt32 - Emit a long directive and value.
+///
+void AsmPrinter::EmitInt32(int Value) const {
+ OutStreamer.EmitIntValue(Value, 4, 0/*addrspace*/);
+}
+
+/// EmitInt64 - Emit a long long directive and value.
+///
+void AsmPrinter::EmitInt64(uint64_t Value) const {
+ OutStreamer.EmitIntValue(Value, 8, 0/*addrspace*/);
+}
+
+//===----------------------------------------------------------------------===//
+
+// EmitAlignment - Emit an alignment directive to the specified power of
+// two boundary. For example, if you pass in 3 here, you will get an 8
+// byte alignment. If a global value is specified, and if that global has
+// an explicit alignment requested, it will unconditionally override the
+// alignment request. However, if ForcedAlignBits is specified, this value
+// has final say: the ultimate alignment will be the max of ForcedAlignBits
+// and the alignment computed with NumBits and the global.
+//
+// The algorithm is:
+// Align = NumBits;
+// if (GV && GV->hasalignment) Align = GV->getalignment();
+// Align = std::max(Align, ForcedAlignBits);
+//
+void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV,
+ unsigned ForcedAlignBits,
+ bool UseFillExpr) const {
+ if (GV && GV->getAlignment())
+ NumBits = Log2_32(GV->getAlignment());
+ NumBits = std::max(NumBits, ForcedAlignBits);
+
+ if (NumBits == 0) return; // No need to emit alignment.
+
+ unsigned FillValue = 0;
+ if (getCurrentSection()->getKind().isText())
+ FillValue = MAI->getTextAlignFillValue();
+
+ OutStreamer.EmitValueToAlignment(1 << NumBits, FillValue, 1, 0);
+}
+
+/// LowerConstant - Lower the specified LLVM Constant to an MCExpr.
+///
+static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
+ MCContext &Ctx = AP.OutContext;
+
+ if (CV->isNullValue() || isa<UndefValue>(CV))
+ return MCConstantExpr::Create(0, Ctx);
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
+ return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
+ return MCSymbolRefExpr::Create(AP.GetGlobalValueSymbol(GV), Ctx);
+ if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
+ return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
+
+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
+ if (CE == 0) {
+ llvm_unreachable("Unknown constant value to lower!");
+ return MCConstantExpr::Create(0, Ctx);
+ }
+
+ switch (CE->getOpcode()) {
+ default:
+ // If the code isn't optimized, there may be outstanding folding
+ // opportunities. Attempt to fold the expression using TargetData as a
+ // last resort before giving up.
+ if (Constant *C =
+ ConstantFoldConstantExpression(CE, AP.TM.getTargetData()))
+ if (C != CE)
+ return LowerConstant(C, AP);
+#ifndef NDEBUG
+ CE->dump();
+#endif
+ llvm_unreachable("FIXME: Don't support this constant expr");
+ case Instruction::GetElementPtr: {
+ const TargetData &TD = *AP.TM.getTargetData();
+ // Generate a symbolic expression for the byte address
+ const Constant *PtrVal = CE->getOperand(0);
+ SmallVector<Value*, 8> IdxVec(CE->op_begin()+1, CE->op_end());
+ int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), &IdxVec[0],
+ IdxVec.size());
+
+ const MCExpr *Base = LowerConstant(CE->getOperand(0), AP);
+ if (Offset == 0)
+ return Base;
+
+ // Truncate/sext the offset to the pointer size.
+ if (TD.getPointerSizeInBits() != 64) {
+ int SExtAmount = 64-TD.getPointerSizeInBits();
+ Offset = (Offset << SExtAmount) >> SExtAmount;
+ }
+
+ return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
+ Ctx);
+ }
+
+ case Instruction::Trunc:
+ // We emit the value and depend on the assembler to truncate the generated
+ // expression properly. This is important for differences between
+ // blockaddress labels. Since the two labels are in the same function, it
+ // is reasonable to treat their delta as a 32-bit value.
+ // FALL THROUGH.
+ case Instruction::BitCast:
+ return LowerConstant(CE->getOperand(0), AP);
+
+ case Instruction::IntToPtr: {
+ const TargetData &TD = *AP.TM.getTargetData();
+ // Handle casts to pointers by changing them into casts to the appropriate
+ // integer type. This promotes constant folding and simplifies this code.
+ Constant *Op = CE->getOperand(0);
+ Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
+ false/*ZExt*/);
+ return LowerConstant(Op, AP);
+ }
+
+ case Instruction::PtrToInt: {
+ const TargetData &TD = *AP.TM.getTargetData();
+ // Support only foldable casts to/from pointers that can be eliminated by
+ // changing the pointer to the appropriately sized integer type.
+ Constant *Op = CE->getOperand(0);
+ const Type *Ty = CE->getType();
+
+ const MCExpr *OpExpr = LowerConstant(Op, AP);
+
+ // We can emit the pointer value into this slot if the slot is an
+ // integer slot equal to the size of the pointer.
+ if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType()))
+ return OpExpr;
+
+ // Otherwise the pointer is smaller than the resultant integer, mask off
+ // the high bits so we are sure to get a proper truncation if the input is
+ // a constant expr.
+ unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType());
+ const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx);
+ return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
+ }
+
+ // The MC library also has a right-shift operator, but it isn't consistently
+ // signed or unsigned between different targets.
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::SDiv:
+ case Instruction::SRem:
+ case Instruction::Shl:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP);
+ const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP);
+ switch (CE->getOpcode()) {
+ default: llvm_unreachable("Unknown binary operator constant cast expr");
+ case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
+ case Instruction::Sub: return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
+ case Instruction::Mul: return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
+ case Instruction::SDiv: return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
+ case Instruction::SRem: return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
+ case Instruction::Shl: return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
+ case Instruction::And: return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
+ case Instruction::Or: return MCBinaryExpr::CreateOr (LHS, RHS, Ctx);
+ case Instruction::Xor: return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
+ }
+ }
+ }
+}
+
+static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace,
+ AsmPrinter &AP) {
+ if (AddrSpace != 0 || !CA->isString()) {
+ // Not a string. Print the values in successive locations
+ for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
+ AP.EmitGlobalConstant(CA->getOperand(i), AddrSpace);
+ return;
+ }
+
+ // Otherwise, it can be emitted as .ascii.
+ SmallVector<char, 128> TmpVec;
+ TmpVec.reserve(CA->getNumOperands());
+ for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
+ TmpVec.push_back(cast<ConstantInt>(CA->getOperand(i))->getZExtValue());
+
+ AP.OutStreamer.EmitBytes(StringRef(TmpVec.data(), TmpVec.size()), AddrSpace);
+}
+
+static void EmitGlobalConstantVector(const ConstantVector *CV,
+ unsigned AddrSpace, AsmPrinter &AP) {
+ for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
+ AP.EmitGlobalConstant(CV->getOperand(i), AddrSpace);
+}
+
+static void EmitGlobalConstantStruct(const ConstantStruct *CS,
+ unsigned AddrSpace, AsmPrinter &AP) {
+ // Print the fields in successive locations. Pad to align if needed!
+ const TargetData *TD = AP.TM.getTargetData();
+ unsigned Size = TD->getTypeAllocSize(CS->getType());
+ const StructLayout *Layout = TD->getStructLayout(CS->getType());
+ uint64_t SizeSoFar = 0;
+ for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) {
+ const Constant *Field = CS->getOperand(i);
+
+ // Check if padding is needed and insert one or more 0s.
+ uint64_t FieldSize = TD->getTypeAllocSize(Field->getType());
+ uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1))
+ - Layout->getElementOffset(i)) - FieldSize;
+ SizeSoFar += FieldSize + PadSize;
+
+ // Now print the actual field value.
+ AP.EmitGlobalConstant(Field, AddrSpace);
+
+ // Insert padding - this may include padding to increase the size of the
+ // current field up to the ABI size (if the struct is not packed) as well
+ // as padding to ensure that the next field starts at the right offset.
+ AP.OutStreamer.EmitZeros(PadSize, AddrSpace);
+ }
+ assert(SizeSoFar == Layout->getSizeInBytes() &&
+ "Layout of constant struct may be incorrect!");
+}
+
+static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
+ AsmPrinter &AP) {
+ // FP Constants are printed as integer constants to avoid losing
+ // precision.
+ if (CFP->getType()->isDoubleTy()) {
+ if (AP.VerboseAsm) {
+ double Val = CFP->getValueAPF().convertToDouble();
+ AP.OutStreamer.GetCommentOS() << "double " << Val << '\n';
+ }
+
+ uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace);
+ return;
+ }
+
+ if (CFP->getType()->isFloatTy()) {
+ if (AP.VerboseAsm) {
+ float Val = CFP->getValueAPF().convertToFloat();
+ AP.OutStreamer.GetCommentOS() << "float " << Val << '\n';
+ }
+ uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ AP.OutStreamer.EmitIntValue(Val, 4, AddrSpace);
+ return;
+ }
+
+ if (CFP->getType()->isX86_FP80Ty()) {
+ // all long double variants are printed as hex
+ // api needed to prevent premature destruction
+ APInt API = CFP->getValueAPF().bitcastToAPInt();
+ const uint64_t *p = API.getRawData();
+ if (AP.VerboseAsm) {
+ // Convert to double so we can print the approximate val as a comment.
+ APFloat DoubleVal = CFP->getValueAPF();
+ bool ignored;
+ DoubleVal.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+ &ignored);
+ AP.OutStreamer.GetCommentOS() << "x86_fp80 ~= "
+ << DoubleVal.convertToDouble() << '\n';
+ }
+
+ if (AP.TM.getTargetData()->isBigEndian()) {
+ AP.OutStreamer.EmitIntValue(p[1], 2, AddrSpace);
+ AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
+ } else {
+ AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
+ AP.OutStreamer.EmitIntValue(p[1], 2, AddrSpace);
+ }
+
+ // Emit the tail padding for the long double.
+ const TargetData &TD = *AP.TM.getTargetData();
+ AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) -
+ TD.getTypeStoreSize(CFP->getType()), AddrSpace);
+ return;
+ }
+
+ assert(CFP->getType()->isPPC_FP128Ty() &&
+ "Floating point constant type not handled");
+ // All long double variants are printed as hex api needed to prevent
+ // premature destruction.
+ APInt API = CFP->getValueAPF().bitcastToAPInt();
+ const uint64_t *p = API.getRawData();
+ if (AP.TM.getTargetData()->isBigEndian()) {
+ AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
+ AP.OutStreamer.EmitIntValue(p[1], 8, AddrSpace);
+ } else {
+ AP.OutStreamer.EmitIntValue(p[1], 8, AddrSpace);
+ AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
+ }
+}
+
+static void EmitGlobalConstantLargeInt(const ConstantInt *CI,
+ unsigned AddrSpace, AsmPrinter &AP) {
+ const TargetData *TD = AP.TM.getTargetData();
+ unsigned BitWidth = CI->getBitWidth();
+ assert((BitWidth & 63) == 0 && "only support multiples of 64-bits");
+
+ // We don't expect assemblers to support integer data directives
+ // for more than 64 bits, so we emit the data in at most 64-bit
+ // quantities at a time.
+ const uint64_t *RawData = CI->getValue().getRawData();
+ for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) {
+ uint64_t Val = TD->isBigEndian() ? RawData[e - i - 1] : RawData[i];
+ AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace);
+ }
+}
+
+/// EmitGlobalConstant - Print a general LLVM constant to the .s file.
+void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) {
+ if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) {
+ uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType());
+ if (Size == 0) Size = 1; // An empty "_foo:" followed by a section is undef.
+ return OutStreamer.EmitZeros(Size, AddrSpace);
+ }
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ unsigned Size = TM.getTargetData()->getTypeAllocSize(CV->getType());
+ switch (Size) {
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ if (VerboseAsm)
+ OutStreamer.GetCommentOS() << format("0x%llx\n", CI->getZExtValue());
+ OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace);
+ return;
+ default:
+ EmitGlobalConstantLargeInt(CI, AddrSpace, *this);
+ return;
+ }
+ }
+
+ if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
+ return EmitGlobalConstantArray(CVA, AddrSpace, *this);
+
+ if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
+ return EmitGlobalConstantStruct(CVS, AddrSpace, *this);
+
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV))
+ return EmitGlobalConstantFP(CFP, AddrSpace, *this);
+
+ if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
+ return EmitGlobalConstantVector(V, AddrSpace, *this);
+
+ if (isa<ConstantPointerNull>(CV)) {
+ unsigned Size = TM.getTargetData()->getTypeAllocSize(CV->getType());
+ OutStreamer.EmitIntValue(0, Size, AddrSpace);
+ return;
+ }
+
+ // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it
+ // thread the streamer with EmitValue.
+ OutStreamer.EmitValue(LowerConstant(CV, *this),
+ TM.getTargetData()->getTypeAllocSize(CV->getType()),
+ AddrSpace);
+}
+
+void AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+ // Target doesn't support this yet!
+ llvm_unreachable("Target does not support EmitMachineConstantPoolValue");
+}
+
+/// PrintSpecial - Print information related to the specified machine instr
+/// that is independent of the operand, and may be independent of the instr
+/// itself. This can be useful for portably encoding the comment character
+/// or other bits of target-specific knowledge into the asmstrings. The
+/// syntax used is ${:comment}. Targets can override this to add support
+/// for their own strange codes.
+void AsmPrinter::PrintSpecial(const MachineInstr *MI, const char *Code) const {
+ if (!strcmp(Code, "private")) {
+ O << MAI->getPrivateGlobalPrefix();
+ } else if (!strcmp(Code, "comment")) {
+ if (VerboseAsm)
+ O << MAI->getCommentString();
+ } else if (!strcmp(Code, "uid")) {
+ // Comparing the address of MI isn't sufficient, because machineinstrs may
+ // be allocated to the same address across functions.
+ const Function *ThisF = MI->getParent()->getParent()->getFunction();
+
+ // If this is a new LastFn instruction, bump the counter.
+ if (LastMI != MI || LastFn != ThisF) {
+ ++Counter;
+ LastMI = MI;
+ LastFn = ThisF;
+ }
+ O << Counter;
+ } else {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "Unknown special formatter '" << Code
+ << "' for machine instr: " << *MI;
+ llvm_report_error(Msg.str());
+ }
+}
+
+/// processDebugLoc - Processes the debug information of each machine
+/// instruction's DebugLoc.
+void AsmPrinter::processDebugLoc(const MachineInstr *MI,
+ bool BeforePrintingInsn) {
+ if (!MAI || !DW || !MAI->doesSupportDebugInformation()
+ || !DW->ShouldEmitDwarfDebug())
+ return;
+ DebugLoc DL = MI->getDebugLoc();
+ if (DL.isUnknown())
+ return;
+ DILocation CurDLT = MF->getDILocation(DL);
+ if (CurDLT.getScope().isNull())
+ return;
+
+ if (!BeforePrintingInsn) {
+ // After printing instruction
+ DW->EndScope(MI);
+ } else if (CurDLT.getNode() != PrevDLT) {
+ unsigned L = DW->RecordSourceLine(CurDLT.getLineNumber(),
+ CurDLT.getColumnNumber(),
+ CurDLT.getScope().getNode());
+ printLabel(L);
+ O << '\n';
+ DW->BeginScope(MI, L);
+ PrevDLT = CurDLT.getNode();
+ }
+}
+
+
+/// printInlineAsm - This method formats and prints the specified machine
+/// instruction that is an inline asm.
+void AsmPrinter::printInlineAsm(const MachineInstr *MI) const {
+ unsigned NumOperands = MI->getNumOperands();
+
+ // Count the number of register definitions.
+ unsigned NumDefs = 0;
+ for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
+ ++NumDefs)
+ assert(NumDefs != NumOperands-1 && "No asm string?");
+
+ assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?");
+
+ // Disassemble the AsmStr, printing out the literal pieces, the operands, etc.
+ const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
+
+ O << '\t';
+
+ // If this asmstr is empty, just print the #APP/#NOAPP markers.
+ // These are useful to see where empty asm's wound up.
+ if (AsmStr[0] == 0) {
+ O << MAI->getCommentString() << MAI->getInlineAsmStart() << "\n\t";
+ O << MAI->getCommentString() << MAI->getInlineAsmEnd() << '\n';
+ return;
+ }
+
+ O << MAI->getCommentString() << MAI->getInlineAsmStart() << "\n\t";
+
+ // The variant of the current asmprinter.
+ int AsmPrinterVariant = MAI->getAssemblerDialect();
+
+ int CurVariant = -1; // The number of the {.|.|.} region we are in.
+ const char *LastEmitted = AsmStr; // One past the last character emitted.
+
+ while (*LastEmitted) {
+ switch (*LastEmitted) {
+ default: {
+ // Not a special case, emit the string section literally.
+ const char *LiteralEnd = LastEmitted+1;
+ while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' &&
+ *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')
+ ++LiteralEnd;
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ O.write(LastEmitted, LiteralEnd-LastEmitted);
+ LastEmitted = LiteralEnd;
+ break;
+ }
+ case '\n':
+ ++LastEmitted; // Consume newline character.
+ O << '\n'; // Indent code with newline.
+ break;
+ case '$': {
+ ++LastEmitted; // Consume '$' character.
+ bool Done = true;
+
+ // Handle escapes.
+ switch (*LastEmitted) {
+ default: Done = false; break;
+ case '$': // $$ -> $
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ O << '$';
+ ++LastEmitted; // Consume second '$' character.
+ break;
+ case '(': // $( -> same as GCC's { character.
+ ++LastEmitted; // Consume '(' character.
+ if (CurVariant != -1) {
+ llvm_report_error("Nested variants found in inline asm string: '"
+ + std::string(AsmStr) + "'");
+ }
+ CurVariant = 0; // We're in the first variant now.
+ break;
+ case '|':
+ ++LastEmitted; // consume '|' character.
+ if (CurVariant == -1)
+ O << '|'; // this is gcc's behavior for | outside a variant
+ else
+ ++CurVariant; // We're in the next variant.
+ break;
+ case ')': // $) -> same as GCC's } char.
+ ++LastEmitted; // consume ')' character.
+ if (CurVariant == -1)
+ O << '}'; // this is gcc's behavior for } outside a variant
+ else
+ CurVariant = -1;
+ break;
+ }
+ if (Done) break;
+
+ bool HasCurlyBraces = false;
+ if (*LastEmitted == '{') { // ${variable}
+ ++LastEmitted; // Consume '{' character.
+ HasCurlyBraces = true;
+ }
+
+ // If we have ${:foo}, then this is not a real operand reference, it is a
+ // "magic" string reference, just like in .td files. Arrange to call
+ // PrintSpecial.
+ if (HasCurlyBraces && *LastEmitted == ':') {
+ ++LastEmitted;
+ const char *StrStart = LastEmitted;
+ const char *StrEnd = strchr(StrStart, '}');
+ if (StrEnd == 0) {
+ llvm_report_error("Unterminated ${:foo} operand in inline asm string: '"
+ + std::string(AsmStr) + "'");
+ }
+
+ std::string Val(StrStart, StrEnd);
+ PrintSpecial(MI, Val.c_str());
+ LastEmitted = StrEnd+1;
+ break;
+ }
+
+ const char *IDStart = LastEmitted;
+ char *IDEnd;
+ errno = 0;
+ long Val = strtol(IDStart, &IDEnd, 10); // We only accept numbers for IDs.
+ if (!isdigit(*IDStart) || (Val == 0 && errno == EINVAL)) {
+ llvm_report_error("Bad $ operand number in inline asm string: '"
+ + std::string(AsmStr) + "'");
+ }
+ LastEmitted = IDEnd;
+
+ char Modifier[2] = { 0, 0 };
+
+ if (HasCurlyBraces) {
+ // If we have curly braces, check for a modifier character. This
+ // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm.
+ if (*LastEmitted == ':') {
+ ++LastEmitted; // Consume ':' character.
+ if (*LastEmitted == 0) {
+ llvm_report_error("Bad ${:} expression in inline asm string: '"
+ + std::string(AsmStr) + "'");
+ }
+
+ Modifier[0] = *LastEmitted;
+ ++LastEmitted; // Consume modifier character.
+ }
+
+ if (*LastEmitted != '}') {
+ llvm_report_error("Bad ${} expression in inline asm string: '"
+ + std::string(AsmStr) + "'");
+ }
+ ++LastEmitted; // Consume '}' character.
+ }
+
+ if ((unsigned)Val >= NumOperands-1) {
+ llvm_report_error("Invalid $ operand number in inline asm string: '"
+ + std::string(AsmStr) + "'");
+ }
+
+ // Okay, we finally have a value number. Ask the target to print this
+ // operand!
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant) {
+ unsigned OpNo = 1;
+
+ bool Error = false;
+
+ // Scan to find the machine operand number for the operand.
+ for (; Val; --Val) {
+ if (OpNo >= MI->getNumOperands()) break;
+ unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
+ }
+
+ if (OpNo >= MI->getNumOperands()) {
+ Error = true;
+ } else {
+ unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ ++OpNo; // Skip over the ID number.
+
+ if (Modifier[0] == 'l') // labels are target independent
+ O << *MI->getOperand(OpNo).getMBB()->getSymbol(OutContext);
+ else {
+ AsmPrinter *AP = const_cast<AsmPrinter*>(this);
+ if ((OpFlags & 7) == 4) {
+ Error = AP->PrintAsmMemoryOperand(MI, OpNo, AsmPrinterVariant,
+ Modifier[0] ? Modifier : 0);
+ } else {
+ Error = AP->PrintAsmOperand(MI, OpNo, AsmPrinterVariant,
+ Modifier[0] ? Modifier : 0);
+ }
+ }
+ }
+ if (Error) {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "Invalid operand found in inline asm: '" << AsmStr << "'\n";
+ MI->print(Msg);
+ llvm_report_error(Msg.str());
+ }
+ }
+ break;
+ }
+ }
+ }
+ O << "\n\t" << MAI->getCommentString() << MAI->getInlineAsmEnd();
+ OutStreamer.AddBlankLine();
+}
+
+/// printImplicitDef - This method prints the specified machine instruction
+/// that is an implicit def.
+void AsmPrinter::printImplicitDef(const MachineInstr *MI) const {
+ if (!VerboseAsm) return;
+ O.PadToColumn(MAI->getCommentColumn());
+ O << MAI->getCommentString() << " implicit-def: "
+ << TRI->getName(MI->getOperand(0).getReg());
+ OutStreamer.AddBlankLine();
+}
+
+void AsmPrinter::printKill(const MachineInstr *MI) const {
+ if (!VerboseAsm) return;
+ O.PadToColumn(MAI->getCommentColumn());
+ O << MAI->getCommentString() << " kill:";
+ for (unsigned n = 0, e = MI->getNumOperands(); n != e; ++n) {
+ const MachineOperand &op = MI->getOperand(n);
+ assert(op.isReg() && "KILL instruction must have only register operands");
+ O << ' ' << TRI->getName(op.getReg()) << (op.isDef() ? "<def>" : "<kill>");
+ }
+ OutStreamer.AddBlankLine();
+}
+
+/// printLabel - This method prints a local label used by debug and
+/// exception handling tables.
+void AsmPrinter::printLabelInst(const MachineInstr *MI) const {
+ printLabel(MI->getOperand(0).getImm());
+ OutStreamer.AddBlankLine();
+}
+
+void AsmPrinter::printLabel(unsigned Id) const {
+ O << MAI->getPrivateGlobalPrefix() << "label" << Id << ':';
+}
+
+/// PrintAsmOperand - Print the specified operand of MI, an INLINEASM
+/// instruction, using the specified assembler variant. Targets should
+/// override this to format as appropriate.
+bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode) {
+ // Target doesn't support this yet!
+ return true;
+}
+
+bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ // Target doesn't support this yet!
+ return true;
+}
+
+MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const {
+ return GetBlockAddressSymbol(BA->getFunction(), BA->getBasicBlock());
+}
+
+MCSymbol *AsmPrinter::GetBlockAddressSymbol(const Function *F,
+ const BasicBlock *BB) const {
+ assert(BB->hasName() &&
+ "Address of anonymous basic block not supported yet!");
+
+ // This code must use the function name itself, and not the function number,
+ // since it must be possible to generate the label name from within other
+ // functions.
+ SmallString<60> FnName;
+ Mang->getNameWithPrefix(FnName, F, false);
+
+ // FIXME: THIS IS BROKEN IF THE LLVM BASIC BLOCK DOESN'T HAVE A NAME!
+ SmallString<60> NameResult;
+ Mang->getNameWithPrefix(NameResult,
+ StringRef("BA") + Twine((unsigned)FnName.size()) +
+ "_" + FnName.str() + "_" + BB->getName(),
+ Mangler::Private);
+
+ return OutContext.GetOrCreateSymbol(NameResult.str());
+}
+
+/// GetCPISymbol - Return the symbol for the specified constant pool entry.
+MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const {
+ SmallString<60> Name;
+ raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "CPI"
+ << getFunctionNumber() << '_' << CPID;
+ return OutContext.GetOrCreateSymbol(Name.str());
+}
+
+/// GetJTISymbol - Return the symbol for the specified jump table entry.
+MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const {
+ return MF->getJTISymbol(JTID, OutContext, isLinkerPrivate);
+}
+
+/// GetJTSetSymbol - Return the symbol for the specified jump table .set
+/// FIXME: privatize to AsmPrinter.
+MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const {
+ SmallString<60> Name;
+ raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix()
+ << getFunctionNumber() << '_' << UID << "_set_" << MBBID;
+ return OutContext.GetOrCreateSymbol(Name.str());
+}
+
+/// GetGlobalValueSymbol - Return the MCSymbol for the specified global
+/// value.
+MCSymbol *AsmPrinter::GetGlobalValueSymbol(const GlobalValue *GV) const {
+ SmallString<60> NameStr;
+ Mang->getNameWithPrefix(NameStr, GV, false);
+ return OutContext.GetOrCreateSymbol(NameStr.str());
+}
+
+/// GetSymbolWithGlobalValueBase - Return the MCSymbol for a symbol with
+/// global value name as its base, with the specified suffix, and where the
+/// symbol is forced to have private linkage if ForcePrivate is true.
+MCSymbol *AsmPrinter::GetSymbolWithGlobalValueBase(const GlobalValue *GV,
+ StringRef Suffix,
+ bool ForcePrivate) const {
+ SmallString<60> NameStr;
+ Mang->getNameWithPrefix(NameStr, GV, ForcePrivate);
+ NameStr.append(Suffix.begin(), Suffix.end());
+ return OutContext.GetOrCreateSymbol(NameStr.str());
+}
+
+/// GetExternalSymbolSymbol - Return the MCSymbol for the specified
+/// ExternalSymbol.
+MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const {
+ SmallString<60> NameStr;
+ Mang->getNameWithPrefix(NameStr, Sym);
+ return OutContext.GetOrCreateSymbol(NameStr.str());
+}
+
+
+
+/// PrintParentLoopComment - Print comments about parent loops of this one.
+static void PrintParentLoopComment(raw_ostream &OS, const MachineLoop *Loop,
+ unsigned FunctionNumber) {
+ if (Loop == 0) return;
+ PrintParentLoopComment(OS, Loop->getParentLoop(), FunctionNumber);
+ OS.indent(Loop->getLoopDepth()*2)
+ << "Parent Loop BB" << FunctionNumber << "_"
+ << Loop->getHeader()->getNumber()
+ << " Depth=" << Loop->getLoopDepth() << '\n';
+}
+
+
+/// PrintChildLoopComment - Print comments about child loops within
+/// the loop for this basic block, with nesting.
+static void PrintChildLoopComment(raw_ostream &OS, const MachineLoop *Loop,
+ unsigned FunctionNumber) {
+ // Add child loop information
+ for (MachineLoop::iterator CL = Loop->begin(), E = Loop->end();CL != E; ++CL){
+ OS.indent((*CL)->getLoopDepth()*2)
+ << "Child Loop BB" << FunctionNumber << "_"
+ << (*CL)->getHeader()->getNumber() << " Depth " << (*CL)->getLoopDepth()
+ << '\n';
+ PrintChildLoopComment(OS, *CL, FunctionNumber);
+ }
+}
+
+/// PrintBasicBlockLoopComments - Pretty-print comments for basic blocks.
+static void PrintBasicBlockLoopComments(const MachineBasicBlock &MBB,
+ const MachineLoopInfo *LI,
+ const AsmPrinter &AP) {
+ // Add loop depth information
+ const MachineLoop *Loop = LI->getLoopFor(&MBB);
+ if (Loop == 0) return;
+
+ MachineBasicBlock *Header = Loop->getHeader();
+ assert(Header && "No header for loop");
+
+ // If this block is not a loop header, just print out what is the loop header
+ // and return.
+ if (Header != &MBB) {
+ AP.OutStreamer.AddComment(" in Loop: Header=BB" +
+ Twine(AP.getFunctionNumber())+"_" +
+ Twine(Loop->getHeader()->getNumber())+
+ " Depth="+Twine(Loop->getLoopDepth()));
+ return;
+ }
+
+ // Otherwise, it is a loop header. Print out information about child and
+ // parent loops.
+ raw_ostream &OS = AP.OutStreamer.GetCommentOS();
+
+ PrintParentLoopComment(OS, Loop->getParentLoop(), AP.getFunctionNumber());
+
+ OS << "=>";
+ OS.indent(Loop->getLoopDepth()*2-2);
+
+ OS << "This ";
+ if (Loop->empty())
+ OS << "Inner ";
+ OS << "Loop Header: Depth=" + Twine(Loop->getLoopDepth()) << '\n';
+
+ PrintChildLoopComment(OS, Loop, AP.getFunctionNumber());
+}
+
+
+/// EmitBasicBlockStart - This method prints the label for the specified
+/// MachineBasicBlock, an alignment (if present) and a comment describing
+/// it if appropriate.
+void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
+ // Emit an alignment directive for this block, if needed.
+ if (unsigned Align = MBB->getAlignment())
+ EmitAlignment(Log2_32(Align));
+
+ // If the block has its address taken, emit a special label to satisfy
+ // references to the block. This is done so that we don't need to
+ // remember the number of this label, and so that we can make
+ // forward references to labels without knowing what their numbers
+ // will be.
+ if (MBB->hasAddressTaken()) {
+ const BasicBlock *BB = MBB->getBasicBlock();
+ if (VerboseAsm)
+ OutStreamer.AddComment("Address Taken");
+ OutStreamer.EmitLabel(GetBlockAddressSymbol(BB->getParent(), BB));
+ }
+
+ // Print the main label for the block.
+ if (MBB->pred_empty() || MBB->isOnlyReachableByFallthrough()) {
+ if (VerboseAsm) {
+ // NOTE: Want this comment at start of line.
+ O << MAI->getCommentString() << " BB#" << MBB->getNumber() << ':';
+ if (const BasicBlock *BB = MBB->getBasicBlock())
+ if (BB->hasName())
+ OutStreamer.AddComment("%" + BB->getName());
+
+ PrintBasicBlockLoopComments(*MBB, LI, *this);
+ OutStreamer.AddBlankLine();
+ }
+ } else {
+ if (VerboseAsm) {
+ if (const BasicBlock *BB = MBB->getBasicBlock())
+ if (BB->hasName())
+ OutStreamer.AddComment("%" + BB->getName());
+ PrintBasicBlockLoopComments(*MBB, LI, *this);
+ }
+
+ OutStreamer.EmitLabel(MBB->getSymbol(OutContext));
+ }
+}
+
+void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility) const {
+ MCSymbolAttr Attr = MCSA_Invalid;
+
+ switch (Visibility) {
+ default: break;
+ case GlobalValue::HiddenVisibility:
+ Attr = MAI->getHiddenVisibilityAttr();
+ break;
+ case GlobalValue::ProtectedVisibility:
+ Attr = MAI->getProtectedVisibilityAttr();
+ break;
+ }
+
+ if (Attr != MCSA_Invalid)
+ OutStreamer.EmitSymbolAttribute(Sym, Attr);
+}
+
+void AsmPrinter::printOffset(int64_t Offset) const {
+ if (Offset > 0)
+ O << '+' << Offset;
+ else if (Offset < 0)
+ O << Offset;
+}
+
+GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) {
+ if (!S->usesMetadata())
+ return 0;
+
+ gcp_iterator GCPI = GCMetadataPrinters.find(S);
+ if (GCPI != GCMetadataPrinters.end())
+ return GCPI->second;
+
+ const char *Name = S->getName().c_str();
+
+ for (GCMetadataPrinterRegistry::iterator
+ I = GCMetadataPrinterRegistry::begin(),
+ E = GCMetadataPrinterRegistry::end(); I != E; ++I)
+ if (strcmp(Name, I->getName()) == 0) {
+ GCMetadataPrinter *GMP = I->instantiate();
+ GMP->S = S;
+ GCMetadataPrinters.insert(std::make_pair(S, GMP));
+ return GMP;
+ }
+
+ llvm_report_error("no GCMetadataPrinter registered for GC: " + Twine(Name));
+ return 0;
+}
+
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
new file mode 100644
index 0000000..066aaab
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -0,0 +1,10 @@
+add_llvm_library(LLVMAsmPrinter
+ AsmPrinter.cpp
+ DIE.cpp
+ DwarfDebug.cpp
+ DwarfException.cpp
+ DwarfLabel.cpp
+ DwarfPrinter.cpp
+ DwarfWriter.cpp
+ OcamlGCPrinter.cpp
+ )
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
new file mode 100644
index 0000000..349e0ac
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -0,0 +1,431 @@
+//===--- lib/CodeGen/DIE.cpp - DWARF Info Entries -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Data structures for DWARF info entries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DIE.h"
+#include "DwarfPrinter.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// DIEAbbrevData Implementation
+//===----------------------------------------------------------------------===//
+
+/// Profile - Used to gather unique data for the abbreviation folding set.
+///
+void DIEAbbrevData::Profile(FoldingSetNodeID &ID) const {
+ ID.AddInteger(Attribute);
+ ID.AddInteger(Form);
+}
+
+//===----------------------------------------------------------------------===//
+// DIEAbbrev Implementation
+//===----------------------------------------------------------------------===//
+
+/// Profile - Used to gather unique data for the abbreviation folding set.
+///
+void DIEAbbrev::Profile(FoldingSetNodeID &ID) const {
+ ID.AddInteger(Tag);
+ ID.AddInteger(ChildrenFlag);
+
+ // For each attribute description.
+ for (unsigned i = 0, N = Data.size(); i < N; ++i)
+ Data[i].Profile(ID);
+}
+
+/// Emit - Print the abbreviation using the specified asm printer.
+///
+void DIEAbbrev::Emit(const DwarfPrinter *DP) const {
+ // Emit its Dwarf tag type.
+ // FIXME: Doing work even in non-asm-verbose runs.
+ DP->EmitULEB128(Tag, dwarf::TagString(Tag));
+
+ // Emit whether it has children DIEs.
+ // FIXME: Doing work even in non-asm-verbose runs.
+ DP->EmitULEB128(ChildrenFlag, dwarf::ChildrenString(ChildrenFlag));
+
+ // For each attribute description.
+ for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+ const DIEAbbrevData &AttrData = Data[i];
+
+ // Emit attribute type.
+ // FIXME: Doing work even in non-asm-verbose runs.
+ DP->EmitULEB128(AttrData.getAttribute(),
+ dwarf::AttributeString(AttrData.getAttribute()));
+
+ // Emit form type.
+ // FIXME: Doing work even in non-asm-verbose runs.
+ DP->EmitULEB128(AttrData.getForm(),
+ dwarf::FormEncodingString(AttrData.getForm()));
+ }
+
+ // Mark end of abbreviation.
+ DP->EmitULEB128(0, "EOM(1)");
+ DP->EmitULEB128(0, "EOM(2)");
+}
+
+#ifndef NDEBUG
+void DIEAbbrev::print(raw_ostream &O) {
+ O << "Abbreviation @"
+ << format("0x%lx", (long)(intptr_t)this)
+ << " "
+ << dwarf::TagString(Tag)
+ << " "
+ << dwarf::ChildrenString(ChildrenFlag)
+ << '\n';
+
+ for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+ O << " "
+ << dwarf::AttributeString(Data[i].getAttribute())
+ << " "
+ << dwarf::FormEncodingString(Data[i].getForm())
+ << '\n';
+ }
+}
+void DIEAbbrev::dump() { print(dbgs()); }
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIE Implementation
+//===----------------------------------------------------------------------===//
+
+DIE::~DIE() {
+ for (unsigned i = 0, N = Children.size(); i < N; ++i)
+ delete Children[i];
+}
+
+/// addSiblingOffset - Add a sibling offset field to the front of the DIE.
+///
+void DIE::addSiblingOffset() {
+ DIEInteger *DI = new DIEInteger(0);
+ Values.insert(Values.begin(), DI);
+ Abbrev.AddFirstAttribute(dwarf::DW_AT_sibling, dwarf::DW_FORM_ref4);
+}
+
+#ifndef NDEBUG
+void DIE::print(raw_ostream &O, unsigned IncIndent) {
+ IndentCount += IncIndent;
+ const std::string Indent(IndentCount, ' ');
+ bool isBlock = Abbrev.getTag() == 0;
+
+ if (!isBlock) {
+ O << Indent
+ << "Die: "
+ << format("0x%lx", (long)(intptr_t)this)
+ << ", Offset: " << Offset
+ << ", Size: " << Size
+ << "\n";
+
+ O << Indent
+ << dwarf::TagString(Abbrev.getTag())
+ << " "
+ << dwarf::ChildrenString(Abbrev.getChildrenFlag());
+ } else {
+ O << "Size: " << Size;
+ }
+ O << "\n";
+
+ const SmallVector<DIEAbbrevData, 8> &Data = Abbrev.getData();
+
+ IndentCount += 2;
+ for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+ O << Indent;
+
+ if (!isBlock)
+ O << dwarf::AttributeString(Data[i].getAttribute());
+ else
+ O << "Blk[" << i << "]";
+
+ O << " "
+ << dwarf::FormEncodingString(Data[i].getForm())
+ << " ";
+ Values[i]->print(O);
+ O << "\n";
+ }
+ IndentCount -= 2;
+
+ for (unsigned j = 0, M = Children.size(); j < M; ++j) {
+ Children[j]->print(O, 4);
+ }
+
+ if (!isBlock) O << "\n";
+ IndentCount -= IncIndent;
+}
+
+void DIE::dump() {
+ print(dbgs());
+}
+#endif
+
+
+#ifndef NDEBUG
+void DIEValue::dump() {
+ print(dbgs());
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEInteger Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit integer of appropriate size.
+///
+void DIEInteger::EmitValue(DwarfPrinter *D, unsigned Form) const {
+ const AsmPrinter *Asm = D->getAsm();
+ unsigned Size = ~0U;
+ switch (Form) {
+ case dwarf::DW_FORM_flag: // Fall thru
+ case dwarf::DW_FORM_ref1: // Fall thru
+ case dwarf::DW_FORM_data1: Size = 1; break;
+ case dwarf::DW_FORM_ref2: // Fall thru
+ case dwarf::DW_FORM_data2: Size = 2; break;
+ case dwarf::DW_FORM_ref4: // Fall thru
+ case dwarf::DW_FORM_data4: Size = 4; break;
+ case dwarf::DW_FORM_ref8: // Fall thru
+ case dwarf::DW_FORM_data8: Size = 8; break;
+ case dwarf::DW_FORM_udata: D->EmitULEB128(Integer); return;
+ case dwarf::DW_FORM_sdata: D->EmitSLEB128(Integer, ""); return;
+ default: llvm_unreachable("DIE Value form not supported yet");
+ }
+ Asm->OutStreamer.EmitIntValue(Integer, Size, 0/*addrspace*/);
+}
+
+/// SizeOf - Determine size of integer value in bytes.
+///
+unsigned DIEInteger::SizeOf(const TargetData *TD, unsigned Form) const {
+ switch (Form) {
+ case dwarf::DW_FORM_flag: // Fall thru
+ case dwarf::DW_FORM_ref1: // Fall thru
+ case dwarf::DW_FORM_data1: return sizeof(int8_t);
+ case dwarf::DW_FORM_ref2: // Fall thru
+ case dwarf::DW_FORM_data2: return sizeof(int16_t);
+ case dwarf::DW_FORM_ref4: // Fall thru
+ case dwarf::DW_FORM_data4: return sizeof(int32_t);
+ case dwarf::DW_FORM_ref8: // Fall thru
+ case dwarf::DW_FORM_data8: return sizeof(int64_t);
+ case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer);
+ case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer);
+ default: llvm_unreachable("DIE Value form not supported yet"); break;
+ }
+ return 0;
+}
+
+#ifndef NDEBUG
+void DIEInteger::print(raw_ostream &O) {
+ O << "Int: " << (int64_t)Integer
+ << format(" 0x%llx", (unsigned long long)Integer);
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEString Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit string value.
+///
+void DIEString::EmitValue(DwarfPrinter *D, unsigned Form) const {
+ D->getAsm()->OutStreamer.EmitBytes(Str, /*addrspace*/0);
+ // Emit nul terminator.
+ D->getAsm()->OutStreamer.EmitIntValue(0, 1, /*addrspace*/0);
+}
+
+#ifndef NDEBUG
+void DIEString::print(raw_ostream &O) {
+ O << "Str: \"" << Str << "\"";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEDwarfLabel Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit label value.
+///
+void DIEDwarfLabel::EmitValue(DwarfPrinter *D, unsigned Form) const {
+ bool IsSmall = Form == dwarf::DW_FORM_data4;
+ D->EmitReference(Label, false, IsSmall);
+}
+
+/// SizeOf - Determine size of label value in bytes.
+///
+unsigned DIEDwarfLabel::SizeOf(const TargetData *TD, unsigned Form) const {
+ if (Form == dwarf::DW_FORM_data4) return 4;
+ return TD->getPointerSize();
+}
+
+#ifndef NDEBUG
+void DIEDwarfLabel::print(raw_ostream &O) {
+ O << "Lbl: ";
+ Label.print(O);
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEObjectLabel Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit label value.
+///
+void DIEObjectLabel::EmitValue(DwarfPrinter *D, unsigned Form) const {
+ bool IsSmall = Form == dwarf::DW_FORM_data4;
+ D->EmitReference(Sym, false, IsSmall);
+}
+
+/// SizeOf - Determine size of label value in bytes.
+///
+unsigned DIEObjectLabel::SizeOf(const TargetData *TD, unsigned Form) const {
+ if (Form == dwarf::DW_FORM_data4) return 4;
+ return TD->getPointerSize();
+}
+
+#ifndef NDEBUG
+void DIEObjectLabel::print(raw_ostream &O) {
+ O << "Obj: " << Sym->getName();
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIESectionOffset Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit delta value.
+///
+void DIESectionOffset::EmitValue(DwarfPrinter *D, unsigned Form) const {
+ bool IsSmall = Form == dwarf::DW_FORM_data4;
+ D->EmitSectionOffset(Label.getTag(), Section.getTag(),
+ Label.getNumber(), Section.getNumber(),
+ IsSmall, IsEH, UseSet);
+}
+
+/// SizeOf - Determine size of delta value in bytes.
+///
+unsigned DIESectionOffset::SizeOf(const TargetData *TD, unsigned Form) const {
+ if (Form == dwarf::DW_FORM_data4) return 4;
+ return TD->getPointerSize();
+}
+
+#ifndef NDEBUG
+void DIESectionOffset::print(raw_ostream &O) {
+ O << "Off: ";
+ Label.print(O);
+ O << "-";
+ Section.print(O);
+ O << "-" << IsEH << "-" << UseSet;
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEDelta Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit delta value.
+///
+void DIEDelta::EmitValue(DwarfPrinter *D, unsigned Form) const {
+ bool IsSmall = Form == dwarf::DW_FORM_data4;
+ D->EmitDifference(LabelHi, LabelLo, IsSmall);
+}
+
+/// SizeOf - Determine size of delta value in bytes.
+///
+unsigned DIEDelta::SizeOf(const TargetData *TD, unsigned Form) const {
+ if (Form == dwarf::DW_FORM_data4) return 4;
+ return TD->getPointerSize();
+}
+
+#ifndef NDEBUG
+void DIEDelta::print(raw_ostream &O) {
+ O << "Del: ";
+ LabelHi.print(O);
+ O << "-";
+ LabelLo.print(O);
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEEntry Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit debug information entry offset.
+///
+void DIEEntry::EmitValue(DwarfPrinter *D, unsigned Form) const {
+ D->getAsm()->EmitInt32(Entry->getOffset());
+}
+
+#ifndef NDEBUG
+void DIEEntry::print(raw_ostream &O) {
+ O << format("Die: 0x%lx", (long)(intptr_t)Entry);
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEBlock Implementation
+//===----------------------------------------------------------------------===//
+
+/// ComputeSize - calculate the size of the block.
+///
+unsigned DIEBlock::ComputeSize(const TargetData *TD) {
+ if (!Size) {
+ const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData();
+ for (unsigned i = 0, N = Values.size(); i < N; ++i)
+ Size += Values[i]->SizeOf(TD, AbbrevData[i].getForm());
+ }
+
+ return Size;
+}
+
+/// EmitValue - Emit block data.
+///
+void DIEBlock::EmitValue(DwarfPrinter *D, unsigned Form) const {
+ const AsmPrinter *Asm = D->getAsm();
+ switch (Form) {
+ case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break;
+ case dwarf::DW_FORM_block2: Asm->EmitInt16(Size); break;
+ case dwarf::DW_FORM_block4: Asm->EmitInt32(Size); break;
+ case dwarf::DW_FORM_block: D->EmitULEB128(Size); break;
+ default: llvm_unreachable("Improper form for block"); break;
+ }
+
+ const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData();
+ for (unsigned i = 0, N = Values.size(); i < N; ++i) {
+ Asm->O << '\n';
+ Values[i]->EmitValue(D, AbbrevData[i].getForm());
+ }
+}
+
+/// SizeOf - Determine size of block data in bytes.
+///
+unsigned DIEBlock::SizeOf(const TargetData *TD, unsigned Form) const {
+ switch (Form) {
+ case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
+ case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
+ case dwarf::DW_FORM_block4: return Size + sizeof(int32_t);
+ case dwarf::DW_FORM_block: return Size + MCAsmInfo::getULEB128Size(Size);
+ default: llvm_unreachable("Improper form for block"); break;
+ }
+ return 0;
+}
+
+#ifndef NDEBUG
+void DIEBlock::print(raw_ostream &O) {
+ O << "Blk: ";
+ DIE::print(O, 5);
+}
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
new file mode 100644
index 0000000..af90289
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -0,0 +1,494 @@
+//===--- lib/CodeGen/DIE.h - DWARF Info Entries -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Data structures for DWARF info entries.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DIE_H__
+#define CODEGEN_ASMPRINTER_DIE_H__
+
+#include "DwarfLabel.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Dwarf.h"
+#include <vector>
+
+namespace llvm {
+ class AsmPrinter;
+ class DwarfPrinter;
+ class TargetData;
+ class MCSymbol;
+
+ //===--------------------------------------------------------------------===//
+ /// DIEAbbrevData - Dwarf abbreviation data, describes the one attribute of a
+ /// Dwarf abbreviation.
+ class DIEAbbrevData {
+ /// Attribute - Dwarf attribute code.
+ ///
+ unsigned Attribute;
+
+ /// Form - Dwarf form code.
+ ///
+ unsigned Form;
+ public:
+ DIEAbbrevData(unsigned A, unsigned F) : Attribute(A), Form(F) {}
+
+ // Accessors.
+ unsigned getAttribute() const { return Attribute; }
+ unsigned getForm() const { return Form; }
+
+ /// Profile - Used to gather unique data for the abbreviation folding set.
+ ///
+ void Profile(FoldingSetNodeID &ID) const;
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEAbbrev - Dwarf abbreviation, describes the organization of a debug
+ /// information object.
+ class DIEAbbrev : public FoldingSetNode {
+ /// Tag - Dwarf tag code.
+ ///
+ unsigned Tag;
+
+ /// Unique number for node.
+ ///
+ unsigned Number;
+
+ /// ChildrenFlag - Dwarf children flag.
+ ///
+ unsigned ChildrenFlag;
+
+ /// Data - Raw data bytes for abbreviation.
+ ///
+ SmallVector<DIEAbbrevData, 8> Data;
+
+ public:
+ DIEAbbrev(unsigned T, unsigned C) : Tag(T), ChildrenFlag(C), Data() {}
+ virtual ~DIEAbbrev() {}
+
+ // Accessors.
+ unsigned getTag() const { return Tag; }
+ unsigned getNumber() const { return Number; }
+ unsigned getChildrenFlag() const { return ChildrenFlag; }
+ const SmallVector<DIEAbbrevData, 8> &getData() const { return Data; }
+ void setTag(unsigned T) { Tag = T; }
+ void setChildrenFlag(unsigned CF) { ChildrenFlag = CF; }
+ void setNumber(unsigned N) { Number = N; }
+
+ /// AddAttribute - Adds another set of attribute information to the
+ /// abbreviation.
+ void AddAttribute(unsigned Attribute, unsigned Form) {
+ Data.push_back(DIEAbbrevData(Attribute, Form));
+ }
+
+ /// AddFirstAttribute - Adds a set of attribute information to the front
+ /// of the abbreviation.
+ void AddFirstAttribute(unsigned Attribute, unsigned Form) {
+ Data.insert(Data.begin(), DIEAbbrevData(Attribute, Form));
+ }
+
+ /// Profile - Used to gather unique data for the abbreviation folding set.
+ ///
+ void Profile(FoldingSetNodeID &ID) const;
+
+ /// Emit - Print the abbreviation using the specified asm printer.
+ ///
+ void Emit(const DwarfPrinter *DP) const;
+
+#ifndef NDEBUG
+ void print(raw_ostream &O);
+ void dump();
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIE - A structured debug information entry. Has an abbreviation which
+ /// describes it's organization.
+ class CompileUnit;
+ class DIEValue;
+
+ class DIE {
+ protected:
+ /// Abbrev - Buffer for constructing abbreviation.
+ ///
+ DIEAbbrev Abbrev;
+
+ /// Offset - Offset in debug info section.
+ ///
+ unsigned Offset;
+
+ /// Size - Size of instance + children.
+ ///
+ unsigned Size;
+
+ /// Children DIEs.
+ ///
+ std::vector<DIE *> Children;
+
+ DIE *Parent;
+
+ /// Attributes values.
+ ///
+ SmallVector<DIEValue*, 32> Values;
+
+ // Private data for print()
+ mutable unsigned IndentCount;
+ public:
+ explicit DIE(unsigned Tag)
+ : Abbrev(Tag, dwarf::DW_CHILDREN_no), Offset(0),
+ Size(0), Parent (0), IndentCount(0) {}
+ virtual ~DIE();
+
+ // Accessors.
+ DIEAbbrev &getAbbrev() { return Abbrev; }
+ unsigned getAbbrevNumber() const { return Abbrev.getNumber(); }
+ unsigned getTag() const { return Abbrev.getTag(); }
+ unsigned getOffset() const { return Offset; }
+ unsigned getSize() const { return Size; }
+ const std::vector<DIE *> &getChildren() const { return Children; }
+ SmallVector<DIEValue*, 32> &getValues() { return Values; }
+ DIE *getParent() const { return Parent; }
+ void setTag(unsigned Tag) { Abbrev.setTag(Tag); }
+ void setOffset(unsigned O) { Offset = O; }
+ void setSize(unsigned S) { Size = S; }
+ void setParent(DIE *P) { Parent = P; }
+
+ /// addValue - Add a value and attributes to a DIE.
+ ///
+ void addValue(unsigned Attribute, unsigned Form, DIEValue *Value) {
+ Abbrev.AddAttribute(Attribute, Form);
+ Values.push_back(Value);
+ }
+
+ /// SiblingOffset - Return the offset of the debug information entry's
+ /// sibling.
+ unsigned getSiblingOffset() const { return Offset + Size; }
+
+ /// addSiblingOffset - Add a sibling offset field to the front of the DIE.
+ ///
+ void addSiblingOffset();
+
+ /// addChild - Add a child to the DIE.
+ ///
+ void addChild(DIE *Child) {
+ if (Child->getParent()) {
+ assert (Child->getParent() == this && "Unexpected DIE Parent!");
+ return;
+ }
+ Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes);
+ Children.push_back(Child);
+ Child->setParent(this);
+ }
+
+#ifndef NDEBUG
+ void print(raw_ostream &O, unsigned IncIndent = 0);
+ void dump();
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEValue - A debug information entry value.
+ ///
+ class DIEValue {
+ public:
+ enum {
+ isInteger,
+ isString,
+ isLabel,
+ isAsIsLabel,
+ isSectionOffset,
+ isDelta,
+ isEntry,
+ isBlock
+ };
+ protected:
+ /// Type - Type of data stored in the value.
+ ///
+ unsigned Type;
+ public:
+ explicit DIEValue(unsigned T) : Type(T) {}
+ virtual ~DIEValue() {}
+
+ // Accessors
+ unsigned getType() const { return Type; }
+
+ /// EmitValue - Emit value via the Dwarf writer.
+ ///
+ virtual void EmitValue(DwarfPrinter *D, unsigned Form) const = 0;
+
+ /// SizeOf - Return the size of a value in bytes.
+ ///
+ virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const = 0;
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEValue *) { return true; }
+
+#ifndef NDEBUG
+ virtual void print(raw_ostream &O) = 0;
+ void dump();
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEInteger - An integer value DIE.
+ ///
+ class DIEInteger : public DIEValue {
+ uint64_t Integer;
+ public:
+ explicit DIEInteger(uint64_t I) : DIEValue(isInteger), Integer(I) {}
+
+ /// BestForm - Choose the best form for integer.
+ ///
+ static unsigned BestForm(bool IsSigned, uint64_t Int) {
+ if (IsSigned) {
+ if ((char)Int == (signed)Int) return dwarf::DW_FORM_data1;
+ if ((short)Int == (signed)Int) return dwarf::DW_FORM_data2;
+ if ((int)Int == (signed)Int) return dwarf::DW_FORM_data4;
+ } else {
+ if ((unsigned char)Int == Int) return dwarf::DW_FORM_data1;
+ if ((unsigned short)Int == Int) return dwarf::DW_FORM_data2;
+ if ((unsigned int)Int == Int) return dwarf::DW_FORM_data4;
+ }
+ return dwarf::DW_FORM_data8;
+ }
+
+ /// EmitValue - Emit integer of appropriate size.
+ ///
+ virtual void EmitValue(DwarfPrinter *D, unsigned Form) const;
+
+ /// SizeOf - Determine size of integer value in bytes.
+ ///
+ virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
+
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEInteger *) { return true; }
+ static bool classof(const DIEValue *I) { return I->getType() == isInteger; }
+
+#ifndef NDEBUG
+ virtual void print(raw_ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEString - A string value DIE. This DIE keeps string reference only.
+ ///
+ class DIEString : public DIEValue {
+ const StringRef Str;
+ public:
+ explicit DIEString(const StringRef S) : DIEValue(isString), Str(S) {}
+
+ /// EmitValue - Emit string value.
+ ///
+ virtual void EmitValue(DwarfPrinter *D, unsigned Form) const;
+
+ /// SizeOf - Determine size of string value in bytes.
+ ///
+ virtual unsigned SizeOf(const TargetData *, unsigned /*Form*/) const {
+ return Str.size() + sizeof(char); // sizeof('\0');
+ }
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEString *) { return true; }
+ static bool classof(const DIEValue *S) { return S->getType() == isString; }
+
+#ifndef NDEBUG
+ virtual void print(raw_ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEDwarfLabel - A Dwarf internal label expression DIE.
+ //
+ class DIEDwarfLabel : public DIEValue {
+ const DWLabel Label;
+ public:
+ explicit DIEDwarfLabel(const DWLabel &L) : DIEValue(isLabel), Label(L) {}
+
+ /// EmitValue - Emit label value.
+ ///
+ virtual void EmitValue(DwarfPrinter *D, unsigned Form) const;
+
+ /// SizeOf - Determine size of label value in bytes.
+ ///
+ virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEDwarfLabel *) { return true; }
+ static bool classof(const DIEValue *L) { return L->getType() == isLabel; }
+
+#ifndef NDEBUG
+ virtual void print(raw_ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEObjectLabel - A label to an object in code or data.
+ //
+ class DIEObjectLabel : public DIEValue {
+ const MCSymbol *Sym;
+ public:
+ explicit DIEObjectLabel(const MCSymbol *S)
+ : DIEValue(isAsIsLabel), Sym(S) {}
+
+ /// EmitValue - Emit label value.
+ ///
+ virtual void EmitValue(DwarfPrinter *D, unsigned Form) const;
+
+ /// SizeOf - Determine size of label value in bytes.
+ ///
+ virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEObjectLabel *) { return true; }
+ static bool classof(const DIEValue *L) {
+ return L->getType() == isAsIsLabel;
+ }
+
+#ifndef NDEBUG
+ virtual void print(raw_ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIESectionOffset - A section offset DIE.
+ ///
+ class DIESectionOffset : public DIEValue {
+ const DWLabel Label;
+ const DWLabel Section;
+ bool IsEH : 1;
+ bool UseSet : 1;
+ public:
+ DIESectionOffset(const DWLabel &Lab, const DWLabel &Sec,
+ bool isEH = false, bool useSet = true)
+ : DIEValue(isSectionOffset), Label(Lab), Section(Sec),
+ IsEH(isEH), UseSet(useSet) {}
+
+ /// EmitValue - Emit section offset.
+ ///
+ virtual void EmitValue(DwarfPrinter *D, unsigned Form) const;
+
+ /// SizeOf - Determine size of section offset value in bytes.
+ ///
+ virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIESectionOffset *) { return true; }
+ static bool classof(const DIEValue *D) {
+ return D->getType() == isSectionOffset;
+ }
+
+#ifndef NDEBUG
+ virtual void print(raw_ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEDelta - A simple label difference DIE.
+ ///
+ class DIEDelta : public DIEValue {
+ const DWLabel LabelHi;
+ const DWLabel LabelLo;
+ public:
+ DIEDelta(const DWLabel &Hi, const DWLabel &Lo)
+ : DIEValue(isDelta), LabelHi(Hi), LabelLo(Lo) {}
+
+ /// EmitValue - Emit delta value.
+ ///
+ virtual void EmitValue(DwarfPrinter *D, unsigned Form) const;
+
+ /// SizeOf - Determine size of delta value in bytes.
+ ///
+ virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEDelta *) { return true; }
+ static bool classof(const DIEValue *D) { return D->getType() == isDelta; }
+
+#ifndef NDEBUG
+ virtual void print(raw_ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEntry - A pointer to another debug information entry. An instance of
+ /// this class can also be used as a proxy for a debug information entry not
+ /// yet defined (ie. types.)
+ class DIEEntry : public DIEValue {
+ DIE *Entry;
+ public:
+ explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) {}
+
+ DIE *getEntry() const { return Entry; }
+ void setEntry(DIE *E) { Entry = E; }
+
+ /// EmitValue - Emit debug information entry offset.
+ ///
+ virtual void EmitValue(DwarfPrinter *D, unsigned Form) const;
+
+ /// SizeOf - Determine size of debug information entry in bytes.
+ ///
+ virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const {
+ return sizeof(int32_t);
+ }
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEEntry *) { return true; }
+ static bool classof(const DIEValue *E) { return E->getType() == isEntry; }
+
+#ifndef NDEBUG
+ virtual void print(raw_ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEBlock - A block of values. Primarily used for location expressions.
+ //
+ class DIEBlock : public DIEValue, public DIE {
+ unsigned Size; // Size in bytes excluding size header.
+ public:
+ DIEBlock()
+ : DIEValue(isBlock), DIE(0), Size(0) {}
+ virtual ~DIEBlock() {}
+
+ /// ComputeSize - calculate the size of the block.
+ ///
+ unsigned ComputeSize(const TargetData *TD);
+
+ /// BestForm - Choose the best form for data.
+ ///
+ unsigned BestForm() const {
+ if ((unsigned char)Size == Size) return dwarf::DW_FORM_block1;
+ if ((unsigned short)Size == Size) return dwarf::DW_FORM_block2;
+ if ((unsigned int)Size == Size) return dwarf::DW_FORM_block4;
+ return dwarf::DW_FORM_block;
+ }
+
+ /// EmitValue - Emit block data.
+ ///
+ virtual void EmitValue(DwarfPrinter *D, unsigned Form) const;
+
+ /// SizeOf - Determine size of block data in bytes.
+ ///
+ virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEBlock *) { return true; }
+ static bool classof(const DIEValue *E) { return E->getType() == isBlock; }
+
+#ifndef NDEBUG
+ virtual void print(raw_ostream &O);
+#endif
+ };
+
+} // end llvm namespace
+
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
new file mode 100644
index 0000000..5093dd9
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -0,0 +1,3011 @@
+//===-- llvm/CodeGen/DwarfDebug.cpp - Dwarf Debug Framework ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf debug info into asm files.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "dwarfdebug"
+#include "DwarfDebug.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/System/Path.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+
+/// Configuration values for initial hash set sizes (log2).
+///
+static const unsigned InitAbbreviationsSetSize = 9; // log2(512)
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+/// CompileUnit - This dwarf writer support class manages information associate
+/// with a source file.
+class CompileUnit {
+ /// ID - File identifier for source.
+ ///
+ unsigned ID;
+
+ /// Die - Compile unit debug information entry.
+ ///
+ DIE *CUDie;
+
+ /// IndexTyDie - An anonymous type for index type.
+ DIE *IndexTyDie;
+
+ /// GVToDieMap - Tracks the mapping of unit level debug informaton
+ /// variables to debug information entries.
+ /// FIXME : Rename GVToDieMap -> NodeToDieMap
+ DenseMap<MDNode *, DIE *> GVToDieMap;
+
+ /// GVToDIEEntryMap - Tracks the mapping of unit level debug informaton
+ /// descriptors to debug information entries using a DIEEntry proxy.
+ /// FIXME : Rename
+ DenseMap<MDNode *, DIEEntry *> GVToDIEEntryMap;
+
+ /// Globals - A map of globally visible named entities for this unit.
+ ///
+ StringMap<DIE*> Globals;
+
+ /// GlobalTypes - A map of globally visible types for this unit.
+ ///
+ StringMap<DIE*> GlobalTypes;
+
+public:
+ CompileUnit(unsigned I, DIE *D)
+ : ID(I), CUDie(D), IndexTyDie(0) {}
+ ~CompileUnit() { delete CUDie; delete IndexTyDie; }
+
+ // Accessors.
+ unsigned getID() const { return ID; }
+ DIE* getCUDie() const { return CUDie; }
+ const StringMap<DIE*> &getGlobals() const { return Globals; }
+ const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; }
+
+ /// hasContent - Return true if this compile unit has something to write out.
+ ///
+ bool hasContent() const { return !CUDie->getChildren().empty(); }
+
+ /// addGlobal - Add a new global entity to the compile unit.
+ ///
+ void addGlobal(const std::string &Name, DIE *Die) { Globals[Name] = Die; }
+
+ /// addGlobalType - Add a new global type to the compile unit.
+ ///
+ void addGlobalType(const std::string &Name, DIE *Die) {
+ GlobalTypes[Name] = Die;
+ }
+
+ /// getDIE - Returns the debug information entry map slot for the
+ /// specified debug variable.
+ DIE *getDIE(MDNode *N) { return GVToDieMap.lookup(N); }
+
+ /// insertDIE - Insert DIE into the map.
+ void insertDIE(MDNode *N, DIE *D) {
+ GVToDieMap.insert(std::make_pair(N, D));
+ }
+
+ /// getDIEEntry - Returns the debug information entry for the speciefied
+ /// debug variable.
+ DIEEntry *getDIEEntry(MDNode *N) {
+ DenseMap<MDNode *, DIEEntry *>::iterator I = GVToDIEEntryMap.find(N);
+ if (I == GVToDIEEntryMap.end())
+ return NULL;
+ return I->second;
+ }
+
+ /// insertDIEEntry - Insert debug information entry into the map.
+ void insertDIEEntry(MDNode *N, DIEEntry *E) {
+ GVToDIEEntryMap.insert(std::make_pair(N, E));
+ }
+
+ /// addDie - Adds or interns the DIE to the compile unit.
+ ///
+ void addDie(DIE *Buffer) {
+ this->CUDie->addChild(Buffer);
+ }
+
+ // getIndexTyDie - Get an anonymous type for index type.
+ DIE *getIndexTyDie() {
+ return IndexTyDie;
+ }
+
+ // setIndexTyDie - Set D as anonymous type for index which can be reused
+ // later.
+ void setIndexTyDie(DIE *D) {
+ IndexTyDie = D;
+ }
+
+};
+
+//===----------------------------------------------------------------------===//
+/// DbgVariable - This class is used to track local variable information.
+///
+class DbgVariable {
+ DIVariable Var; // Variable Descriptor.
+ unsigned FrameIndex; // Variable frame index.
+ DbgVariable *AbstractVar; // Abstract variable for this variable.
+ DIE *TheDIE;
+public:
+ DbgVariable(DIVariable V, unsigned I)
+ : Var(V), FrameIndex(I), AbstractVar(0), TheDIE(0) {}
+
+ // Accessors.
+ DIVariable getVariable() const { return Var; }
+ unsigned getFrameIndex() const { return FrameIndex; }
+ void setAbstractVariable(DbgVariable *V) { AbstractVar = V; }
+ DbgVariable *getAbstractVariable() const { return AbstractVar; }
+ void setDIE(DIE *D) { TheDIE = D; }
+ DIE *getDIE() const { return TheDIE; }
+};
+
+//===----------------------------------------------------------------------===//
+/// DbgScope - This class is used to track scope information.
+///
+class DbgScope {
+ DbgScope *Parent; // Parent to this scope.
+ DIDescriptor Desc; // Debug info descriptor for scope.
+ // Location at which this scope is inlined.
+ AssertingVH<MDNode> InlinedAtLocation;
+ bool AbstractScope; // Abstract Scope
+ unsigned StartLabelID; // Label ID of the beginning of scope.
+ unsigned EndLabelID; // Label ID of the end of scope.
+ const MachineInstr *LastInsn; // Last instruction of this scope.
+ const MachineInstr *FirstInsn; // First instruction of this scope.
+ SmallVector<DbgScope *, 4> Scopes; // Scopes defined in scope.
+ SmallVector<DbgVariable *, 8> Variables;// Variables declared in scope.
+
+ // Private state for dump()
+ mutable unsigned IndentLevel;
+public:
+ DbgScope(DbgScope *P, DIDescriptor D, MDNode *I = 0)
+ : Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(false),
+ StartLabelID(0), EndLabelID(0),
+ LastInsn(0), FirstInsn(0), IndentLevel(0) {}
+ virtual ~DbgScope();
+
+ // Accessors.
+ DbgScope *getParent() const { return Parent; }
+ void setParent(DbgScope *P) { Parent = P; }
+ DIDescriptor getDesc() const { return Desc; }
+ MDNode *getInlinedAt() const {
+ return InlinedAtLocation;
+ }
+ MDNode *getScopeNode() const { return Desc.getNode(); }
+ unsigned getStartLabelID() const { return StartLabelID; }
+ unsigned getEndLabelID() const { return EndLabelID; }
+ SmallVector<DbgScope *, 4> &getScopes() { return Scopes; }
+ SmallVector<DbgVariable *, 8> &getVariables() { return Variables; }
+ void setStartLabelID(unsigned S) { StartLabelID = S; }
+ void setEndLabelID(unsigned E) { EndLabelID = E; }
+ void setLastInsn(const MachineInstr *MI) { LastInsn = MI; }
+ const MachineInstr *getLastInsn() { return LastInsn; }
+ void setFirstInsn(const MachineInstr *MI) { FirstInsn = MI; }
+ void setAbstractScope() { AbstractScope = true; }
+ bool isAbstractScope() const { return AbstractScope; }
+ const MachineInstr *getFirstInsn() { return FirstInsn; }
+
+ /// addScope - Add a scope to the scope.
+ ///
+ void addScope(DbgScope *S) { Scopes.push_back(S); }
+
+ /// addVariable - Add a variable to the scope.
+ ///
+ void addVariable(DbgVariable *V) { Variables.push_back(V); }
+
+ void fixInstructionMarkers(DenseMap<const MachineInstr *,
+ unsigned> &MIIndexMap) {
+ assert (getFirstInsn() && "First instruction is missing!");
+
+ // Use the end of last child scope as end of this scope.
+ SmallVector<DbgScope *, 4> &Scopes = getScopes();
+ const MachineInstr *LastInsn = getFirstInsn();
+ unsigned LIndex = 0;
+ if (Scopes.empty()) {
+ assert (getLastInsn() && "Inner most scope does not have last insn!");
+ return;
+ }
+ for (SmallVector<DbgScope *, 4>::iterator SI = Scopes.begin(),
+ SE = Scopes.end(); SI != SE; ++SI) {
+ DbgScope *DS = *SI;
+ DS->fixInstructionMarkers(MIIndexMap);
+ const MachineInstr *DSLastInsn = DS->getLastInsn();
+ unsigned DSI = MIIndexMap[DSLastInsn];
+ if (DSI > LIndex) {
+ LastInsn = DSLastInsn;
+ LIndex = DSI;
+ }
+ }
+ setLastInsn(LastInsn);
+ }
+
+#ifndef NDEBUG
+ void dump() const;
+#endif
+};
+
+#ifndef NDEBUG
+void DbgScope::dump() const {
+ raw_ostream &err = dbgs();
+ err.indent(IndentLevel);
+ MDNode *N = Desc.getNode();
+ N->dump();
+ err << " [" << StartLabelID << ", " << EndLabelID << "]\n";
+ if (AbstractScope)
+ err << "Abstract Scope\n";
+
+ IndentLevel += 2;
+ if (!Scopes.empty())
+ err << "Children ...\n";
+ for (unsigned i = 0, e = Scopes.size(); i != e; ++i)
+ if (Scopes[i] != this)
+ Scopes[i]->dump();
+
+ IndentLevel -= 2;
+}
+#endif
+
+DbgScope::~DbgScope() {
+ for (unsigned i = 0, N = Scopes.size(); i < N; ++i)
+ delete Scopes[i];
+ for (unsigned j = 0, M = Variables.size(); j < M; ++j)
+ delete Variables[j];
+}
+
+} // end llvm namespace
+
+DwarfDebug::DwarfDebug(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T)
+ : DwarfPrinter(OS, A, T, "dbg"), ModuleCU(0),
+ AbbreviationsSet(InitAbbreviationsSetSize), Abbreviations(),
+ DIEValues(), StringPool(),
+ SectionSourceLines(), didInitial(false), shouldEmit(false),
+ CurrentFnDbgScope(0), DebugTimer(0) {
+ if (TimePassesIsEnabled)
+ DebugTimer = new Timer("Dwarf Debug Writer");
+}
+DwarfDebug::~DwarfDebug() {
+ for (unsigned j = 0, M = DIEValues.size(); j < M; ++j)
+ delete DIEValues[j];
+
+ delete DebugTimer;
+}
+
+/// assignAbbrevNumber - Define a unique number for the abbreviation.
+///
+void DwarfDebug::assignAbbrevNumber(DIEAbbrev &Abbrev) {
+ // Profile the node so that we can make it unique.
+ FoldingSetNodeID ID;
+ Abbrev.Profile(ID);
+
+ // Check the set for priors.
+ DIEAbbrev *InSet = AbbreviationsSet.GetOrInsertNode(&Abbrev);
+
+ // If it's newly added.
+ if (InSet == &Abbrev) {
+ // Add to abbreviation list.
+ Abbreviations.push_back(&Abbrev);
+
+ // Assign the vector position + 1 as its number.
+ Abbrev.setNumber(Abbreviations.size());
+ } else {
+ // Assign existing abbreviation number.
+ Abbrev.setNumber(InSet->getNumber());
+ }
+}
+
+/// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
+/// information entry.
+DIEEntry *DwarfDebug::createDIEEntry(DIE *Entry) {
+ DIEEntry *Value = new DIEEntry(Entry);
+ DIEValues.push_back(Value);
+ return Value;
+}
+
+/// addUInt - Add an unsigned integer attribute data and value.
+///
+void DwarfDebug::addUInt(DIE *Die, unsigned Attribute,
+ unsigned Form, uint64_t Integer) {
+ if (!Form) Form = DIEInteger::BestForm(false, Integer);
+ DIEValue *Value = new DIEInteger(Integer);
+ DIEValues.push_back(Value);
+ Die->addValue(Attribute, Form, Value);
+}
+
+/// addSInt - Add an signed integer attribute data and value.
+///
+void DwarfDebug::addSInt(DIE *Die, unsigned Attribute,
+ unsigned Form, int64_t Integer) {
+ if (!Form) Form = DIEInteger::BestForm(true, Integer);
+ DIEValue *Value = new DIEInteger(Integer);
+ DIEValues.push_back(Value);
+ Die->addValue(Attribute, Form, Value);
+}
+
+/// addString - Add a string attribute data and value. DIEString only
+/// keeps string reference.
+void DwarfDebug::addString(DIE *Die, unsigned Attribute, unsigned Form,
+ StringRef String) {
+ DIEValue *Value = new DIEString(String);
+ DIEValues.push_back(Value);
+ Die->addValue(Attribute, Form, Value);
+}
+
+/// addLabel - Add a Dwarf label attribute data and value.
+///
+void DwarfDebug::addLabel(DIE *Die, unsigned Attribute, unsigned Form,
+ const DWLabel &Label) {
+ DIEValue *Value = new DIEDwarfLabel(Label);
+ DIEValues.push_back(Value);
+ Die->addValue(Attribute, Form, Value);
+}
+
+/// addObjectLabel - Add an non-Dwarf label attribute data and value.
+///
+void DwarfDebug::addObjectLabel(DIE *Die, unsigned Attribute, unsigned Form,
+ const MCSymbol *Sym) {
+ DIEValue *Value = new DIEObjectLabel(Sym);
+ DIEValues.push_back(Value);
+ Die->addValue(Attribute, Form, Value);
+}
+
+/// addSectionOffset - Add a section offset label attribute data and value.
+///
+void DwarfDebug::addSectionOffset(DIE *Die, unsigned Attribute, unsigned Form,
+ const DWLabel &Label, const DWLabel &Section,
+ bool isEH, bool useSet) {
+ DIEValue *Value = new DIESectionOffset(Label, Section, isEH, useSet);
+ DIEValues.push_back(Value);
+ Die->addValue(Attribute, Form, Value);
+}
+
+/// addDelta - Add a label delta attribute data and value.
+///
+void DwarfDebug::addDelta(DIE *Die, unsigned Attribute, unsigned Form,
+ const DWLabel &Hi, const DWLabel &Lo) {
+ DIEValue *Value = new DIEDelta(Hi, Lo);
+ DIEValues.push_back(Value);
+ Die->addValue(Attribute, Form, Value);
+}
+
+/// addBlock - Add block data.
+///
+void DwarfDebug::addBlock(DIE *Die, unsigned Attribute, unsigned Form,
+ DIEBlock *Block) {
+ Block->ComputeSize(TD);
+ DIEValues.push_back(Block);
+ Die->addValue(Attribute, Block->BestForm(), Block);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void DwarfDebug::addSourceLine(DIE *Die, const DIVariable *V) {
+ // If there is no compile unit specified, don't add a line #.
+ if (V->getCompileUnit().isNull())
+ return;
+
+ unsigned Line = V->getLineNumber();
+ unsigned FileID = findCompileUnit(V->getCompileUnit())->getID();
+ assert(FileID && "Invalid file id");
+ addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void DwarfDebug::addSourceLine(DIE *Die, const DIGlobal *G) {
+ // If there is no compile unit specified, don't add a line #.
+ if (G->getCompileUnit().isNull())
+ return;
+
+ unsigned Line = G->getLineNumber();
+ unsigned FileID = findCompileUnit(G->getCompileUnit())->getID();
+ assert(FileID && "Invalid file id");
+ addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void DwarfDebug::addSourceLine(DIE *Die, const DISubprogram *SP) {
+ // If there is no compile unit specified, don't add a line #.
+ if (SP->getCompileUnit().isNull())
+ return;
+ // If the line number is 0, don't add it.
+ if (SP->getLineNumber() == 0)
+ return;
+
+
+ unsigned Line = SP->getLineNumber();
+ unsigned FileID = findCompileUnit(SP->getCompileUnit())->getID();
+ assert(FileID && "Invalid file id");
+ addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void DwarfDebug::addSourceLine(DIE *Die, const DIType *Ty) {
+ // If there is no compile unit specified, don't add a line #.
+ DICompileUnit CU = Ty->getCompileUnit();
+ if (CU.isNull())
+ return;
+
+ unsigned Line = Ty->getLineNumber();
+ unsigned FileID = findCompileUnit(CU)->getID();
+ assert(FileID && "Invalid file id");
+ addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void DwarfDebug::addSourceLine(DIE *Die, const DINameSpace *NS) {
+ // If there is no compile unit specified, don't add a line #.
+ if (NS->getCompileUnit().isNull())
+ return;
+
+ unsigned Line = NS->getLineNumber();
+ StringRef FN = NS->getFilename();
+ StringRef Dir = NS->getDirectory();
+
+ unsigned FileID = GetOrCreateSourceID(Dir, FN);
+ assert(FileID && "Invalid file id");
+ addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/* Byref variables, in Blocks, are declared by the programmer as
+ "SomeType VarName;", but the compiler creates a
+ __Block_byref_x_VarName struct, and gives the variable VarName
+ either the struct, or a pointer to the struct, as its type. This
+ is necessary for various behind-the-scenes things the compiler
+ needs to do with by-reference variables in blocks.
+
+ However, as far as the original *programmer* is concerned, the
+ variable should still have type 'SomeType', as originally declared.
+
+ The following function dives into the __Block_byref_x_VarName
+ struct to find the original type of the variable. This will be
+ passed back to the code generating the type for the Debug
+ Information Entry for the variable 'VarName'. 'VarName' will then
+ have the original type 'SomeType' in its debug information.
+
+ The original type 'SomeType' will be the type of the field named
+ 'VarName' inside the __Block_byref_x_VarName struct.
+
+ NOTE: In order for this to not completely fail on the debugger
+ side, the Debug Information Entry for the variable VarName needs to
+ have a DW_AT_location that tells the debugger how to unwind through
+ the pointers and __Block_byref_x_VarName struct to find the actual
+ value of the variable. The function addBlockByrefType does this. */
+
+/// Find the type the programmer originally declared the variable to be
+/// and return that type.
+///
+DIType DwarfDebug::getBlockByrefType(DIType Ty, std::string Name) {
+
+ DIType subType = Ty;
+ unsigned tag = Ty.getTag();
+
+ if (tag == dwarf::DW_TAG_pointer_type) {
+ DIDerivedType DTy = DIDerivedType(Ty.getNode());
+ subType = DTy.getTypeDerivedFrom();
+ }
+
+ DICompositeType blockStruct = DICompositeType(subType.getNode());
+
+ DIArray Elements = blockStruct.getTypeArray();
+
+ if (Elements.isNull())
+ return Ty;
+
+ for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+ DIDescriptor Element = Elements.getElement(i);
+ DIDerivedType DT = DIDerivedType(Element.getNode());
+ if (Name == DT.getName())
+ return (DT.getTypeDerivedFrom());
+ }
+
+ return Ty;
+}
+
+/// addComplexAddress - Start with the address based on the location provided,
+/// and generate the DWARF information necessary to find the actual variable
+/// given the extra address information encoded in the DIVariable, starting from
+/// the starting location. Add the DWARF information to the die.
+///
+void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die,
+ unsigned Attribute,
+ const MachineLocation &Location) {
+ const DIVariable &VD = DV->getVariable();
+ DIType Ty = VD.getType();
+
+ // Decode the original location, and use that as the start of the byref
+ // variable's location.
+ unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
+ DIEBlock *Block = new DIEBlock();
+
+ if (Location.isReg()) {
+ if (Reg < 32) {
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
+ } else {
+ Reg = Reg - dwarf::DW_OP_reg0;
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+ addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+ }
+ } else {
+ if (Reg < 32)
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+ else {
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
+ addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+ }
+
+ addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
+ }
+
+ for (unsigned i = 0, N = VD.getNumAddrElements(); i < N; ++i) {
+ uint64_t Element = VD.getAddrElement(i);
+
+ if (Element == DIFactory::OpPlus) {
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+ addUInt(Block, 0, dwarf::DW_FORM_udata, VD.getAddrElement(++i));
+ } else if (Element == DIFactory::OpDeref) {
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ } else llvm_unreachable("unknown DIFactory Opcode");
+ }
+
+ // Now attach the location information to the DIE.
+ addBlock(Die, Attribute, 0, Block);
+}
+
+/* Byref variables, in Blocks, are declared by the programmer as "SomeType
+ VarName;", but the compiler creates a __Block_byref_x_VarName struct, and
+ gives the variable VarName either the struct, or a pointer to the struct, as
+ its type. This is necessary for various behind-the-scenes things the
+ compiler needs to do with by-reference variables in Blocks.
+
+ However, as far as the original *programmer* is concerned, the variable
+ should still have type 'SomeType', as originally declared.
+
+ The function getBlockByrefType dives into the __Block_byref_x_VarName
+ struct to find the original type of the variable, which is then assigned to
+ the variable's Debug Information Entry as its real type. So far, so good.
+ However now the debugger will expect the variable VarName to have the type
+ SomeType. So we need the location attribute for the variable to be an
+ expression that explains to the debugger how to navigate through the
+ pointers and struct to find the actual variable of type SomeType.
+
+ The following function does just that. We start by getting
+ the "normal" location for the variable. This will be the location
+ of either the struct __Block_byref_x_VarName or the pointer to the
+ struct __Block_byref_x_VarName.
+
+ The struct will look something like:
+
+ struct __Block_byref_x_VarName {
+ ... <various fields>
+ struct __Block_byref_x_VarName *forwarding;
+ ... <various other fields>
+ SomeType VarName;
+ ... <maybe more fields>
+ };
+
+ If we are given the struct directly (as our starting point) we
+ need to tell the debugger to:
+
+ 1). Add the offset of the forwarding field.
+
+ 2). Follow that pointer to get the real __Block_byref_x_VarName
+ struct to use (the real one may have been copied onto the heap).
+
+ 3). Add the offset for the field VarName, to find the actual variable.
+
+ If we started with a pointer to the struct, then we need to
+ dereference that pointer first, before the other steps.
+ Translating this into DWARF ops, we will need to append the following
+ to the current location description for the variable:
+
+ DW_OP_deref -- optional, if we start with a pointer
+ DW_OP_plus_uconst <forward_fld_offset>
+ DW_OP_deref
+ DW_OP_plus_uconst <varName_fld_offset>
+
+ That is what this function does. */
+
+/// addBlockByrefAddress - Start with the address based on the location
+/// provided, and generate the DWARF information necessary to find the
+/// actual Block variable (navigating the Block struct) based on the
+/// starting location. Add the DWARF information to the die. For
+/// more information, read large comment just above here.
+///
+void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die,
+ unsigned Attribute,
+ const MachineLocation &Location) {
+ const DIVariable &VD = DV->getVariable();
+ DIType Ty = VD.getType();
+ DIType TmpTy = Ty;
+ unsigned Tag = Ty.getTag();
+ bool isPointer = false;
+
+ StringRef varName = VD.getName();
+
+ if (Tag == dwarf::DW_TAG_pointer_type) {
+ DIDerivedType DTy = DIDerivedType(Ty.getNode());
+ TmpTy = DTy.getTypeDerivedFrom();
+ isPointer = true;
+ }
+
+ DICompositeType blockStruct = DICompositeType(TmpTy.getNode());
+
+ // Find the __forwarding field and the variable field in the __Block_byref
+ // struct.
+ DIArray Fields = blockStruct.getTypeArray();
+ DIDescriptor varField = DIDescriptor();
+ DIDescriptor forwardingField = DIDescriptor();
+
+
+ for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) {
+ DIDescriptor Element = Fields.getElement(i);
+ DIDerivedType DT = DIDerivedType(Element.getNode());
+ StringRef fieldName = DT.getName();
+ if (fieldName == "__forwarding")
+ forwardingField = Element;
+ else if (fieldName == varName)
+ varField = Element;
+ }
+
+ assert(!varField.isNull() && "Can't find byref variable in Block struct");
+ assert(!forwardingField.isNull()
+ && "Can't find forwarding field in Block struct");
+
+ // Get the offsets for the forwarding field and the variable field.
+ unsigned int forwardingFieldOffset =
+ DIDerivedType(forwardingField.getNode()).getOffsetInBits() >> 3;
+ unsigned int varFieldOffset =
+ DIDerivedType(varField.getNode()).getOffsetInBits() >> 3;
+
+ // Decode the original location, and use that as the start of the byref
+ // variable's location.
+ unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
+ DIEBlock *Block = new DIEBlock();
+
+ if (Location.isReg()) {
+ if (Reg < 32)
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
+ else {
+ Reg = Reg - dwarf::DW_OP_reg0;
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+ addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+ }
+ } else {
+ if (Reg < 32)
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+ else {
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
+ addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+ }
+
+ addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
+ }
+
+ // If we started with a pointer to the __Block_byref... struct, then
+ // the first thing we need to do is dereference the pointer (DW_OP_deref).
+ if (isPointer)
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+
+ // Next add the offset for the '__forwarding' field:
+ // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in
+ // adding the offset if it's 0.
+ if (forwardingFieldOffset > 0) {
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+ addUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset);
+ }
+
+ // Now dereference the __forwarding field to get to the real __Block_byref
+ // struct: DW_OP_deref.
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+
+ // Now that we've got the real __Block_byref... struct, add the offset
+ // for the variable's field to get to the location of the actual variable:
+ // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0.
+ if (varFieldOffset > 0) {
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+ addUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset);
+ }
+
+ // Now attach the location information to the DIE.
+ addBlock(Die, Attribute, 0, Block);
+}
+
+/// addAddress - Add an address attribute to a die based on the location
+/// provided.
+void DwarfDebug::addAddress(DIE *Die, unsigned Attribute,
+ const MachineLocation &Location) {
+ unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
+ DIEBlock *Block = new DIEBlock();
+
+ if (Location.isReg()) {
+ if (Reg < 32) {
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
+ } else {
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
+ addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+ }
+ } else {
+ if (Reg < 32) {
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+ } else {
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
+ addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+ }
+
+ addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
+ }
+
+ addBlock(Die, Attribute, 0, Block);
+}
+
+/// addToContextOwner - Add Die into the list of its context owner's children.
+void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) {
+ if (Context.isNull())
+ ModuleCU->addDie(Die);
+ else if (Context.isType()) {
+ DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context.getNode()));
+ ContextDIE->addChild(Die);
+ } else if (Context.isNameSpace()) {
+ DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context.getNode()));
+ ContextDIE->addChild(Die);
+ } else if (DIE *ContextDIE = ModuleCU->getDIE(Context.getNode()))
+ ContextDIE->addChild(Die);
+ else
+ ModuleCU->addDie(Die);
+}
+
+/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
+/// given DIType.
+DIE *DwarfDebug::getOrCreateTypeDIE(DIType Ty) {
+ DIE *TyDIE = ModuleCU->getDIE(Ty.getNode());
+ if (TyDIE)
+ return TyDIE;
+
+ // Create new type.
+ TyDIE = new DIE(dwarf::DW_TAG_base_type);
+ ModuleCU->insertDIE(Ty.getNode(), TyDIE);
+ if (Ty.isBasicType())
+ constructTypeDIE(*TyDIE, DIBasicType(Ty.getNode()));
+ else if (Ty.isCompositeType())
+ constructTypeDIE(*TyDIE, DICompositeType(Ty.getNode()));
+ else {
+ assert(Ty.isDerivedType() && "Unknown kind of DIType");
+ constructTypeDIE(*TyDIE, DIDerivedType(Ty.getNode()));
+ }
+
+ addToContextOwner(TyDIE, Ty.getContext());
+ return TyDIE;
+}
+
+/// addType - Add a new type attribute to the specified entity.
+void DwarfDebug::addType(DIE *Entity, DIType Ty) {
+ if (Ty.isNull())
+ return;
+
+ // Check for pre-existence.
+ DIEEntry *Entry = ModuleCU->getDIEEntry(Ty.getNode());
+ // If it exists then use the existing value.
+ if (Entry) {
+ Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
+ return;
+ }
+
+ // Set up proxy.
+ Entry = createDIEEntry();
+ ModuleCU->insertDIEEntry(Ty.getNode(), Entry);
+
+ // Construct type.
+ DIE *Buffer = getOrCreateTypeDIE(Ty);
+
+ Entry->setEntry(Buffer);
+ Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
+}
+
+/// constructTypeDIE - Construct basic type die from DIBasicType.
+void DwarfDebug::constructTypeDIE(DIE &Buffer, DIBasicType BTy) {
+ // Get core information.
+ StringRef Name = BTy.getName();
+ Buffer.setTag(dwarf::DW_TAG_base_type);
+ addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+ BTy.getEncoding());
+
+ // Add name if not anonymous or intermediate type.
+ if (!Name.empty())
+ addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+ uint64_t Size = BTy.getSizeInBits() >> 3;
+ addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+}
+
+/// constructTypeDIE - Construct derived type die from DIDerivedType.
+void DwarfDebug::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
+ // Get core information.
+ StringRef Name = DTy.getName();
+ uint64_t Size = DTy.getSizeInBits() >> 3;
+ unsigned Tag = DTy.getTag();
+
+ // FIXME - Workaround for templates.
+ if (Tag == dwarf::DW_TAG_inheritance) Tag = dwarf::DW_TAG_reference_type;
+
+ Buffer.setTag(Tag);
+
+ // Map to main type, void will not have a type.
+ DIType FromTy = DTy.getTypeDerivedFrom();
+ addType(&Buffer, FromTy);
+
+ // Add name if not anonymous or intermediate type.
+ if (!Name.empty())
+ addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+ // Add size if non-zero (derived types might be zero-sized.)
+ if (Size)
+ addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+
+ // Add source line info if available and TyDesc is not a forward declaration.
+ if (!DTy.isForwardDecl())
+ addSourceLine(&Buffer, &DTy);
+}
+
+/// constructTypeDIE - Construct type DIE from DICompositeType.
+void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
+ // Get core information.
+ StringRef Name = CTy.getName();
+
+ uint64_t Size = CTy.getSizeInBits() >> 3;
+ unsigned Tag = CTy.getTag();
+ Buffer.setTag(Tag);
+
+ switch (Tag) {
+ case dwarf::DW_TAG_vector_type:
+ case dwarf::DW_TAG_array_type:
+ constructArrayTypeDIE(Buffer, &CTy);
+ break;
+ case dwarf::DW_TAG_enumeration_type: {
+ DIArray Elements = CTy.getTypeArray();
+
+ // Add enumerators to enumeration type.
+ for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+ DIE *ElemDie = NULL;
+ DIEnumerator Enum(Elements.getElement(i).getNode());
+ if (!Enum.isNull()) {
+ ElemDie = constructEnumTypeDIE(&Enum);
+ Buffer.addChild(ElemDie);
+ }
+ }
+ }
+ break;
+ case dwarf::DW_TAG_subroutine_type: {
+ // Add return type.
+ DIArray Elements = CTy.getTypeArray();
+ DIDescriptor RTy = Elements.getElement(0);
+ addType(&Buffer, DIType(RTy.getNode()));
+
+ // Add prototype flag.
+ addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
+
+ // Add arguments.
+ for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) {
+ DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+ DIDescriptor Ty = Elements.getElement(i);
+ addType(Arg, DIType(Ty.getNode()));
+ Buffer.addChild(Arg);
+ }
+ }
+ break;
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_union_type:
+ case dwarf::DW_TAG_class_type: {
+ // Add elements to structure type.
+ DIArray Elements = CTy.getTypeArray();
+
+ // A forward struct declared type may not have elements available.
+ if (Elements.isNull())
+ break;
+
+ // Add elements to structure type.
+ for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+ DIDescriptor Element = Elements.getElement(i);
+ if (Element.isNull())
+ continue;
+ DIE *ElemDie = NULL;
+ if (Element.getTag() == dwarf::DW_TAG_subprogram)
+ ElemDie = createSubprogramDIE(DISubprogram(Element.getNode()));
+ else if (Element.getTag() == dwarf::DW_TAG_auto_variable) {
+ DIVariable DV(Element.getNode());
+ ElemDie = new DIE(dwarf::DW_TAG_variable);
+ addString(ElemDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
+ DV.getName());
+ addType(ElemDie, DV.getType());
+ addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+ addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+ addSourceLine(ElemDie, &DV);
+ } else
+ ElemDie = createMemberDIE(DIDerivedType(Element.getNode()));
+ Buffer.addChild(ElemDie);
+ }
+
+ if (CTy.isAppleBlockExtension())
+ addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1);
+
+ unsigned RLang = CTy.getRunTimeLang();
+ if (RLang)
+ addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class,
+ dwarf::DW_FORM_data1, RLang);
+
+ DICompositeType ContainingType = CTy.getContainingType();
+ if (!ContainingType.isNull())
+ addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
+ getOrCreateTypeDIE(DIType(ContainingType.getNode())));
+ break;
+ }
+ default:
+ break;
+ }
+
+ // Add name if not anonymous or intermediate type.
+ if (!Name.empty())
+ addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+ if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type ||
+ Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) {
+ // Add size if non-zero (derived types might be zero-sized.)
+ if (Size)
+ addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+ else {
+ // Add zero size if it is not a forward declaration.
+ if (CTy.isForwardDecl())
+ addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+ else
+ addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0);
+ }
+
+ // Add source line info if available.
+ if (!CTy.isForwardDecl())
+ addSourceLine(&Buffer, &CTy);
+ }
+}
+
+/// constructSubrangeDIE - Construct subrange DIE from DISubrange.
+void DwarfDebug::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){
+ int64_t L = SR.getLo();
+ int64_t H = SR.getHi();
+ DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type);
+
+ addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
+ if (L)
+ addSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L);
+ addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H);
+
+ Buffer.addChild(DW_Subrange);
+}
+
+/// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
+void DwarfDebug::constructArrayTypeDIE(DIE &Buffer,
+ DICompositeType *CTy) {
+ Buffer.setTag(dwarf::DW_TAG_array_type);
+ if (CTy->getTag() == dwarf::DW_TAG_vector_type)
+ addUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1);
+
+ // Emit derived type.
+ addType(&Buffer, CTy->getTypeDerivedFrom());
+ DIArray Elements = CTy->getTypeArray();
+
+ // Get an anonymous type for index type.
+ DIE *IdxTy = ModuleCU->getIndexTyDie();
+ if (!IdxTy) {
+ // Construct an anonymous type for index type.
+ IdxTy = new DIE(dwarf::DW_TAG_base_type);
+ addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t));
+ addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+ dwarf::DW_ATE_signed);
+ ModuleCU->addDie(IdxTy);
+ ModuleCU->setIndexTyDie(IdxTy);
+ }
+
+ // Add subranges to array type.
+ for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+ DIDescriptor Element = Elements.getElement(i);
+ if (Element.getTag() == dwarf::DW_TAG_subrange_type)
+ constructSubrangeDIE(Buffer, DISubrange(Element.getNode()), IdxTy);
+ }
+}
+
+/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
+DIE *DwarfDebug::constructEnumTypeDIE(DIEnumerator *ETy) {
+ DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator);
+ StringRef Name = ETy->getName();
+ addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+ int64_t Value = ETy->getEnumValue();
+ addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value);
+ return Enumerator;
+}
+
+/// getRealLinkageName - If special LLVM prefix that is used to inform the asm
+/// printer to not emit usual symbol prefix before the symbol name is used then
+/// return linkage name after skipping this special LLVM prefix.
+static StringRef getRealLinkageName(StringRef LinkageName) {
+ char One = '\1';
+ if (LinkageName.startswith(StringRef(&One, 1)))
+ return LinkageName.substr(1);
+ return LinkageName;
+}
+
+/// createGlobalVariableDIE - Create new DIE using GV.
+DIE *DwarfDebug::createGlobalVariableDIE(const DIGlobalVariable &GV) {
+ // If the global variable was optmized out then no need to create debug info
+ // entry.
+ if (!GV.getGlobal()) return NULL;
+ if (GV.getDisplayName().empty()) return NULL;
+
+ DIE *GVDie = new DIE(dwarf::DW_TAG_variable);
+ addString(GVDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
+ GV.getDisplayName());
+
+ StringRef LinkageName = GV.getLinkageName();
+ if (!LinkageName.empty())
+ addString(GVDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
+ getRealLinkageName(LinkageName));
+
+ addType(GVDie, GV.getType());
+ if (!GV.isLocalToUnit())
+ addUInt(GVDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+ addSourceLine(GVDie, &GV);
+
+ return GVDie;
+}
+
+/// createMemberDIE - Create new member DIE.
+DIE *DwarfDebug::createMemberDIE(const DIDerivedType &DT) {
+ DIE *MemberDie = new DIE(DT.getTag());
+ StringRef Name = DT.getName();
+ if (!Name.empty())
+ addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+ addType(MemberDie, DT.getTypeDerivedFrom());
+
+ addSourceLine(MemberDie, &DT);
+
+ DIEBlock *MemLocationDie = new DIEBlock();
+ addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+
+ uint64_t Size = DT.getSizeInBits();
+ uint64_t FieldSize = DT.getOriginalTypeSize();
+
+ if (Size != FieldSize) {
+ // Handle bitfield.
+ addUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3);
+ addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits());
+
+ uint64_t Offset = DT.getOffsetInBits();
+ uint64_t AlignMask = ~(DT.getAlignInBits() - 1);
+ uint64_t HiMark = (Offset + FieldSize) & AlignMask;
+ uint64_t FieldOffset = (HiMark - FieldSize);
+ Offset -= FieldOffset;
+
+ // Maybe we need to work from the other end.
+ if (TD->isLittleEndian()) Offset = FieldSize - (Offset + Size);
+ addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset);
+
+ // Here WD_AT_data_member_location points to the anonymous
+ // field that includes this bit field.
+ addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3);
+
+ } else
+ // This is not a bitfield.
+ addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3);
+
+ if (DT.getTag() == dwarf::DW_TAG_inheritance
+ && DT.isVirtual()) {
+
+ // For C++, virtual base classes are not at fixed offset. Use following
+ // expression to extract appropriate offset from vtable.
+ // BaseAddr = ObAddr + *((*ObAddr) - Offset)
+
+ DIEBlock *VBaseLocationDie = new DIEBlock();
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_dup);
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits());
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_minus);
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+
+ addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0,
+ VBaseLocationDie);
+ } else
+ addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie);
+
+ if (DT.isProtected())
+ addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ dwarf::DW_ACCESS_protected);
+ else if (DT.isPrivate())
+ addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ dwarf::DW_ACCESS_private);
+ else if (DT.getTag() == dwarf::DW_TAG_inheritance)
+ addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ dwarf::DW_ACCESS_public);
+ if (DT.isVirtual())
+ addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag,
+ dwarf::DW_VIRTUALITY_virtual);
+ return MemberDie;
+}
+
+/// createSubprogramDIE - Create new DIE using SP.
+DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
+ DIE *SPDie = ModuleCU->getDIE(SP.getNode());
+ if (SPDie)
+ return SPDie;
+
+ SPDie = new DIE(dwarf::DW_TAG_subprogram);
+ addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, SP.getName());
+
+ StringRef LinkageName = SP.getLinkageName();
+ if (!LinkageName.empty())
+ addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
+ getRealLinkageName(LinkageName));
+
+ addSourceLine(SPDie, &SP);
+
+ // Add prototyped tag, if C or ObjC.
+ unsigned Lang = SP.getCompileUnit().getLanguage();
+ if (Lang == dwarf::DW_LANG_C99 || Lang == dwarf::DW_LANG_C89 ||
+ Lang == dwarf::DW_LANG_ObjC)
+ addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
+
+ // Add Return Type.
+ DICompositeType SPTy = SP.getType();
+ DIArray Args = SPTy.getTypeArray();
+ unsigned SPTag = SPTy.getTag();
+
+ if (Args.isNull() || SPTag != dwarf::DW_TAG_subroutine_type)
+ addType(SPDie, SPTy);
+ else
+ addType(SPDie, DIType(Args.getElement(0).getNode()));
+
+ unsigned VK = SP.getVirtuality();
+ if (VK) {
+ addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK);
+ DIEBlock *Block = new DIEBlock();
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ addUInt(Block, 0, dwarf::DW_FORM_data1, SP.getVirtualIndex());
+ addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block);
+ ContainingTypeMap.insert(std::make_pair(SPDie,
+ SP.getContainingType().getNode()));
+ }
+
+ if (MakeDecl || !SP.isDefinition()) {
+ addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+
+ // Add arguments. Do not add arguments for subprogram definition. They will
+ // be handled while processing variables.
+ DICompositeType SPTy = SP.getType();
+ DIArray Args = SPTy.getTypeArray();
+ unsigned SPTag = SPTy.getTag();
+
+ if (SPTag == dwarf::DW_TAG_subroutine_type)
+ for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
+ DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+ DIType ATy = DIType(DIType(Args.getElement(i).getNode()));
+ addType(Arg, ATy);
+ if (ATy.isArtificial())
+ addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+ SPDie->addChild(Arg);
+ }
+ }
+
+ if (SP.isArtificial())
+ addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+
+ // DW_TAG_inlined_subroutine may refer to this DIE.
+ ModuleCU->insertDIE(SP.getNode(), SPDie);
+ return SPDie;
+}
+
+/// findCompileUnit - Get the compile unit for the given descriptor.
+///
+CompileUnit *DwarfDebug::findCompileUnit(DICompileUnit Unit) {
+ DenseMap<Value *, CompileUnit *>::const_iterator I =
+ CompileUnitMap.find(Unit.getNode());
+ if (I == CompileUnitMap.end())
+ return constructCompileUnit(Unit.getNode());
+ return I->second;
+}
+
+/// getUpdatedDbgScope - Find or create DbgScope assicated with the instruction.
+/// Initialize scope and update scope hierarchy.
+DbgScope *DwarfDebug::getUpdatedDbgScope(MDNode *N, const MachineInstr *MI,
+ MDNode *InlinedAt) {
+ assert (N && "Invalid Scope encoding!");
+ assert (MI && "Missing machine instruction!");
+ bool GetConcreteScope = (MI && InlinedAt);
+
+ DbgScope *NScope = NULL;
+
+ if (InlinedAt)
+ NScope = DbgScopeMap.lookup(InlinedAt);
+ else
+ NScope = DbgScopeMap.lookup(N);
+ assert (NScope && "Unable to find working scope!");
+
+ if (NScope->getFirstInsn())
+ return NScope;
+
+ DbgScope *Parent = NULL;
+ if (GetConcreteScope) {
+ DILocation IL(InlinedAt);
+ Parent = getUpdatedDbgScope(IL.getScope().getNode(), MI,
+ IL.getOrigLocation().getNode());
+ assert (Parent && "Unable to find Parent scope!");
+ NScope->setParent(Parent);
+ Parent->addScope(NScope);
+ } else if (DIDescriptor(N).isLexicalBlock()) {
+ DILexicalBlock DB(N);
+ if (!DB.getContext().isNull()) {
+ Parent = getUpdatedDbgScope(DB.getContext().getNode(), MI, InlinedAt);
+ NScope->setParent(Parent);
+ Parent->addScope(NScope);
+ }
+ }
+
+ NScope->setFirstInsn(MI);
+
+ if (!Parent && !InlinedAt) {
+ StringRef SPName = DISubprogram(N).getLinkageName();
+ if (SPName == MF->getFunction()->getName())
+ CurrentFnDbgScope = NScope;
+ }
+
+ if (GetConcreteScope) {
+ ConcreteScopes[InlinedAt] = NScope;
+ getOrCreateAbstractScope(N);
+ }
+
+ return NScope;
+}
+
+DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) {
+ assert (N && "Invalid Scope encoding!");
+
+ DbgScope *AScope = AbstractScopes.lookup(N);
+ if (AScope)
+ return AScope;
+
+ DbgScope *Parent = NULL;
+
+ DIDescriptor Scope(N);
+ if (Scope.isLexicalBlock()) {
+ DILexicalBlock DB(N);
+ DIDescriptor ParentDesc = DB.getContext();
+ if (!ParentDesc.isNull())
+ Parent = getOrCreateAbstractScope(ParentDesc.getNode());
+ }
+
+ AScope = new DbgScope(Parent, DIDescriptor(N), NULL);
+
+ if (Parent)
+ Parent->addScope(AScope);
+ AScope->setAbstractScope();
+ AbstractScopes[N] = AScope;
+ if (DIDescriptor(N).isSubprogram())
+ AbstractScopesList.push_back(AScope);
+ return AScope;
+}
+
+/// updateSubprogramScopeDIE - Find DIE for the given subprogram and
+/// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
+/// If there are global variables in this scope then create and insert
+/// DIEs for these variables.
+DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) {
+
+ DIE *SPDie = ModuleCU->getDIE(SPNode);
+ assert (SPDie && "Unable to find subprogram DIE!");
+ DISubprogram SP(SPNode);
+ // There is not any need to generate specification DIE for a function
+ // defined at compile unit level. If a function is defined inside another
+ // function then gdb prefers the definition at top level and but does not
+ // expect specification DIE in parent function. So avoid creating
+ // specification DIE for a function defined inside a function.
+ if (SP.isDefinition() && !SP.getContext().isCompileUnit()
+ && !SP.getContext().isSubprogram()) {
+ addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+ // Add arguments.
+ DICompositeType SPTy = SP.getType();
+ DIArray Args = SPTy.getTypeArray();
+ unsigned SPTag = SPTy.getTag();
+ if (SPTag == dwarf::DW_TAG_subroutine_type)
+ for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
+ DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+ DIType ATy = DIType(DIType(Args.getElement(i).getNode()));
+ addType(Arg, ATy);
+ if (ATy.isArtificial())
+ addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+ SPDie->addChild(Arg);
+ }
+ DIE *SPDeclDie = SPDie;
+ SPDie = new DIE(dwarf::DW_TAG_subprogram);
+ addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
+ SPDeclDie);
+ ModuleCU->addDie(SPDie);
+ }
+
+ addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+ DWLabel("func_begin", SubprogramCount));
+ addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+ DWLabel("func_end", SubprogramCount));
+ MachineLocation Location(RI->getFrameRegister(*MF));
+ addAddress(SPDie, dwarf::DW_AT_frame_base, Location);
+
+ if (!DISubprogram(SPNode).isLocalToUnit())
+ addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+
+ return SPDie;
+}
+
+/// constructLexicalScope - Construct new DW_TAG_lexical_block
+/// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
+DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) {
+ unsigned StartID = MMI->MappedLabel(Scope->getStartLabelID());
+ unsigned EndID = MMI->MappedLabel(Scope->getEndLabelID());
+
+ // Ignore empty scopes.
+ if (StartID == EndID && StartID != 0)
+ return NULL;
+
+ DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block);
+ if (Scope->isAbstractScope())
+ return ScopeDIE;
+
+ addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+ StartID ?
+ DWLabel("label", StartID)
+ : DWLabel("func_begin", SubprogramCount));
+ addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+ EndID ?
+ DWLabel("label", EndID)
+ : DWLabel("func_end", SubprogramCount));
+
+
+
+ return ScopeDIE;
+}
+
+/// constructInlinedScopeDIE - This scope represents inlined body of
+/// a function. Construct DIE to represent this concrete inlined copy
+/// of the function.
+DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
+ unsigned StartID = MMI->MappedLabel(Scope->getStartLabelID());
+ unsigned EndID = MMI->MappedLabel(Scope->getEndLabelID());
+ assert (StartID && "Invalid starting label for an inlined scope!");
+ assert (EndID && "Invalid end label for an inlined scope!");
+ // Ignore empty scopes.
+ if (StartID == EndID && StartID != 0)
+ return NULL;
+
+ DIScope DS(Scope->getScopeNode());
+ if (DS.isNull())
+ return NULL;
+ DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine);
+
+ DISubprogram InlinedSP = getDISubprogram(DS.getNode());
+ DIE *OriginDIE = ModuleCU->getDIE(InlinedSP.getNode());
+ assert (OriginDIE && "Unable to find Origin DIE!");
+ addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin,
+ dwarf::DW_FORM_ref4, OriginDIE);
+
+ addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+ DWLabel("label", StartID));
+ addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+ DWLabel("label", EndID));
+
+ InlinedSubprogramDIEs.insert(OriginDIE);
+
+ // Track the start label for this inlined function.
+ DenseMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator
+ I = InlineInfo.find(InlinedSP.getNode());
+
+ if (I == InlineInfo.end()) {
+ InlineInfo[InlinedSP.getNode()].push_back(std::make_pair(StartID,
+ ScopeDIE));
+ InlinedSPNodes.push_back(InlinedSP.getNode());
+ } else
+ I->second.push_back(std::make_pair(StartID, ScopeDIE));
+
+ StringPool.insert(InlinedSP.getName());
+ StringPool.insert(getRealLinkageName(InlinedSP.getLinkageName()));
+
+ DILocation DL(Scope->getInlinedAt());
+ addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, ModuleCU->getID());
+ addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
+
+ return ScopeDIE;
+}
+
+
+/// constructVariableDIE - Construct a DIE for the given DbgVariable.
+DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
+ // Get the descriptor.
+ const DIVariable &VD = DV->getVariable();
+ StringRef Name = VD.getName();
+ if (Name.empty())
+ return NULL;
+
+ // Translate tag to proper Dwarf tag. The result variable is dropped for
+ // now.
+ unsigned Tag;
+ switch (VD.getTag()) {
+ case dwarf::DW_TAG_return_variable:
+ return NULL;
+ case dwarf::DW_TAG_arg_variable:
+ Tag = dwarf::DW_TAG_formal_parameter;
+ break;
+ case dwarf::DW_TAG_auto_variable: // fall thru
+ default:
+ Tag = dwarf::DW_TAG_variable;
+ break;
+ }
+
+ // Define variable debug information entry.
+ DIE *VariableDie = new DIE(Tag);
+
+
+ DIE *AbsDIE = NULL;
+ if (DbgVariable *AV = DV->getAbstractVariable())
+ AbsDIE = AV->getDIE();
+
+ if (AbsDIE) {
+ DIScope DS(Scope->getScopeNode());
+ DISubprogram InlinedSP = getDISubprogram(DS.getNode());
+ DIE *OriginSPDIE = ModuleCU->getDIE(InlinedSP.getNode());
+ (void) OriginSPDIE;
+ assert (OriginSPDIE && "Unable to find Origin DIE for the SP!");
+ DIE *AbsDIE = DV->getAbstractVariable()->getDIE();
+ assert (AbsDIE && "Unable to find Origin DIE for the Variable!");
+ addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin,
+ dwarf::DW_FORM_ref4, AbsDIE);
+ }
+ else {
+ addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+ addSourceLine(VariableDie, &VD);
+
+ // Add variable type.
+ // FIXME: isBlockByrefVariable should be reformulated in terms of complex
+ // addresses instead.
+ if (VD.isBlockByrefVariable())
+ addType(VariableDie, getBlockByrefType(VD.getType(), Name));
+ else
+ addType(VariableDie, VD.getType());
+ }
+
+ // Add variable address.
+ if (!Scope->isAbstractScope()) {
+ MachineLocation Location;
+ unsigned FrameReg;
+ int Offset = RI->getFrameIndexReference(*MF, DV->getFrameIndex(), FrameReg);
+ Location.set(FrameReg, Offset);
+
+ if (VD.hasComplexAddress())
+ addComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+ else if (VD.isBlockByrefVariable())
+ addBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+ else
+ addAddress(VariableDie, dwarf::DW_AT_location, Location);
+ }
+
+ if (Tag == dwarf::DW_TAG_formal_parameter && VD.getType().isArtificial())
+ addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+ DV->setDIE(VariableDie);
+ return VariableDie;
+
+}
+
+void DwarfDebug::addPubTypes(DISubprogram SP) {
+ DICompositeType SPTy = SP.getType();
+ unsigned SPTag = SPTy.getTag();
+ if (SPTag != dwarf::DW_TAG_subroutine_type)
+ return;
+
+ DIArray Args = SPTy.getTypeArray();
+ if (Args.isNull())
+ return;
+
+ for (unsigned i = 0, e = Args.getNumElements(); i != e; ++i) {
+ DIType ATy(Args.getElement(i).getNode());
+ if (ATy.isNull())
+ continue;
+ DICompositeType CATy = getDICompositeType(ATy);
+ if (!CATy.isNull() && !CATy.getName().empty()) {
+ if (DIEEntry *Entry = ModuleCU->getDIEEntry(CATy.getNode()))
+ ModuleCU->addGlobalType(CATy.getName(), Entry->getEntry());
+ }
+ }
+}
+
+/// constructScopeDIE - Construct a DIE for this scope.
+DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
+ if (!Scope)
+ return NULL;
+ DIScope DS(Scope->getScopeNode());
+ if (DS.isNull())
+ return NULL;
+
+ DIE *ScopeDIE = NULL;
+ if (Scope->getInlinedAt())
+ ScopeDIE = constructInlinedScopeDIE(Scope);
+ else if (DS.isSubprogram()) {
+ if (Scope->isAbstractScope())
+ ScopeDIE = ModuleCU->getDIE(DS.getNode());
+ else
+ ScopeDIE = updateSubprogramScopeDIE(DS.getNode());
+ }
+ else {
+ ScopeDIE = constructLexicalScopeDIE(Scope);
+ if (!ScopeDIE) return NULL;
+ }
+
+ // Add variables to scope.
+ SmallVector<DbgVariable *, 8> &Variables = Scope->getVariables();
+ for (unsigned i = 0, N = Variables.size(); i < N; ++i) {
+ DIE *VariableDIE = constructVariableDIE(Variables[i], Scope);
+ if (VariableDIE)
+ ScopeDIE->addChild(VariableDIE);
+ }
+
+ // Add nested scopes.
+ SmallVector<DbgScope *, 4> &Scopes = Scope->getScopes();
+ for (unsigned j = 0, M = Scopes.size(); j < M; ++j) {
+ // Define the Scope debug information entry.
+ DIE *NestedDIE = constructScopeDIE(Scopes[j]);
+ if (NestedDIE)
+ ScopeDIE->addChild(NestedDIE);
+ }
+
+ if (DS.isSubprogram())
+ addPubTypes(DISubprogram(DS.getNode()));
+
+ return ScopeDIE;
+}
+
+/// GetOrCreateSourceID - Look up the source id with the given directory and
+/// source file names. If none currently exists, create a new id and insert it
+/// in the SourceIds map. This can update DirectoryNames and SourceFileNames
+/// maps as well.
+unsigned DwarfDebug::GetOrCreateSourceID(StringRef DirName, StringRef FileName) {
+ unsigned DId;
+ StringMap<unsigned>::iterator DI = DirectoryIdMap.find(DirName);
+ if (DI != DirectoryIdMap.end()) {
+ DId = DI->getValue();
+ } else {
+ DId = DirectoryNames.size() + 1;
+ DirectoryIdMap[DirName] = DId;
+ DirectoryNames.push_back(DirName);
+ }
+
+ unsigned FId;
+ StringMap<unsigned>::iterator FI = SourceFileIdMap.find(FileName);
+ if (FI != SourceFileIdMap.end()) {
+ FId = FI->getValue();
+ } else {
+ FId = SourceFileNames.size() + 1;
+ SourceFileIdMap[FileName] = FId;
+ SourceFileNames.push_back(FileName);
+ }
+
+ DenseMap<std::pair<unsigned, unsigned>, unsigned>::iterator SI =
+ SourceIdMap.find(std::make_pair(DId, FId));
+ if (SI != SourceIdMap.end())
+ return SI->second;
+
+ unsigned SrcId = SourceIds.size() + 1; // DW_AT_decl_file cannot be 0.
+ SourceIdMap[std::make_pair(DId, FId)] = SrcId;
+ SourceIds.push_back(std::make_pair(DId, FId));
+
+ return SrcId;
+}
+
+/// getOrCreateNameSpace - Create a DIE for DINameSpace.
+DIE *DwarfDebug::getOrCreateNameSpace(DINameSpace NS) {
+ DIE *NDie = ModuleCU->getDIE(NS.getNode());
+ if (NDie)
+ return NDie;
+ NDie = new DIE(dwarf::DW_TAG_namespace);
+ ModuleCU->insertDIE(NS.getNode(), NDie);
+ if (!NS.getName().empty())
+ addString(NDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, NS.getName());
+ addSourceLine(NDie, &NS);
+ addToContextOwner(NDie, NS.getContext());
+ return NDie;
+}
+
+CompileUnit *DwarfDebug::constructCompileUnit(MDNode *N) {
+ DICompileUnit DIUnit(N);
+ StringRef FN = DIUnit.getFilename();
+ StringRef Dir = DIUnit.getDirectory();
+ unsigned ID = GetOrCreateSourceID(Dir, FN);
+
+ DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
+ addSectionOffset(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
+ DWLabel("section_line", 0), DWLabel("section_line", 0),
+ false);
+ addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string,
+ DIUnit.getProducer());
+ addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data1,
+ DIUnit.getLanguage());
+ addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN);
+
+ if (!Dir.empty())
+ addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir);
+ if (DIUnit.isOptimized())
+ addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
+
+ StringRef Flags = DIUnit.getFlags();
+ if (!Flags.empty())
+ addString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags);
+
+ unsigned RVer = DIUnit.getRunTimeVersion();
+ if (RVer)
+ addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
+ dwarf::DW_FORM_data1, RVer);
+
+ CompileUnit *Unit = new CompileUnit(ID, Die);
+ if (!ModuleCU && DIUnit.isMain()) {
+ // Use first compile unit marked as isMain as the compile unit
+ // for this module.
+ ModuleCU = Unit;
+ }
+
+ CompileUnitMap[DIUnit.getNode()] = Unit;
+ CompileUnits.push_back(Unit);
+ return Unit;
+}
+
+void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
+ DIGlobalVariable DI_GV(N);
+
+ // If debug information is malformed then ignore it.
+ if (DI_GV.Verify() == false)
+ return;
+
+ // Check for pre-existence.
+ if (ModuleCU->getDIE(DI_GV.getNode()))
+ return;
+
+ DIE *VariableDie = createGlobalVariableDIE(DI_GV);
+ if (!VariableDie)
+ return;
+
+ // Add to map.
+ ModuleCU->insertDIE(N, VariableDie);
+
+ // Add to context owner.
+ DIDescriptor GVContext = DI_GV.getContext();
+ // Do not create specification DIE if context is either compile unit
+ // or a subprogram.
+ if (DI_GV.isDefinition() && !GVContext.isCompileUnit()
+ && !GVContext.isSubprogram()) {
+ // Create specification DIE.
+ DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
+ addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
+ dwarf::DW_FORM_ref4, VariableDie);
+ DIEBlock *Block = new DIEBlock();
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+ addObjectLabel(Block, 0, dwarf::DW_FORM_udata,
+ Asm->GetGlobalValueSymbol(DI_GV.getGlobal()));
+ addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block);
+ addUInt(VariableDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+ ModuleCU->addDie(VariableSpecDIE);
+ } else {
+ DIEBlock *Block = new DIEBlock();
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+ addObjectLabel(Block, 0, dwarf::DW_FORM_udata,
+ Asm->GetGlobalValueSymbol(DI_GV.getGlobal()));
+ addBlock(VariableDie, dwarf::DW_AT_location, 0, Block);
+ }
+ addToContextOwner(VariableDie, GVContext);
+
+ // Expose as global. FIXME - need to check external flag.
+ ModuleCU->addGlobal(DI_GV.getName(), VariableDie);
+
+ DIType GTy = DI_GV.getType();
+ if (GTy.isCompositeType() && !GTy.getName().empty()) {
+ DIEEntry *Entry = ModuleCU->getDIEEntry(GTy.getNode());
+ assert (Entry && "Missing global type!");
+ ModuleCU->addGlobalType(GTy.getName(), Entry->getEntry());
+ }
+ return;
+}
+
+void DwarfDebug::constructSubprogramDIE(MDNode *N) {
+ DISubprogram SP(N);
+
+ // Check for pre-existence.
+ if (ModuleCU->getDIE(N))
+ return;
+
+ if (!SP.isDefinition())
+ // This is a method declaration which will be handled while constructing
+ // class type.
+ return;
+
+ DIE *SubprogramDie = createSubprogramDIE(SP);
+
+ // Add to map.
+ ModuleCU->insertDIE(N, SubprogramDie);
+
+ // Add to context owner.
+ addToContextOwner(SubprogramDie, SP.getContext());
+
+ // Expose as global.
+ ModuleCU->addGlobal(SP.getName(), SubprogramDie);
+
+ return;
+}
+
+/// beginModule - Emit all Dwarf sections that should come prior to the
+/// content. Create global DIEs and emit initial debug info sections.
+/// This is inovked by the target AsmPrinter.
+void DwarfDebug::beginModule(Module *M, MachineModuleInfo *mmi) {
+ this->M = M;
+
+ if (TimePassesIsEnabled)
+ DebugTimer->startTimer();
+
+ if (!MAI->doesSupportDebugInformation())
+ return;
+
+ DebugInfoFinder DbgFinder;
+ DbgFinder.processModule(*M);
+
+ // Create all the compile unit DIEs.
+ for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
+ E = DbgFinder.compile_unit_end(); I != E; ++I)
+ constructCompileUnit(*I);
+
+ if (CompileUnits.empty()) {
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+
+ return;
+ }
+
+ // If main compile unit for this module is not seen than randomly
+ // select first compile unit.
+ if (!ModuleCU)
+ ModuleCU = CompileUnits[0];
+
+ // Create DIEs for each subprogram.
+ for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
+ E = DbgFinder.subprogram_end(); I != E; ++I)
+ constructSubprogramDIE(*I);
+
+ // Create DIEs for each global variable.
+ for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(),
+ E = DbgFinder.global_variable_end(); I != E; ++I)
+ constructGlobalVariableDIE(*I);
+
+ MMI = mmi;
+ shouldEmit = true;
+ MMI->setDebugInfoAvailability(true);
+
+ // Prime section data.
+ SectionMap.insert(Asm->getObjFileLowering().getTextSection());
+
+ // Print out .file directives to specify files for .loc directives. These are
+ // printed out early so that they precede any .loc directives.
+ if (MAI->hasDotLocAndDotFile()) {
+ for (unsigned i = 1, e = getNumSourceIds()+1; i != e; ++i) {
+ // Remember source id starts at 1.
+ std::pair<unsigned, unsigned> Id = getSourceDirectoryAndFileIds(i);
+ // FIXME: don't use sys::path for this! This should not depend on the
+ // host.
+ sys::Path FullPath(getSourceDirectoryName(Id.first));
+ bool AppendOk =
+ FullPath.appendComponent(getSourceFileName(Id.second));
+ assert(AppendOk && "Could not append filename to directory!");
+ AppendOk = false;
+ Asm->OutStreamer.EmitDwarfFileDirective(i, FullPath.str());
+ }
+ }
+
+ // Emit initial sections
+ emitInitial();
+
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+}
+
+/// endModule - Emit all Dwarf sections that should come after the content.
+///
+void DwarfDebug::endModule() {
+ if (!ModuleCU)
+ return;
+
+ if (TimePassesIsEnabled)
+ DebugTimer->startTimer();
+
+ // Attach DW_AT_inline attribute with inlined subprogram DIEs.
+ for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(),
+ AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) {
+ DIE *ISP = *AI;
+ addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
+ }
+
+ // Insert top level DIEs.
+ for (SmallVector<DIE *, 4>::iterator TI = TopLevelDIEsVector.begin(),
+ TE = TopLevelDIEsVector.end(); TI != TE; ++TI)
+ ModuleCU->getCUDie()->addChild(*TI);
+
+ for (DenseMap<DIE *, MDNode *>::iterator CI = ContainingTypeMap.begin(),
+ CE = ContainingTypeMap.end(); CI != CE; ++CI) {
+ DIE *SPDie = CI->first;
+ MDNode *N = dyn_cast_or_null<MDNode>(CI->second);
+ if (!N) continue;
+ DIE *NDie = ModuleCU->getDIE(N);
+ if (!NDie) continue;
+ addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie);
+ // FIXME - This is not the correct approach.
+ // addDIEEntry(NDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie);
+ }
+
+ // Standard sections final addresses.
+ Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getTextSection());
+ EmitLabel("text_end", 0);
+ Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getDataSection());
+ EmitLabel("data_end", 0);
+
+ // End text sections.
+ for (unsigned i = 1, N = SectionMap.size(); i <= N; ++i) {
+ Asm->OutStreamer.SwitchSection(SectionMap[i]);
+ EmitLabel("section_end", i);
+ }
+
+ // Emit common frame information.
+ emitCommonDebugFrame();
+
+ // Emit function debug frame information
+ for (std::vector<FunctionDebugFrameInfo>::iterator I = DebugFrames.begin(),
+ E = DebugFrames.end(); I != E; ++I)
+ emitFunctionDebugFrame(*I);
+
+ // Compute DIE offsets and sizes.
+ computeSizeAndOffsets();
+
+ // Emit all the DIEs into a debug info section
+ emitDebugInfo();
+
+ // Corresponding abbreviations into a abbrev section.
+ emitAbbreviations();
+
+ // Emit source line correspondence into a debug line section.
+ emitDebugLines();
+
+ // Emit info into a debug pubnames section.
+ emitDebugPubNames();
+
+ // Emit info into a debug pubtypes section.
+ emitDebugPubTypes();
+
+ // Emit info into a debug str section.
+ emitDebugStr();
+
+ // Emit info into a debug loc section.
+ emitDebugLoc();
+
+ // Emit info into a debug aranges section.
+ EmitDebugARanges();
+
+ // Emit info into a debug ranges section.
+ emitDebugRanges();
+
+ // Emit info into a debug macinfo section.
+ emitDebugMacInfo();
+
+ // Emit inline info.
+ emitDebugInlineInfo();
+
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+}
+
+/// findAbstractVariable - Find abstract variable, if any, associated with Var.
+DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var,
+ unsigned FrameIdx,
+ DILocation &ScopeLoc) {
+
+ DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var.getNode());
+ if (AbsDbgVariable)
+ return AbsDbgVariable;
+
+ DbgScope *Scope = AbstractScopes.lookup(ScopeLoc.getScope().getNode());
+ if (!Scope)
+ return NULL;
+
+ AbsDbgVariable = new DbgVariable(Var, FrameIdx);
+ Scope->addVariable(AbsDbgVariable);
+ AbstractVariables[Var.getNode()] = AbsDbgVariable;
+ return AbsDbgVariable;
+}
+
+/// collectVariableInfo - Populate DbgScope entries with variables' info.
+void DwarfDebug::collectVariableInfo() {
+ if (!MMI) return;
+
+ MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo();
+ for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(),
+ VE = VMap.end(); VI != VE; ++VI) {
+ MDNode *Var = VI->first;
+ if (!Var) continue;
+ DIVariable DV (Var);
+ std::pair< unsigned, MDNode *> VP = VI->second;
+ DILocation ScopeLoc(VP.second);
+
+ DbgScope *Scope =
+ ConcreteScopes.lookup(ScopeLoc.getOrigLocation().getNode());
+ if (!Scope)
+ Scope = DbgScopeMap.lookup(ScopeLoc.getScope().getNode());
+ // If variable scope is not found then skip this variable.
+ if (!Scope)
+ continue;
+
+ DbgVariable *RegVar = new DbgVariable(DV, VP.first);
+ Scope->addVariable(RegVar);
+ if (DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.first,
+ ScopeLoc))
+ RegVar->setAbstractVariable(AbsDbgVariable);
+ }
+}
+
+/// beginScope - Process beginning of a scope starting at Label.
+void DwarfDebug::beginScope(const MachineInstr *MI, unsigned Label) {
+ InsnToDbgScopeMapTy::iterator I = DbgScopeBeginMap.find(MI);
+ if (I == DbgScopeBeginMap.end())
+ return;
+ ScopeVector &SD = I->second;
+ for (ScopeVector::iterator SDI = SD.begin(), SDE = SD.end();
+ SDI != SDE; ++SDI)
+ (*SDI)->setStartLabelID(Label);
+}
+
+/// endScope - Process end of a scope.
+void DwarfDebug::endScope(const MachineInstr *MI) {
+ InsnToDbgScopeMapTy::iterator I = DbgScopeEndMap.find(MI);
+ if (I == DbgScopeEndMap.end())
+ return;
+
+ unsigned Label = MMI->NextLabelID();
+ Asm->printLabel(Label);
+ O << '\n';
+
+ SmallVector<DbgScope *, 2> &SD = I->second;
+ for (SmallVector<DbgScope *, 2>::iterator SDI = SD.begin(), SDE = SD.end();
+ SDI != SDE; ++SDI)
+ (*SDI)->setEndLabelID(Label);
+ return;
+}
+
+/// createDbgScope - Create DbgScope for the scope.
+void DwarfDebug::createDbgScope(MDNode *Scope, MDNode *InlinedAt) {
+
+ if (!InlinedAt) {
+ DbgScope *WScope = DbgScopeMap.lookup(Scope);
+ if (WScope)
+ return;
+ WScope = new DbgScope(NULL, DIDescriptor(Scope), NULL);
+ DbgScopeMap.insert(std::make_pair(Scope, WScope));
+ if (DIDescriptor(Scope).isLexicalBlock())
+ createDbgScope(DILexicalBlock(Scope).getContext().getNode(), NULL);
+ return;
+ }
+
+ DbgScope *WScope = DbgScopeMap.lookup(InlinedAt);
+ if (WScope)
+ return;
+
+ WScope = new DbgScope(NULL, DIDescriptor(Scope), InlinedAt);
+ DbgScopeMap.insert(std::make_pair(InlinedAt, WScope));
+ DILocation DL(InlinedAt);
+ createDbgScope(DL.getScope().getNode(), DL.getOrigLocation().getNode());
+}
+
+/// extractScopeInformation - Scan machine instructions in this function
+/// and collect DbgScopes. Return true, if atleast one scope was found.
+bool DwarfDebug::extractScopeInformation() {
+ // If scope information was extracted using .dbg intrinsics then there is not
+ // any need to extract these information by scanning each instruction.
+ if (!DbgScopeMap.empty())
+ return false;
+
+ DenseMap<const MachineInstr *, unsigned> MIIndexMap;
+ unsigned MIIndex = 0;
+ // Scan each instruction and create scopes. First build working set of scopes.
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+ II != IE; ++II) {
+ const MachineInstr *MInsn = II;
+ MIIndexMap[MInsn] = MIIndex++;
+ DebugLoc DL = MInsn->getDebugLoc();
+ if (DL.isUnknown()) continue;
+ DILocation DLT = MF->getDILocation(DL);
+ DIScope DLTScope = DLT.getScope();
+ if (DLTScope.isNull()) continue;
+ // There is no need to create another DIE for compile unit. For all
+ // other scopes, create one DbgScope now. This will be translated
+ // into a scope DIE at the end.
+ if (DLTScope.isCompileUnit()) continue;
+ createDbgScope(DLTScope.getNode(), DLT.getOrigLocation().getNode());
+ }
+ }
+
+
+ // Build scope hierarchy using working set of scopes.
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+ II != IE; ++II) {
+ const MachineInstr *MInsn = II;
+ DebugLoc DL = MInsn->getDebugLoc();
+ if (DL.isUnknown()) continue;
+ DILocation DLT = MF->getDILocation(DL);
+ DIScope DLTScope = DLT.getScope();
+ if (DLTScope.isNull()) continue;
+ // There is no need to create another DIE for compile unit. For all
+ // other scopes, create one DbgScope now. This will be translated
+ // into a scope DIE at the end.
+ if (DLTScope.isCompileUnit()) continue;
+ DbgScope *Scope = getUpdatedDbgScope(DLTScope.getNode(), MInsn,
+ DLT.getOrigLocation().getNode());
+ Scope->setLastInsn(MInsn);
+ }
+ }
+
+ if (!CurrentFnDbgScope)
+ return false;
+
+ CurrentFnDbgScope->fixInstructionMarkers(MIIndexMap);
+
+ // Each scope has first instruction and last instruction to mark beginning
+ // and end of a scope respectively. Create an inverse map that list scopes
+ // starts (and ends) with an instruction. One instruction may start (or end)
+ // multiple scopes. Ignore scopes that are not reachable.
+ SmallVector<DbgScope *, 4> WorkList;
+ WorkList.push_back(CurrentFnDbgScope);
+ while (!WorkList.empty()) {
+ DbgScope *S = WorkList.back(); WorkList.pop_back();
+
+ SmallVector<DbgScope *, 4> &Children = S->getScopes();
+ if (!Children.empty())
+ for (SmallVector<DbgScope *, 4>::iterator SI = Children.begin(),
+ SE = Children.end(); SI != SE; ++SI)
+ WorkList.push_back(*SI);
+
+ if (S->isAbstractScope())
+ continue;
+ const MachineInstr *MI = S->getFirstInsn();
+ assert (MI && "DbgScope does not have first instruction!");
+
+ InsnToDbgScopeMapTy::iterator IDI = DbgScopeBeginMap.find(MI);
+ if (IDI != DbgScopeBeginMap.end())
+ IDI->second.push_back(S);
+ else
+ DbgScopeBeginMap[MI].push_back(S);
+
+ MI = S->getLastInsn();
+ assert (MI && "DbgScope does not have last instruction!");
+ IDI = DbgScopeEndMap.find(MI);
+ if (IDI != DbgScopeEndMap.end())
+ IDI->second.push_back(S);
+ else
+ DbgScopeEndMap[MI].push_back(S);
+ }
+
+ return !DbgScopeMap.empty();
+}
+
+/// beginFunction - Gather pre-function debug information. Assumes being
+/// emitted immediately after the function entry point.
+void DwarfDebug::beginFunction(const MachineFunction *MF) {
+ this->MF = MF;
+
+ if (!ShouldEmitDwarfDebug()) return;
+
+ if (TimePassesIsEnabled)
+ DebugTimer->startTimer();
+
+ if (!extractScopeInformation())
+ return;
+
+ collectVariableInfo();
+
+ // Assumes in correct section after the entry point.
+ EmitLabel("func_begin", ++SubprogramCount);
+
+ // Emit label for the implicitly defined dbg.stoppoint at the start of the
+ // function.
+ DebugLoc FDL = MF->getDefaultDebugLoc();
+ if (!FDL.isUnknown()) {
+ DILocation DLT = MF->getDILocation(FDL);
+ unsigned LabelID = 0;
+ DISubprogram SP = getDISubprogram(DLT.getScope().getNode());
+ if (!SP.isNull())
+ LabelID = recordSourceLine(SP.getLineNumber(), 0,
+ DLT.getScope().getNode());
+ else
+ LabelID = recordSourceLine(DLT.getLineNumber(),
+ DLT.getColumnNumber(),
+ DLT.getScope().getNode());
+ Asm->printLabel(LabelID);
+ O << '\n';
+ }
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+}
+
+/// endFunction - Gather and emit post-function debug information.
+///
+void DwarfDebug::endFunction(const MachineFunction *MF) {
+ if (!ShouldEmitDwarfDebug()) return;
+
+ if (TimePassesIsEnabled)
+ DebugTimer->startTimer();
+
+ if (DbgScopeMap.empty())
+ return;
+
+ if (CurrentFnDbgScope) {
+ // Define end label for subprogram.
+ EmitLabel("func_end", SubprogramCount);
+
+ // Get function line info.
+ if (!Lines.empty()) {
+ // Get section line info.
+ unsigned ID = SectionMap.insert(Asm->getCurrentSection());
+ if (SectionSourceLines.size() < ID) SectionSourceLines.resize(ID);
+ std::vector<SrcLineInfo> &SectionLineInfos = SectionSourceLines[ID-1];
+ // Append the function info to section info.
+ SectionLineInfos.insert(SectionLineInfos.end(),
+ Lines.begin(), Lines.end());
+ }
+
+ // Construct abstract scopes.
+ for (SmallVector<DbgScope *, 4>::iterator AI = AbstractScopesList.begin(),
+ AE = AbstractScopesList.end(); AI != AE; ++AI)
+ constructScopeDIE(*AI);
+
+ constructScopeDIE(CurrentFnDbgScope);
+
+ DebugFrames.push_back(FunctionDebugFrameInfo(SubprogramCount,
+ MMI->getFrameMoves()));
+ }
+
+ // Clear debug info
+ CurrentFnDbgScope = NULL;
+ DbgScopeMap.clear();
+ DbgScopeBeginMap.clear();
+ DbgScopeEndMap.clear();
+ ConcreteScopes.clear();
+ AbstractScopesList.clear();
+ Lines.clear();
+
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+}
+
+/// recordSourceLine - Records location information and associates it with a
+/// label. Returns a unique label ID used to generate a label and provide
+/// correspondence to the source line list.
+unsigned DwarfDebug::recordSourceLine(unsigned Line, unsigned Col,
+ MDNode *S) {
+ if (!MMI)
+ return 0;
+
+ if (TimePassesIsEnabled)
+ DebugTimer->startTimer();
+
+ StringRef Dir;
+ StringRef Fn;
+
+ DIDescriptor Scope(S);
+ if (Scope.isCompileUnit()) {
+ DICompileUnit CU(S);
+ Dir = CU.getDirectory();
+ Fn = CU.getFilename();
+ } else if (Scope.isSubprogram()) {
+ DISubprogram SP(S);
+ Dir = SP.getDirectory();
+ Fn = SP.getFilename();
+ } else if (Scope.isLexicalBlock()) {
+ DILexicalBlock DB(S);
+ Dir = DB.getDirectory();
+ Fn = DB.getFilename();
+ } else
+ assert (0 && "Unexpected scope info");
+
+ unsigned Src = GetOrCreateSourceID(Dir, Fn);
+ unsigned ID = MMI->NextLabelID();
+ Lines.push_back(SrcLineInfo(Line, Col, Src, ID));
+
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+
+ return ID;
+}
+
+/// getOrCreateSourceID - Public version of GetOrCreateSourceID. This can be
+/// timed. Look up the source id with the given directory and source file
+/// names. If none currently exists, create a new id and insert it in the
+/// SourceIds map. This can update DirectoryNames and SourceFileNames maps as
+/// well.
+unsigned DwarfDebug::getOrCreateSourceID(const std::string &DirName,
+ const std::string &FileName) {
+ if (TimePassesIsEnabled)
+ DebugTimer->startTimer();
+
+ unsigned SrcId = GetOrCreateSourceID(DirName.c_str(), FileName.c_str());
+
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+
+ return SrcId;
+}
+
+//===----------------------------------------------------------------------===//
+// Emit Methods
+//===----------------------------------------------------------------------===//
+
+/// computeSizeAndOffset - Compute the size and offset of a DIE.
+///
+unsigned
+DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) {
+ // Get the children.
+ const std::vector<DIE *> &Children = Die->getChildren();
+
+ // If not last sibling and has children then add sibling offset attribute.
+ if (!Last && !Children.empty()) Die->addSiblingOffset();
+
+ // Record the abbreviation.
+ assignAbbrevNumber(Die->getAbbrev());
+
+ // Get the abbreviation for this DIE.
+ unsigned AbbrevNumber = Die->getAbbrevNumber();
+ const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1];
+
+ // Set DIE offset
+ Die->setOffset(Offset);
+
+ // Start the size with the size of abbreviation code.
+ Offset += MCAsmInfo::getULEB128Size(AbbrevNumber);
+
+ const SmallVector<DIEValue*, 32> &Values = Die->getValues();
+ const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData();
+
+ // Size the DIE attribute values.
+ for (unsigned i = 0, N = Values.size(); i < N; ++i)
+ // Size attribute value.
+ Offset += Values[i]->SizeOf(TD, AbbrevData[i].getForm());
+
+ // Size the DIE children if any.
+ if (!Children.empty()) {
+ assert(Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes &&
+ "Children flag not set");
+
+ for (unsigned j = 0, M = Children.size(); j < M; ++j)
+ Offset = computeSizeAndOffset(Children[j], Offset, (j + 1) == M);
+
+ // End of children marker.
+ Offset += sizeof(int8_t);
+ }
+
+ Die->setSize(Offset - Die->getOffset());
+ return Offset;
+}
+
+/// computeSizeAndOffsets - Compute the size and offset of all the DIEs.
+///
+void DwarfDebug::computeSizeAndOffsets() {
+ // Compute size of compile unit header.
+ static unsigned Offset =
+ sizeof(int32_t) + // Length of Compilation Unit Info
+ sizeof(int16_t) + // DWARF version number
+ sizeof(int32_t) + // Offset Into Abbrev. Section
+ sizeof(int8_t); // Pointer Size (in bytes)
+
+ computeSizeAndOffset(ModuleCU->getCUDie(), Offset, true);
+ CompileUnitOffsets[ModuleCU] = 0;
+}
+
+/// emitInitial - Emit initial Dwarf declarations. This is necessary for cc
+/// tools to recognize the object file contains Dwarf information.
+void DwarfDebug::emitInitial() {
+ // Check to see if we already emitted intial headers.
+ if (didInitial) return;
+ didInitial = true;
+
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+ // Dwarf sections base addresses.
+ if (MAI->doesDwarfRequireFrameSection()) {
+ Asm->OutStreamer.SwitchSection(TLOF.getDwarfFrameSection());
+ EmitLabel("section_debug_frame", 0);
+ }
+
+ Asm->OutStreamer.SwitchSection(TLOF.getDwarfInfoSection());
+ EmitLabel("section_info", 0);
+ Asm->OutStreamer.SwitchSection(TLOF.getDwarfAbbrevSection());
+ EmitLabel("section_abbrev", 0);
+ Asm->OutStreamer.SwitchSection(TLOF.getDwarfARangesSection());
+ EmitLabel("section_aranges", 0);
+
+ if (const MCSection *LineInfoDirective = TLOF.getDwarfMacroInfoSection()) {
+ Asm->OutStreamer.SwitchSection(LineInfoDirective);
+ EmitLabel("section_macinfo", 0);
+ }
+
+ Asm->OutStreamer.SwitchSection(TLOF.getDwarfLineSection());
+ EmitLabel("section_line", 0);
+ Asm->OutStreamer.SwitchSection(TLOF.getDwarfLocSection());
+ EmitLabel("section_loc", 0);
+ Asm->OutStreamer.SwitchSection(TLOF.getDwarfPubNamesSection());
+ EmitLabel("section_pubnames", 0);
+ Asm->OutStreamer.SwitchSection(TLOF.getDwarfPubTypesSection());
+ EmitLabel("section_pubtypes", 0);
+ Asm->OutStreamer.SwitchSection(TLOF.getDwarfStrSection());
+ EmitLabel("section_str", 0);
+ Asm->OutStreamer.SwitchSection(TLOF.getDwarfRangesSection());
+ EmitLabel("section_ranges", 0);
+
+ Asm->OutStreamer.SwitchSection(TLOF.getTextSection());
+ EmitLabel("text_begin", 0);
+ Asm->OutStreamer.SwitchSection(TLOF.getDataSection());
+ EmitLabel("data_begin", 0);
+}
+
+/// emitDIE - Recusively Emits a debug information entry.
+///
+void DwarfDebug::emitDIE(DIE *Die) {
+ // Get the abbreviation for this DIE.
+ unsigned AbbrevNumber = Die->getAbbrevNumber();
+ const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1];
+
+ Asm->O << '\n';
+
+ // Emit the code (index) for the abbreviation.
+ if (Asm->VerboseAsm)
+ Asm->OutStreamer.AddComment("Abbrev [" + Twine(AbbrevNumber) + "] 0x" +
+ Twine::utohexstr(Die->getOffset()) + ":0x" +
+ Twine::utohexstr(Die->getSize()) + " " +
+ dwarf::TagString(Abbrev->getTag()));
+ EmitULEB128(AbbrevNumber);
+
+ SmallVector<DIEValue*, 32> &Values = Die->getValues();
+ const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData();
+
+ // Emit the DIE attribute values.
+ for (unsigned i = 0, N = Values.size(); i < N; ++i) {
+ unsigned Attr = AbbrevData[i].getAttribute();
+ unsigned Form = AbbrevData[i].getForm();
+ assert(Form && "Too many attributes for DIE (check abbreviation)");
+
+ if (Asm->VerboseAsm)
+ Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr));
+
+ switch (Attr) {
+ case dwarf::DW_AT_sibling:
+ Asm->EmitInt32(Die->getSiblingOffset());
+ break;
+ case dwarf::DW_AT_abstract_origin: {
+ DIEEntry *E = cast<DIEEntry>(Values[i]);
+ DIE *Origin = E->getEntry();
+ unsigned Addr = Origin->getOffset();
+ Asm->EmitInt32(Addr);
+ break;
+ }
+ default:
+ // Emit an attribute using the defined form.
+ Values[i]->EmitValue(this, Form);
+ O << "\n"; // REMOVE This once all EmitValue impls emit their own newline.
+ break;
+ }
+ }
+
+ // Emit the DIE children if any.
+ if (Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes) {
+ const std::vector<DIE *> &Children = Die->getChildren();
+
+ for (unsigned j = 0, M = Children.size(); j < M; ++j)
+ emitDIE(Children[j]);
+
+ Asm->EmitInt8(0); EOL("End Of Children Mark");
+ }
+}
+
+/// emitDebugInfo - Emit the debug info section.
+///
+void DwarfDebug::emitDebugInfo() {
+ // Start debug info section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfInfoSection());
+ DIE *Die = ModuleCU->getCUDie();
+
+ // Emit the compile units header.
+ EmitLabel("info_begin", ModuleCU->getID());
+
+ // Emit size of content not including length itself
+ unsigned ContentSize = Die->getSize() +
+ sizeof(int16_t) + // DWARF version number
+ sizeof(int32_t) + // Offset Into Abbrev. Section
+ sizeof(int8_t) + // Pointer Size (in bytes)
+ sizeof(int32_t); // FIXME - extra pad for gdb bug.
+
+ Asm->EmitInt32(ContentSize); EOL("Length of Compilation Unit Info");
+ Asm->EmitInt16(dwarf::DWARF_VERSION); EOL("DWARF version number");
+ EmitSectionOffset("abbrev_begin", "section_abbrev", 0, 0, true, false);
+ EOL("Offset Into Abbrev. Section");
+ Asm->EmitInt8(TD->getPointerSize()); EOL("Address Size (in bytes)");
+
+ emitDIE(Die);
+ // FIXME - extra padding for gdb bug.
+ Asm->EmitInt8(0); EOL("Extra Pad For GDB");
+ Asm->EmitInt8(0); EOL("Extra Pad For GDB");
+ Asm->EmitInt8(0); EOL("Extra Pad For GDB");
+ Asm->EmitInt8(0); EOL("Extra Pad For GDB");
+ EmitLabel("info_end", ModuleCU->getID());
+ Asm->O << '\n';
+}
+
+/// emitAbbreviations - Emit the abbreviation section.
+///
+void DwarfDebug::emitAbbreviations() const {
+ // Check to see if it is worth the effort.
+ if (!Abbreviations.empty()) {
+ // Start the debug abbrev section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfAbbrevSection());
+
+ EmitLabel("abbrev_begin", 0);
+
+ // For each abbrevation.
+ for (unsigned i = 0, N = Abbreviations.size(); i < N; ++i) {
+ // Get abbreviation data
+ const DIEAbbrev *Abbrev = Abbreviations[i];
+
+ // Emit the abbrevations code (base 1 index.)
+ EmitULEB128(Abbrev->getNumber(), "Abbreviation Code");
+
+ // Emit the abbreviations data.
+ Abbrev->Emit(this);
+ Asm->O << '\n';
+ }
+
+ // Mark end of abbreviations.
+ EmitULEB128(0, "EOM(3)");
+
+ EmitLabel("abbrev_end", 0);
+ Asm->O << '\n';
+ }
+}
+
+/// emitEndOfLineMatrix - Emit the last address of the section and the end of
+/// the line matrix.
+///
+void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
+ // Define last address of section.
+ Asm->EmitInt8(0); EOL("Extended Op");
+ Asm->EmitInt8(TD->getPointerSize() + 1); EOL("Op size");
+ Asm->EmitInt8(dwarf::DW_LNE_set_address); EOL("DW_LNE_set_address");
+ EmitReference("section_end", SectionEnd); EOL("Section end label");
+
+ // Mark end of matrix.
+ Asm->EmitInt8(0); EOL("DW_LNE_end_sequence");
+ Asm->EmitInt8(1);
+ Asm->EmitInt8(1);
+}
+
+/// emitDebugLines - Emit source line information.
+///
+void DwarfDebug::emitDebugLines() {
+ // If the target is using .loc/.file, the assembler will be emitting the
+ // .debug_line table automatically.
+ if (MAI->hasDotLocAndDotFile())
+ return;
+
+ // Minimum line delta, thus ranging from -10..(255-10).
+ const int MinLineDelta = -(dwarf::DW_LNS_fixed_advance_pc + 1);
+ // Maximum line delta, thus ranging from -10..(255-10).
+ const int MaxLineDelta = 255 + MinLineDelta;
+
+ // Start the dwarf line section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfLineSection());
+
+ // Construct the section header.
+ EmitDifference("line_end", 0, "line_begin", 0, true);
+ EOL("Length of Source Line Info");
+ EmitLabel("line_begin", 0);
+
+ Asm->EmitInt16(dwarf::DWARF_VERSION); EOL("DWARF version number");
+
+ EmitDifference("line_prolog_end", 0, "line_prolog_begin", 0, true);
+ EOL("Prolog Length");
+ EmitLabel("line_prolog_begin", 0);
+
+ Asm->EmitInt8(1); EOL("Minimum Instruction Length");
+ Asm->EmitInt8(1); EOL("Default is_stmt_start flag");
+ Asm->EmitInt8(MinLineDelta); EOL("Line Base Value (Special Opcodes)");
+ Asm->EmitInt8(MaxLineDelta); EOL("Line Range Value (Special Opcodes)");
+ Asm->EmitInt8(-MinLineDelta); EOL("Special Opcode Base");
+
+ // Line number standard opcode encodings argument count
+ Asm->EmitInt8(0); EOL("DW_LNS_copy arg count");
+ Asm->EmitInt8(1); EOL("DW_LNS_advance_pc arg count");
+ Asm->EmitInt8(1); EOL("DW_LNS_advance_line arg count");
+ Asm->EmitInt8(1); EOL("DW_LNS_set_file arg count");
+ Asm->EmitInt8(1); EOL("DW_LNS_set_column arg count");
+ Asm->EmitInt8(0); EOL("DW_LNS_negate_stmt arg count");
+ Asm->EmitInt8(0); EOL("DW_LNS_set_basic_block arg count");
+ Asm->EmitInt8(0); EOL("DW_LNS_const_add_pc arg count");
+ Asm->EmitInt8(1); EOL("DW_LNS_fixed_advance_pc arg count");
+
+ // Emit directories.
+ for (unsigned DI = 1, DE = getNumSourceDirectories()+1; DI != DE; ++DI) {
+ const std::string &Dir = getSourceDirectoryName(DI);
+ if (Asm->VerboseAsm) Asm->OutStreamer.AddComment("Directory");
+ Asm->OutStreamer.EmitBytes(StringRef(Dir.c_str(), Dir.size()+1), 0);
+ }
+
+ Asm->EmitInt8(0); EOL("End of directories");
+
+ // Emit files.
+ for (unsigned SI = 1, SE = getNumSourceIds()+1; SI != SE; ++SI) {
+ // Remember source id starts at 1.
+ std::pair<unsigned, unsigned> Id = getSourceDirectoryAndFileIds(SI);
+ const std::string &FN = getSourceFileName(Id.second);
+ if (Asm->VerboseAsm) Asm->OutStreamer.AddComment("Source");
+ Asm->OutStreamer.EmitBytes(StringRef(FN.c_str(), FN.size()+1), 0);
+
+ EmitULEB128(Id.first, "Directory #");
+ EmitULEB128(0, "Mod date");
+ EmitULEB128(0, "File size");
+ }
+
+ Asm->EmitInt8(0); EOL("End of files");
+
+ EmitLabel("line_prolog_end", 0);
+
+ // A sequence for each text section.
+ unsigned SecSrcLinesSize = SectionSourceLines.size();
+
+ for (unsigned j = 0; j < SecSrcLinesSize; ++j) {
+ // Isolate current sections line info.
+ const std::vector<SrcLineInfo> &LineInfos = SectionSourceLines[j];
+
+ /*if (Asm->isVerbose()) {
+ const MCSection *S = SectionMap[j + 1];
+ O << '\t' << MAI->getCommentString() << " Section"
+ << S->getName() << '\n';
+ }*/
+ Asm->O << '\n';
+
+ // Dwarf assumes we start with first line of first source file.
+ unsigned Source = 1;
+ unsigned Line = 1;
+
+ // Construct rows of the address, source, line, column matrix.
+ for (unsigned i = 0, N = LineInfos.size(); i < N; ++i) {
+ const SrcLineInfo &LineInfo = LineInfos[i];
+ unsigned LabelID = MMI->MappedLabel(LineInfo.getLabelID());
+ if (!LabelID) continue;
+
+ if (LineInfo.getLine() == 0) continue;
+
+ if (!Asm->isVerbose())
+ Asm->O << '\n';
+ else {
+ std::pair<unsigned, unsigned> SourceID =
+ getSourceDirectoryAndFileIds(LineInfo.getSourceID());
+ O << '\t' << MAI->getCommentString() << ' '
+ << getSourceDirectoryName(SourceID.first) << '/'
+ << getSourceFileName(SourceID.second)
+ << ':' << utostr_32(LineInfo.getLine()) << '\n';
+ }
+
+ // Define the line address.
+ Asm->EmitInt8(0); EOL("Extended Op");
+ Asm->EmitInt8(TD->getPointerSize() + 1); EOL("Op size");
+ Asm->EmitInt8(dwarf::DW_LNE_set_address); EOL("DW_LNE_set_address");
+ EmitReference("label", LabelID); EOL("Location label");
+
+ // If change of source, then switch to the new source.
+ if (Source != LineInfo.getSourceID()) {
+ Source = LineInfo.getSourceID();
+ Asm->EmitInt8(dwarf::DW_LNS_set_file); EOL("DW_LNS_set_file");
+ EmitULEB128(Source, "New Source");
+ }
+
+ // If change of line.
+ if (Line != LineInfo.getLine()) {
+ // Determine offset.
+ int Offset = LineInfo.getLine() - Line;
+ int Delta = Offset - MinLineDelta;
+
+ // Update line.
+ Line = LineInfo.getLine();
+
+ // If delta is small enough and in range...
+ if (Delta >= 0 && Delta < (MaxLineDelta - 1)) {
+ // ... then use fast opcode.
+ Asm->EmitInt8(Delta - MinLineDelta); EOL("Line Delta");
+ } else {
+ // ... otherwise use long hand.
+ Asm->EmitInt8(dwarf::DW_LNS_advance_line);
+ EOL("DW_LNS_advance_line");
+ EmitSLEB128(Offset, "Line Offset");
+ Asm->EmitInt8(dwarf::DW_LNS_copy); EOL("DW_LNS_copy");
+ }
+ } else {
+ // Copy the previous row (different address or source)
+ Asm->EmitInt8(dwarf::DW_LNS_copy); EOL("DW_LNS_copy");
+ }
+ }
+
+ emitEndOfLineMatrix(j + 1);
+ }
+
+ if (SecSrcLinesSize == 0)
+ // Because we're emitting a debug_line section, we still need a line
+ // table. The linker and friends expect it to exist. If there's nothing to
+ // put into it, emit an empty table.
+ emitEndOfLineMatrix(1);
+
+ EmitLabel("line_end", 0);
+ Asm->O << '\n';
+}
+
+/// emitCommonDebugFrame - Emit common frame info into a debug frame section.
+///
+void DwarfDebug::emitCommonDebugFrame() {
+ if (!MAI->doesDwarfRequireFrameSection())
+ return;
+
+ int stackGrowth =
+ Asm->TM.getFrameInfo()->getStackGrowthDirection() ==
+ TargetFrameInfo::StackGrowsUp ?
+ TD->getPointerSize() : -TD->getPointerSize();
+
+ // Start the dwarf frame section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfFrameSection());
+
+ EmitLabel("debug_frame_common", 0);
+ EmitDifference("debug_frame_common_end", 0,
+ "debug_frame_common_begin", 0, true);
+ EOL("Length of Common Information Entry");
+
+ EmitLabel("debug_frame_common_begin", 0);
+ Asm->EmitInt32((int)dwarf::DW_CIE_ID);
+ EOL("CIE Identifier Tag");
+ Asm->EmitInt8(dwarf::DW_CIE_VERSION);
+ EOL("CIE Version");
+ Asm->OutStreamer.EmitIntValue(0, 1, /*addrspace*/0); // nul terminator.
+ EOL("CIE Augmentation");
+ EmitULEB128(1, "CIE Code Alignment Factor");
+ EmitSLEB128(stackGrowth, "CIE Data Alignment Factor");
+ Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), false));
+ EOL("CIE RA Column");
+
+ std::vector<MachineMove> Moves;
+ RI->getInitialFrameState(Moves);
+
+ EmitFrameMoves(NULL, 0, Moves, false);
+
+ Asm->EmitAlignment(2, 0, 0, false);
+ EmitLabel("debug_frame_common_end", 0);
+ Asm->O << '\n';
+}
+
+/// emitFunctionDebugFrame - Emit per function frame info into a debug frame
+/// section.
+void
+DwarfDebug::emitFunctionDebugFrame(const FunctionDebugFrameInfo&DebugFrameInfo){
+ if (!MAI->doesDwarfRequireFrameSection())
+ return;
+
+ // Start the dwarf frame section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfFrameSection());
+
+ EmitDifference("debug_frame_end", DebugFrameInfo.Number,
+ "debug_frame_begin", DebugFrameInfo.Number, true);
+ EOL("Length of Frame Information Entry");
+
+ EmitLabel("debug_frame_begin", DebugFrameInfo.Number);
+
+ EmitSectionOffset("debug_frame_common", "section_debug_frame",
+ 0, 0, true, false);
+ EOL("FDE CIE offset");
+
+ EmitReference("func_begin", DebugFrameInfo.Number);
+ EOL("FDE initial location");
+ EmitDifference("func_end", DebugFrameInfo.Number,
+ "func_begin", DebugFrameInfo.Number);
+ EOL("FDE address range");
+
+ EmitFrameMoves("func_begin", DebugFrameInfo.Number, DebugFrameInfo.Moves,
+ false);
+
+ Asm->EmitAlignment(2, 0, 0, false);
+ EmitLabel("debug_frame_end", DebugFrameInfo.Number);
+ Asm->O << '\n';
+}
+
+/// emitDebugPubNames - Emit visible names into a debug pubnames section.
+///
+void DwarfDebug::emitDebugPubNames() {
+ // Start the dwarf pubnames section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfPubNamesSection());
+
+ EmitDifference("pubnames_end", ModuleCU->getID(),
+ "pubnames_begin", ModuleCU->getID(), true);
+ EOL("Length of Public Names Info");
+
+ EmitLabel("pubnames_begin", ModuleCU->getID());
+
+ Asm->EmitInt16(dwarf::DWARF_VERSION); EOL("DWARF Version");
+
+ EmitSectionOffset("info_begin", "section_info",
+ ModuleCU->getID(), 0, true, false);
+ EOL("Offset of Compilation Unit Info");
+
+ EmitDifference("info_end", ModuleCU->getID(), "info_begin", ModuleCU->getID(),
+ true);
+ EOL("Compilation Unit Length");
+
+ const StringMap<DIE*> &Globals = ModuleCU->getGlobals();
+ for (StringMap<DIE*>::const_iterator
+ GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
+ const char *Name = GI->getKeyData();
+ DIE * Entity = GI->second;
+
+ Asm->EmitInt32(Entity->getOffset()); EOL("DIE offset");
+
+ if (Asm->VerboseAsm)
+ Asm->OutStreamer.AddComment("External Name");
+ Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0);
+ }
+
+ Asm->EmitInt32(0); EOL("End Mark");
+ EmitLabel("pubnames_end", ModuleCU->getID());
+ Asm->O << '\n';
+}
+
+void DwarfDebug::emitDebugPubTypes() {
+ // Start the dwarf pubnames section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfPubTypesSection());
+ EmitDifference("pubtypes_end", ModuleCU->getID(),
+ "pubtypes_begin", ModuleCU->getID(), true);
+ EOL("Length of Public Types Info");
+
+ EmitLabel("pubtypes_begin", ModuleCU->getID());
+
+ if (Asm->VerboseAsm) Asm->OutStreamer.AddComment("DWARF Version");
+ Asm->EmitInt16(dwarf::DWARF_VERSION);
+
+ EmitSectionOffset("info_begin", "section_info",
+ ModuleCU->getID(), 0, true, false);
+ EOL("Offset of Compilation ModuleCU Info");
+
+ EmitDifference("info_end", ModuleCU->getID(), "info_begin", ModuleCU->getID(),
+ true);
+ EOL("Compilation ModuleCU Length");
+
+ const StringMap<DIE*> &Globals = ModuleCU->getGlobalTypes();
+ for (StringMap<DIE*>::const_iterator
+ GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
+ const char *Name = GI->getKeyData();
+ DIE * Entity = GI->second;
+
+ if (Asm->VerboseAsm) Asm->OutStreamer.AddComment("DIE offset");
+ Asm->EmitInt32(Entity->getOffset());
+
+ if (Asm->VerboseAsm) Asm->OutStreamer.AddComment("External Name");
+ Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0);
+ }
+
+ Asm->EmitInt32(0); EOL("End Mark");
+ EmitLabel("pubtypes_end", ModuleCU->getID());
+ Asm->O << '\n';
+}
+
+/// emitDebugStr - Emit visible names into a debug str section.
+///
+void DwarfDebug::emitDebugStr() {
+ // Check to see if it is worth the effort.
+ if (!StringPool.empty()) {
+ // Start the dwarf str section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfStrSection());
+
+ // For each of strings in the string pool.
+ for (unsigned StringID = 1, N = StringPool.size();
+ StringID <= N; ++StringID) {
+ // Emit a label for reference from debug information entries.
+ EmitLabel("string", StringID);
+
+ // Emit the string itself.
+ const std::string &String = StringPool[StringID];
+ Asm->OutStreamer.EmitBytes(StringRef(String.c_str(), String.size()+1), 0);
+ }
+
+ Asm->O << '\n';
+ }
+}
+
+/// emitDebugLoc - Emit visible names into a debug loc section.
+///
+void DwarfDebug::emitDebugLoc() {
+ // Start the dwarf loc section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfLocSection());
+}
+
+/// EmitDebugARanges - Emit visible names into a debug aranges section.
+///
+void DwarfDebug::EmitDebugARanges() {
+ // Start the dwarf aranges section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfARangesSection());
+
+ // FIXME - Mock up
+#if 0
+ CompileUnit *Unit = GetBaseCompileUnit();
+
+ // Don't include size of length
+ Asm->EmitInt32(0x1c); EOL("Length of Address Ranges Info");
+
+ Asm->EmitInt16(dwarf::DWARF_VERSION); EOL("Dwarf Version");
+
+ EmitReference("info_begin", Unit->getID());
+ EOL("Offset of Compilation Unit Info");
+
+ Asm->EmitInt8(TD->getPointerSize()); EOL("Size of Address");
+
+ Asm->EmitInt8(0); EOL("Size of Segment Descriptor");
+
+ Asm->EmitInt16(0); EOL("Pad (1)");
+ Asm->EmitInt16(0); EOL("Pad (2)");
+
+ // Range 1
+ EmitReference("text_begin", 0); EOL("Address");
+ EmitDifference("text_end", 0, "text_begin", 0, true); EOL("Length");
+
+ Asm->EmitInt32(0); EOL("EOM (1)");
+ Asm->EmitInt32(0); EOL("EOM (2)");
+#endif
+}
+
+/// emitDebugRanges - Emit visible names into a debug ranges section.
+///
+void DwarfDebug::emitDebugRanges() {
+ // Start the dwarf ranges section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfRangesSection());
+}
+
+/// emitDebugMacInfo - Emit visible names into a debug macinfo section.
+///
+void DwarfDebug::emitDebugMacInfo() {
+ if (const MCSection *LineInfo =
+ Asm->getObjFileLowering().getDwarfMacroInfoSection()) {
+ // Start the dwarf macinfo section.
+ Asm->OutStreamer.SwitchSection(LineInfo);
+ }
+}
+
+/// emitDebugInlineInfo - Emit inline info using following format.
+/// Section Header:
+/// 1. length of section
+/// 2. Dwarf version number
+/// 3. address size.
+///
+/// Entries (one "entry" for each function that was inlined):
+///
+/// 1. offset into __debug_str section for MIPS linkage name, if exists;
+/// otherwise offset into __debug_str for regular function name.
+/// 2. offset into __debug_str section for regular function name.
+/// 3. an unsigned LEB128 number indicating the number of distinct inlining
+/// instances for the function.
+///
+/// The rest of the entry consists of a {die_offset, low_pc} pair for each
+/// inlined instance; the die_offset points to the inlined_subroutine die in the
+/// __debug_info section, and the low_pc is the starting address for the
+/// inlining instance.
+void DwarfDebug::emitDebugInlineInfo() {
+ if (!MAI->doesDwarfUsesInlineInfoSection())
+ return;
+
+ if (!ModuleCU)
+ return;
+
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfDebugInlineSection());
+
+ EmitDifference("debug_inlined_end", 1,
+ "debug_inlined_begin", 1, true);
+ EOL("Length of Debug Inlined Information Entry");
+
+ EmitLabel("debug_inlined_begin", 1);
+
+ Asm->EmitInt16(dwarf::DWARF_VERSION); EOL("Dwarf Version");
+ Asm->EmitInt8(TD->getPointerSize()); EOL("Address Size (in bytes)");
+
+ for (SmallVector<MDNode *, 4>::iterator I = InlinedSPNodes.begin(),
+ E = InlinedSPNodes.end(); I != E; ++I) {
+
+ MDNode *Node = *I;
+ DenseMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II
+ = InlineInfo.find(Node);
+ SmallVector<InlineInfoLabels, 4> &Labels = II->second;
+ DISubprogram SP(Node);
+ StringRef LName = SP.getLinkageName();
+ StringRef Name = SP.getName();
+
+ if (LName.empty()) {
+ Asm->OutStreamer.EmitBytes(Name, 0);
+ Asm->OutStreamer.EmitIntValue(0, 1, 0); // nul terminator.
+ } else
+ EmitSectionOffset("string", "section_str",
+ StringPool.idFor(getRealLinkageName(LName)), false, true);
+
+ EOL("MIPS linkage name");
+ EmitSectionOffset("string", "section_str",
+ StringPool.idFor(Name), false, true);
+ EOL("Function name");
+ EmitULEB128(Labels.size(), "Inline count");
+
+ for (SmallVector<InlineInfoLabels, 4>::iterator LI = Labels.begin(),
+ LE = Labels.end(); LI != LE; ++LI) {
+ DIE *SP = LI->second;
+ Asm->EmitInt32(SP->getOffset()); EOL("DIE offset");
+
+ if (TD->getPointerSize() == sizeof(int32_t))
+ O << MAI->getData32bitsDirective();
+ else
+ O << MAI->getData64bitsDirective();
+
+ PrintLabelName("label", LI->first); EOL("low_pc");
+ }
+ }
+
+ EmitLabel("debug_inlined_end", 1);
+ Asm->O << '\n';
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
new file mode 100644
index 0000000..55baa92
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -0,0 +1,566 @@
+//===-- llvm/CodeGen/DwarfDebug.h - Dwarf Debug Framework ------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf debug info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DWARFDEBUG_H__
+#define CODEGEN_ASMPRINTER_DWARFDEBUG_H__
+
+#include "DIE.h"
+#include "DwarfPrinter.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/UniqueVector.h"
+#include <string>
+
+namespace llvm {
+
+class CompileUnit;
+class DbgConcreteScope;
+class DbgScope;
+class DbgVariable;
+class MachineFrameInfo;
+class MachineModuleInfo;
+class MCAsmInfo;
+class Timer;
+
+//===----------------------------------------------------------------------===//
+/// SrcLineInfo - This class is used to record source line correspondence.
+///
+class SrcLineInfo {
+ unsigned Line; // Source line number.
+ unsigned Column; // Source column.
+ unsigned SourceID; // Source ID number.
+ unsigned LabelID; // Label in code ID number.
+public:
+ SrcLineInfo(unsigned L, unsigned C, unsigned S, unsigned I)
+ : Line(L), Column(C), SourceID(S), LabelID(I) {}
+
+ // Accessors
+ unsigned getLine() const { return Line; }
+ unsigned getColumn() const { return Column; }
+ unsigned getSourceID() const { return SourceID; }
+ unsigned getLabelID() const { return LabelID; }
+};
+
+class DwarfDebug : public DwarfPrinter {
+ //===--------------------------------------------------------------------===//
+ // Attributes used to construct specific Dwarf sections.
+ //
+
+ /// CompileUnitMap - A map of global variables representing compile units to
+ /// compile units.
+ DenseMap<Value *, CompileUnit *> CompileUnitMap;
+
+ /// CompileUnits - All the compile units in this module.
+ ///
+ SmallVector<CompileUnit *, 8> CompileUnits;
+
+ /// ModuleCU - All DIEs are inserted in ModuleCU.
+ CompileUnit *ModuleCU;
+
+ /// AbbreviationsSet - Used to uniquely define abbreviations.
+ ///
+ FoldingSet<DIEAbbrev> AbbreviationsSet;
+
+ /// Abbreviations - A list of all the unique abbreviations in use.
+ ///
+ std::vector<DIEAbbrev *> Abbreviations;
+
+ /// DirectoryIdMap - Directory name to directory id map.
+ ///
+ StringMap<unsigned> DirectoryIdMap;
+
+ /// DirectoryNames - A list of directory names.
+ SmallVector<std::string, 8> DirectoryNames;
+
+ /// SourceFileIdMap - Source file name to source file id map.
+ ///
+ StringMap<unsigned> SourceFileIdMap;
+
+ /// SourceFileNames - A list of source file names.
+ SmallVector<std::string, 8> SourceFileNames;
+
+ /// SourceIdMap - Source id map, i.e. pair of directory id and source file
+ /// id mapped to a unique id.
+ DenseMap<std::pair<unsigned, unsigned>, unsigned> SourceIdMap;
+
+ /// SourceIds - Reverse map from source id to directory id + file id pair.
+ ///
+ SmallVector<std::pair<unsigned, unsigned>, 8> SourceIds;
+
+ /// Lines - List of source line correspondence.
+ std::vector<SrcLineInfo> Lines;
+
+ /// DIEValues - A list of all the unique values in use.
+ ///
+ std::vector<DIEValue *> DIEValues;
+
+ /// StringPool - A UniqueVector of strings used by indirect references.
+ ///
+ UniqueVector<std::string> StringPool;
+
+ /// SectionMap - Provides a unique id per text section.
+ ///
+ UniqueVector<const MCSection*> SectionMap;
+
+ /// SectionSourceLines - Tracks line numbers per text section.
+ ///
+ std::vector<std::vector<SrcLineInfo> > SectionSourceLines;
+
+ /// didInitial - Flag to indicate if initial emission has been done.
+ ///
+ bool didInitial;
+
+ /// shouldEmit - Flag to indicate if debug information should be emitted.
+ ///
+ bool shouldEmit;
+
+ // CurrentFnDbgScope - Top level scope for the current function.
+ //
+ DbgScope *CurrentFnDbgScope;
+
+ /// DbgScopeMap - Tracks the scopes in the current function.
+ ///
+ DenseMap<MDNode *, DbgScope *> DbgScopeMap;
+
+ /// ConcreteScopes - Tracks the concrete scopees in the current function.
+ /// These scopes are also included in DbgScopeMap.
+ DenseMap<MDNode *, DbgScope *> ConcreteScopes;
+
+ /// AbstractScopes - Tracks the abstract scopes a module. These scopes are
+ /// not included DbgScopeMap.
+ DenseMap<MDNode *, DbgScope *> AbstractScopes;
+ SmallVector<DbgScope *, 4>AbstractScopesList;
+
+ /// AbstractVariables - Collection on abstract variables.
+ DenseMap<MDNode *, DbgVariable *> AbstractVariables;
+
+ /// InliendSubprogramDIEs - Collection of subprgram DIEs that are marked
+ /// (at the end of the module) as DW_AT_inline.
+ SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs;
+
+ DenseMap<DIE *, MDNode *> ContainingTypeMap;
+
+ /// AbstractSubprogramDIEs - Collection of abstruct subprogram DIEs.
+ SmallPtrSet<DIE *, 4> AbstractSubprogramDIEs;
+
+ /// TopLevelDIEs - Collection of top level DIEs.
+ SmallPtrSet<DIE *, 4> TopLevelDIEs;
+ SmallVector<DIE *, 4> TopLevelDIEsVector;
+
+ typedef SmallVector<DbgScope *, 2> ScopeVector;
+ typedef DenseMap<const MachineInstr *, ScopeVector>
+ InsnToDbgScopeMapTy;
+
+ /// DbgScopeBeginMap - Maps instruction with a list of DbgScopes it starts.
+ InsnToDbgScopeMapTy DbgScopeBeginMap;
+
+ /// DbgScopeEndMap - Maps instruction with a list DbgScopes it ends.
+ InsnToDbgScopeMapTy DbgScopeEndMap;
+
+ /// InlineInfo - Keep track of inlined functions and their location. This
+ /// information is used to populate debug_inlined section.
+ typedef std::pair<unsigned, DIE *> InlineInfoLabels;
+ DenseMap<MDNode *, SmallVector<InlineInfoLabels, 4> > InlineInfo;
+ SmallVector<MDNode *, 4> InlinedSPNodes;
+
+ /// CompileUnitOffsets - A vector of the offsets of the compile units. This is
+ /// used when calculating the "origin" of a concrete instance of an inlined
+ /// function.
+ DenseMap<CompileUnit *, unsigned> CompileUnitOffsets;
+
+ /// DebugTimer - Timer for the Dwarf debug writer.
+ Timer *DebugTimer;
+
+ struct FunctionDebugFrameInfo {
+ unsigned Number;
+ std::vector<MachineMove> Moves;
+
+ FunctionDebugFrameInfo(unsigned Num, const std::vector<MachineMove> &M)
+ : Number(Num), Moves(M) {}
+ };
+
+ std::vector<FunctionDebugFrameInfo> DebugFrames;
+
+ /// getSourceDirectoryAndFileIds - Return the directory and file ids that
+ /// maps to the source id. Source id starts at 1.
+ std::pair<unsigned, unsigned>
+ getSourceDirectoryAndFileIds(unsigned SId) const {
+ return SourceIds[SId-1];
+ }
+
+ /// getNumSourceDirectories - Return the number of source directories in the
+ /// debug info.
+ unsigned getNumSourceDirectories() const {
+ return DirectoryNames.size();
+ }
+
+ /// getSourceDirectoryName - Return the name of the directory corresponding
+ /// to the id.
+ const std::string &getSourceDirectoryName(unsigned Id) const {
+ return DirectoryNames[Id - 1];
+ }
+
+ /// getSourceFileName - Return the name of the source file corresponding
+ /// to the id.
+ const std::string &getSourceFileName(unsigned Id) const {
+ return SourceFileNames[Id - 1];
+ }
+
+ /// getNumSourceIds - Return the number of unique source ids.
+ unsigned getNumSourceIds() const {
+ return SourceIds.size();
+ }
+
+ /// assignAbbrevNumber - Define a unique number for the abbreviation.
+ ///
+ void assignAbbrevNumber(DIEAbbrev &Abbrev);
+
+ /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
+ /// information entry.
+ DIEEntry *createDIEEntry(DIE *Entry = NULL);
+
+ /// addUInt - Add an unsigned integer attribute data and value.
+ ///
+ void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer);
+
+ /// addSInt - Add an signed integer attribute data and value.
+ ///
+ void addSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer);
+
+ /// addString - Add a string attribute data and value.
+ ///
+ void addString(DIE *Die, unsigned Attribute, unsigned Form,
+ const StringRef Str);
+
+ /// addLabel - Add a Dwarf label attribute data and value.
+ ///
+ void addLabel(DIE *Die, unsigned Attribute, unsigned Form,
+ const DWLabel &Label);
+
+ /// addObjectLabel - Add an non-Dwarf label attribute data and value.
+ ///
+ void addObjectLabel(DIE *Die, unsigned Attribute, unsigned Form,
+ const MCSymbol *Sym);
+
+ /// addSectionOffset - Add a section offset label attribute data and value.
+ ///
+ void addSectionOffset(DIE *Die, unsigned Attribute, unsigned Form,
+ const DWLabel &Label, const DWLabel &Section,
+ bool isEH = false, bool useSet = true);
+
+ /// addDelta - Add a label delta attribute data and value.
+ ///
+ void addDelta(DIE *Die, unsigned Attribute, unsigned Form,
+ const DWLabel &Hi, const DWLabel &Lo);
+
+ /// addDIEEntry - Add a DIE attribute data and value.
+ ///
+ void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry) {
+ Die->addValue(Attribute, Form, createDIEEntry(Entry));
+ }
+
+ /// addBlock - Add block data.
+ ///
+ void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block);
+
+ /// addSourceLine - Add location information to specified debug information
+ /// entry.
+ void addSourceLine(DIE *Die, const DIVariable *V);
+ void addSourceLine(DIE *Die, const DIGlobal *G);
+ void addSourceLine(DIE *Die, const DISubprogram *SP);
+ void addSourceLine(DIE *Die, const DIType *Ty);
+ void addSourceLine(DIE *Die, const DINameSpace *NS);
+
+ /// addAddress - Add an address attribute to a die based on the location
+ /// provided.
+ void addAddress(DIE *Die, unsigned Attribute,
+ const MachineLocation &Location);
+
+ /// addComplexAddress - Start with the address based on the location provided,
+ /// and generate the DWARF information necessary to find the actual variable
+ /// (navigating the extra location information encoded in the type) based on
+ /// the starting location. Add the DWARF information to the die.
+ ///
+ void addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
+ const MachineLocation &Location);
+
+ // FIXME: Should be reformulated in terms of addComplexAddress.
+ /// addBlockByrefAddress - Start with the address based on the location
+ /// provided, and generate the DWARF information necessary to find the
+ /// actual Block variable (navigating the Block struct) based on the
+ /// starting location. Add the DWARF information to the die. Obsolete,
+ /// please use addComplexAddress instead.
+ ///
+ void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
+ const MachineLocation &Location);
+
+ /// addToContextOwner - Add Die into the list of its context owner's children.
+ void addToContextOwner(DIE *Die, DIDescriptor Context);
+
+ /// addType - Add a new type attribute to the specified entity.
+ void addType(DIE *Entity, DIType Ty);
+
+
+ /// getOrCreateNameSpace - Create a DIE for DINameSpace.
+ DIE *getOrCreateNameSpace(DINameSpace NS);
+
+ /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
+ /// given DIType.
+ DIE *getOrCreateTypeDIE(DIType Ty);
+
+ void addPubTypes(DISubprogram SP);
+
+ /// constructTypeDIE - Construct basic type die from DIBasicType.
+ void constructTypeDIE(DIE &Buffer,
+ DIBasicType BTy);
+
+ /// constructTypeDIE - Construct derived type die from DIDerivedType.
+ void constructTypeDIE(DIE &Buffer,
+ DIDerivedType DTy);
+
+ /// constructTypeDIE - Construct type DIE from DICompositeType.
+ void constructTypeDIE(DIE &Buffer,
+ DICompositeType CTy);
+
+ /// constructSubrangeDIE - Construct subrange DIE from DISubrange.
+ void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy);
+
+ /// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
+ void constructArrayTypeDIE(DIE &Buffer,
+ DICompositeType *CTy);
+
+ /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
+ DIE *constructEnumTypeDIE(DIEnumerator *ETy);
+
+ /// createGlobalVariableDIE - Create new DIE using GV.
+ DIE *createGlobalVariableDIE(const DIGlobalVariable &GV);
+
+ /// createMemberDIE - Create new member DIE.
+ DIE *createMemberDIE(const DIDerivedType &DT);
+
+ /// createSubprogramDIE - Create new DIE using SP.
+ DIE *createSubprogramDIE(const DISubprogram &SP, bool MakeDecl = false);
+
+ /// findCompileUnit - Get the compile unit for the given descriptor.
+ ///
+ CompileUnit *findCompileUnit(DICompileUnit Unit);
+
+ /// getUpdatedDbgScope - Find or create DbgScope assicated with
+ /// the instruction. Initialize scope and update scope hierarchy.
+ DbgScope *getUpdatedDbgScope(MDNode *N, const MachineInstr *MI, MDNode *InlinedAt);
+
+ /// createDbgScope - Create DbgScope for the scope.
+ void createDbgScope(MDNode *Scope, MDNode *InlinedAt);
+
+ DbgScope *getOrCreateAbstractScope(MDNode *N);
+
+ /// findAbstractVariable - Find abstract variable associated with Var.
+ DbgVariable *findAbstractVariable(DIVariable &Var, unsigned FrameIdx,
+ DILocation &Loc);
+
+ /// updateSubprogramScopeDIE - Find DIE for the given subprogram and
+ /// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
+ /// If there are global variables in this scope then create and insert
+ /// DIEs for these variables.
+ DIE *updateSubprogramScopeDIE(MDNode *SPNode);
+
+ /// constructLexicalScope - Construct new DW_TAG_lexical_block
+ /// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
+ DIE *constructLexicalScopeDIE(DbgScope *Scope);
+
+ /// constructInlinedScopeDIE - This scope represents inlined body of
+ /// a function. Construct DIE to represent this concrete inlined copy
+ /// of the function.
+ DIE *constructInlinedScopeDIE(DbgScope *Scope);
+
+ /// constructVariableDIE - Construct a DIE for the given DbgVariable.
+ DIE *constructVariableDIE(DbgVariable *DV, DbgScope *S);
+
+ /// constructScopeDIE - Construct a DIE for this scope.
+ DIE *constructScopeDIE(DbgScope *Scope);
+
+ /// emitInitial - Emit initial Dwarf declarations. This is necessary for cc
+ /// tools to recognize the object file contains Dwarf information.
+ void emitInitial();
+
+ /// emitDIE - Recusively Emits a debug information entry.
+ ///
+ void emitDIE(DIE *Die);
+
+ /// computeSizeAndOffset - Compute the size and offset of a DIE.
+ ///
+ unsigned computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last);
+
+ /// computeSizeAndOffsets - Compute the size and offset of all the DIEs.
+ ///
+ void computeSizeAndOffsets();
+
+ /// EmitDebugInfo - Emit the debug info section.
+ ///
+ void emitDebugInfo();
+
+ /// emitAbbreviations - Emit the abbreviation section.
+ ///
+ void emitAbbreviations() const;
+
+ /// emitEndOfLineMatrix - Emit the last address of the section and the end of
+ /// the line matrix.
+ ///
+ void emitEndOfLineMatrix(unsigned SectionEnd);
+
+ /// emitDebugLines - Emit source line information.
+ ///
+ void emitDebugLines();
+
+ /// emitCommonDebugFrame - Emit common frame info into a debug frame section.
+ ///
+ void emitCommonDebugFrame();
+
+ /// emitFunctionDebugFrame - Emit per function frame info into a debug frame
+ /// section.
+ void emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo);
+
+ /// emitDebugPubNames - Emit visible names into a debug pubnames section.
+ ///
+ void emitDebugPubNames();
+
+ /// emitDebugPubTypes - Emit visible types into a debug pubtypes section.
+ ///
+ void emitDebugPubTypes();
+
+ /// emitDebugStr - Emit visible names into a debug str section.
+ ///
+ void emitDebugStr();
+
+ /// emitDebugLoc - Emit visible names into a debug loc section.
+ ///
+ void emitDebugLoc();
+
+ /// EmitDebugARanges - Emit visible names into a debug aranges section.
+ ///
+ void EmitDebugARanges();
+
+ /// emitDebugRanges - Emit visible names into a debug ranges section.
+ ///
+ void emitDebugRanges();
+
+ /// emitDebugMacInfo - Emit visible names into a debug macinfo section.
+ ///
+ void emitDebugMacInfo();
+
+ /// emitDebugInlineInfo - Emit inline info using following format.
+ /// Section Header:
+ /// 1. length of section
+ /// 2. Dwarf version number
+ /// 3. address size.
+ ///
+ /// Entries (one "entry" for each function that was inlined):
+ ///
+ /// 1. offset into __debug_str section for MIPS linkage name, if exists;
+ /// otherwise offset into __debug_str for regular function name.
+ /// 2. offset into __debug_str section for regular function name.
+ /// 3. an unsigned LEB128 number indicating the number of distinct inlining
+ /// instances for the function.
+ ///
+ /// The rest of the entry consists of a {die_offset, low_pc} pair for each
+ /// inlined instance; the die_offset points to the inlined_subroutine die in
+ /// the __debug_info section, and the low_pc is the starting address for the
+ /// inlining instance.
+ void emitDebugInlineInfo();
+
+ /// GetOrCreateSourceID - Look up the source id with the given directory and
+ /// source file names. If none currently exists, create a new id and insert it
+ /// in the SourceIds map. This can update DirectoryNames and SourceFileNames maps
+ /// as well.
+ unsigned GetOrCreateSourceID(StringRef DirName, StringRef FileName);
+
+ CompileUnit *constructCompileUnit(MDNode *N);
+
+ void constructGlobalVariableDIE(MDNode *N);
+
+ void constructSubprogramDIE(MDNode *N);
+
+ // FIXME: This should go away in favor of complex addresses.
+ /// Find the type the programmer originally declared the variable to be
+ /// and return that type. Obsolete, use GetComplexAddrType instead.
+ ///
+ DIType getBlockByrefType(DIType Ty, std::string Name);
+
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ DwarfDebug(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T);
+ virtual ~DwarfDebug();
+
+ /// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should
+ /// be emitted.
+ bool ShouldEmitDwarfDebug() const { return shouldEmit; }
+
+ /// beginModule - Emit all Dwarf sections that should come prior to the
+ /// content.
+ void beginModule(Module *M, MachineModuleInfo *MMI);
+
+ /// endModule - Emit all Dwarf sections that should come after the content.
+ ///
+ void endModule();
+
+ /// beginFunction - Gather pre-function debug information. Assumes being
+ /// emitted immediately after the function entry point.
+ void beginFunction(const MachineFunction *MF);
+
+ /// endFunction - Gather and emit post-function debug information.
+ ///
+ void endFunction(const MachineFunction *MF);
+
+ /// recordSourceLine - Records location information and associates it with a
+ /// label. Returns a unique label ID used to generate a label and provide
+ /// correspondence to the source line list.
+ unsigned recordSourceLine(unsigned Line, unsigned Col, MDNode *Scope);
+
+ /// getSourceLineCount - Return the number of source lines in the debug
+ /// info.
+ unsigned getSourceLineCount() const {
+ return Lines.size();
+ }
+
+ /// getOrCreateSourceID - Public version of GetOrCreateSourceID. This can be
+ /// timed. Look up the source id with the given directory and source file
+ /// names. If none currently exists, create a new id and insert it in the
+ /// SourceIds map. This can update DirectoryNames and SourceFileNames maps as
+ /// well.
+ unsigned getOrCreateSourceID(const std::string &DirName,
+ const std::string &FileName);
+
+ /// extractScopeInformation - Scan machine instructions in this function
+ /// and collect DbgScopes. Return true, if atleast one scope was found.
+ bool extractScopeInformation();
+
+ /// collectVariableInfo - Populate DbgScope entries with variables' info.
+ void collectVariableInfo();
+
+ /// beginScope - Process beginning of a scope starting at Label.
+ void beginScope(const MachineInstr *MI, unsigned Label);
+
+ /// endScope - Prcess end of a scope.
+ void endScope(const MachineInstr *MI);
+};
+} // End of namespace llvm
+
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
new file mode 100644
index 0000000..b6801dc
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -0,0 +1,1021 @@
+//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+DwarfException::DwarfException(raw_ostream &OS, AsmPrinter *A,
+ const MCAsmInfo *T)
+ : DwarfPrinter(OS, A, T, "eh"), shouldEmitTable(false),shouldEmitMoves(false),
+ shouldEmitTableModule(false), shouldEmitMovesModule(false),
+ ExceptionTimer(0) {
+ if (TimePassesIsEnabled)
+ ExceptionTimer = new Timer("DWARF Exception Writer");
+}
+
+DwarfException::~DwarfException() {
+ delete ExceptionTimer;
+}
+
+/// SizeOfEncodedValue - Return the size of the encoding in bytes.
+unsigned DwarfException::SizeOfEncodedValue(unsigned Encoding) {
+ if (Encoding == dwarf::DW_EH_PE_omit)
+ return 0;
+
+ switch (Encoding & 0x07) {
+ case dwarf::DW_EH_PE_absptr:
+ return TD->getPointerSize();
+ case dwarf::DW_EH_PE_udata2:
+ return 2;
+ case dwarf::DW_EH_PE_udata4:
+ return 4;
+ case dwarf::DW_EH_PE_udata8:
+ return 8;
+ }
+
+ assert(0 && "Invalid encoded value.");
+ return 0;
+}
+
+/// CreateLabelDiff - Emit a label and subtract it from the expression we
+/// already have. This is equivalent to emitting "foo - .", but we have to emit
+/// the label for "." directly.
+const MCExpr *DwarfException::CreateLabelDiff(const MCExpr *ExprRef,
+ const char *LabelName,
+ unsigned Index) {
+ SmallString<64> Name;
+ raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix()
+ << LabelName << Asm->getFunctionNumber()
+ << "_" << Index;
+ MCSymbol *DotSym = Asm->OutContext.GetOrCreateSymbol(Name.str());
+ Asm->OutStreamer.EmitLabel(DotSym);
+
+ return MCBinaryExpr::CreateSub(ExprRef,
+ MCSymbolRefExpr::Create(DotSym,
+ Asm->OutContext),
+ Asm->OutContext);
+}
+
+/// EmitCIE - Emit a Common Information Entry (CIE). This holds information that
+/// is shared among many Frame Description Entries. There is at least one CIE
+/// in every non-empty .debug_frame section.
+void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) {
+ // Size and sign of stack growth.
+ int stackGrowth =
+ Asm->TM.getFrameInfo()->getStackGrowthDirection() ==
+ TargetFrameInfo::StackGrowsUp ?
+ TD->getPointerSize() : -TD->getPointerSize();
+
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+ // Begin eh frame section.
+ Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
+
+ if (MAI->is_EHSymbolPrivate())
+ O << MAI->getPrivateGlobalPrefix();
+ O << "EH_frame" << Index << ":\n";
+
+ EmitLabel("section_eh_frame", Index);
+
+ // Define base labels.
+ EmitLabel("eh_frame_common", Index);
+
+ // Define the eh frame length.
+ EmitDifference("eh_frame_common_end", Index,
+ "eh_frame_common_begin", Index, true);
+ EOL("Length of Common Information Entry");
+
+ // EH frame header.
+ EmitLabel("eh_frame_common_begin", Index);
+ if (Asm->VerboseAsm) Asm->OutStreamer.AddComment("CIE Identifier Tag");
+ Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+ if (Asm->VerboseAsm) Asm->OutStreamer.AddComment("DW_CIE_VERSION");
+ Asm->OutStreamer.EmitIntValue(dwarf::DW_CIE_VERSION, 1/*size*/, 0/*addr*/);
+
+ // The personality presence indicates that language specific information will
+ // show up in the eh frame. Find out how we are supposed to lower the
+ // personality function reference:
+ const MCExpr *PersonalityRef = 0;
+ bool IsPersonalityIndirect = false, IsPersonalityPCRel = false;
+ if (PersonalityFn) {
+ // FIXME: HANDLE STATIC CODEGEN MODEL HERE.
+
+ // In non-static mode, ask the object file how to represent this reference.
+ PersonalityRef =
+ TLOF.getSymbolForDwarfGlobalReference(PersonalityFn, Asm->Mang,
+ Asm->MMI,
+ IsPersonalityIndirect,
+ IsPersonalityPCRel);
+ }
+
+ unsigned PerEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ if (IsPersonalityIndirect)
+ PerEncoding |= dwarf::DW_EH_PE_indirect;
+ unsigned LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ unsigned FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+
+ char Augmentation[6] = { 0 };
+ unsigned AugmentationSize = 0;
+ char *APtr = Augmentation + 1;
+
+ if (PersonalityRef) {
+ // There is a personality function.
+ *APtr++ = 'P';
+ AugmentationSize += 1 + SizeOfEncodedValue(PerEncoding);
+ }
+
+ if (UsesLSDA[Index]) {
+ // An LSDA pointer is in the FDE augmentation.
+ *APtr++ = 'L';
+ ++AugmentationSize;
+ }
+
+ if (FDEEncoding != dwarf::DW_EH_PE_absptr) {
+ // A non-default pointer encoding for the FDE.
+ *APtr++ = 'R';
+ ++AugmentationSize;
+ }
+
+ if (APtr != Augmentation + 1)
+ Augmentation[0] = 'z';
+
+ Asm->OutStreamer.EmitBytes(StringRef(Augmentation, strlen(Augmentation)+1),0);
+ EOL("CIE Augmentation");
+
+ // Round out reader.
+ EmitULEB128(1, "CIE Code Alignment Factor");
+ EmitSLEB128(stackGrowth, "CIE Data Alignment Factor");
+ Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), true));
+ EOL("CIE Return Address Column");
+
+ EmitULEB128(AugmentationSize, "Augmentation Size");
+ EmitEncodingByte(PerEncoding, "Personality");
+
+ // If there is a personality, we need to indicate the function's location.
+ if (PersonalityRef) {
+ if (!IsPersonalityPCRel)
+ PersonalityRef = CreateLabelDiff(PersonalityRef, "personalityref_addr",
+ Index);
+
+ O << MAI->getData32bitsDirective() << *PersonalityRef;
+ EOL("Personality");
+
+ EmitEncodingByte(LSDAEncoding, "LSDA");
+ EmitEncodingByte(FDEEncoding, "FDE");
+ }
+
+ // Indicate locations of general callee saved registers in frame.
+ std::vector<MachineMove> Moves;
+ RI->getInitialFrameState(Moves);
+ EmitFrameMoves(NULL, 0, Moves, true);
+
+ // On Darwin the linker honors the alignment of eh_frame, which means it must
+ // be 8-byte on 64-bit targets to match what gcc does. Otherwise you get
+ // holes which confuse readers of eh_frame.
+ Asm->EmitAlignment(TD->getPointerSize() == 4 ? 2 : 3, 0, 0, false);
+ EmitLabel("eh_frame_common_end", Index);
+ Asm->O << '\n';
+}
+
+/// EmitFDE - Emit the Frame Description Entry (FDE) for the function.
+void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
+ assert(!EHFrameInfo.function->hasAvailableExternallyLinkage() &&
+ "Should not emit 'available externally' functions at all");
+
+ const Function *TheFunc = EHFrameInfo.function;
+
+ Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getEHFrameSection());
+
+ // Externally visible entry into the functions eh frame info. If the
+ // corresponding function is static, this should not be externally visible.
+ if (!TheFunc->hasLocalLinkage())
+ if (const char *GlobalEHDirective = MAI->getGlobalEHDirective())
+ O << GlobalEHDirective << *EHFrameInfo.FunctionEHSym << '\n';
+
+ // If corresponding function is weak definition, this should be too.
+ if (TheFunc->isWeakForLinker() && MAI->getWeakDefDirective())
+ O << MAI->getWeakDefDirective() << *EHFrameInfo.FunctionEHSym << '\n';
+
+ // If corresponding function is hidden, this should be too.
+ if (TheFunc->hasHiddenVisibility())
+ if (MCSymbolAttr HiddenAttr = MAI->getHiddenVisibilityAttr())
+ Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
+ HiddenAttr);
+
+ // If there are no calls then you can't unwind. This may mean we can omit the
+ // EH Frame, but some environments do not handle weak absolute symbols. If
+ // UnwindTablesMandatory is set we cannot do this optimization; the unwind
+ // info is to be available for non-EH uses.
+ if (!EHFrameInfo.hasCalls && !UnwindTablesMandatory &&
+ (!TheFunc->isWeakForLinker() ||
+ !MAI->getWeakDefDirective() ||
+ MAI->getSupportsWeakOmittedEHFrame())) {
+ O << *EHFrameInfo.FunctionEHSym << " = 0\n";
+ // This name has no connection to the function, so it might get
+ // dead-stripped when the function is not, erroneously. Prohibit
+ // dead-stripping unconditionally.
+ if (MAI->hasNoDeadStrip())
+ Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
+ MCSA_NoDeadStrip);
+ } else {
+ O << *EHFrameInfo.FunctionEHSym << ":\n";
+
+ // EH frame header.
+ EmitDifference("eh_frame_end", EHFrameInfo.Number,
+ "eh_frame_begin", EHFrameInfo.Number, true);
+ EOL("Length of Frame Information Entry");
+
+ EmitLabel("eh_frame_begin", EHFrameInfo.Number);
+
+ EmitSectionOffset("eh_frame_begin", "eh_frame_common",
+ EHFrameInfo.Number, EHFrameInfo.PersonalityIndex,
+ true, true, false);
+
+ EOL("FDE CIE offset");
+
+ EmitReference("eh_func_begin", EHFrameInfo.Number, true, true);
+ EOL("FDE initial location");
+ EmitDifference("eh_func_end", EHFrameInfo.Number,
+ "eh_func_begin", EHFrameInfo.Number, true);
+ EOL("FDE address range");
+
+ // If there is a personality and landing pads then point to the language
+ // specific data area in the exception table.
+ if (MMI->getPersonalities()[0] != NULL) {
+
+ if (Asm->TM.getLSDAEncoding() != DwarfLSDAEncoding::EightByte) {
+ EmitULEB128(4, "Augmentation size");
+
+ if (EHFrameInfo.hasLandingPads)
+ EmitReference("exception", EHFrameInfo.Number, true, true);
+ else
+ Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+ } else {
+ EmitULEB128(TD->getPointerSize(), "Augmentation size");
+
+ if (EHFrameInfo.hasLandingPads) {
+ EmitReference("exception", EHFrameInfo.Number, true, false);
+ } else {
+ Asm->OutStreamer.EmitIntValue(0, TD->getPointerSize(),
+ 0/*addrspace*/);
+ }
+ }
+
+ EOL("Language Specific Data Area");
+ } else {
+ EmitULEB128(0, "Augmentation size");
+ }
+
+ // Indicate locations of function specific callee saved registers in frame.
+ EmitFrameMoves("eh_func_begin", EHFrameInfo.Number, EHFrameInfo.Moves,
+ true);
+
+ // On Darwin the linker honors the alignment of eh_frame, which means it
+ // must be 8-byte on 64-bit targets to match what gcc does. Otherwise you
+ // get holes which confuse readers of eh_frame.
+ Asm->EmitAlignment(TD->getPointerSize() == sizeof(int32_t) ? 2 : 3,
+ 0, 0, false);
+ EmitLabel("eh_frame_end", EHFrameInfo.Number);
+
+ // If the function is marked used, this table should be also. We cannot
+ // make the mark unconditional in this case, since retaining the table also
+ // retains the function in this case, and there is code around that depends
+ // on unused functions (calling undefined externals) being dead-stripped to
+ // link correctly. Yes, there really is.
+ if (MMI->isUsedFunction(EHFrameInfo.function))
+ if (MAI->hasNoDeadStrip())
+ Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
+ MCSA_NoDeadStrip);
+ }
+ Asm->O << '\n';
+}
+
+/// SharedTypeIds - How many leading type ids two landing pads have in common.
+unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L,
+ const LandingPadInfo *R) {
+ const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+ unsigned LSize = LIds.size(), RSize = RIds.size();
+ unsigned MinSize = LSize < RSize ? LSize : RSize;
+ unsigned Count = 0;
+
+ for (; Count != MinSize; ++Count)
+ if (LIds[Count] != RIds[Count])
+ return Count;
+
+ return Count;
+}
+
+/// PadLT - Order landing pads lexicographically by type id.
+bool DwarfException::PadLT(const LandingPadInfo *L, const LandingPadInfo *R) {
+ const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+ unsigned LSize = LIds.size(), RSize = RIds.size();
+ unsigned MinSize = LSize < RSize ? LSize : RSize;
+
+ for (unsigned i = 0; i != MinSize; ++i)
+ if (LIds[i] != RIds[i])
+ return LIds[i] < RIds[i];
+
+ return LSize < RSize;
+}
+
+/// ComputeActionsTable - Compute the actions table and gather the first action
+/// index for each landing pad site.
+unsigned DwarfException::
+ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
+ SmallVectorImpl<ActionEntry> &Actions,
+ SmallVectorImpl<unsigned> &FirstActions) {
+
+ // The action table follows the call-site table in the LSDA. The individual
+ // records are of two types:
+ //
+ // * Catch clause
+ // * Exception specification
+ //
+ // The two record kinds have the same format, with only small differences.
+ // They are distinguished by the "switch value" field: Catch clauses
+ // (TypeInfos) have strictly positive switch values, and exception
+ // specifications (FilterIds) have strictly negative switch values. Value 0
+ // indicates a catch-all clause.
+ //
+ // Negative type IDs index into FilterIds. Positive type IDs index into
+ // TypeInfos. The value written for a positive type ID is just the type ID
+ // itself. For a negative type ID, however, the value written is the
+ // (negative) byte offset of the corresponding FilterIds entry. The byte
+ // offset is usually equal to the type ID (because the FilterIds entries are
+ // written using a variable width encoding, which outputs one byte per entry
+ // as long as the value written is not too large) but can differ. This kind
+ // of complication does not occur for positive type IDs because type infos are
+ // output using a fixed width encoding. FilterOffsets[i] holds the byte
+ // offset corresponding to FilterIds[i].
+
+ const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+ SmallVector<int, 16> FilterOffsets;
+ FilterOffsets.reserve(FilterIds.size());
+ int Offset = -1;
+
+ for (std::vector<unsigned>::const_iterator
+ I = FilterIds.begin(), E = FilterIds.end(); I != E; ++I) {
+ FilterOffsets.push_back(Offset);
+ Offset -= MCAsmInfo::getULEB128Size(*I);
+ }
+
+ FirstActions.reserve(LandingPads.size());
+
+ int FirstAction = 0;
+ unsigned SizeActions = 0;
+ const LandingPadInfo *PrevLPI = 0;
+
+ for (SmallVectorImpl<const LandingPadInfo *>::const_iterator
+ I = LandingPads.begin(), E = LandingPads.end(); I != E; ++I) {
+ const LandingPadInfo *LPI = *I;
+ const std::vector<int> &TypeIds = LPI->TypeIds;
+ const unsigned NumShared = PrevLPI ? SharedTypeIds(LPI, PrevLPI) : 0;
+ unsigned SizeSiteActions = 0;
+
+ if (NumShared < TypeIds.size()) {
+ unsigned SizeAction = 0;
+ ActionEntry *PrevAction = 0;
+
+ if (NumShared) {
+ const unsigned SizePrevIds = PrevLPI->TypeIds.size();
+ assert(Actions.size());
+ PrevAction = &Actions.back();
+ SizeAction = MCAsmInfo::getSLEB128Size(PrevAction->NextAction) +
+ MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
+
+ for (unsigned j = NumShared; j != SizePrevIds; ++j) {
+ SizeAction -=
+ MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
+ SizeAction += -PrevAction->NextAction;
+ PrevAction = PrevAction->Previous;
+ }
+ }
+
+ // Compute the actions.
+ for (unsigned J = NumShared, M = TypeIds.size(); J != M; ++J) {
+ int TypeID = TypeIds[J];
+ assert(-1 - TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
+ int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID;
+ unsigned SizeTypeID = MCAsmInfo::getSLEB128Size(ValueForTypeID);
+
+ int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
+ SizeAction = SizeTypeID + MCAsmInfo::getSLEB128Size(NextAction);
+ SizeSiteActions += SizeAction;
+
+ ActionEntry Action = { ValueForTypeID, NextAction, PrevAction };
+ Actions.push_back(Action);
+ PrevAction = &Actions.back();
+ }
+
+ // Record the first action of the landing pad site.
+ FirstAction = SizeActions + SizeSiteActions - SizeAction + 1;
+ } // else identical - re-use previous FirstAction
+
+ // Information used when created the call-site table. The action record
+ // field of the call site record is the offset of the first associated
+ // action record, relative to the start of the actions table. This value is
+ // biased by 1 (1 in dicating the start of the actions table), and 0
+ // indicates that there are no actions.
+ FirstActions.push_back(FirstAction);
+
+ // Compute this sites contribution to size.
+ SizeActions += SizeSiteActions;
+
+ PrevLPI = LPI;
+ }
+
+ return SizeActions;
+}
+
+/// CallToNoUnwindFunction - Return `true' if this is a call to a function
+/// marked `nounwind'. Return `false' otherwise.
+bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
+ assert(MI->getDesc().isCall() && "This should be a call instruction!");
+
+ bool MarkedNoUnwind = false;
+ bool SawFunc = false;
+
+ for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI->getOperand(I);
+
+ if (MO.isGlobal()) {
+ if (Function *F = dyn_cast<Function>(MO.getGlobal())) {
+ if (SawFunc) {
+ // Be conservative. If we have more than one function operand for this
+ // call, then we can't make the assumption that it's the callee and
+ // not a parameter to the call.
+ //
+ // FIXME: Determine if there's a way to say that `F' is the callee or
+ // parameter.
+ MarkedNoUnwind = false;
+ break;
+ }
+
+ MarkedNoUnwind = F->doesNotThrow();
+ SawFunc = true;
+ }
+ }
+ }
+
+ return MarkedNoUnwind;
+}
+
+/// ComputeCallSiteTable - Compute the call-site table. The entry for an invoke
+/// has a try-range containing the call, a non-zero landing pad, and an
+/// appropriate action. The entry for an ordinary call has a try-range
+/// containing the call and zero for the landing pad and the action. Calls
+/// marked 'nounwind' have no entry and must not be contained in the try-range
+/// of any entry - they form gaps in the table. Entries must be ordered by
+/// try-range address.
+void DwarfException::
+ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
+ const RangeMapType &PadMap,
+ const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+ const SmallVectorImpl<unsigned> &FirstActions) {
+ // The end label of the previous invoke or nounwind try-range.
+ unsigned LastLabel = 0;
+
+ // Whether there is a potentially throwing instruction (currently this means
+ // an ordinary call) between the end of the previous try-range and now.
+ bool SawPotentiallyThrowing = false;
+
+ // Whether the last CallSite entry was for an invoke.
+ bool PreviousIsInvoke = false;
+
+ // Visit all instructions in order of address.
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
+ MI != E; ++MI) {
+ if (!MI->isLabel()) {
+ if (MI->getDesc().isCall())
+ SawPotentiallyThrowing |= !CallToNoUnwindFunction(MI);
+
+ continue;
+ }
+
+ unsigned BeginLabel = MI->getOperand(0).getImm();
+ assert(BeginLabel && "Invalid label!");
+
+ // End of the previous try-range?
+ if (BeginLabel == LastLabel)
+ SawPotentiallyThrowing = false;
+
+ // Beginning of a new try-range?
+ RangeMapType::const_iterator L = PadMap.find(BeginLabel);
+ if (L == PadMap.end())
+ // Nope, it was just some random label.
+ continue;
+
+ const PadRange &P = L->second;
+ const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
+ assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
+ "Inconsistent landing pad map!");
+
+ // For Dwarf exception handling (SjLj handling doesn't use this). If some
+ // instruction between the previous try-range and this one may throw,
+ // create a call-site entry with no landing pad for the region between the
+ // try-ranges.
+ if (SawPotentiallyThrowing &&
+ MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) {
+ CallSiteEntry Site = { LastLabel, BeginLabel, 0, 0 };
+ CallSites.push_back(Site);
+ PreviousIsInvoke = false;
+ }
+
+ LastLabel = LandingPad->EndLabels[P.RangeIndex];
+ assert(BeginLabel && LastLabel && "Invalid landing pad!");
+
+ if (LandingPad->LandingPadLabel) {
+ // This try-range is for an invoke.
+ CallSiteEntry Site = {
+ BeginLabel,
+ LastLabel,
+ LandingPad->LandingPadLabel,
+ FirstActions[P.PadIndex]
+ };
+
+ // Try to merge with the previous call-site. SJLJ doesn't do this
+ if (PreviousIsInvoke &&
+ MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) {
+ CallSiteEntry &Prev = CallSites.back();
+ if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {
+ // Extend the range of the previous entry.
+ Prev.EndLabel = Site.EndLabel;
+ continue;
+ }
+ }
+
+ // Otherwise, create a new call-site.
+ if (MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf)
+ CallSites.push_back(Site);
+ else {
+ // SjLj EH must maintain the call sites in the order assigned
+ // to them by the SjLjPrepare pass.
+ unsigned SiteNo = MMI->getCallSiteBeginLabel(BeginLabel);
+ if (CallSites.size() < SiteNo)
+ CallSites.resize(SiteNo);
+ CallSites[SiteNo - 1] = Site;
+ }
+ PreviousIsInvoke = true;
+ } else {
+ // Create a gap.
+ PreviousIsInvoke = false;
+ }
+ }
+ }
+
+ // If some instruction between the previous try-range and the end of the
+ // function may throw, create a call-site entry with no landing pad for the
+ // region following the try-range.
+ if (SawPotentiallyThrowing &&
+ MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) {
+ CallSiteEntry Site = { LastLabel, 0, 0, 0 };
+ CallSites.push_back(Site);
+ }
+}
+
+/// EmitExceptionTable - Emit landing pads and actions.
+///
+/// The general organization of the table is complex, but the basic concepts are
+/// easy. First there is a header which describes the location and organization
+/// of the three components that follow.
+///
+/// 1. The landing pad site information describes the range of code covered by
+/// the try. In our case it's an accumulation of the ranges covered by the
+/// invokes in the try. There is also a reference to the landing pad that
+/// handles the exception once processed. Finally an index into the actions
+/// table.
+/// 2. The action table, in our case, is composed of pairs of type IDs and next
+/// action offset. Starting with the action index from the landing pad
+/// site, each type ID is checked for a match to the current exception. If
+/// it matches then the exception and type id are passed on to the landing
+/// pad. Otherwise the next action is looked up. This chain is terminated
+/// with a next action of zero. If no type id is found then the frame is
+/// unwound and handling continues.
+/// 3. Type ID table contains references to all the C++ typeinfo for all
+/// catches in the function. This tables is reverse indexed base 1.
+void DwarfException::EmitExceptionTable() {
+ const std::vector<GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+ const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+ const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+ if (PadInfos.empty()) return;
+
+ // Sort the landing pads in order of their type ids. This is used to fold
+ // duplicate actions.
+ SmallVector<const LandingPadInfo *, 64> LandingPads;
+ LandingPads.reserve(PadInfos.size());
+
+ for (unsigned i = 0, N = PadInfos.size(); i != N; ++i)
+ LandingPads.push_back(&PadInfos[i]);
+
+ std::sort(LandingPads.begin(), LandingPads.end(), PadLT);
+
+ // Compute the actions table and gather the first action index for each
+ // landing pad site.
+ SmallVector<ActionEntry, 32> Actions;
+ SmallVector<unsigned, 64> FirstActions;
+ unsigned SizeActions = ComputeActionsTable(LandingPads, Actions,
+ FirstActions);
+
+ // Invokes and nounwind calls have entries in PadMap (due to being bracketed
+ // by try-range labels when lowered). Ordinary calls do not, so appropriate
+ // try-ranges for them need be deduced when using DWARF exception handling.
+ RangeMapType PadMap;
+ for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
+ const LandingPadInfo *LandingPad = LandingPads[i];
+ for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
+ unsigned BeginLabel = LandingPad->BeginLabels[j];
+ assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!");
+ PadRange P = { i, j };
+ PadMap[BeginLabel] = P;
+ }
+ }
+
+ // Compute the call-site table.
+ SmallVector<CallSiteEntry, 64> CallSites;
+ ComputeCallSiteTable(CallSites, PadMap, LandingPads, FirstActions);
+
+ // Final tallies.
+
+ // Call sites.
+ const unsigned SiteStartSize = SizeOfEncodedValue(dwarf::DW_EH_PE_udata4);
+ const unsigned SiteLengthSize = SizeOfEncodedValue(dwarf::DW_EH_PE_udata4);
+ const unsigned LandingPadSize = SizeOfEncodedValue(dwarf::DW_EH_PE_udata4);
+ bool IsSJLJ = MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
+ bool HaveTTData = IsSJLJ ? (!TypeInfos.empty() || !FilterIds.empty()) : true;
+ unsigned SizeSites;
+
+ if (IsSJLJ)
+ SizeSites = 0;
+ else
+ SizeSites = CallSites.size() *
+ (SiteStartSize + SiteLengthSize + LandingPadSize);
+
+ for (unsigned i = 0, e = CallSites.size(); i < e; ++i) {
+ SizeSites += MCAsmInfo::getULEB128Size(CallSites[i].Action);
+ if (IsSJLJ)
+ SizeSites += MCAsmInfo::getULEB128Size(i);
+ }
+
+ // Type infos.
+ const MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection();
+ unsigned TTypeFormat;
+ unsigned TypeFormatSize;
+
+ if (!HaveTTData) {
+ // For SjLj exceptions, if there is no TypeInfo, then we just explicitly say
+ // that we're omitting that bit.
+ TTypeFormat = dwarf::DW_EH_PE_omit;
+ TypeFormatSize = SizeOfEncodedValue(dwarf::DW_EH_PE_absptr);
+ } else {
+ // Okay, we have actual filters or typeinfos to emit. As such, we need to
+ // pick a type encoding for them. We're about to emit a list of pointers to
+ // typeinfo objects at the end of the LSDA. However, unless we're in static
+ // mode, this reference will require a relocation by the dynamic linker.
+ //
+ // Because of this, we have a couple of options:
+ //
+ // 1) If we are in -static mode, we can always use an absolute reference
+ // from the LSDA, because the static linker will resolve it.
+ //
+ // 2) Otherwise, if the LSDA section is writable, we can output the direct
+ // reference to the typeinfo and allow the dynamic linker to relocate
+ // it. Since it is in a writable section, the dynamic linker won't
+ // have a problem.
+ //
+ // 3) Finally, if we're in PIC mode and the LDSA section isn't writable,
+ // we need to use some form of indirection. For example, on Darwin,
+ // we can output a statically-relocatable reference to a dyld stub. The
+ // offset to the stub is constant, but the contents are in a section
+ // that is updated by the dynamic linker. This is easy enough, but we
+ // need to tell the personality function of the unwinder to indirect
+ // through the dyld stub.
+ //
+ // FIXME: When (3) is actually implemented, we'll have to emit the stubs
+ // somewhere. This predicate should be moved to a shared location that is
+ // in target-independent code.
+ //
+ if (LSDASection->getKind().isWriteable() ||
+ Asm->TM.getRelocationModel() == Reloc::Static)
+ TTypeFormat = dwarf::DW_EH_PE_absptr;
+ else
+ TTypeFormat = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+
+ TypeFormatSize = SizeOfEncodedValue(TTypeFormat);
+ }
+
+ // Begin the exception table.
+ Asm->OutStreamer.SwitchSection(LSDASection);
+ Asm->EmitAlignment(2, 0, 0, false);
+
+ O << "GCC_except_table" << SubprogramCount << ":\n";
+
+ // The type infos need to be aligned. GCC does this by inserting padding just
+ // before the type infos. However, this changes the size of the exception
+ // table, so you need to take this into account when you output the exception
+ // table size. However, the size is output using a variable length encoding.
+ // So by increasing the size by inserting padding, you may increase the number
+ // of bytes used for writing the size. If it increases, say by one byte, then
+ // you now need to output one less byte of padding to get the type infos
+ // aligned. However this decreases the size of the exception table. This
+ // changes the value you have to output for the exception table size. Due to
+ // the variable length encoding, the number of bytes used for writing the
+ // length may decrease. If so, you then have to increase the amount of
+ // padding. And so on. If you look carefully at the GCC code you will see that
+ // it indeed does this in a loop, going on and on until the values stabilize.
+ // We chose another solution: don't output padding inside the table like GCC
+ // does, instead output it before the table.
+ unsigned SizeTypes = TypeInfos.size() * TypeFormatSize;
+ unsigned TyOffset = sizeof(int8_t) + // Call site format
+ MCAsmInfo::getULEB128Size(SizeSites) + // Call site table length
+ SizeSites + SizeActions + SizeTypes;
+ unsigned TotalSize = sizeof(int8_t) + // LPStart format
+ sizeof(int8_t) + // TType format
+ (HaveTTData ?
+ MCAsmInfo::getULEB128Size(TyOffset) : 0) + // TType base offset
+ TyOffset;
+ unsigned SizeAlign = (4 - TotalSize) & 3;
+
+ for (unsigned i = 0; i != SizeAlign; ++i) {
+ Asm->EmitInt8(0);
+ EOL("Padding");
+ }
+
+ EmitLabel("exception", SubprogramCount);
+
+ if (IsSJLJ) {
+ SmallString<16> LSDAName;
+ raw_svector_ostream(LSDAName) << MAI->getPrivateGlobalPrefix() <<
+ "_LSDA_" << Asm->getFunctionNumber();
+ O << LSDAName.str() << ":\n";
+ }
+
+ // Emit the header.
+ EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
+ EmitEncodingByte(TTypeFormat, "@TType");
+
+ if (HaveTTData)
+ EmitULEB128(TyOffset, "@TType base offset");
+
+ // SjLj Exception handling
+ if (IsSJLJ) {
+ EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
+ EmitULEB128(SizeSites, "Call site table length");
+
+ // Emit the landing pad site information.
+ unsigned idx = 0;
+ for (SmallVectorImpl<CallSiteEntry>::const_iterator
+ I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) {
+ const CallSiteEntry &S = *I;
+
+ // Offset of the landing pad, counted in 16-byte bundles relative to the
+ // @LPStart address.
+ EmitULEB128(idx, "Landing pad");
+
+ // Offset of the first associated action record, relative to the start of
+ // the action table. This value is biased by 1 (1 indicates the start of
+ // the action table), and 0 indicates that there are no actions.
+ EmitULEB128(S.Action, "Action");
+ }
+ } else {
+ // DWARF Exception handling
+ assert(MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf);
+
+ // The call-site table is a list of all call sites that may throw an
+ // exception (including C++ 'throw' statements) in the procedure
+ // fragment. It immediately follows the LSDA header. Each entry indicates,
+ // for a given call, the first corresponding action record and corresponding
+ // landing pad.
+ //
+ // The table begins with the number of bytes, stored as an LEB128
+ // compressed, unsigned integer. The records immediately follow the record
+ // count. They are sorted in increasing call-site address. Each record
+ // indicates:
+ //
+ // * The position of the call-site.
+ // * The position of the landing pad.
+ // * The first action record for that call site.
+ //
+ // A missing entry in the call-site table indicates that a call is not
+ // supposed to throw.
+
+ // Emit the landing pad call site table.
+ EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
+ EmitULEB128(SizeSites, "Call site table length");
+
+ for (SmallVectorImpl<CallSiteEntry>::const_iterator
+ I = CallSites.begin(), E = CallSites.end(); I != E; ++I) {
+ const CallSiteEntry &S = *I;
+ const char *BeginTag;
+ unsigned BeginNumber;
+
+ if (!S.BeginLabel) {
+ BeginTag = "eh_func_begin";
+ BeginNumber = SubprogramCount;
+ } else {
+ BeginTag = "label";
+ BeginNumber = S.BeginLabel;
+ }
+
+ // Offset of the call site relative to the previous call site, counted in
+ // number of 16-byte bundles. The first call site is counted relative to
+ // the start of the procedure fragment.
+ EmitSectionOffset(BeginTag, "eh_func_begin", BeginNumber, SubprogramCount,
+ true, true);
+ EOL("Region start");
+
+ if (!S.EndLabel)
+ EmitDifference("eh_func_end", SubprogramCount, BeginTag, BeginNumber,
+ true);
+ else
+ EmitDifference("label", S.EndLabel, BeginTag, BeginNumber, true);
+
+ EOL("Region length");
+
+ // Offset of the landing pad, counted in 16-byte bundles relative to the
+ // @LPStart address.
+ if (!S.PadLabel) {
+ Asm->OutStreamer.AddComment("Landing pad");
+ Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+ } else {
+ EmitSectionOffset("label", "eh_func_begin", S.PadLabel, SubprogramCount,
+ true, true);
+ EOL("Landing pad");
+ }
+
+ // Offset of the first associated action record, relative to the start of
+ // the action table. This value is biased by 1 (1 indicates the start of
+ // the action table), and 0 indicates that there are no actions.
+ EmitULEB128(S.Action, "Action");
+ }
+ }
+
+ // Emit the Action Table.
+ if (Actions.size() != 0) EOL("-- Action Record Table --");
+ for (SmallVectorImpl<ActionEntry>::const_iterator
+ I = Actions.begin(), E = Actions.end(); I != E; ++I) {
+ const ActionEntry &Action = *I;
+ EOL("Action Record:");
+
+ // Type Filter
+ //
+ // Used by the runtime to match the type of the thrown exception to the
+ // type of the catch clauses or the types in the exception specification.
+ EmitSLEB128(Action.ValueForTypeID, " TypeInfo index");
+
+ // Action Record
+ //
+ // Self-relative signed displacement in bytes of the next action record,
+ // or 0 if there is no next action record.
+ EmitSLEB128(Action.NextAction, " Next action");
+ }
+
+ // Emit the Catch TypeInfos.
+ if (TypeInfos.size() != 0) EOL("-- Catch TypeInfos --");
+ for (std::vector<GlobalVariable *>::const_reverse_iterator
+ I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
+ const GlobalVariable *GV = *I;
+ PrintRelDirective();
+
+ if (GV) {
+ O << *Asm->GetGlobalValueSymbol(GV);
+ EOL("TypeInfo");
+ } else {
+ O << "0x0";
+ EOL("");
+ }
+ }
+
+ // Emit the Exception Specifications.
+ if (FilterIds.size() != 0) EOL("-- Filter IDs --");
+ for (std::vector<unsigned>::const_iterator
+ I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) {
+ unsigned TypeID = *I;
+ EmitULEB128(TypeID, TypeID != 0 ? "Exception specification" : 0);
+ }
+
+ Asm->EmitAlignment(2, 0, 0, false);
+}
+
+/// EndModule - Emit all exception information that should come after the
+/// content.
+void DwarfException::EndModule() {
+ if (MAI->getExceptionHandlingType() != ExceptionHandling::Dwarf)
+ return;
+
+ if (!shouldEmitMovesModule && !shouldEmitTableModule)
+ return;
+
+ if (TimePassesIsEnabled)
+ ExceptionTimer->startTimer();
+
+ const std::vector<Function *> Personalities = MMI->getPersonalities();
+
+ for (unsigned I = 0, E = Personalities.size(); I < E; ++I)
+ EmitCIE(Personalities[I], I);
+
+ for (std::vector<FunctionEHFrameInfo>::iterator
+ I = EHFrames.begin(), E = EHFrames.end(); I != E; ++I)
+ EmitFDE(*I);
+
+ if (TimePassesIsEnabled)
+ ExceptionTimer->stopTimer();
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void DwarfException::BeginFunction(const MachineFunction *MF) {
+ if (!MMI || !MAI->doesSupportExceptionHandling()) return;
+
+ if (TimePassesIsEnabled)
+ ExceptionTimer->startTimer();
+
+ this->MF = MF;
+ shouldEmitTable = shouldEmitMoves = false;
+
+ // Map all labels and get rid of any dead landing pads.
+ MMI->TidyLandingPads();
+
+ // If any landing pads survive, we need an EH table.
+ if (!MMI->getLandingPads().empty())
+ shouldEmitTable = true;
+
+ // See if we need frame move info.
+ if (!MF->getFunction()->doesNotThrow() || UnwindTablesMandatory)
+ shouldEmitMoves = true;
+
+ if (shouldEmitMoves || shouldEmitTable)
+ // Assumes in correct section after the entry point.
+ EmitLabel("eh_func_begin", ++SubprogramCount);
+
+ shouldEmitTableModule |= shouldEmitTable;
+ shouldEmitMovesModule |= shouldEmitMoves;
+
+ if (TimePassesIsEnabled)
+ ExceptionTimer->stopTimer();
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void DwarfException::EndFunction() {
+ if (!shouldEmitMoves && !shouldEmitTable) return;
+
+ if (TimePassesIsEnabled)
+ ExceptionTimer->startTimer();
+
+ EmitLabel("eh_func_end", SubprogramCount);
+ EmitExceptionTable();
+
+ MCSymbol *FunctionEHSym =
+ Asm->GetSymbolWithGlobalValueBase(MF->getFunction(), ".eh",
+ Asm->MAI->is_EHSymbolPrivate());
+
+ // Save EH frame information
+ EHFrames.push_back(FunctionEHFrameInfo(FunctionEHSym, SubprogramCount,
+ MMI->getPersonalityIndex(),
+ MF->getFrameInfo()->hasCalls(),
+ !MMI->getLandingPads().empty(),
+ MMI->getFrameMoves(),
+ MF->getFunction()));
+
+ // Record if this personality index uses a landing pad.
+ UsesLSDA[MMI->getPersonalityIndex()] |= !MMI->getLandingPads().empty();
+
+ if (TimePassesIsEnabled)
+ ExceptionTimer->stopTimer();
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
new file mode 100644
index 0000000..06033a1
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -0,0 +1,208 @@
+//===-- DwarfException.h - Dwarf Exception Framework -----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
+#define LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
+
+#include "DIE.h"
+#include "DwarfPrinter.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/ADT/DenseMap.h"
+#include <string>
+
+namespace llvm {
+
+struct LandingPadInfo;
+class MachineModuleInfo;
+class MCAsmInfo;
+class MCExpr;
+class Timer;
+class raw_ostream;
+
+//===----------------------------------------------------------------------===//
+/// DwarfException - Emits Dwarf exception handling directives.
+///
+class DwarfException : public DwarfPrinter {
+ struct FunctionEHFrameInfo {
+ MCSymbol *FunctionEHSym; // L_foo.eh
+ unsigned Number;
+ unsigned PersonalityIndex;
+ bool hasCalls;
+ bool hasLandingPads;
+ std::vector<MachineMove> Moves;
+ const Function *function;
+
+ FunctionEHFrameInfo(MCSymbol *EHSym, unsigned Num, unsigned P,
+ bool hC, bool hL,
+ const std::vector<MachineMove> &M,
+ const Function *f):
+ FunctionEHSym(EHSym), Number(Num), PersonalityIndex(P),
+ hasCalls(hC), hasLandingPads(hL), Moves(M), function (f) { }
+ };
+
+ std::vector<FunctionEHFrameInfo> EHFrames;
+
+ /// UsesLSDA - Indicates whether an FDE that uses the CIE at the given index
+ /// uses an LSDA. If so, then we need to encode that information in the CIE's
+ /// augmentation.
+ DenseMap<unsigned, bool> UsesLSDA;
+
+ /// shouldEmitTable - Per-function flag to indicate if EH tables should
+ /// be emitted.
+ bool shouldEmitTable;
+
+ /// shouldEmitMoves - Per-function flag to indicate if frame moves info
+ /// should be emitted.
+ bool shouldEmitMoves;
+
+ /// shouldEmitTableModule - Per-module flag to indicate if EH tables
+ /// should be emitted.
+ bool shouldEmitTableModule;
+
+ /// shouldEmitFrameModule - Per-module flag to indicate if frame moves
+ /// should be emitted.
+ bool shouldEmitMovesModule;
+
+ /// ExceptionTimer - Timer for the Dwarf exception writer.
+ Timer *ExceptionTimer;
+
+ /// SizeOfEncodedValue - Return the size of the encoding in bytes.
+ unsigned SizeOfEncodedValue(unsigned Encoding);
+
+ /// EmitCIE - Emit a Common Information Entry (CIE). This holds information
+ /// that is shared among many Frame Description Entries. There is at least
+ /// one CIE in every non-empty .debug_frame section.
+ void EmitCIE(const Function *Personality, unsigned Index);
+
+ /// EmitFDE - Emit the Frame Description Entry (FDE) for the function.
+ void EmitFDE(const FunctionEHFrameInfo &EHFrameInfo);
+
+ /// EmitExceptionTable - Emit landing pads and actions.
+ ///
+ /// The general organization of the table is complex, but the basic concepts
+ /// are easy. First there is a header which describes the location and
+ /// organization of the three components that follow.
+ /// 1. The landing pad site information describes the range of code covered
+ /// by the try. In our case it's an accumulation of the ranges covered
+ /// by the invokes in the try. There is also a reference to the landing
+ /// pad that handles the exception once processed. Finally an index into
+ /// the actions table.
+ /// 2. The action table, in our case, is composed of pairs of type ids
+ /// and next action offset. Starting with the action index from the
+ /// landing pad site, each type Id is checked for a match to the current
+ /// exception. If it matches then the exception and type id are passed
+ /// on to the landing pad. Otherwise the next action is looked up. This
+ /// chain is terminated with a next action of zero. If no type id is
+ /// found the frame is unwound and handling continues.
+ /// 3. Type id table contains references to all the C++ typeinfo for all
+ /// catches in the function. This tables is reversed indexed base 1.
+
+ /// SharedTypeIds - How many leading type ids two landing pads have in common.
+ static unsigned SharedTypeIds(const LandingPadInfo *L,
+ const LandingPadInfo *R);
+
+ /// PadLT - Order landing pads lexicographically by type id.
+ static bool PadLT(const LandingPadInfo *L, const LandingPadInfo *R);
+
+ struct KeyInfo {
+ static inline unsigned getEmptyKey() { return -1U; }
+ static inline unsigned getTombstoneKey() { return -2U; }
+ static unsigned getHashValue(const unsigned &Key) { return Key; }
+ static bool isEqual(unsigned LHS, unsigned RHS) { return LHS == RHS; }
+ };
+
+ /// PadRange - Structure holding a try-range and the associated landing pad.
+ struct PadRange {
+ // The index of the landing pad.
+ unsigned PadIndex;
+ // The index of the begin and end labels in the landing pad's label lists.
+ unsigned RangeIndex;
+ };
+
+ typedef DenseMap<unsigned, PadRange, KeyInfo> RangeMapType;
+
+ /// ActionEntry - Structure describing an entry in the actions table.
+ struct ActionEntry {
+ int ValueForTypeID; // The value to write - may not be equal to the type id.
+ int NextAction;
+ struct ActionEntry *Previous;
+ };
+
+ /// CallSiteEntry - Structure describing an entry in the call-site table.
+ struct CallSiteEntry {
+ // The 'try-range' is BeginLabel .. EndLabel.
+ unsigned BeginLabel; // zero indicates the start of the function.
+ unsigned EndLabel; // zero indicates the end of the function.
+
+ // The landing pad starts at PadLabel.
+ unsigned PadLabel; // zero indicates that there is no landing pad.
+ unsigned Action;
+ };
+
+ /// ComputeActionsTable - Compute the actions table and gather the first
+ /// action index for each landing pad site.
+ unsigned ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*>&LPs,
+ SmallVectorImpl<ActionEntry> &Actions,
+ SmallVectorImpl<unsigned> &FirstActions);
+
+ /// CallToNoUnwindFunction - Return `true' if this is a call to a function
+ /// marked `nounwind'. Return `false' otherwise.
+ bool CallToNoUnwindFunction(const MachineInstr *MI);
+
+ /// ComputeCallSiteTable - Compute the call-site table. The entry for an
+ /// invoke has a try-range containing the call, a non-zero landing pad and an
+ /// appropriate action. The entry for an ordinary call has a try-range
+ /// containing the call and zero for the landing pad and the action. Calls
+ /// marked 'nounwind' have no entry and must not be contained in the try-range
+ /// of any entry - they form gaps in the table. Entries must be ordered by
+ /// try-range address.
+ void ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
+ const RangeMapType &PadMap,
+ const SmallVectorImpl<const LandingPadInfo *> &LPs,
+ const SmallVectorImpl<unsigned> &FirstActions);
+ void EmitExceptionTable();
+
+ /// CreateLabelDiff - Emit a label and subtract it from the expression we
+ /// already have. This is equivalent to emitting "foo - .", but we have to
+ /// emit the label for "." directly.
+ const MCExpr *CreateLabelDiff(const MCExpr *ExprRef, const char *LabelName,
+ unsigned Index);
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ DwarfException(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T);
+ virtual ~DwarfException();
+
+ /// BeginModule - Emit all exception information that should come prior to the
+ /// content.
+ void BeginModule(Module *m, MachineModuleInfo *mmi) {
+ this->M = m;
+ this->MMI = mmi;
+ }
+
+ /// EndModule - Emit all exception information that should come after the
+ /// content.
+ void EndModule();
+
+ /// BeginFunction - Gather pre-function exception information. Assumes being
+ /// emitted immediately after the function entry point.
+ void BeginFunction(const MachineFunction *MF);
+
+ /// EndFunction - Gather and emit post-function exception information.
+ void EndFunction();
+};
+
+} // End of namespace llvm
+
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfLabel.cpp b/lib/CodeGen/AsmPrinter/DwarfLabel.cpp
new file mode 100644
index 0000000..6e9293a
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfLabel.cpp
@@ -0,0 +1,32 @@
+//===--- lib/CodeGen/DwarfLabel.cpp - Dwarf Label -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// DWARF Labels
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfLabel.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+/// Profile - Used to gather unique data for the folding set.
+///
+void DWLabel::Profile(FoldingSetNodeID &ID) const {
+ ID.AddString(Tag);
+ ID.AddInteger(Number);
+}
+
+#ifndef NDEBUG
+void DWLabel::print(raw_ostream &O) const {
+ O << "." << Tag;
+ if (Number) O << Number;
+}
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfLabel.h b/lib/CodeGen/AsmPrinter/DwarfLabel.h
new file mode 100644
index 0000000..0c0cc4b
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfLabel.h
@@ -0,0 +1,52 @@
+//===--- lib/CodeGen/DwarfLabel.h - Dwarf Label -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// DWARF Labels.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DWARFLABEL_H__
+#define CODEGEN_ASMPRINTER_DWARFLABEL_H__
+
+namespace llvm {
+ class FoldingSetNodeID;
+ class raw_ostream;
+
+ //===--------------------------------------------------------------------===//
+ /// DWLabel - Labels are used to track locations in the assembler file.
+ /// Labels appear in the form @verbatim <prefix><Tag><Number> @endverbatim,
+ /// where the tag is a category of label (Ex. location) and number is a value
+ /// unique in that category.
+ class DWLabel {
+ /// Tag - Label category tag. Should always be a statically declared C
+ /// string.
+ ///
+ const char *Tag;
+
+ /// Number - Value to make label unique.
+ ///
+ unsigned Number;
+ public:
+ DWLabel(const char *T, unsigned N) : Tag(T), Number(N) {}
+
+ // Accessors.
+ const char *getTag() const { return Tag; }
+ unsigned getNumber() const { return Number; }
+
+ /// Profile - Used to gather unique data for the folding set.
+ ///
+ void Profile(FoldingSetNodeID &ID) const;
+
+#ifndef NDEBUG
+ void print(raw_ostream &O) const;
+#endif
+ };
+} // end llvm namespace
+
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
new file mode 100644
index 0000000..415390b
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
@@ -0,0 +1,341 @@
+//===--- lib/CodeGen/DwarfPrinter.cpp - Dwarf Printer ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Emit general DWARF directives.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfPrinter.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+DwarfPrinter::DwarfPrinter(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T,
+ const char *flavor)
+: O(OS), Asm(A), MAI(T), TD(Asm->TM.getTargetData()),
+ RI(Asm->TM.getRegisterInfo()), M(NULL), MF(NULL), MMI(NULL),
+ SubprogramCount(0), Flavor(flavor), SetCounter(1) {}
+
+void DwarfPrinter::PrintRelDirective(bool Force32Bit, bool isInSection) const {
+ if (isInSection && MAI->getDwarfSectionOffsetDirective())
+ O << MAI->getDwarfSectionOffsetDirective();
+ else if (Force32Bit || TD->getPointerSize() == sizeof(int32_t))
+ O << MAI->getData32bitsDirective();
+ else
+ O << MAI->getData64bitsDirective();
+}
+
+/// EOL - Print a newline character to asm stream. If a comment is present
+/// then it will be printed first. Comments should not contain '\n'.
+void DwarfPrinter::EOL(const Twine &Comment) const {
+ if (Asm->VerboseAsm && !Comment.isTriviallyEmpty()) {
+ Asm->O.PadToColumn(MAI->getCommentColumn());
+ Asm->O << Asm->MAI->getCommentString() << ' ' << Comment;
+ }
+ Asm->O << '\n';
+}
+
+static const char *DecodeDWARFEncoding(unsigned Encoding) {
+ switch (Encoding) {
+ case dwarf::DW_EH_PE_absptr: return "absptr";
+ case dwarf::DW_EH_PE_omit: return "omit";
+ case dwarf::DW_EH_PE_pcrel: return "pcrel";
+ case dwarf::DW_EH_PE_udata4: return "udata4";
+ case dwarf::DW_EH_PE_udata8: return "udata8";
+ case dwarf::DW_EH_PE_sdata4: return "sdata4";
+ case dwarf::DW_EH_PE_sdata8: return "sdata8";
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4: return "pcrel udata4";
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4: return "pcrel sdata4";
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8: return "pcrel udata8";
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8: return "pcrel sdata8";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata4:
+ return "indirect pcrel udata4";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata4:
+ return "indirect pcrel sdata4";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata8:
+ return "indirect pcrel udata8";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata8:
+ return "indirect pcrel sdata8";
+ }
+
+ return "<unknown encoding>";
+}
+
+/// EmitEncodingByte - Emit a .byte 42 directive that corresponds to an
+/// encoding. If verbose assembly output is enabled, we output comments
+/// describing the encoding. Desc is an optional string saying what the
+/// encoding is specifying (e.g. "LSDA").
+void DwarfPrinter::EmitEncodingByte(unsigned Val, const char *Desc) {
+ if (Asm->VerboseAsm) {
+ if (Desc != 0)
+ Asm->OutStreamer.AddComment(Twine(Desc)+" Encoding = " +
+ Twine(DecodeDWARFEncoding(Val)));
+ else
+ Asm->OutStreamer.AddComment(Twine("Encoding = ") +
+ DecodeDWARFEncoding(Val));
+ }
+
+ Asm->OutStreamer.EmitIntValue(Val, 1, 0/*addrspace*/);
+}
+
+/// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value.
+void DwarfPrinter::EmitCFAByte(unsigned Val) {
+ if (Asm->VerboseAsm) {
+ if (Val >= dwarf::DW_CFA_offset && Val < dwarf::DW_CFA_offset+64)
+ Asm->OutStreamer.AddComment("DW_CFA_offset + Reg (" +
+ Twine(Val-dwarf::DW_CFA_offset) + ")");
+ else
+ Asm->OutStreamer.AddComment(dwarf::CallFrameString(Val));
+ }
+ Asm->OutStreamer.EmitIntValue(Val, 1, 0/*addrspace*/);
+}
+
+/// EmitSLEB128 - emit the specified signed leb128 value.
+void DwarfPrinter::EmitSLEB128(int Value, const char *Desc) const {
+ if (Asm->VerboseAsm && Desc)
+ Asm->OutStreamer.AddComment(Desc);
+
+ if (MAI->hasLEB128()) {
+ O << "\t.sleb128\t" << Value;
+ Asm->OutStreamer.AddBlankLine();
+ return;
+ }
+
+ // If we don't have .sleb128, emit as .bytes.
+ int Sign = Value >> (8 * sizeof(Value) - 1);
+ bool IsMore;
+
+ do {
+ unsigned char Byte = static_cast<unsigned char>(Value & 0x7f);
+ Value >>= 7;
+ IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
+ if (IsMore) Byte |= 0x80;
+
+ Asm->OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0);
+ } while (IsMore);
+}
+
+/// EmitULEB128 - emit the specified signed leb128 value.
+void DwarfPrinter::EmitULEB128(unsigned Value, const char *Desc) const {
+ if (Asm->VerboseAsm && Desc)
+ Asm->OutStreamer.AddComment(Desc);
+
+ if (MAI->hasLEB128()) {
+ O << "\t.uleb128\t" << Value;
+ Asm->OutStreamer.AddBlankLine();
+ return;
+ }
+
+ // If we don't have .uleb128, emit as .bytes.
+ do {
+ unsigned char Byte = static_cast<unsigned char>(Value & 0x7f);
+ Value >>= 7;
+ if (Value) Byte |= 0x80;
+ Asm->OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0);
+ } while (Value);
+}
+
+
+/// PrintLabelName - Print label name in form used by Dwarf writer.
+///
+void DwarfPrinter::PrintLabelName(const char *Tag, unsigned Number) const {
+ O << MAI->getPrivateGlobalPrefix() << Tag;
+ if (Number) O << Number;
+}
+void DwarfPrinter::PrintLabelName(const char *Tag, unsigned Number,
+ const char *Suffix) const {
+ O << MAI->getPrivateGlobalPrefix() << Tag;
+ if (Number) O << Number;
+ O << Suffix;
+}
+
+/// EmitLabel - Emit location label for internal use by Dwarf.
+///
+void DwarfPrinter::EmitLabel(const char *Tag, unsigned Number) const {
+ PrintLabelName(Tag, Number);
+ O << ":\n";
+}
+
+/// EmitReference - Emit a reference to a label.
+///
+void DwarfPrinter::EmitReference(const char *Tag, unsigned Number,
+ bool IsPCRelative, bool Force32Bit) const {
+ PrintRelDirective(Force32Bit);
+ PrintLabelName(Tag, Number);
+ if (IsPCRelative) O << "-" << MAI->getPCSymbol();
+}
+void DwarfPrinter::EmitReference(const std::string &Name, bool IsPCRelative,
+ bool Force32Bit) const {
+ PrintRelDirective(Force32Bit);
+ O << Name;
+ if (IsPCRelative) O << "-" << MAI->getPCSymbol();
+}
+
+void DwarfPrinter::EmitReference(const MCSymbol *Sym, bool IsPCRelative,
+ bool Force32Bit) const {
+ PrintRelDirective(Force32Bit);
+ O << *Sym;
+ if (IsPCRelative) O << "-" << MAI->getPCSymbol();
+}
+
+/// EmitDifference - Emit the difference between two labels. If this assembler
+/// supports .set, we emit a .set of a temporary and then use it in the .word.
+void DwarfPrinter::EmitDifference(const char *TagHi, unsigned NumberHi,
+ const char *TagLo, unsigned NumberLo,
+ bool IsSmall) {
+ if (MAI->hasSetDirective()) {
+ // FIXME: switch to OutStreamer.EmitAssignment.
+ O << "\t.set\t";
+ PrintLabelName("set", SetCounter, Flavor);
+ O << ",";
+ PrintLabelName(TagHi, NumberHi);
+ O << "-";
+ PrintLabelName(TagLo, NumberLo);
+ O << "\n";
+
+ PrintRelDirective(IsSmall);
+ PrintLabelName("set", SetCounter, Flavor);
+ ++SetCounter;
+ } else {
+ PrintRelDirective(IsSmall);
+ PrintLabelName(TagHi, NumberHi);
+ O << "-";
+ PrintLabelName(TagLo, NumberLo);
+ }
+}
+
+void DwarfPrinter::EmitSectionOffset(const char* Label, const char* Section,
+ unsigned LabelNumber,
+ unsigned SectionNumber,
+ bool IsSmall, bool isEH,
+ bool useSet) {
+ bool printAbsolute = false;
+ if (isEH)
+ printAbsolute = MAI->isAbsoluteEHSectionOffsets();
+ else
+ printAbsolute = MAI->isAbsoluteDebugSectionOffsets();
+
+ if (MAI->hasSetDirective() && useSet) {
+ // FIXME: switch to OutStreamer.EmitAssignment.
+ O << "\t.set\t";
+ PrintLabelName("set", SetCounter, Flavor);
+ O << ",";
+ PrintLabelName(Label, LabelNumber);
+
+ if (!printAbsolute) {
+ O << "-";
+ PrintLabelName(Section, SectionNumber);
+ }
+
+ O << "\n";
+ PrintRelDirective(IsSmall);
+ PrintLabelName("set", SetCounter, Flavor);
+ ++SetCounter;
+ O << "\n";
+ } else {
+ PrintRelDirective(IsSmall, true);
+ PrintLabelName(Label, LabelNumber);
+
+ if (!printAbsolute) {
+ O << "-";
+ PrintLabelName(Section, SectionNumber);
+ }
+ O << "\n";
+ }
+}
+
+/// EmitFrameMoves - Emit frame instructions to describe the layout of the
+/// frame.
+void DwarfPrinter::EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID,
+ const std::vector<MachineMove> &Moves,
+ bool isEH) {
+ int stackGrowth =
+ Asm->TM.getFrameInfo()->getStackGrowthDirection() ==
+ TargetFrameInfo::StackGrowsUp ?
+ TD->getPointerSize() : -TD->getPointerSize();
+ bool IsLocal = BaseLabel && strcmp(BaseLabel, "label") == 0;
+
+ for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
+ const MachineMove &Move = Moves[i];
+ unsigned LabelID = Move.getLabelID();
+
+ if (LabelID) {
+ LabelID = MMI->MappedLabel(LabelID);
+
+ // Throw out move if the label is invalid.
+ if (!LabelID) continue;
+ }
+
+ const MachineLocation &Dst = Move.getDestination();
+ const MachineLocation &Src = Move.getSource();
+
+ // Advance row if new location.
+ if (BaseLabel && LabelID && (BaseLabelID != LabelID || !IsLocal)) {
+ EmitCFAByte(dwarf::DW_CFA_advance_loc4);
+ EmitDifference("label", LabelID, BaseLabel, BaseLabelID, true);
+ Asm->O << '\n';
+
+ BaseLabelID = LabelID;
+ BaseLabel = "label";
+ IsLocal = true;
+ }
+
+ // If advancing cfa.
+ if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+ if (!Src.isReg()) {
+ if (Src.getReg() == MachineLocation::VirtualFP) {
+ EmitCFAByte(dwarf::DW_CFA_def_cfa_offset);
+ } else {
+ EmitCFAByte(dwarf::DW_CFA_def_cfa);
+ EmitULEB128(RI->getDwarfRegNum(Src.getReg(), isEH), "Register");
+ }
+
+ int Offset = -Src.getOffset();
+ EmitULEB128(Offset, "Offset");
+ } else {
+ llvm_unreachable("Machine move not supported yet.");
+ }
+ } else if (Src.isReg() &&
+ Src.getReg() == MachineLocation::VirtualFP) {
+ if (Dst.isReg()) {
+ EmitCFAByte(dwarf::DW_CFA_def_cfa_register);
+ EmitULEB128(RI->getDwarfRegNum(Dst.getReg(), isEH), "Register");
+ } else {
+ llvm_unreachable("Machine move not supported yet.");
+ }
+ } else {
+ unsigned Reg = RI->getDwarfRegNum(Src.getReg(), isEH);
+ int Offset = Dst.getOffset() / stackGrowth;
+
+ if (Offset < 0) {
+ EmitCFAByte(dwarf::DW_CFA_offset_extended_sf);
+ EmitULEB128(Reg, "Reg");
+ EmitSLEB128(Offset, "Offset");
+ } else if (Reg < 64) {
+ EmitCFAByte(dwarf::DW_CFA_offset + Reg);
+ EmitULEB128(Offset, "Offset");
+ } else {
+ EmitCFAByte(dwarf::DW_CFA_offset_extended);
+ EmitULEB128(Reg, "Reg");
+ EmitULEB128(Offset, "Offset");
+ }
+ }
+ }
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.h b/lib/CodeGen/AsmPrinter/DwarfPrinter.h
new file mode 100644
index 0000000..69d9c27
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.h
@@ -0,0 +1,167 @@
+//===--- lib/CodeGen/DwarfPrinter.h - Dwarf Printer -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Emit general DWARF directives.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DWARFPRINTER_H__
+#define CODEGEN_ASMPRINTER_DWARFPRINTER_H__
+
+#include "DwarfLabel.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/FormattedStream.h"
+#include <vector>
+
+namespace llvm {
+class AsmPrinter;
+class MachineFunction;
+class MachineModuleInfo;
+class Module;
+class MCAsmInfo;
+class TargetData;
+class TargetRegisterInfo;
+class MCSymbol;
+class Twine;
+
+class DwarfPrinter {
+protected:
+ ~DwarfPrinter() {}
+
+ //===-------------------------------------------------------------==---===//
+ // Core attributes used by the DWARF printer.
+ //
+
+ /// O - Stream to .s file.
+ raw_ostream &O;
+
+ /// Asm - Target of Dwarf emission.
+ AsmPrinter *Asm;
+
+ /// MAI - Target asm information.
+ const MCAsmInfo *MAI;
+
+ /// TD - Target data.
+ const TargetData *TD;
+
+ /// RI - Register Information.
+ const TargetRegisterInfo *RI;
+
+ /// M - Current module.
+ Module *M;
+
+ /// MF - Current machine function.
+ const MachineFunction *MF;
+
+ /// MMI - Collected machine module information.
+ MachineModuleInfo *MMI;
+
+ /// SubprogramCount - The running count of functions being compiled.
+ unsigned SubprogramCount;
+
+ /// Flavor - A unique string indicating what dwarf producer this is, used to
+ /// unique labels.
+ const char * const Flavor;
+
+ /// SetCounter - A unique number for each '.set' directive.
+ unsigned SetCounter;
+
+ DwarfPrinter(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T,
+ const char *flavor);
+public:
+
+ //===------------------------------------------------------------------===//
+ // Accessors.
+ //
+ const AsmPrinter *getAsm() const { return Asm; }
+ MachineModuleInfo *getMMI() const { return MMI; }
+ const MCAsmInfo *getMCAsmInfo() const { return MAI; }
+ const TargetData *getTargetData() const { return TD; }
+
+ void PrintRelDirective(bool Force32Bit = false,
+ bool isInSection = false) const;
+
+ /// EOL - Print a newline character to asm stream. If a comment is present
+ /// then it will be printed first. Comments should not contain '\n'.
+ void EOL(const Twine &Comment) const;
+
+ /// EmitEncodingByte - Emit a .byte 42 directive that corresponds to an
+ /// encoding. If verbose assembly output is enabled, we output comments
+ /// describing the encoding. Desc is a string saying what the encoding is
+ /// specifying (e.g. "LSDA").
+ void EmitEncodingByte(unsigned Val, const char *Desc);
+
+ /// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value.
+ void EmitCFAByte(unsigned Val);
+
+
+ /// EmitSLEB128 - emit the specified signed leb128 value.
+ void EmitSLEB128(int Value, const char *Desc) const;
+
+ /// EmitULEB128 - emit the specified unsigned leb128 value.
+ void EmitULEB128(unsigned Value, const char *Desc = 0) const;
+
+
+ /// PrintLabelName - Print label name in form used by Dwarf writer.
+ ///
+ void PrintLabelName(const DWLabel &Label) const {
+ PrintLabelName(Label.getTag(), Label.getNumber());
+ }
+ void PrintLabelName(const char *Tag, unsigned Number) const;
+ void PrintLabelName(const char *Tag, unsigned Number,
+ const char *Suffix) const;
+
+ /// EmitLabel - Emit location label for internal use by Dwarf.
+ ///
+ void EmitLabel(const DWLabel &Label) const {
+ EmitLabel(Label.getTag(), Label.getNumber());
+ }
+ void EmitLabel(const char *Tag, unsigned Number) const;
+
+ /// EmitReference - Emit a reference to a label.
+ ///
+ void EmitReference(const DWLabel &Label, bool IsPCRelative = false,
+ bool Force32Bit = false) const {
+ EmitReference(Label.getTag(), Label.getNumber(),
+ IsPCRelative, Force32Bit);
+ }
+ void EmitReference(const char *Tag, unsigned Number,
+ bool IsPCRelative = false,
+ bool Force32Bit = false) const;
+ void EmitReference(const std::string &Name, bool IsPCRelative = false,
+ bool Force32Bit = false) const;
+ void EmitReference(const MCSymbol *Sym, bool IsPCRelative = false,
+ bool Force32Bit = false) const;
+
+ /// EmitDifference - Emit the difference between two labels.
+ void EmitDifference(const DWLabel &LabelHi, const DWLabel &LabelLo,
+ bool IsSmall = false) {
+ EmitDifference(LabelHi.getTag(), LabelHi.getNumber(),
+ LabelLo.getTag(), LabelLo.getNumber(),
+ IsSmall);
+ }
+ void EmitDifference(const char *TagHi, unsigned NumberHi,
+ const char *TagLo, unsigned NumberLo,
+ bool IsSmall = false);
+
+ void EmitSectionOffset(const char* Label, const char* Section,
+ unsigned LabelNumber, unsigned SectionNumber,
+ bool IsSmall = false, bool isEH = false,
+ bool useSet = true);
+
+ /// EmitFrameMoves - Emit frame instructions to describe the layout of the
+ /// frame.
+ void EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID,
+ const std::vector<MachineMove> &Moves, bool isEH);
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
new file mode 100644
index 0000000..08e1bbc
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
@@ -0,0 +1,100 @@
+//===-- llvm/CodeGen/DwarfWriter.cpp - Dwarf Framework --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "DwarfDebug.h"
+#include "DwarfException.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+
+using namespace llvm;
+
+static RegisterPass<DwarfWriter>
+X("dwarfwriter", "DWARF Information Writer");
+char DwarfWriter::ID = 0;
+
+//===----------------------------------------------------------------------===//
+/// DwarfWriter Implementation
+///
+
+DwarfWriter::DwarfWriter()
+ : ImmutablePass(&ID), DD(0), DE(0) {}
+
+DwarfWriter::~DwarfWriter() {
+ delete DE;
+ delete DD;
+}
+
+/// BeginModule - Emit all Dwarf sections that should come prior to the
+/// content.
+void DwarfWriter::BeginModule(Module *M,
+ MachineModuleInfo *MMI,
+ raw_ostream &OS, AsmPrinter *A,
+ const MCAsmInfo *T) {
+ DE = new DwarfException(OS, A, T);
+ DD = new DwarfDebug(OS, A, T);
+ DE->BeginModule(M, MMI);
+ DD->beginModule(M, MMI);
+}
+
+/// EndModule - Emit all Dwarf sections that should come after the content.
+///
+void DwarfWriter::EndModule() {
+ DE->EndModule();
+ DD->endModule();
+ delete DD; DD = 0;
+ delete DE; DE = 0;
+}
+
+/// BeginFunction - Gather pre-function debug information. Assumes being
+/// emitted immediately after the function entry point.
+void DwarfWriter::BeginFunction(const MachineFunction *MF) {
+ DE->BeginFunction(MF);
+ DD->beginFunction(MF);
+}
+
+/// EndFunction - Gather and emit post-function debug information.
+///
+void DwarfWriter::EndFunction(const MachineFunction *MF) {
+ DD->endFunction(MF);
+ DE->EndFunction();
+
+ if (MachineModuleInfo *MMI = DD->getMMI() ? DD->getMMI() : DE->getMMI())
+ // Clear function debug information.
+ MMI->EndFunction();
+}
+
+/// RecordSourceLine - Records location information and associates it with a
+/// label. Returns a unique label ID used to generate a label and provide
+/// correspondence to the source line list.
+unsigned DwarfWriter::RecordSourceLine(unsigned Line, unsigned Col,
+ MDNode *Scope) {
+ return DD->recordSourceLine(Line, Col, Scope);
+}
+
+/// getRecordSourceLineCount - Count source lines.
+unsigned DwarfWriter::getRecordSourceLineCount() {
+ return DD->getSourceLineCount();
+}
+
+/// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should
+/// be emitted.
+bool DwarfWriter::ShouldEmitDwarfDebug() const {
+ return DD && DD->ShouldEmitDwarfDebug();
+}
+
+void DwarfWriter::BeginScope(const MachineInstr *MI, unsigned L) {
+ DD->beginScope(MI, L);
+}
+void DwarfWriter::EndScope(const MachineInstr *MI) {
+ DD->endScope(MI);
+}
diff --git a/lib/CodeGen/AsmPrinter/Makefile b/lib/CodeGen/AsmPrinter/Makefile
new file mode 100644
index 0000000..60aa6cb
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/Makefile
@@ -0,0 +1,13 @@
+##===- lib/CodeGen/AsmPrinter/Makefile ---------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMAsmPrinter
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
new file mode 100644
index 0000000..3531ed6
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -0,0 +1,160 @@
+//===-- OcamlGCPrinter.cpp - Ocaml frametable emitter ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements printing the assembly code for an Ocaml frametable.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/Module.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+namespace {
+
+ class OcamlGCMetadataPrinter : public GCMetadataPrinter {
+ public:
+ void beginAssembly(raw_ostream &OS, AsmPrinter &AP,
+ const MCAsmInfo &MAI);
+
+ void finishAssembly(raw_ostream &OS, AsmPrinter &AP,
+ const MCAsmInfo &MAI);
+ };
+
+}
+
+static GCMetadataPrinterRegistry::Add<OcamlGCMetadataPrinter>
+Y("ocaml", "ocaml 3.10-compatible collector");
+
+void llvm::linkOcamlGCPrinter() { }
+
+static void EmitCamlGlobal(const Module &M, raw_ostream &OS, AsmPrinter &AP,
+ const MCAsmInfo &MAI, const char *Id) {
+ const std::string &MId = M.getModuleIdentifier();
+
+ std::string Mangled;
+ Mangled += MAI.getGlobalPrefix();
+ Mangled += "caml";
+ size_t Letter = Mangled.size();
+ Mangled.append(MId.begin(), std::find(MId.begin(), MId.end(), '.'));
+ Mangled += "__";
+ Mangled += Id;
+
+ // Capitalize the first letter of the module name.
+ Mangled[Letter] = toupper(Mangled[Letter]);
+
+ if (const char *GlobalDirective = MAI.getGlobalDirective())
+ OS << GlobalDirective << Mangled << "\n";
+ OS << Mangled << ":\n";
+}
+
+void OcamlGCMetadataPrinter::beginAssembly(raw_ostream &OS, AsmPrinter &AP,
+ const MCAsmInfo &MAI) {
+ AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
+ EmitCamlGlobal(getModule(), OS, AP, MAI, "code_begin");
+
+ AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+ EmitCamlGlobal(getModule(), OS, AP, MAI, "data_begin");
+}
+
+/// emitAssembly - Print the frametable. The ocaml frametable format is thus:
+///
+/// extern "C" struct align(sizeof(intptr_t)) {
+/// uint16_t NumDescriptors;
+/// struct align(sizeof(intptr_t)) {
+/// void *ReturnAddress;
+/// uint16_t FrameSize;
+/// uint16_t NumLiveOffsets;
+/// uint16_t LiveOffsets[NumLiveOffsets];
+/// } Descriptors[NumDescriptors];
+/// } caml${module}__frametable;
+///
+/// Note that this precludes programs from stack frames larger than 64K
+/// (FrameSize and LiveOffsets would overflow). FrameTablePrinter will abort if
+/// either condition is detected in a function which uses the GC.
+///
+void OcamlGCMetadataPrinter::finishAssembly(raw_ostream &OS, AsmPrinter &AP,
+ const MCAsmInfo &MAI) {
+ const char *AddressDirective;
+ int AddressAlignLog;
+ if (AP.TM.getTargetData()->getPointerSize() == sizeof(int32_t)) {
+ AddressDirective = MAI.getData32bitsDirective();
+ AddressAlignLog = 2;
+ } else {
+ AddressDirective = MAI.getData64bitsDirective();
+ AddressAlignLog = 3;
+ }
+
+ AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
+ EmitCamlGlobal(getModule(), OS, AP, MAI, "code_end");
+
+ AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+ EmitCamlGlobal(getModule(), OS, AP, MAI, "data_end");
+
+ OS << AddressDirective << 0 << '\n'; // FIXME: Why does ocaml emit this??
+
+ AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+ EmitCamlGlobal(getModule(), OS, AP, MAI, "frametable");
+
+ for (iterator I = begin(), IE = end(); I != IE; ++I) {
+ GCFunctionInfo &FI = **I;
+
+ uint64_t FrameSize = FI.getFrameSize();
+ if (FrameSize >= 1<<16) {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "Function '" << FI.getFunction().getName()
+ << "' is too large for the ocaml GC! "
+ << "Frame size " << FrameSize << " >= 65536.\n";
+ Msg << "(" << uintptr_t(&FI) << ")";
+ llvm_report_error(Msg.str()); // Very rude!
+ }
+
+ OS << "\t" << MAI.getCommentString() << " live roots for "
+ << FI.getFunction().getName() << "\n";
+
+ for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) {
+ size_t LiveCount = FI.live_size(J);
+ if (LiveCount >= 1<<16) {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "Function '" << FI.getFunction().getName()
+ << "' is too large for the ocaml GC! "
+ << "Live root count " << LiveCount << " >= 65536.";
+ llvm_report_error(Msg.str()); // Very rude!
+ }
+
+ OS << AddressDirective
+ << MAI.getPrivateGlobalPrefix() << "label" << J->Num << '\n';
+
+ AP.EmitInt16(FrameSize);
+
+ AP.EmitInt16(LiveCount);
+
+ for (GCFunctionInfo::live_iterator K = FI.live_begin(J),
+ KE = FI.live_end(J); K != KE; ++K) {
+ assert(K->StackOffset < 1<<16 &&
+ "GC root stack offset is outside of fixed stack frame and out "
+ "of range for ocaml GC!");
+
+ AP.EmitInt32(K->StackOffset);
+ }
+
+ AP.EmitAlignment(AddressAlignLog);
+ }
+ }
+}
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
new file mode 100644
index 0000000..faf4d95
--- /dev/null
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -0,0 +1,1295 @@
+//===-- BranchFolding.cpp - Fold machine code branch instructions ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass forwards branches to unconditional branches to make them branch
+// directly to the target block. This pass often results in dead MBB's, which
+// it then removes.
+//
+// Note that this pass must be run after register allocation, it cannot handle
+// SSA form.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "branchfolding"
+#include "BranchFolding.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
+STATISTIC(NumBranchOpts, "Number of branches optimized");
+STATISTIC(NumTailMerge , "Number of block tails merged");
+
+static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
+ cl::init(cl::BOU_UNSET), cl::Hidden);
+
+// Throttle for huge numbers of predecessors (compile speed problems)
+static cl::opt<unsigned>
+TailMergeThreshold("tail-merge-threshold",
+ cl::desc("Max number of predecessors to consider tail merging"),
+ cl::init(150), cl::Hidden);
+
+// Heuristic for tail merging (and, inversely, tail duplication).
+// TODO: This should be replaced with a target query.
+static cl::opt<unsigned>
+TailMergeSize("tail-merge-size",
+ cl::desc("Min number of instructions to consider tail merging"),
+ cl::init(3), cl::Hidden);
+
+namespace {
+ /// BranchFolderPass - Wrap branch folder in a machine function pass.
+ class BranchFolderPass : public MachineFunctionPass,
+ public BranchFolder {
+ public:
+ static char ID;
+ explicit BranchFolderPass(bool defaultEnableTailMerge)
+ : MachineFunctionPass(&ID), BranchFolder(defaultEnableTailMerge) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ virtual const char *getPassName() const { return "Control Flow Optimizer"; }
+ };
+}
+
+char BranchFolderPass::ID = 0;
+
+FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) {
+ return new BranchFolderPass(DefaultEnableTailMerge);
+}
+
+bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
+ return OptimizeFunction(MF,
+ MF.getTarget().getInstrInfo(),
+ MF.getTarget().getRegisterInfo(),
+ getAnalysisIfAvailable<MachineModuleInfo>());
+}
+
+
+BranchFolder::BranchFolder(bool defaultEnableTailMerge) {
+ switch (FlagEnableTailMerge) {
+ case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
+ case cl::BOU_TRUE: EnableTailMerge = true; break;
+ case cl::BOU_FALSE: EnableTailMerge = false; break;
+ }
+}
+
+/// RemoveDeadBlock - Remove the specified dead machine basic block from the
+/// function, updating the CFG.
+void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
+ assert(MBB->pred_empty() && "MBB must be dead!");
+ DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
+
+ MachineFunction *MF = MBB->getParent();
+ // drop all successors.
+ while (!MBB->succ_empty())
+ MBB->removeSuccessor(MBB->succ_end()-1);
+
+ // If there are any labels in the basic block, unregister them from
+ // MachineModuleInfo.
+ if (MMI && !MBB->empty()) {
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ if (I->isLabel())
+ // The label ID # is always operand #0, an immediate.
+ MMI->InvalidateLabel(I->getOperand(0).getImm());
+ }
+ }
+
+ // Remove the block.
+ MF->erase(MBB);
+}
+
+/// OptimizeImpDefsBlock - If a basic block is just a bunch of implicit_def
+/// followed by terminators, and if the implicitly defined registers are not
+/// used by the terminators, remove those implicit_def's. e.g.
+/// BB1:
+/// r0 = implicit_def
+/// r1 = implicit_def
+/// br
+/// This block can be optimized away later if the implicit instructions are
+/// removed.
+bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
+ SmallSet<unsigned, 4> ImpDefRegs;
+ MachineBasicBlock::iterator I = MBB->begin();
+ while (I != MBB->end()) {
+ if (!I->isImplicitDef())
+ break;
+ unsigned Reg = I->getOperand(0).getReg();
+ ImpDefRegs.insert(Reg);
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs)
+ ImpDefRegs.insert(SubReg);
+ ++I;
+ }
+ if (ImpDefRegs.empty())
+ return false;
+
+ MachineBasicBlock::iterator FirstTerm = I;
+ while (I != MBB->end()) {
+ if (!TII->isUnpredicatedTerminator(I))
+ return false;
+ // See if it uses any of the implicitly defined registers.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = I->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (ImpDefRegs.count(Reg))
+ return false;
+ }
+ ++I;
+ }
+
+ I = MBB->begin();
+ while (I != FirstTerm) {
+ MachineInstr *ImpDefMI = &*I;
+ ++I;
+ MBB->erase(ImpDefMI);
+ }
+
+ return true;
+}
+
+/// OptimizeFunction - Perhaps branch folding, tail merging and other
+/// CFG optimizations on the given function.
+bool BranchFolder::OptimizeFunction(MachineFunction &MF,
+ const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ MachineModuleInfo *mmi) {
+ if (!tii) return false;
+
+ TII = tii;
+ TRI = tri;
+ MMI = mmi;
+
+ RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL;
+
+ // Fix CFG. The later algorithms expect it to be right.
+ bool MadeChange = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; I++) {
+ MachineBasicBlock *MBB = I, *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true))
+ MadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
+ MadeChange |= OptimizeImpDefsBlock(MBB);
+ }
+
+ bool MadeChangeThisIteration = true;
+ while (MadeChangeThisIteration) {
+ MadeChangeThisIteration = false;
+ MadeChangeThisIteration |= TailMergeBlocks(MF);
+ MadeChangeThisIteration |= OptimizeBranches(MF);
+ MadeChange |= MadeChangeThisIteration;
+ }
+
+ // See if any jump tables have become mergable or dead as the code generator
+ // did its thing.
+ MachineJumpTableInfo *JTI = MF.getJumpTableInfo();
+ if (JTI == 0) {
+ delete RS;
+ return MadeChange;
+ }
+
+ const std::vector<MachineJumpTableEntry> &JTs = JTI->getJumpTables();
+ // Figure out how these jump tables should be merged.
+ std::vector<unsigned> JTMapping;
+ JTMapping.reserve(JTs.size());
+
+ // We always keep the 0th jump table.
+ JTMapping.push_back(0);
+
+ // Scan the jump tables, seeing if there are any duplicates. Note that this
+ // is N^2, which should be fixed someday.
+ for (unsigned i = 1, e = JTs.size(); i != e; ++i) {
+ if (JTs[i].MBBs.empty())
+ JTMapping.push_back(i);
+ else
+ JTMapping.push_back(JTI->getJumpTableIndex(JTs[i].MBBs));
+ }
+
+ // If a jump table was merge with another one, walk the function rewriting
+ // references to jump tables to reference the new JT ID's. Keep track of
+ // whether we see a jump table idx, if not, we can delete the JT.
+ BitVector JTIsLive(JTs.size());
+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
+ BB != E; ++BB) {
+ for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
+ I != E; ++I)
+ for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
+ MachineOperand &Op = I->getOperand(op);
+ if (!Op.isJTI()) continue;
+ unsigned NewIdx = JTMapping[Op.getIndex()];
+ Op.setIndex(NewIdx);
+
+ // Remember that this JT is live.
+ JTIsLive.set(NewIdx);
+ }
+ }
+
+ // Finally, remove dead jump tables. This happens either because the
+ // indirect jump was unreachable (and thus deleted) or because the jump
+ // table was merged with some other one.
+ for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i)
+ if (!JTIsLive.test(i)) {
+ JTI->RemoveJumpTable(i);
+ MadeChange = true;
+ }
+
+ delete RS;
+ return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+// Tail Merging of Blocks
+//===----------------------------------------------------------------------===//
+
+/// HashMachineInstr - Compute a hash value for MI and its operands.
+static unsigned HashMachineInstr(const MachineInstr *MI) {
+ unsigned Hash = MI->getOpcode();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &Op = MI->getOperand(i);
+
+ // Merge in bits from the operand if easy.
+ unsigned OperandHash = 0;
+ switch (Op.getType()) {
+ case MachineOperand::MO_Register: OperandHash = Op.getReg(); break;
+ case MachineOperand::MO_Immediate: OperandHash = Op.getImm(); break;
+ case MachineOperand::MO_MachineBasicBlock:
+ OperandHash = Op.getMBB()->getNumber();
+ break;
+ case MachineOperand::MO_FrameIndex:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_JumpTableIndex:
+ OperandHash = Op.getIndex();
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ // Global address / external symbol are too hard, don't bother, but do
+ // pull in the offset.
+ OperandHash = Op.getOffset();
+ break;
+ default: break;
+ }
+
+ Hash += ((OperandHash << 3) | Op.getType()) << (i&31);
+ }
+ return Hash;
+}
+
+/// HashEndOfMBB - Hash the last few instructions in the MBB. For blocks
+/// with no successors, we hash two instructions, because cross-jumping
+/// only saves code when at least two instructions are removed (since a
+/// branch must be inserted). For blocks with a successor, one of the
+/// two blocks to be tail-merged will end with a branch already, so
+/// it gains to cross-jump even for one instruction.
+static unsigned HashEndOfMBB(const MachineBasicBlock *MBB,
+ unsigned minCommonTailLength) {
+ MachineBasicBlock::const_iterator I = MBB->end();
+ if (I == MBB->begin())
+ return 0; // Empty MBB.
+
+ --I;
+ unsigned Hash = HashMachineInstr(I);
+
+ if (I == MBB->begin() || minCommonTailLength == 1)
+ return Hash; // Single instr MBB.
+
+ --I;
+ // Hash in the second-to-last instruction.
+ Hash ^= HashMachineInstr(I) << 2;
+ return Hash;
+}
+
+/// ComputeCommonTailLength - Given two machine basic blocks, compute the number
+/// of instructions they actually have in common together at their end. Return
+/// iterators for the first shared instruction in each block.
+static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
+ MachineBasicBlock *MBB2,
+ MachineBasicBlock::iterator &I1,
+ MachineBasicBlock::iterator &I2) {
+ I1 = MBB1->end();
+ I2 = MBB2->end();
+
+ unsigned TailLen = 0;
+ while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
+ --I1; --I2;
+ if (!I1->isIdenticalTo(I2) ||
+ // FIXME: This check is dubious. It's used to get around a problem where
+ // people incorrectly expect inline asm directives to remain in the same
+ // relative order. This is untenable because normal compiler
+ // optimizations (like this one) may reorder and/or merge these
+ // directives.
+ I1->isInlineAsm()) {
+ ++I1; ++I2;
+ break;
+ }
+ ++TailLen;
+ }
+ return TailLen;
+}
+
+/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
+/// after it, replacing it with an unconditional branch to NewDest. This
+/// returns true if OldInst's block is modified, false if NewDest is modified.
+void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
+ MachineBasicBlock *NewDest) {
+ MachineBasicBlock *OldBB = OldInst->getParent();
+
+ // Remove all the old successors of OldBB from the CFG.
+ while (!OldBB->succ_empty())
+ OldBB->removeSuccessor(OldBB->succ_begin());
+
+ // Remove all the dead instructions from the end of OldBB.
+ OldBB->erase(OldInst, OldBB->end());
+
+ // If OldBB isn't immediately before OldBB, insert a branch to it.
+ if (++MachineFunction::iterator(OldBB) != MachineFunction::iterator(NewDest))
+ TII->InsertBranch(*OldBB, NewDest, 0, SmallVector<MachineOperand, 0>());
+ OldBB->addSuccessor(NewDest);
+ ++NumTailMerge;
+}
+
+/// SplitMBBAt - Given a machine basic block and an iterator into it, split the
+/// MBB so that the part before the iterator falls into the part starting at the
+/// iterator. This returns the new MBB.
+MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
+ MachineBasicBlock::iterator BBI1) {
+ MachineFunction &MF = *CurMBB.getParent();
+
+ // Create the fall-through block.
+ MachineFunction::iterator MBBI = &CurMBB;
+ MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(CurMBB.getBasicBlock());
+ CurMBB.getParent()->insert(++MBBI, NewMBB);
+
+ // Move all the successors of this block to the specified block.
+ NewMBB->transferSuccessors(&CurMBB);
+
+ // Add an edge from CurMBB to NewMBB for the fall-through.
+ CurMBB.addSuccessor(NewMBB);
+
+ // Splice the code over.
+ NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end());
+
+ // For targets that use the register scavenger, we must maintain LiveIns.
+ if (RS) {
+ RS->enterBasicBlock(&CurMBB);
+ if (!CurMBB.empty())
+ RS->forward(prior(CurMBB.end()));
+ BitVector RegsLiveAtExit(TRI->getNumRegs());
+ RS->getRegsUsed(RegsLiveAtExit, false);
+ for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++)
+ if (RegsLiveAtExit[i])
+ NewMBB->addLiveIn(i);
+ }
+
+ return NewMBB;
+}
+
+/// EstimateRuntime - Make a rough estimate for how long it will take to run
+/// the specified code.
+static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E) {
+ unsigned Time = 0;
+ for (; I != E; ++I) {
+ const TargetInstrDesc &TID = I->getDesc();
+ if (TID.isCall())
+ Time += 10;
+ else if (TID.mayLoad() || TID.mayStore())
+ Time += 2;
+ else
+ ++Time;
+ }
+ return Time;
+}
+
+// CurMBB needs to add an unconditional branch to SuccMBB (we removed these
+// branches temporarily for tail merging). In the case where CurMBB ends
+// with a conditional branch to the next block, optimize by reversing the
+// test and conditionally branching to SuccMBB instead.
+static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
+ const TargetInstrInfo *TII) {
+ MachineFunction *MF = CurMBB->getParent();
+ MachineFunction::iterator I = llvm::next(MachineFunction::iterator(CurMBB));
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (I != MF->end() &&
+ !TII->AnalyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {
+ MachineBasicBlock *NextBB = I;
+ if (TBB == NextBB && !Cond.empty() && !FBB) {
+ if (!TII->ReverseBranchCondition(Cond)) {
+ TII->RemoveBranch(*CurMBB);
+ TII->InsertBranch(*CurMBB, SuccBB, NULL, Cond);
+ return;
+ }
+ }
+ }
+ TII->InsertBranch(*CurMBB, SuccBB, NULL, SmallVector<MachineOperand, 0>());
+}
+
+bool
+BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const {
+ if (getHash() < o.getHash())
+ return true;
+ else if (getHash() > o.getHash())
+ return false;
+ else if (getBlock()->getNumber() < o.getBlock()->getNumber())
+ return true;
+ else if (getBlock()->getNumber() > o.getBlock()->getNumber())
+ return false;
+ else {
+ // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing
+ // an object with itself.
+#ifndef _GLIBCXX_DEBUG
+ llvm_unreachable("Predecessor appears twice");
+#endif
+ return false;
+ }
+}
+
+/// CountTerminators - Count the number of terminators in the given
+/// block and set I to the position of the first non-terminator, if there
+/// is one, or MBB->end() otherwise.
+static unsigned CountTerminators(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &I) {
+ I = MBB->end();
+ unsigned NumTerms = 0;
+ for (;;) {
+ if (I == MBB->begin()) {
+ I = MBB->end();
+ break;
+ }
+ --I;
+ if (!I->getDesc().isTerminator()) break;
+ ++NumTerms;
+ }
+ return NumTerms;
+}
+
+/// ProfitableToMerge - Check if two machine basic blocks have a common tail
+/// and decide if it would be profitable to merge those tails. Return the
+/// length of the common tail and iterators to the first common instruction
+/// in each block.
+static bool ProfitableToMerge(MachineBasicBlock *MBB1,
+ MachineBasicBlock *MBB2,
+ unsigned minCommonTailLength,
+ unsigned &CommonTailLen,
+ MachineBasicBlock::iterator &I1,
+ MachineBasicBlock::iterator &I2,
+ MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB) {
+ CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2);
+ MachineFunction *MF = MBB1->getParent();
+
+ if (CommonTailLen == 0)
+ return false;
+
+ // It's almost always profitable to merge any number of non-terminator
+ // instructions with the block that falls through into the common successor.
+ if (MBB1 == PredBB || MBB2 == PredBB) {
+ MachineBasicBlock::iterator I;
+ unsigned NumTerms = CountTerminators(MBB1 == PredBB ? MBB2 : MBB1, I);
+ if (CommonTailLen > NumTerms)
+ return true;
+ }
+
+ // If one of the blocks can be completely merged and happens to be in
+ // a position where the other could fall through into it, merge any number
+ // of instructions, because it can be done without a branch.
+ // TODO: If the blocks are not adjacent, move one of them so that they are?
+ if (MBB1->isLayoutSuccessor(MBB2) && I2 == MBB2->begin())
+ return true;
+ if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin())
+ return true;
+
+ // If both blocks have an unconditional branch temporarily stripped out,
+ // count that as an additional common instruction for the following
+ // heuristics.
+ unsigned EffectiveTailLen = CommonTailLen;
+ if (SuccBB && MBB1 != PredBB && MBB2 != PredBB &&
+ !MBB1->back().getDesc().isBarrier() &&
+ !MBB2->back().getDesc().isBarrier())
+ ++EffectiveTailLen;
+
+ // Check if the common tail is long enough to be worthwhile.
+ if (EffectiveTailLen >= minCommonTailLength)
+ return true;
+
+ // If we are optimizing for code size, 2 instructions in common is enough if
+ // we don't have to split a block. At worst we will be introducing 1 new
+ // branch instruction, which is likely to be smaller than the 2
+ // instructions that would be deleted in the merge.
+ if (EffectiveTailLen >= 2 &&
+ MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
+ (I1 == MBB1->begin() || I2 == MBB2->begin()))
+ return true;
+
+ return false;
+}
+
+/// ComputeSameTails - Look through all the blocks in MergePotentials that have
+/// hash CurHash (guaranteed to match the last element). Build the vector
+/// SameTails of all those that have the (same) largest number of instructions
+/// in common of any pair of these blocks. SameTails entries contain an
+/// iterator into MergePotentials (from which the MachineBasicBlock can be
+/// found) and a MachineBasicBlock::iterator into that MBB indicating the
+/// instruction where the matching code sequence begins.
+/// Order of elements in SameTails is the reverse of the order in which
+/// those blocks appear in MergePotentials (where they are not necessarily
+/// consecutive).
+unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
+ unsigned minCommonTailLength,
+ MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB) {
+ unsigned maxCommonTailLength = 0U;
+ SameTails.clear();
+ MachineBasicBlock::iterator TrialBBI1, TrialBBI2;
+ MPIterator HighestMPIter = prior(MergePotentials.end());
+ for (MPIterator CurMPIter = prior(MergePotentials.end()),
+ B = MergePotentials.begin();
+ CurMPIter != B && CurMPIter->getHash() == CurHash;
+ --CurMPIter) {
+ for (MPIterator I = prior(CurMPIter); I->getHash() == CurHash ; --I) {
+ unsigned CommonTailLen;
+ if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(),
+ minCommonTailLength,
+ CommonTailLen, TrialBBI1, TrialBBI2,
+ SuccBB, PredBB)) {
+ if (CommonTailLen > maxCommonTailLength) {
+ SameTails.clear();
+ maxCommonTailLength = CommonTailLen;
+ HighestMPIter = CurMPIter;
+ SameTails.push_back(SameTailElt(CurMPIter, TrialBBI1));
+ }
+ if (HighestMPIter == CurMPIter &&
+ CommonTailLen == maxCommonTailLength)
+ SameTails.push_back(SameTailElt(I, TrialBBI2));
+ }
+ if (I == B)
+ break;
+ }
+ }
+ return maxCommonTailLength;
+}
+
+/// RemoveBlocksWithHash - Remove all blocks with hash CurHash from
+/// MergePotentials, restoring branches at ends of blocks as appropriate.
+void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
+ MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB) {
+ MPIterator CurMPIter, B;
+ for (CurMPIter = prior(MergePotentials.end()), B = MergePotentials.begin();
+ CurMPIter->getHash() == CurHash;
+ --CurMPIter) {
+ // Put the unconditional branch back, if we need one.
+ MachineBasicBlock *CurMBB = CurMPIter->getBlock();
+ if (SuccBB && CurMBB != PredBB)
+ FixTail(CurMBB, SuccBB, TII);
+ if (CurMPIter == B)
+ break;
+ }
+ if (CurMPIter->getHash() != CurHash)
+ CurMPIter++;
+ MergePotentials.erase(CurMPIter, MergePotentials.end());
+}
+
+/// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist
+/// only of the common tail. Create a block that does by splitting one.
+unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
+ unsigned maxCommonTailLength) {
+ unsigned commonTailIndex = 0;
+ unsigned TimeEstimate = ~0U;
+ for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {
+ // Use PredBB if possible; that doesn't require a new branch.
+ if (SameTails[i].getBlock() == PredBB) {
+ commonTailIndex = i;
+ break;
+ }
+ // Otherwise, make a (fairly bogus) choice based on estimate of
+ // how long it will take the various blocks to execute.
+ unsigned t = EstimateRuntime(SameTails[i].getBlock()->begin(),
+ SameTails[i].getTailStartPos());
+ if (t <= TimeEstimate) {
+ TimeEstimate = t;
+ commonTailIndex = i;
+ }
+ }
+
+ MachineBasicBlock::iterator BBI =
+ SameTails[commonTailIndex].getTailStartPos();
+ MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
+
+ DEBUG(dbgs() << "\nSplitting BB#" << MBB->getNumber() << ", size "
+ << maxCommonTailLength);
+
+ MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI);
+ SameTails[commonTailIndex].setBlock(newMBB);
+ SameTails[commonTailIndex].setTailStartPos(newMBB->begin());
+
+ // If we split PredBB, newMBB is the new predecessor.
+ if (PredBB == MBB)
+ PredBB = newMBB;
+
+ return commonTailIndex;
+}
+
+// See if any of the blocks in MergePotentials (which all have a common single
+// successor, or all have no successor) can be tail-merged. If there is a
+// successor, any blocks in MergePotentials that are not tail-merged and
+// are not immediately before Succ must have an unconditional branch to
+// Succ added (but the predecessor/successor lists need no adjustment).
+// The lone predecessor of Succ that falls through into Succ,
+// if any, is given in PredBB.
+
+bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB) {
+ bool MadeChange = false;
+
+ // Except for the special cases below, tail-merge if there are at least
+ // this many instructions in common.
+ unsigned minCommonTailLength = TailMergeSize;
+
+ DEBUG(dbgs() << "\nTryTailMergeBlocks: ";
+ for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+ dbgs() << "BB#" << MergePotentials[i].getBlock()->getNumber()
+ << (i == e-1 ? "" : ", ");
+ dbgs() << "\n";
+ if (SuccBB) {
+ dbgs() << " with successor BB#" << SuccBB->getNumber() << '\n';
+ if (PredBB)
+ dbgs() << " which has fall-through from BB#"
+ << PredBB->getNumber() << "\n";
+ }
+ dbgs() << "Looking for common tails of at least "
+ << minCommonTailLength << " instruction"
+ << (minCommonTailLength == 1 ? "" : "s") << '\n';
+ );
+
+ // Sort by hash value so that blocks with identical end sequences sort
+ // together.
+ std::stable_sort(MergePotentials.begin(), MergePotentials.end());
+
+ // Walk through equivalence sets looking for actual exact matches.
+ while (MergePotentials.size() > 1) {
+ unsigned CurHash = MergePotentials.back().getHash();
+
+ // Build SameTails, identifying the set of blocks with this hash code
+ // and with the maximum number of instructions in common.
+ unsigned maxCommonTailLength = ComputeSameTails(CurHash,
+ minCommonTailLength,
+ SuccBB, PredBB);
+
+ // If we didn't find any pair that has at least minCommonTailLength
+ // instructions in common, remove all blocks with this hash code and retry.
+ if (SameTails.empty()) {
+ RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
+ continue;
+ }
+
+ // If one of the blocks is the entire common tail (and not the entry
+ // block, which we can't jump to), we can treat all blocks with this same
+ // tail at once. Use PredBB if that is one of the possibilities, as that
+ // will not introduce any extra branches.
+ MachineBasicBlock *EntryBB = MergePotentials.begin()->getBlock()->
+ getParent()->begin();
+ unsigned commonTailIndex = SameTails.size();
+ // If there are two blocks, check to see if one can be made to fall through
+ // into the other.
+ if (SameTails.size() == 2 &&
+ SameTails[0].getBlock()->isLayoutSuccessor(SameTails[1].getBlock()) &&
+ SameTails[1].tailIsWholeBlock())
+ commonTailIndex = 1;
+ else if (SameTails.size() == 2 &&
+ SameTails[1].getBlock()->isLayoutSuccessor(
+ SameTails[0].getBlock()) &&
+ SameTails[0].tailIsWholeBlock())
+ commonTailIndex = 0;
+ else {
+ // Otherwise just pick one, favoring the fall-through predecessor if
+ // there is one.
+ for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = SameTails[i].getBlock();
+ if (MBB == EntryBB && SameTails[i].tailIsWholeBlock())
+ continue;
+ if (MBB == PredBB) {
+ commonTailIndex = i;
+ break;
+ }
+ if (SameTails[i].tailIsWholeBlock())
+ commonTailIndex = i;
+ }
+ }
+
+ if (commonTailIndex == SameTails.size() ||
+ (SameTails[commonTailIndex].getBlock() == PredBB &&
+ !SameTails[commonTailIndex].tailIsWholeBlock())) {
+ // None of the blocks consist entirely of the common tail.
+ // Split a block so that one does.
+ commonTailIndex = CreateCommonTailOnlyBlock(PredBB, maxCommonTailLength);
+ }
+
+ MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
+ // MBB is common tail. Adjust all other BB's to jump to this one.
+ // Traversal must be forwards so erases work.
+ DEBUG(dbgs() << "\nUsing common tail in BB#" << MBB->getNumber()
+ << " for ");
+ for (unsigned int i=0, e = SameTails.size(); i != e; ++i) {
+ if (commonTailIndex == i)
+ continue;
+ DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber()
+ << (i == e-1 ? "" : ", "));
+ // Hack the end off BB i, making it jump to BB commonTailIndex instead.
+ ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB);
+ // BB i is no longer a predecessor of SuccBB; remove it from the worklist.
+ MergePotentials.erase(SameTails[i].getMPIter());
+ }
+ DEBUG(dbgs() << "\n");
+ // We leave commonTailIndex in the worklist in case there are other blocks
+ // that match it with a smaller number of instructions.
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
+
+ if (!EnableTailMerge) return false;
+
+ bool MadeChange = false;
+
+ // First find blocks with no successors.
+ MergePotentials.clear();
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ if (I->succ_empty())
+ MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I, 2U), I));
+ }
+
+ // See if we can do any tail merging on those.
+ if (MergePotentials.size() < TailMergeThreshold &&
+ MergePotentials.size() >= 2)
+ MadeChange |= TryTailMergeBlocks(NULL, NULL);
+
+ // Look at blocks (IBB) with multiple predecessors (PBB).
+ // We change each predecessor to a canonical form, by
+ // (1) temporarily removing any unconditional branch from the predecessor
+ // to IBB, and
+ // (2) alter conditional branches so they branch to the other block
+ // not IBB; this may require adding back an unconditional branch to IBB
+ // later, where there wasn't one coming in. E.g.
+ // Bcc IBB
+ // fallthrough to QBB
+ // here becomes
+ // Bncc QBB
+ // with a conceptual B to IBB after that, which never actually exists.
+ // With those changes, we see whether the predecessors' tails match,
+ // and merge them if so. We change things out of canonical form and
+ // back to the way they were later in the process. (OptimizeBranches
+ // would undo some of this, but we can't use it, because we'd get into
+ // a compile-time infinite loop repeatedly doing and undoing the same
+ // transformations.)
+
+ for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+ I != E; ++I) {
+ if (I->pred_size() >= 2 && I->pred_size() < TailMergeThreshold) {
+ SmallPtrSet<MachineBasicBlock *, 8> UniquePreds;
+ MachineBasicBlock *IBB = I;
+ MachineBasicBlock *PredBB = prior(I);
+ MergePotentials.clear();
+ for (MachineBasicBlock::pred_iterator P = I->pred_begin(),
+ E2 = I->pred_end();
+ P != E2; ++P) {
+ MachineBasicBlock *PBB = *P;
+ // Skip blocks that loop to themselves, can't tail merge these.
+ if (PBB == IBB)
+ continue;
+ // Visit each predecessor only once.
+ if (!UniquePreds.insert(PBB))
+ continue;
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) {
+ // Failing case: IBB is the target of a cbr, and
+ // we cannot reverse the branch.
+ SmallVector<MachineOperand, 4> NewCond(Cond);
+ if (!Cond.empty() && TBB == IBB) {
+ if (TII->ReverseBranchCondition(NewCond))
+ continue;
+ // This is the QBB case described above
+ if (!FBB)
+ FBB = llvm::next(MachineFunction::iterator(PBB));
+ }
+ // Failing case: the only way IBB can be reached from PBB is via
+ // exception handling. Happens for landing pads. Would be nice
+ // to have a bit in the edge so we didn't have to do all this.
+ if (IBB->isLandingPad()) {
+ MachineFunction::iterator IP = PBB; IP++;
+ MachineBasicBlock *PredNextBB = NULL;
+ if (IP != MF.end())
+ PredNextBB = IP;
+ if (TBB == NULL) {
+ if (IBB != PredNextBB) // fallthrough
+ continue;
+ } else if (FBB) {
+ if (TBB != IBB && FBB != IBB) // cbr then ubr
+ continue;
+ } else if (Cond.empty()) {
+ if (TBB != IBB) // ubr
+ continue;
+ } else {
+ if (TBB != IBB && IBB != PredNextBB) // cbr
+ continue;
+ }
+ }
+ // Remove the unconditional branch at the end, if any.
+ if (TBB && (Cond.empty() || FBB)) {
+ TII->RemoveBranch(*PBB);
+ if (!Cond.empty())
+ // reinsert conditional branch only, for now
+ TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond);
+ }
+ MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB, 1U),
+ *P));
+ }
+ }
+ if (MergePotentials.size() >= 2)
+ MadeChange |= TryTailMergeBlocks(IBB, PredBB);
+ // Reinsert an unconditional branch if needed.
+ // The 1 below can occur as a result of removing blocks in TryTailMergeBlocks.
+ PredBB = prior(I); // this may have been changed in TryTailMergeBlocks
+ if (MergePotentials.size() == 1 &&
+ MergePotentials.begin()->getBlock() != PredBB)
+ FixTail(MergePotentials.begin()->getBlock(), IBB, TII);
+ }
+ }
+ return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Optimization
+//===----------------------------------------------------------------------===//
+
+bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
+ bool MadeChange = false;
+
+ // Make sure blocks are numbered in order
+ MF.RenumberBlocks();
+
+ for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
+ MachineBasicBlock *MBB = I++;
+ MadeChange |= OptimizeBlock(MBB);
+
+ // If it is dead, remove it.
+ if (MBB->pred_empty()) {
+ RemoveDeadBlock(MBB);
+ MadeChange = true;
+ ++NumDeadBlocks;
+ }
+ }
+ return MadeChange;
+}
+
+
+/// IsBetterFallthrough - Return true if it would be clearly better to
+/// fall-through to MBB1 than to fall through into MBB2. This has to return
+/// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will
+/// result in infinite loops.
+static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
+ MachineBasicBlock *MBB2) {
+ // Right now, we use a simple heuristic. If MBB2 ends with a call, and
+ // MBB1 doesn't, we prefer to fall through into MBB1. This allows us to
+ // optimize branches that branch to either a return block or an assert block
+ // into a fallthrough to the return.
+ if (MBB1->empty() || MBB2->empty()) return false;
+
+ // If there is a clear successor ordering we make sure that one block
+ // will fall through to the next
+ if (MBB1->isSuccessor(MBB2)) return true;
+ if (MBB2->isSuccessor(MBB1)) return false;
+
+ MachineInstr *MBB1I = --MBB1->end();
+ MachineInstr *MBB2I = --MBB2->end();
+ return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall();
+}
+
+/// OptimizeBlock - Analyze and optimize control flow related to the specified
+/// block. This is never called on the entry block.
+bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
+ bool MadeChange = false;
+ MachineFunction &MF = *MBB->getParent();
+ReoptimizeBlock:
+
+ MachineFunction::iterator FallThrough = MBB;
+ ++FallThrough;
+
+ // If this block is empty, make everyone use its fall-through, not the block
+ // explicitly. Landing pads should not do this since the landing-pad table
+ // points to this block. Blocks with their addresses taken shouldn't be
+ // optimized away.
+ if (MBB->empty() && !MBB->isLandingPad() && !MBB->hasAddressTaken()) {
+ // Dead block? Leave for cleanup later.
+ if (MBB->pred_empty()) return MadeChange;
+
+ if (FallThrough == MF.end()) {
+ // TODO: Simplify preds to not branch here if possible!
+ } else {
+ // Rewrite all predecessors of the old block to go to the fallthrough
+ // instead.
+ while (!MBB->pred_empty()) {
+ MachineBasicBlock *Pred = *(MBB->pred_end()-1);
+ Pred->ReplaceUsesOfBlockWith(MBB, FallThrough);
+ }
+ // If MBB was the target of a jump table, update jump tables to go to the
+ // fallthrough instead.
+ if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo())
+ MJTI->ReplaceMBBInJumpTables(MBB, FallThrough);
+ MadeChange = true;
+ }
+ return MadeChange;
+ }
+
+ // Check to see if we can simplify the terminator of the block before this
+ // one.
+ MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(MBB));
+
+ MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
+ SmallVector<MachineOperand, 4> PriorCond;
+ bool PriorUnAnalyzable =
+ TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true);
+ if (!PriorUnAnalyzable) {
+ // If the CFG for the prior block has extra edges, remove them.
+ MadeChange |= PrevBB.CorrectExtraCFGEdges(PriorTBB, PriorFBB,
+ !PriorCond.empty());
+
+ // If the previous branch is conditional and both conditions go to the same
+ // destination, remove the branch, replacing it with an unconditional one or
+ // a fall-through.
+ if (PriorTBB && PriorTBB == PriorFBB) {
+ TII->RemoveBranch(PrevBB);
+ PriorCond.clear();
+ if (PriorTBB != MBB)
+ TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond);
+ MadeChange = true;
+ ++NumBranchOpts;
+ goto ReoptimizeBlock;
+ }
+
+ // If the previous block unconditionally falls through to this block and
+ // this block has no other predecessors, move the contents of this block
+ // into the prior block. This doesn't usually happen when SimplifyCFG
+ // has been used, but it can happen if tail merging splits a fall-through
+ // predecessor of a block.
+ // This has to check PrevBB->succ_size() because EH edges are ignored by
+ // AnalyzeBranch.
+ if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 &&
+ PrevBB.succ_size() == 1 &&
+ !MBB->hasAddressTaken()) {
+ DEBUG(dbgs() << "\nMerging into block: " << PrevBB
+ << "From MBB: " << *MBB);
+ PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end());
+ PrevBB.removeSuccessor(PrevBB.succ_begin());;
+ assert(PrevBB.succ_empty());
+ PrevBB.transferSuccessors(MBB);
+ MadeChange = true;
+ return MadeChange;
+ }
+
+ // If the previous branch *only* branches to *this* block (conditional or
+ // not) remove the branch.
+ if (PriorTBB == MBB && PriorFBB == 0) {
+ TII->RemoveBranch(PrevBB);
+ MadeChange = true;
+ ++NumBranchOpts;
+ goto ReoptimizeBlock;
+ }
+
+ // If the prior block branches somewhere else on the condition and here if
+ // the condition is false, remove the uncond second branch.
+ if (PriorFBB == MBB) {
+ TII->RemoveBranch(PrevBB);
+ TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond);
+ MadeChange = true;
+ ++NumBranchOpts;
+ goto ReoptimizeBlock;
+ }
+
+ // If the prior block branches here on true and somewhere else on false, and
+ // if the branch condition is reversible, reverse the branch to create a
+ // fall-through.
+ if (PriorTBB == MBB) {
+ SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
+ if (!TII->ReverseBranchCondition(NewPriorCond)) {
+ TII->RemoveBranch(PrevBB);
+ TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond);
+ MadeChange = true;
+ ++NumBranchOpts;
+ goto ReoptimizeBlock;
+ }
+ }
+
+ // If this block has no successors (e.g. it is a return block or ends with
+ // a call to a no-return function like abort or __cxa_throw) and if the pred
+ // falls through into this block, and if it would otherwise fall through
+ // into the block after this, move this block to the end of the function.
+ //
+ // We consider it more likely that execution will stay in the function (e.g.
+ // due to loops) than it is to exit it. This asserts in loops etc, moving
+ // the assert condition out of the loop body.
+ if (MBB->succ_empty() && !PriorCond.empty() && PriorFBB == 0 &&
+ MachineFunction::iterator(PriorTBB) == FallThrough &&
+ !MBB->canFallThrough()) {
+ bool DoTransform = true;
+
+ // We have to be careful that the succs of PredBB aren't both no-successor
+ // blocks. If neither have successors and if PredBB is the second from
+ // last block in the function, we'd just keep swapping the two blocks for
+ // last. Only do the swap if one is clearly better to fall through than
+ // the other.
+ if (FallThrough == --MF.end() &&
+ !IsBetterFallthrough(PriorTBB, MBB))
+ DoTransform = false;
+
+ // We don't want to do this transformation if we have control flow like:
+ // br cond BB2
+ // BB1:
+ // ..
+ // jmp BBX
+ // BB2:
+ // ..
+ // ret
+ //
+ // In this case, we could actually be moving the return block *into* a
+ // loop!
+ if (DoTransform && !MBB->succ_empty() &&
+ (!PriorTBB->canFallThrough() || PriorTBB->empty()))
+ DoTransform = false;
+
+
+ if (DoTransform) {
+ // Reverse the branch so we will fall through on the previous true cond.
+ SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
+ if (!TII->ReverseBranchCondition(NewPriorCond)) {
+ DEBUG(dbgs() << "\nMoving MBB: " << *MBB
+ << "To make fallthrough to: " << *PriorTBB << "\n");
+
+ TII->RemoveBranch(PrevBB);
+ TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond);
+
+ // Move this block to the end of the function.
+ MBB->moveAfter(--MF.end());
+ MadeChange = true;
+ ++NumBranchOpts;
+ return MadeChange;
+ }
+ }
+ }
+ }
+
+ // Analyze the branch in the current block.
+ MachineBasicBlock *CurTBB = 0, *CurFBB = 0;
+ SmallVector<MachineOperand, 4> CurCond;
+ bool CurUnAnalyzable= TII->AnalyzeBranch(*MBB, CurTBB, CurFBB, CurCond, true);
+ if (!CurUnAnalyzable) {
+ // If the CFG for the prior block has extra edges, remove them.
+ MadeChange |= MBB->CorrectExtraCFGEdges(CurTBB, CurFBB, !CurCond.empty());
+
+ // If this is a two-way branch, and the FBB branches to this block, reverse
+ // the condition so the single-basic-block loop is faster. Instead of:
+ // Loop: xxx; jcc Out; jmp Loop
+ // we want:
+ // Loop: xxx; jncc Loop; jmp Out
+ if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) {
+ SmallVector<MachineOperand, 4> NewCond(CurCond);
+ if (!TII->ReverseBranchCondition(NewCond)) {
+ TII->RemoveBranch(*MBB);
+ TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond);
+ MadeChange = true;
+ ++NumBranchOpts;
+ goto ReoptimizeBlock;
+ }
+ }
+
+ // If this branch is the only thing in its block, see if we can forward
+ // other blocks across it.
+ if (CurTBB && CurCond.empty() && CurFBB == 0 &&
+ MBB->begin()->getDesc().isBranch() && CurTBB != MBB &&
+ !MBB->hasAddressTaken()) {
+ // This block may contain just an unconditional branch. Because there can
+ // be 'non-branch terminators' in the block, try removing the branch and
+ // then seeing if the block is empty.
+ TII->RemoveBranch(*MBB);
+
+ // If this block is just an unconditional branch to CurTBB, we can
+ // usually completely eliminate the block. The only case we cannot
+ // completely eliminate the block is when the block before this one
+ // falls through into MBB and we can't understand the prior block's branch
+ // condition.
+ if (MBB->empty()) {
+ bool PredHasNoFallThrough = !PrevBB.canFallThrough();
+ if (PredHasNoFallThrough || !PriorUnAnalyzable ||
+ !PrevBB.isSuccessor(MBB)) {
+ // If the prior block falls through into us, turn it into an
+ // explicit branch to us to make updates simpler.
+ if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) &&
+ PriorTBB != MBB && PriorFBB != MBB) {
+ if (PriorTBB == 0) {
+ assert(PriorCond.empty() && PriorFBB == 0 &&
+ "Bad branch analysis");
+ PriorTBB = MBB;
+ } else {
+ assert(PriorFBB == 0 && "Machine CFG out of date!");
+ PriorFBB = MBB;
+ }
+ TII->RemoveBranch(PrevBB);
+ TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond);
+ }
+
+ // Iterate through all the predecessors, revectoring each in-turn.
+ size_t PI = 0;
+ bool DidChange = false;
+ bool HasBranchToSelf = false;
+ while(PI != MBB->pred_size()) {
+ MachineBasicBlock *PMBB = *(MBB->pred_begin() + PI);
+ if (PMBB == MBB) {
+ // If this block has an uncond branch to itself, leave it.
+ ++PI;
+ HasBranchToSelf = true;
+ } else {
+ DidChange = true;
+ PMBB->ReplaceUsesOfBlockWith(MBB, CurTBB);
+ // If this change resulted in PMBB ending in a conditional
+ // branch where both conditions go to the same destination,
+ // change this to an unconditional branch (and fix the CFG).
+ MachineBasicBlock *NewCurTBB = 0, *NewCurFBB = 0;
+ SmallVector<MachineOperand, 4> NewCurCond;
+ bool NewCurUnAnalyzable = TII->AnalyzeBranch(*PMBB, NewCurTBB,
+ NewCurFBB, NewCurCond, true);
+ if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) {
+ TII->RemoveBranch(*PMBB);
+ NewCurCond.clear();
+ TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond);
+ MadeChange = true;
+ ++NumBranchOpts;
+ PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false);
+ }
+ }
+ }
+
+ // Change any jumptables to go to the new MBB.
+ if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo())
+ MJTI->ReplaceMBBInJumpTables(MBB, CurTBB);
+ if (DidChange) {
+ ++NumBranchOpts;
+ MadeChange = true;
+ if (!HasBranchToSelf) return MadeChange;
+ }
+ }
+ }
+
+ // Add the branch back if the block is more than just an uncond branch.
+ TII->InsertBranch(*MBB, CurTBB, 0, CurCond);
+ }
+ }
+
+ // If the prior block doesn't fall through into this block, and if this
+ // block doesn't fall through into some other block, see if we can find a
+ // place to move this block where a fall-through will happen.
+ if (!PrevBB.canFallThrough()) {
+
+ // Now we know that there was no fall-through into this block, check to
+ // see if it has a fall-through into its successor.
+ bool CurFallsThru = MBB->canFallThrough();
+
+ if (!MBB->isLandingPad()) {
+ // Check all the predecessors of this block. If one of them has no fall
+ // throughs, move this block right after it.
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ E = MBB->pred_end(); PI != E; ++PI) {
+ // Analyze the branch at the end of the pred.
+ MachineBasicBlock *PredBB = *PI;
+ MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough;
+ MachineBasicBlock *PredTBB = 0, *PredFBB = 0;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (PredBB != MBB && !PredBB->canFallThrough() &&
+ !TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)
+ && (!CurFallsThru || !CurTBB || !CurFBB)
+ && (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) {
+ // If the current block doesn't fall through, just move it.
+ // If the current block can fall through and does not end with a
+ // conditional branch, we need to append an unconditional jump to
+ // the (current) next block. To avoid a possible compile-time
+ // infinite loop, move blocks only backward in this case.
+ // Also, if there are already 2 branches here, we cannot add a third;
+ // this means we have the case
+ // Bcc next
+ // B elsewhere
+ // next:
+ if (CurFallsThru) {
+ MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
+ CurCond.clear();
+ TII->InsertBranch(*MBB, NextBB, 0, CurCond);
+ }
+ MBB->moveAfter(PredBB);
+ MadeChange = true;
+ goto ReoptimizeBlock;
+ }
+ }
+ }
+
+ if (!CurFallsThru) {
+ // Check all successors to see if we can move this block before it.
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ E = MBB->succ_end(); SI != E; ++SI) {
+ // Analyze the branch at the end of the block before the succ.
+ MachineBasicBlock *SuccBB = *SI;
+ MachineFunction::iterator SuccPrev = SuccBB; --SuccPrev;
+
+ // If this block doesn't already fall-through to that successor, and if
+ // the succ doesn't already have a block that can fall through into it,
+ // and if the successor isn't an EH destination, we can arrange for the
+ // fallthrough to happen.
+ if (SuccBB != MBB && &*SuccPrev != MBB &&
+ !SuccPrev->canFallThrough() && !CurUnAnalyzable &&
+ !SuccBB->isLandingPad()) {
+ MBB->moveBefore(SuccBB);
+ MadeChange = true;
+ goto ReoptimizeBlock;
+ }
+ }
+
+ // Okay, there is no really great place to put this block. If, however,
+ // the block before this one would be a fall-through if this block were
+ // removed, move this block to the end of the function.
+ MachineBasicBlock *PrevTBB = 0, *PrevFBB = 0;
+ SmallVector<MachineOperand, 4> PrevCond;
+ if (FallThrough != MF.end() &&
+ !TII->AnalyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) &&
+ PrevBB.isSuccessor(FallThrough)) {
+ MBB->moveAfter(--MF.end());
+ MadeChange = true;
+ return MadeChange;
+ }
+ }
+ }
+
+ return MadeChange;
+}
diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h
new file mode 100644
index 0000000..b087395
--- /dev/null
+++ b/lib/CodeGen/BranchFolding.h
@@ -0,0 +1,115 @@
+//===-- BranchFolding.h - Fold machine code branch instructions --*- C++ -*===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_BRANCHFOLDING_HPP
+#define LLVM_CODEGEN_BRANCHFOLDING_HPP
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include <vector>
+
+namespace llvm {
+ class MachineFunction;
+ class MachineModuleInfo;
+ class RegScavenger;
+ class TargetInstrInfo;
+ class TargetRegisterInfo;
+ template<typename T> class SmallVectorImpl;
+
+ class BranchFolder {
+ public:
+ explicit BranchFolder(bool defaultEnableTailMerge);
+
+ bool OptimizeFunction(MachineFunction &MF,
+ const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ MachineModuleInfo *mmi);
+ private:
+ class MergePotentialsElt {
+ unsigned Hash;
+ MachineBasicBlock *Block;
+ public:
+ MergePotentialsElt(unsigned h, MachineBasicBlock *b)
+ : Hash(h), Block(b) {}
+
+ unsigned getHash() const { return Hash; }
+ MachineBasicBlock *getBlock() const { return Block; }
+
+ void setBlock(MachineBasicBlock *MBB) {
+ Block = MBB;
+ }
+
+ bool operator<(const MergePotentialsElt &) const;
+ };
+ typedef std::vector<MergePotentialsElt>::iterator MPIterator;
+ std::vector<MergePotentialsElt> MergePotentials;
+
+ class SameTailElt {
+ MPIterator MPIter;
+ MachineBasicBlock::iterator TailStartPos;
+ public:
+ SameTailElt(MPIterator mp, MachineBasicBlock::iterator tsp)
+ : MPIter(mp), TailStartPos(tsp) {}
+
+ MPIterator getMPIter() const {
+ return MPIter;
+ }
+ MergePotentialsElt &getMergePotentialsElt() const {
+ return *getMPIter();
+ }
+ MachineBasicBlock::iterator getTailStartPos() const {
+ return TailStartPos;
+ }
+ unsigned getHash() const {
+ return getMergePotentialsElt().getHash();
+ }
+ MachineBasicBlock *getBlock() const {
+ return getMergePotentialsElt().getBlock();
+ }
+ bool tailIsWholeBlock() const {
+ return TailStartPos == getBlock()->begin();
+ }
+
+ void setBlock(MachineBasicBlock *MBB) {
+ getMergePotentialsElt().setBlock(MBB);
+ }
+ void setTailStartPos(MachineBasicBlock::iterator Pos) {
+ TailStartPos = Pos;
+ }
+ };
+ std::vector<SameTailElt> SameTails;
+
+ bool EnableTailMerge;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineModuleInfo *MMI;
+ RegScavenger *RS;
+
+ bool TailMergeBlocks(MachineFunction &MF);
+ bool TryTailMergeBlocks(MachineBasicBlock* SuccBB,
+ MachineBasicBlock* PredBB);
+ void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
+ MachineBasicBlock *NewDest);
+ MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
+ MachineBasicBlock::iterator BBI1);
+ unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength,
+ MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB);
+ void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB,
+ MachineBasicBlock* PredBB);
+ unsigned CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
+ unsigned maxCommonTailLength);
+
+ bool OptimizeBranches(MachineFunction &MF);
+ bool OptimizeBlock(MachineBasicBlock *MBB);
+ void RemoveDeadBlock(MachineBasicBlock *MBB);
+ bool OptimizeImpDefsBlock(MachineBasicBlock *MBB);
+ };
+}
+
+#endif /* LLVM_CODEGEN_BRANCHFOLDING_HPP */
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
new file mode 100644
index 0000000..9fcbea9
--- /dev/null
+++ b/lib/CodeGen/CMakeLists.txt
@@ -0,0 +1,75 @@
+add_llvm_library(LLVMCodeGen
+ AggressiveAntiDepBreaker.cpp
+ BranchFolding.cpp
+ CalcSpillWeights.cpp
+ CodePlacementOpt.cpp
+ CriticalAntiDepBreaker.cpp
+ DeadMachineInstructionElim.cpp
+ DwarfEHPrepare.cpp
+ ELFCodeEmitter.cpp
+ ELFWriter.cpp
+ ExactHazardRecognizer.cpp
+ GCMetadata.cpp
+ GCMetadataPrinter.cpp
+ GCStrategy.cpp
+ IfConversion.cpp
+ IntrinsicLowering.cpp
+ LLVMTargetMachine.cpp
+ LatencyPriorityQueue.cpp
+ LiveInterval.cpp
+ LiveIntervalAnalysis.cpp
+ LiveStackAnalysis.cpp
+ LiveVariables.cpp
+ LowerSubregs.cpp
+ MachineBasicBlock.cpp
+ MachineDominators.cpp
+ MachineFunction.cpp
+ MachineFunctionAnalysis.cpp
+ MachineFunctionPass.cpp
+ MachineInstr.cpp
+ MachineLICM.cpp
+ MachineLoopInfo.cpp
+ MachineModuleInfo.cpp
+ MachineModuleInfoImpls.cpp
+ MachinePassRegistry.cpp
+ MachineRegisterInfo.cpp
+ MachineSSAUpdater.cpp
+ MachineSink.cpp
+ MachineVerifier.cpp
+ ObjectCodeEmitter.cpp
+ OcamlGC.cpp
+ OptimizeExts.cpp
+ PHIElimination.cpp
+ Passes.cpp
+ PostRASchedulerList.cpp
+ PreAllocSplitting.cpp
+ ProcessImplicitDefs.cpp
+ PrologEpilogInserter.cpp
+ PseudoSourceValue.cpp
+ RegAllocLinearScan.cpp
+ RegAllocLocal.cpp
+ RegAllocPBQP.cpp
+ RegisterCoalescer.cpp
+ RegisterScavenging.cpp
+ ScheduleDAG.cpp
+ ScheduleDAGEmit.cpp
+ ScheduleDAGInstrs.cpp
+ ScheduleDAGPrinter.cpp
+ ShadowStackGC.cpp
+ ShrinkWrapping.cpp
+ SimpleRegisterCoalescing.cpp
+ SjLjEHPrepare.cpp
+ SlotIndexes.cpp
+ Spiller.cpp
+ StackProtector.cpp
+ StackSlotColoring.cpp
+ StrongPHIElimination.cpp
+ TailDuplication.cpp
+ TargetInstrInfoImpl.cpp
+ TwoAddressInstructionPass.cpp
+ UnreachableBlockElim.cpp
+ VirtRegMap.cpp
+ VirtRegRewriter.cpp
+ )
+
+target_link_libraries (LLVMCodeGen LLVMCore LLVMScalarOpts)
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
new file mode 100644
index 0000000..2bedd04
--- /dev/null
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -0,0 +1,151 @@
+//===------------------------ CalcSpillWeights.cpp ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "calcspillweights"
+
+#include "llvm/Function.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+char CalculateSpillWeights::ID = 0;
+static RegisterPass<CalculateSpillWeights> X("calcspillweights",
+ "Calculate spill weights");
+
+void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const {
+ au.addRequired<LiveIntervals>();
+ au.addRequired<MachineLoopInfo>();
+ au.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(au);
+}
+
+bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) {
+
+ DEBUG(dbgs() << "********** Compute Spill Weights **********\n"
+ << "********** Function: "
+ << fn.getFunction()->getName() << '\n');
+
+ LiveIntervals *lis = &getAnalysis<LiveIntervals>();
+ MachineLoopInfo *loopInfo = &getAnalysis<MachineLoopInfo>();
+ const TargetInstrInfo *tii = fn.getTarget().getInstrInfo();
+ MachineRegisterInfo *mri = &fn.getRegInfo();
+
+ SmallSet<unsigned, 4> processed;
+ for (MachineFunction::iterator mbbi = fn.begin(), mbbe = fn.end();
+ mbbi != mbbe; ++mbbi) {
+ MachineBasicBlock* mbb = mbbi;
+ SlotIndex mbbEnd = lis->getMBBEndIdx(mbb);
+ MachineLoop* loop = loopInfo->getLoopFor(mbb);
+ unsigned loopDepth = loop ? loop->getLoopDepth() : 0;
+ bool isExiting = loop ? loop->isLoopExiting(mbb) : false;
+
+ for (MachineBasicBlock::const_iterator mii = mbb->begin(), mie = mbb->end();
+ mii != mie; ++mii) {
+ const MachineInstr *mi = mii;
+ if (tii->isIdentityCopy(*mi) || mi->isImplicitDef() || mi->isDebugValue())
+ continue;
+
+ for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
+ const MachineOperand &mopi = mi->getOperand(i);
+ if (!mopi.isReg() || mopi.getReg() == 0)
+ continue;
+ unsigned reg = mopi.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(mopi.getReg()))
+ continue;
+ // Multiple uses of reg by the same instruction. It should not
+ // contribute to spill weight again.
+ if (!processed.insert(reg))
+ continue;
+
+ bool hasDef = mopi.isDef();
+ bool hasUse = !hasDef;
+ for (unsigned j = i+1; j != e; ++j) {
+ const MachineOperand &mopj = mi->getOperand(j);
+ if (!mopj.isReg() || mopj.getReg() != reg)
+ continue;
+ hasDef |= mopj.isDef();
+ hasUse |= mopj.isUse();
+ if (hasDef && hasUse)
+ break;
+ }
+
+ LiveInterval ®Int = lis->getInterval(reg);
+ float weight = lis->getSpillWeight(hasDef, hasUse, loopDepth);
+ if (hasDef && isExiting) {
+ // Looks like this is a loop count variable update.
+ SlotIndex defIdx = lis->getInstructionIndex(mi).getDefIndex();
+ const LiveRange *dlr =
+ lis->getInterval(reg).getLiveRangeContaining(defIdx);
+ if (dlr->end >= mbbEnd)
+ weight *= 3.0F;
+ }
+ regInt.weight += weight;
+ }
+ processed.clear();
+ }
+ }
+
+ for (LiveIntervals::iterator I = lis->begin(), E = lis->end(); I != E; ++I) {
+ LiveInterval &li = *I->second;
+ if (TargetRegisterInfo::isVirtualRegister(li.reg)) {
+ // If the live interval length is essentially zero, i.e. in every live
+ // range the use follows def immediately, it doesn't make sense to spill
+ // it and hope it will be easier to allocate for this li.
+ if (isZeroLengthInterval(&li)) {
+ li.weight = HUGE_VALF;
+ continue;
+ }
+
+ bool isLoad = false;
+ SmallVector<LiveInterval*, 4> spillIs;
+ if (lis->isReMaterializable(li, spillIs, isLoad)) {
+ // If all of the definitions of the interval are re-materializable,
+ // it is a preferred candidate for spilling. If non of the defs are
+ // loads, then it's potentially very cheap to re-materialize.
+ // FIXME: this gets much more complicated once we support non-trivial
+ // re-materialization.
+ if (isLoad)
+ li.weight *= 0.9F;
+ else
+ li.weight *= 0.5F;
+ }
+
+ // Slightly prefer live interval that has been assigned a preferred reg.
+ std::pair<unsigned, unsigned> Hint = mri->getRegAllocationHint(li.reg);
+ if (Hint.first || Hint.second)
+ li.weight *= 1.01F;
+
+ // Divide the weight of the interval by its size. This encourages
+ // spilling of intervals that are large and have few uses, and
+ // discourages spilling of small intervals with many uses.
+ li.weight /= lis->getApproximateInstructionCount(li) * SlotIndex::NUM;
+ }
+ }
+
+ return false;
+}
+
+/// Returns true if the given live interval is zero length.
+bool CalculateSpillWeights::isZeroLengthInterval(LiveInterval *li) const {
+ for (LiveInterval::Ranges::const_iterator
+ i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i)
+ if (i->end.getPrevIndex() > i->start)
+ return false;
+ return true;
+}
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
new file mode 100644
index 0000000..05a57d4
--- /dev/null
+++ b/lib/CodeGen/CodePlacementOpt.cpp
@@ -0,0 +1,420 @@
+//===-- CodePlacementOpt.cpp - Code Placement pass. -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the pass that optimize code placement and align loop
+// headers to target specific alignment boundary.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "code-placement"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumLoopsAligned, "Number of loops aligned");
+STATISTIC(NumIntraElim, "Number of intra loop branches eliminated");
+STATISTIC(NumIntraMoved, "Number of intra loop branches moved");
+
+namespace {
+ class CodePlacementOpt : public MachineFunctionPass {
+ const MachineLoopInfo *MLI;
+ const TargetInstrInfo *TII;
+ const TargetLowering *TLI;
+
+ public:
+ static char ID;
+ CodePlacementOpt() : MachineFunctionPass(&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ virtual const char *getPassName() const {
+ return "Code Placement Optimizater";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ bool HasFallthrough(MachineBasicBlock *MBB);
+ bool HasAnalyzableTerminator(MachineBasicBlock *MBB);
+ void Splice(MachineFunction &MF,
+ MachineFunction::iterator InsertPt,
+ MachineFunction::iterator Begin,
+ MachineFunction::iterator End);
+ bool EliminateUnconditionalJumpsToTop(MachineFunction &MF,
+ MachineLoop *L);
+ bool MoveDiscontiguousLoopBlocks(MachineFunction &MF,
+ MachineLoop *L);
+ bool OptimizeIntraLoopEdgesInLoopNest(MachineFunction &MF, MachineLoop *L);
+ bool OptimizeIntraLoopEdges(MachineFunction &MF);
+ bool AlignLoops(MachineFunction &MF);
+ bool AlignLoop(MachineFunction &MF, MachineLoop *L, unsigned Align);
+ };
+
+ char CodePlacementOpt::ID = 0;
+} // end anonymous namespace
+
+FunctionPass *llvm::createCodePlacementOptPass() {
+ return new CodePlacementOpt();
+}
+
+/// HasFallthrough - Test whether the given branch has a fallthrough, either as
+/// a plain fallthrough or as a fallthrough case of a conditional branch.
+///
+bool CodePlacementOpt::HasFallthrough(MachineBasicBlock *MBB) {
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond))
+ return false;
+ // This conditional branch has no fallthrough.
+ if (FBB)
+ return false;
+ // An unconditional branch has no fallthrough.
+ if (Cond.empty() && TBB)
+ return false;
+ // It has a fallthrough.
+ return true;
+}
+
+/// HasAnalyzableTerminator - Test whether AnalyzeBranch will succeed on MBB.
+/// This is called before major changes are begun to test whether it will be
+/// possible to complete the changes.
+///
+/// Target-specific code is hereby encouraged to make AnalyzeBranch succeed
+/// whenever possible.
+///
+bool CodePlacementOpt::HasAnalyzableTerminator(MachineBasicBlock *MBB) {
+ // Conservatively ignore EH landing pads.
+ if (MBB->isLandingPad()) return false;
+
+ // Ignore blocks which look like they might have EH-related control flow.
+ // At the time of this writing, there are blocks which AnalyzeBranch
+ // thinks end in single uncoditional branches, yet which have two CFG
+ // successors. Code in this file is not prepared to reason about such things.
+ if (!MBB->empty() && MBB->back().isEHLabel())
+ return false;
+
+ // Aggressively handle return blocks and similar constructs.
+ if (MBB->succ_empty()) return true;
+
+ // Ask the target's AnalyzeBranch if it can handle this block.
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ // Make the terminator is understood.
+ if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond))
+ return false;
+ // Make sure we have the option of reversing the condition.
+ if (!Cond.empty() && TII->ReverseBranchCondition(Cond))
+ return false;
+ return true;
+}
+
+/// Splice - Move the sequence of instructions [Begin,End) to just before
+/// InsertPt. Update branch instructions as needed to account for broken
+/// fallthrough edges and to take advantage of newly exposed fallthrough
+/// opportunities.
+///
+void CodePlacementOpt::Splice(MachineFunction &MF,
+ MachineFunction::iterator InsertPt,
+ MachineFunction::iterator Begin,
+ MachineFunction::iterator End) {
+ assert(Begin != MF.begin() && End != MF.begin() && InsertPt != MF.begin() &&
+ "Splice can't change the entry block!");
+ MachineFunction::iterator OldBeginPrior = prior(Begin);
+ MachineFunction::iterator OldEndPrior = prior(End);
+
+ MF.splice(InsertPt, Begin, End);
+
+ prior(Begin)->updateTerminator();
+ OldBeginPrior->updateTerminator();
+ OldEndPrior->updateTerminator();
+}
+
+/// EliminateUnconditionalJumpsToTop - Move blocks which unconditionally jump
+/// to the loop top to the top of the loop so that they have a fall through.
+/// This can introduce a branch on entry to the loop, but it can eliminate a
+/// branch within the loop. See the @simple case in
+/// test/CodeGen/X86/loop_blocks.ll for an example of this.
+bool CodePlacementOpt::EliminateUnconditionalJumpsToTop(MachineFunction &MF,
+ MachineLoop *L) {
+ bool Changed = false;
+ MachineBasicBlock *TopMBB = L->getTopBlock();
+
+ bool BotHasFallthrough = HasFallthrough(L->getBottomBlock());
+
+ if (TopMBB == MF.begin() ||
+ HasAnalyzableTerminator(prior(MachineFunction::iterator(TopMBB)))) {
+ new_top:
+ for (MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(),
+ PE = TopMBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock *Pred = *PI;
+ if (Pred == TopMBB) continue;
+ if (HasFallthrough(Pred)) continue;
+ if (!L->contains(Pred)) continue;
+
+ // Verify that we can analyze all the loop entry edges before beginning
+ // any changes which will require us to be able to analyze them.
+ if (Pred == MF.begin())
+ continue;
+ if (!HasAnalyzableTerminator(Pred))
+ continue;
+ if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Pred))))
+ continue;
+
+ // Move the block.
+ Changed = true;
+
+ // Move it and all the blocks that can reach it via fallthrough edges
+ // exclusively, to keep existing fallthrough edges intact.
+ MachineFunction::iterator Begin = Pred;
+ MachineFunction::iterator End = llvm::next(Begin);
+ while (Begin != MF.begin()) {
+ MachineFunction::iterator Prior = prior(Begin);
+ if (Prior == MF.begin())
+ break;
+ // Stop when a non-fallthrough edge is found.
+ if (!HasFallthrough(Prior))
+ break;
+ // Stop if a block which could fall-through out of the loop is found.
+ if (Prior->isSuccessor(End))
+ break;
+ // If we've reached the top, stop scanning.
+ if (Prior == MachineFunction::iterator(TopMBB)) {
+ // We know top currently has a fall through (because we just checked
+ // it) which would be lost if we do the transformation, so it isn't
+ // worthwhile to do the transformation unless it would expose a new
+ // fallthrough edge.
+ if (!Prior->isSuccessor(End))
+ goto next_pred;
+ // Otherwise we can stop scanning and procede to move the blocks.
+ break;
+ }
+ // If we hit a switch or something complicated, don't move anything
+ // for this predecessor.
+ if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Prior))))
+ break;
+ // Ok, the block prior to Begin will be moved along with the rest.
+ // Extend the range to include it.
+ Begin = Prior;
+ ++NumIntraMoved;
+ }
+
+ // Move the blocks.
+ Splice(MF, TopMBB, Begin, End);
+
+ // Update TopMBB.
+ TopMBB = L->getTopBlock();
+
+ // We have a new loop top. Iterate on it. We shouldn't have to do this
+ // too many times if BranchFolding has done a reasonable job.
+ goto new_top;
+ next_pred:;
+ }
+ }
+
+ // If the loop previously didn't exit with a fall-through and it now does,
+ // we eliminated a branch.
+ if (Changed &&
+ !BotHasFallthrough &&
+ HasFallthrough(L->getBottomBlock())) {
+ ++NumIntraElim;
+ }
+
+ return Changed;
+}
+
+/// MoveDiscontiguousLoopBlocks - Move any loop blocks that are not in the
+/// portion of the loop contiguous with the header. This usually makes the loop
+/// contiguous, provided that AnalyzeBranch can handle all the relevant
+/// branching. See the @cfg_islands case in test/CodeGen/X86/loop_blocks.ll
+/// for an example of this.
+bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF,
+ MachineLoop *L) {
+ bool Changed = false;
+ MachineBasicBlock *TopMBB = L->getTopBlock();
+ MachineBasicBlock *BotMBB = L->getBottomBlock();
+
+ // Determine a position to move orphaned loop blocks to. If TopMBB is not
+ // entered via fallthrough and BotMBB is exited via fallthrough, prepend them
+ // to the top of the loop to avoid loosing that fallthrough. Otherwise append
+ // them to the bottom, even if it previously had a fallthrough, on the theory
+ // that it's worth an extra branch to keep the loop contiguous.
+ MachineFunction::iterator InsertPt =
+ llvm::next(MachineFunction::iterator(BotMBB));
+ bool InsertAtTop = false;
+ if (TopMBB != MF.begin() &&
+ !HasFallthrough(prior(MachineFunction::iterator(TopMBB))) &&
+ HasFallthrough(BotMBB)) {
+ InsertPt = TopMBB;
+ InsertAtTop = true;
+ }
+
+ // Keep a record of which blocks are in the portion of the loop contiguous
+ // with the loop header.
+ SmallPtrSet<MachineBasicBlock *, 8> ContiguousBlocks;
+ for (MachineFunction::iterator I = TopMBB,
+ E = llvm::next(MachineFunction::iterator(BotMBB)); I != E; ++I)
+ ContiguousBlocks.insert(I);
+
+ // Find non-contigous blocks and fix them.
+ if (InsertPt != MF.begin() && HasAnalyzableTerminator(prior(InsertPt)))
+ for (MachineLoop::block_iterator BI = L->block_begin(), BE = L->block_end();
+ BI != BE; ++BI) {
+ MachineBasicBlock *BB = *BI;
+
+ // Verify that we can analyze all the loop entry edges before beginning
+ // any changes which will require us to be able to analyze them.
+ if (!HasAnalyzableTerminator(BB))
+ continue;
+ if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(BB))))
+ continue;
+
+ // If the layout predecessor is part of the loop, this block will be
+ // processed along with it. This keeps them in their relative order.
+ if (BB != MF.begin() &&
+ L->contains(prior(MachineFunction::iterator(BB))))
+ continue;
+
+ // Check to see if this block is already contiguous with the main
+ // portion of the loop.
+ if (!ContiguousBlocks.insert(BB))
+ continue;
+
+ // Move the block.
+ Changed = true;
+
+ // Process this block and all loop blocks contiguous with it, to keep
+ // them in their relative order.
+ MachineFunction::iterator Begin = BB;
+ MachineFunction::iterator End = llvm::next(MachineFunction::iterator(BB));
+ for (; End != MF.end(); ++End) {
+ if (!L->contains(End)) break;
+ if (!HasAnalyzableTerminator(End)) break;
+ ContiguousBlocks.insert(End);
+ ++NumIntraMoved;
+ }
+
+ // If we're inserting at the bottom of the loop, and the code we're
+ // moving originally had fall-through successors, bring the sucessors
+ // up with the loop blocks to preserve the fall-through edges.
+ if (!InsertAtTop)
+ for (; End != MF.end(); ++End) {
+ if (L->contains(End)) break;
+ if (!HasAnalyzableTerminator(End)) break;
+ if (!HasFallthrough(prior(End))) break;
+ }
+
+ // Move the blocks. This may invalidate TopMBB and/or BotMBB, but
+ // we don't need them anymore at this point.
+ Splice(MF, InsertPt, Begin, End);
+ }
+
+ return Changed;
+}
+
+/// OptimizeIntraLoopEdgesInLoopNest - Reposition loop blocks to minimize
+/// intra-loop branching and to form contiguous loops.
+///
+/// This code takes the approach of making minor changes to the existing
+/// layout to fix specific loop-oriented problems. Also, it depends on
+/// AnalyzeBranch, which can't understand complex control instructions.
+///
+bool CodePlacementOpt::OptimizeIntraLoopEdgesInLoopNest(MachineFunction &MF,
+ MachineLoop *L) {
+ bool Changed = false;
+
+ // Do optimization for nested loops.
+ for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ Changed |= OptimizeIntraLoopEdgesInLoopNest(MF, *I);
+
+ // Do optimization for this loop.
+ Changed |= EliminateUnconditionalJumpsToTop(MF, L);
+ Changed |= MoveDiscontiguousLoopBlocks(MF, L);
+
+ return Changed;
+}
+
+/// OptimizeIntraLoopEdges - Reposition loop blocks to minimize
+/// intra-loop branching and to form contiguous loops.
+///
+bool CodePlacementOpt::OptimizeIntraLoopEdges(MachineFunction &MF) {
+ bool Changed = false;
+
+ if (!TLI->shouldOptimizeCodePlacement())
+ return Changed;
+
+ // Do optimization for each loop in the function.
+ for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
+ I != E; ++I)
+ if (!(*I)->getParentLoop())
+ Changed |= OptimizeIntraLoopEdgesInLoopNest(MF, *I);
+
+ return Changed;
+}
+
+/// AlignLoops - Align loop headers to target preferred alignments.
+///
+bool CodePlacementOpt::AlignLoops(MachineFunction &MF) {
+ const Function *F = MF.getFunction();
+ if (F->hasFnAttr(Attribute::OptimizeForSize))
+ return false;
+
+ unsigned Align = TLI->getPrefLoopAlignment();
+ if (!Align)
+ return false; // Don't care about loop alignment.
+
+ bool Changed = false;
+
+ for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
+ I != E; ++I)
+ Changed |= AlignLoop(MF, *I, Align);
+
+ return Changed;
+}
+
+/// AlignLoop - Align loop headers to target preferred alignments.
+///
+bool CodePlacementOpt::AlignLoop(MachineFunction &MF, MachineLoop *L,
+ unsigned Align) {
+ bool Changed = false;
+
+ // Do alignment for nested loops.
+ for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ Changed |= AlignLoop(MF, *I, Align);
+
+ L->getTopBlock()->setAlignment(Align);
+ Changed = true;
+ ++NumLoopsAligned;
+
+ return Changed;
+}
+
+bool CodePlacementOpt::runOnMachineFunction(MachineFunction &MF) {
+ MLI = &getAnalysis<MachineLoopInfo>();
+ if (MLI->empty())
+ return false; // No loops.
+
+ TLI = MF.getTarget().getTargetLowering();
+ TII = MF.getTarget().getInstrInfo();
+
+ bool Changed = OptimizeIntraLoopEdges(MF);
+
+ Changed |= AlignLoops(MF);
+
+ return Changed;
+}
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
new file mode 100644
index 0000000..056e2d5
--- /dev/null
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -0,0 +1,548 @@
+//===----- CriticalAntiDepBreaker.cpp - Anti-dep breaker -------- ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CriticalAntiDepBreaker class, which
+// implements register anti-dependence breaking along a blocks
+// critical path during post-RA scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "post-RA-sched"
+#include "CriticalAntiDepBreaker.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+CriticalAntiDepBreaker::
+CriticalAntiDepBreaker(MachineFunction& MFi) :
+ AntiDepBreaker(), MF(MFi),
+ MRI(MF.getRegInfo()),
+ TRI(MF.getTarget().getRegisterInfo()),
+ AllocatableSet(TRI->getAllocatableSet(MF))
+{
+}
+
+CriticalAntiDepBreaker::~CriticalAntiDepBreaker() {
+}
+
+void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
+ // Clear out the register class data.
+ std::fill(Classes, array_endof(Classes),
+ static_cast<const TargetRegisterClass *>(0));
+
+ // Initialize the indices to indicate that no registers are live.
+ const unsigned BBSize = BB->size();
+ for (unsigned i = 0; i < TRI->getNumRegs(); ++i) {
+ KillIndices[i] = ~0u;
+ DefIndices[i] = BBSize;
+ }
+
+ // Clear "do not change" set.
+ KeepRegs.clear();
+
+ bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn());
+
+ // Determine the live-out physregs for this block.
+ if (IsReturnBlock) {
+ // In a return block, examine the function live-out regs.
+ for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
+ E = MRI.liveout_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = BB->size();
+ DefIndices[Reg] = ~0u;
+ // Repeat, for all aliases.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[AliasReg] = BB->size();
+ DefIndices[AliasReg] = ~0u;
+ }
+ }
+ } else {
+ // In a non-return block, examine the live-in regs of all successors.
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI)
+ for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+ E = (*SI)->livein_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = BB->size();
+ DefIndices[Reg] = ~0u;
+ // Repeat, for all aliases.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[AliasReg] = BB->size();
+ DefIndices[AliasReg] = ~0u;
+ }
+ }
+ }
+
+ // Mark live-out callee-saved registers. In a return block this is
+ // all callee-saved registers. In non-return this is any
+ // callee-saved register that is not saved in the prolog.
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ BitVector Pristine = MFI->getPristineRegs(BB);
+ for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
+ unsigned Reg = *I;
+ if (!IsReturnBlock && !Pristine.test(Reg)) continue;
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = BB->size();
+ DefIndices[Reg] = ~0u;
+ // Repeat, for all aliases.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[AliasReg] = BB->size();
+ DefIndices[AliasReg] = ~0u;
+ }
+ }
+}
+
+void CriticalAntiDepBreaker::FinishBlock() {
+ RegRefs.clear();
+ KeepRegs.clear();
+}
+
+void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
+ unsigned InsertPosIndex) {
+ assert(Count < InsertPosIndex && "Instruction index out of expected range!");
+
+ // Any register which was defined within the previous scheduling region
+ // may have been rescheduled and its lifetime may overlap with registers
+ // in ways not reflected in our current liveness state. For each such
+ // register, adjust the liveness state to be conservatively correct.
+ for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg)
+ if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) {
+ assert(KillIndices[Reg] == ~0u && "Clobbered register is live!");
+ // Mark this register to be non-renamable.
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ // Move the def index to the end of the previous region, to reflect
+ // that the def could theoretically have been scheduled at the end.
+ DefIndices[Reg] = InsertPosIndex;
+ }
+
+ PrescanInstruction(MI);
+ ScanInstruction(MI, Count);
+}
+
+/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
+/// critical path.
+static SDep *CriticalPathStep(SUnit *SU) {
+ SDep *Next = 0;
+ unsigned NextDepth = 0;
+ // Find the predecessor edge with the greatest depth.
+ for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+ P != PE; ++P) {
+ SUnit *PredSU = P->getSUnit();
+ unsigned PredLatency = P->getLatency();
+ unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
+ // In the case of a latency tie, prefer an anti-dependency edge over
+ // other types of edges.
+ if (NextDepth < PredTotalLatency ||
+ (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
+ NextDepth = PredTotalLatency;
+ Next = &*P;
+ }
+ }
+ return Next;
+}
+
+void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
+ // Scan the register operands for this instruction and update
+ // Classes and RegRefs.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ const TargetRegisterClass *NewRC = 0;
+
+ if (i < MI->getDesc().getNumOperands())
+ NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI);
+
+ // For now, only allow the register to be changed if its register
+ // class is consistent across all uses.
+ if (!Classes[Reg] && NewRC)
+ Classes[Reg] = NewRC;
+ else if (!NewRC || Classes[Reg] != NewRC)
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
+ // Now check for aliases.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ // If an alias of the reg is used during the live range, give up.
+ // Note that this allows us to skip checking if AntiDepReg
+ // overlaps with any of the aliases, among other things.
+ unsigned AliasReg = *Alias;
+ if (Classes[AliasReg]) {
+ Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ }
+ }
+
+ // If we're still willing to consider this register, note the reference.
+ if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1))
+ RegRefs.insert(std::make_pair(Reg, &MO));
+
+ // It's not safe to change register allocation for source operands of
+ // that have special allocation requirements.
+ if (MO.isUse() && MI->getDesc().hasExtraSrcRegAllocReq()) {
+ if (KeepRegs.insert(Reg)) {
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg)
+ KeepRegs.insert(*Subreg);
+ }
+ }
+ }
+}
+
+void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
+ unsigned Count) {
+ // Update liveness.
+ // Proceding upwards, registers that are defed but not used in this
+ // instruction are now dead.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ if (!MO.isDef()) continue;
+ // Ignore two-addr defs.
+ if (MI->isRegTiedToUseOperand(i)) continue;
+
+ DefIndices[Reg] = Count;
+ KillIndices[Reg] = ~0u;
+ assert(((KillIndices[Reg] == ~0u) !=
+ (DefIndices[Reg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for Reg!");
+ KeepRegs.erase(Reg);
+ Classes[Reg] = 0;
+ RegRefs.erase(Reg);
+ // Repeat, for all subregs.
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg) {
+ unsigned SubregReg = *Subreg;
+ DefIndices[SubregReg] = Count;
+ KillIndices[SubregReg] = ~0u;
+ KeepRegs.erase(SubregReg);
+ Classes[SubregReg] = 0;
+ RegRefs.erase(SubregReg);
+ }
+ // Conservatively mark super-registers as unusable.
+ for (const unsigned *Super = TRI->getSuperRegisters(Reg);
+ *Super; ++Super) {
+ unsigned SuperReg = *Super;
+ Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ }
+ }
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ if (!MO.isUse()) continue;
+
+ const TargetRegisterClass *NewRC = 0;
+ if (i < MI->getDesc().getNumOperands())
+ NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI);
+
+ // For now, only allow the register to be changed if its register
+ // class is consistent across all uses.
+ if (!Classes[Reg] && NewRC)
+ Classes[Reg] = NewRC;
+ else if (!NewRC || Classes[Reg] != NewRC)
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
+ RegRefs.insert(std::make_pair(Reg, &MO));
+
+ // It wasn't previously live but now it is, this is a kill.
+ if (KillIndices[Reg] == ~0u) {
+ KillIndices[Reg] = Count;
+ DefIndices[Reg] = ~0u;
+ assert(((KillIndices[Reg] == ~0u) !=
+ (DefIndices[Reg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for Reg!");
+ }
+ // Repeat, for all aliases.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ if (KillIndices[AliasReg] == ~0u) {
+ KillIndices[AliasReg] = Count;
+ DefIndices[AliasReg] = ~0u;
+ }
+ }
+ }
+}
+
+unsigned
+CriticalAntiDepBreaker::findSuitableFreeRegister(MachineInstr *MI,
+ unsigned AntiDepReg,
+ unsigned LastNewReg,
+ const TargetRegisterClass *RC)
+{
+ for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF),
+ RE = RC->allocation_order_end(MF); R != RE; ++R) {
+ unsigned NewReg = *R;
+ // Don't replace a register with itself.
+ if (NewReg == AntiDepReg) continue;
+ // Don't replace a register with one that was recently used to repair
+ // an anti-dependence with this AntiDepReg, because that would
+ // re-introduce that anti-dependence.
+ if (NewReg == LastNewReg) continue;
+ // If the instruction already has a def of the NewReg, it's not suitable.
+ // For example, Instruction with multiple definitions can result in this
+ // condition.
+ if (MI->modifiesRegister(NewReg, TRI)) continue;
+ // If NewReg is dead and NewReg's most recent def is not before
+ // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg.
+ assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u))
+ && "Kill and Def maps aren't consistent for AntiDepReg!");
+ assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u))
+ && "Kill and Def maps aren't consistent for NewReg!");
+ if (KillIndices[NewReg] != ~0u ||
+ Classes[NewReg] == reinterpret_cast<TargetRegisterClass *>(-1) ||
+ KillIndices[AntiDepReg] > DefIndices[NewReg])
+ continue;
+ return NewReg;
+ }
+
+ // No registers are free and available!
+ return 0;
+}
+
+unsigned CriticalAntiDepBreaker::
+BreakAntiDependencies(std::vector<SUnit>& SUnits,
+ MachineBasicBlock::iterator& Begin,
+ MachineBasicBlock::iterator& End,
+ unsigned InsertPosIndex) {
+ // The code below assumes that there is at least one instruction,
+ // so just duck out immediately if the block is empty.
+ if (SUnits.empty()) return 0;
+
+ // Find the node at the bottom of the critical path.
+ SUnit *Max = 0;
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ SUnit *SU = &SUnits[i];
+ if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
+ Max = SU;
+ }
+
+#ifndef NDEBUG
+ {
+ DEBUG(dbgs() << "Critical path has total latency "
+ << (Max->getDepth() + Max->Latency) << "\n");
+ DEBUG(dbgs() << "Available regs:");
+ for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
+ if (KillIndices[Reg] == ~0u)
+ DEBUG(dbgs() << " " << TRI->getName(Reg));
+ }
+ DEBUG(dbgs() << '\n');
+ }
+#endif
+
+ // Track progress along the critical path through the SUnit graph as we walk
+ // the instructions.
+ SUnit *CriticalPathSU = Max;
+ MachineInstr *CriticalPathMI = CriticalPathSU->getInstr();
+
+ // Consider this pattern:
+ // A = ...
+ // ... = A
+ // A = ...
+ // ... = A
+ // A = ...
+ // ... = A
+ // A = ...
+ // ... = A
+ // There are three anti-dependencies here, and without special care,
+ // we'd break all of them using the same register:
+ // A = ...
+ // ... = A
+ // B = ...
+ // ... = B
+ // B = ...
+ // ... = B
+ // B = ...
+ // ... = B
+ // because at each anti-dependence, B is the first register that
+ // isn't A which is free. This re-introduces anti-dependencies
+ // at all but one of the original anti-dependencies that we were
+ // trying to break. To avoid this, keep track of the most recent
+ // register that each register was replaced with, avoid
+ // using it to repair an anti-dependence on the same register.
+ // This lets us produce this:
+ // A = ...
+ // ... = A
+ // B = ...
+ // ... = B
+ // C = ...
+ // ... = C
+ // B = ...
+ // ... = B
+ // This still has an anti-dependence on B, but at least it isn't on the
+ // original critical path.
+ //
+ // TODO: If we tracked more than one register here, we could potentially
+ // fix that remaining critical edge too. This is a little more involved,
+ // because unlike the most recent register, less recent registers should
+ // still be considered, though only if no other registers are available.
+ unsigned LastNewReg[TargetRegisterInfo::FirstVirtualRegister] = {};
+
+ // Attempt to break anti-dependence edges on the critical path. Walk the
+ // instructions from the bottom up, tracking information about liveness
+ // as we go to help determine which registers are available.
+ unsigned Broken = 0;
+ unsigned Count = InsertPosIndex - 1;
+ for (MachineBasicBlock::iterator I = End, E = Begin;
+ I != E; --Count) {
+ MachineInstr *MI = --I;
+
+ // Check if this instruction has a dependence on the critical path that
+ // is an anti-dependence that we may be able to break. If it is, set
+ // AntiDepReg to the non-zero register associated with the anti-dependence.
+ //
+ // We limit our attention to the critical path as a heuristic to avoid
+ // breaking anti-dependence edges that aren't going to significantly
+ // impact the overall schedule. There are a limited number of registers
+ // and we want to save them for the important edges.
+ //
+ // TODO: Instructions with multiple defs could have multiple
+ // anti-dependencies. The current code here only knows how to break one
+ // edge per instruction. Note that we'd have to be able to break all of
+ // the anti-dependencies in an instruction in order to be effective.
+ unsigned AntiDepReg = 0;
+ if (MI == CriticalPathMI) {
+ if (SDep *Edge = CriticalPathStep(CriticalPathSU)) {
+ SUnit *NextSU = Edge->getSUnit();
+
+ // Only consider anti-dependence edges.
+ if (Edge->getKind() == SDep::Anti) {
+ AntiDepReg = Edge->getReg();
+ assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
+ if (!AllocatableSet.test(AntiDepReg))
+ // Don't break anti-dependencies on non-allocatable registers.
+ AntiDepReg = 0;
+ else if (KeepRegs.count(AntiDepReg))
+ // Don't break anti-dependencies if an use down below requires
+ // this exact register.
+ AntiDepReg = 0;
+ else {
+ // If the SUnit has other dependencies on the SUnit that it
+ // anti-depends on, don't bother breaking the anti-dependency
+ // since those edges would prevent such units from being
+ // scheduled past each other regardless.
+ //
+ // Also, if there are dependencies on other SUnits with the
+ // same register as the anti-dependency, don't attempt to
+ // break it.
+ for (SUnit::pred_iterator P = CriticalPathSU->Preds.begin(),
+ PE = CriticalPathSU->Preds.end(); P != PE; ++P)
+ if (P->getSUnit() == NextSU ?
+ (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
+ (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) {
+ AntiDepReg = 0;
+ break;
+ }
+ }
+ }
+ CriticalPathSU = NextSU;
+ CriticalPathMI = CriticalPathSU->getInstr();
+ } else {
+ // We've reached the end of the critical path.
+ CriticalPathSU = 0;
+ CriticalPathMI = 0;
+ }
+ }
+
+ PrescanInstruction(MI);
+
+ if (MI->getDesc().hasExtraDefRegAllocReq())
+ // If this instruction's defs have special allocation requirement, don't
+ // break this anti-dependency.
+ AntiDepReg = 0;
+ else if (AntiDepReg) {
+ // If this instruction has a use of AntiDepReg, breaking it
+ // is invalid.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ if (MO.isUse() && AntiDepReg == Reg) {
+ AntiDepReg = 0;
+ break;
+ }
+ }
+ }
+
+ // Determine AntiDepReg's register class, if it is live and is
+ // consistently used within a single class.
+ const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0;
+ assert((AntiDepReg == 0 || RC != NULL) &&
+ "Register should be live if it's causing an anti-dependence!");
+ if (RC == reinterpret_cast<TargetRegisterClass *>(-1))
+ AntiDepReg = 0;
+
+ // Look for a suitable register to use to break the anti-depenence.
+ //
+ // TODO: Instead of picking the first free register, consider which might
+ // be the best.
+ if (AntiDepReg != 0) {
+ if (unsigned NewReg = findSuitableFreeRegister(MI, AntiDepReg,
+ LastNewReg[AntiDepReg],
+ RC)) {
+ DEBUG(dbgs() << "Breaking anti-dependence edge on "
+ << TRI->getName(AntiDepReg)
+ << " with " << RegRefs.count(AntiDepReg) << " references"
+ << " using " << TRI->getName(NewReg) << "!\n");
+
+ // Update the references to the old register to refer to the new
+ // register.
+ std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
+ std::multimap<unsigned, MachineOperand *>::iterator>
+ Range = RegRefs.equal_range(AntiDepReg);
+ for (std::multimap<unsigned, MachineOperand *>::iterator
+ Q = Range.first, QE = Range.second; Q != QE; ++Q)
+ Q->second->setReg(NewReg);
+
+ // We just went back in time and modified history; the
+ // liveness information for the anti-depenence reg is now
+ // inconsistent. Set the state as if it were dead.
+ Classes[NewReg] = Classes[AntiDepReg];
+ DefIndices[NewReg] = DefIndices[AntiDepReg];
+ KillIndices[NewReg] = KillIndices[AntiDepReg];
+ assert(((KillIndices[NewReg] == ~0u) !=
+ (DefIndices[NewReg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for NewReg!");
+
+ Classes[AntiDepReg] = 0;
+ DefIndices[AntiDepReg] = KillIndices[AntiDepReg];
+ KillIndices[AntiDepReg] = ~0u;
+ assert(((KillIndices[AntiDepReg] == ~0u) !=
+ (DefIndices[AntiDepReg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for AntiDepReg!");
+
+ RegRefs.erase(AntiDepReg);
+ LastNewReg[AntiDepReg] = NewReg;
+ ++Broken;
+ }
+ }
+
+ ScanInstruction(MI, Count);
+ }
+
+ return Broken;
+}
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
new file mode 100644
index 0000000..9e8db02
--- /dev/null
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -0,0 +1,98 @@
+//=- llvm/CodeGen/CriticalAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CriticalAntiDepBreaker class, which
+// implements register anti-dependence breaking along a blocks
+// critical path during post-RA scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H
+#define LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H
+
+#include "AntiDepBreaker.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include <map>
+
+namespace llvm {
+ class CriticalAntiDepBreaker : public AntiDepBreaker {
+ MachineFunction& MF;
+ MachineRegisterInfo &MRI;
+ const TargetRegisterInfo *TRI;
+
+ /// AllocatableSet - The set of allocatable registers.
+ /// We'll be ignoring anti-dependencies on non-allocatable registers,
+ /// because they may not be safe to break.
+ const BitVector AllocatableSet;
+
+ /// Classes - For live regs that are only used in one register class in a
+ /// live range, the register class. If the register is not live, the
+ /// corresponding value is null. If the register is live but used in
+ /// multiple register classes, the corresponding value is -1 casted to a
+ /// pointer.
+ const TargetRegisterClass *
+ Classes[TargetRegisterInfo::FirstVirtualRegister];
+
+ /// RegRegs - Map registers to all their references within a live range.
+ std::multimap<unsigned, MachineOperand *> RegRefs;
+
+ /// KillIndices - The index of the most recent kill (proceding bottom-up),
+ /// or ~0u if the register is not live.
+ unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister];
+
+ /// DefIndices - The index of the most recent complete def (proceding bottom
+ /// up), or ~0u if the register is live.
+ unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister];
+
+ /// KeepRegs - A set of registers which are live and cannot be changed to
+ /// break anti-dependencies.
+ SmallSet<unsigned, 4> KeepRegs;
+
+ public:
+ CriticalAntiDepBreaker(MachineFunction& MFi);
+ ~CriticalAntiDepBreaker();
+
+ /// Start - Initialize anti-dep breaking for a new basic block.
+ void StartBlock(MachineBasicBlock *BB);
+
+ /// BreakAntiDependencies - Identifiy anti-dependencies along the critical
+ /// path
+ /// of the ScheduleDAG and break them by renaming registers.
+ ///
+ unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits,
+ MachineBasicBlock::iterator& Begin,
+ MachineBasicBlock::iterator& End,
+ unsigned InsertPosIndex);
+
+ /// Observe - Update liveness information to account for the current
+ /// instruction, which will not be scheduled.
+ ///
+ void Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex);
+
+ /// Finish - Finish anti-dep breaking for a basic block.
+ void FinishBlock();
+
+ private:
+ void PrescanInstruction(MachineInstr *MI);
+ void ScanInstruction(MachineInstr *MI, unsigned Count);
+ unsigned findSuitableFreeRegister(MachineInstr *MI,
+ unsigned AntiDepReg,
+ unsigned LastNewReg,
+ const TargetRegisterClass *);
+ };
+}
+
+#endif
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
new file mode 100644
index 0000000..b0cb24d
--- /dev/null
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -0,0 +1,179 @@
+//===- DeadMachineInstructionElim.cpp - Remove dead machine instructions --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an extremely simple MachineInstr-level dead-code-elimination pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "codegen-dce"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumDeletes, "Number of dead instructions deleted");
+
+namespace {
+ class DeadMachineInstructionElim : public MachineFunctionPass {
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const TargetRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+ BitVector LivePhysRegs;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ DeadMachineInstructionElim() : MachineFunctionPass(&ID) {}
+
+ private:
+ bool isDead(const MachineInstr *MI) const;
+ };
+}
+char DeadMachineInstructionElim::ID = 0;
+
+static RegisterPass<DeadMachineInstructionElim>
+Y("dead-mi-elimination",
+ "Remove dead machine instructions");
+
+FunctionPass *llvm::createDeadMachineInstructionElimPass() {
+ return new DeadMachineInstructionElim();
+}
+
+bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
+ // Don't delete instructions with side effects.
+ bool SawStore = false;
+ if (!MI->isSafeToMove(TII, SawStore, 0))
+ return false;
+
+ // Examine each operand.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef()) {
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) ?
+ LivePhysRegs[Reg] : !MRI->use_empty(Reg)) {
+ // This def has a use. Don't delete the instruction!
+ return false;
+ }
+ }
+ }
+
+ // If there are no defs with uses, the instruction is dead.
+ return true;
+}
+
+bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
+ bool AnyChanges = false;
+ MRI = &MF.getRegInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getTarget().getInstrInfo();
+
+ // Compute a bitvector to represent all non-allocatable physregs.
+ BitVector NonAllocatableRegs = TRI->getAllocatableSet(MF);
+ NonAllocatableRegs.flip();
+
+ // Loop over all instructions in all blocks, from bottom to top, so that it's
+ // more likely that chains of dependent but ultimately dead instructions will
+ // be cleaned up.
+ for (MachineFunction::reverse_iterator I = MF.rbegin(), E = MF.rend();
+ I != E; ++I) {
+ MachineBasicBlock *MBB = &*I;
+
+ // Start out assuming that all non-allocatable registers are live
+ // out of this block.
+ LivePhysRegs = NonAllocatableRegs;
+
+ // Also add any explicit live-out physregs for this block.
+ if (!MBB->empty() && MBB->back().getDesc().isReturn())
+ for (MachineRegisterInfo::liveout_iterator LOI = MRI->liveout_begin(),
+ LOE = MRI->liveout_end(); LOI != LOE; ++LOI) {
+ unsigned Reg = *LOI;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ LivePhysRegs.set(Reg);
+ }
+
+ // Now scan the instructions and delete dead ones, tracking physreg
+ // liveness as we go.
+ for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(),
+ MIE = MBB->rend(); MII != MIE; ) {
+ MachineInstr *MI = &*MII;
+
+ if (MI->isDebugValue()) {
+ // Don't delete the DBG_VALUE itself, but if its Value operand is
+ // a vreg and this is the only use, substitute an undef operand;
+ // the former operand will then be deleted normally.
+ if (MI->getNumOperands()==3 && MI->getOperand(0).isReg()) {
+ unsigned Reg = MI->getOperand(0).getReg();
+ MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg);
+ assert(I != MRI->use_end());
+ if (++I == MRI->use_end())
+ // only one use, which must be this DBG_VALUE.
+ MI->getOperand(0).setReg(0U);
+ }
+ }
+
+ // If the instruction is dead, delete it!
+ if (isDead(MI)) {
+ DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI);
+ AnyChanges = true;
+ MI->eraseFromParent();
+ ++NumDeletes;
+ MIE = MBB->rend();
+ // MII is now pointing to the next instruction to process,
+ // so don't increment it.
+ continue;
+ }
+
+ // Record the physreg defs.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef()) {
+ unsigned Reg = MO.getReg();
+ if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ LivePhysRegs.reset(Reg);
+ // Check the subreg set, not the alias set, because a def
+ // of a super-register may still be partially live after
+ // this def.
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ *SubRegs; ++SubRegs)
+ LivePhysRegs.reset(*SubRegs);
+ }
+ }
+ }
+ // Record the physreg uses, after the defs, in case a physreg is
+ // both defined and used in the same instruction.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isUse()) {
+ unsigned Reg = MO.getReg();
+ if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ LivePhysRegs.set(Reg);
+ for (const unsigned *AliasSet = TRI->getAliasSet(Reg);
+ *AliasSet; ++AliasSet)
+ LivePhysRegs.set(*AliasSet);
+ }
+ }
+ }
+
+ // We didn't delete the current instruction, so increment MII to
+ // the next one.
+ ++MII;
+ }
+ }
+
+ LivePhysRegs.clear();
+ return AnyChanges;
+}
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
new file mode 100644
index 0000000..39fc85e
--- /dev/null
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -0,0 +1,428 @@
+//===-- DwarfEHPrepare - Prepare exception handling for code generation ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass mulches exception handling code into a form adapted to code
+// generation. Required if using dwarf exception handling.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dwarfehprepare"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+using namespace llvm;
+
+STATISTIC(NumLandingPadsSplit, "Number of landing pads split");
+STATISTIC(NumUnwindsLowered, "Number of unwind instructions lowered");
+STATISTIC(NumExceptionValuesMoved, "Number of eh.exception calls moved");
+STATISTIC(NumStackTempsIntroduced, "Number of stack temporaries introduced");
+
+namespace {
+ class DwarfEHPrepare : public FunctionPass {
+ const TargetLowering *TLI;
+ bool CompileFast;
+
+ // The eh.exception intrinsic.
+ Function *ExceptionValueIntrinsic;
+
+ // _Unwind_Resume or the target equivalent.
+ Constant *RewindFunction;
+
+ // Dominator info is used when turning stack temporaries into registers.
+ DominatorTree *DT;
+ DominanceFrontier *DF;
+
+ // The function we are running on.
+ Function *F;
+
+ // The landing pads for this function.
+ typedef SmallPtrSet<BasicBlock*, 8> BBSet;
+ BBSet LandingPads;
+
+ // Stack temporary used to hold eh.exception values.
+ AllocaInst *ExceptionValueVar;
+
+ bool NormalizeLandingPads();
+ bool LowerUnwinds();
+ bool MoveExceptionValueCalls();
+ bool FinishStackTemporaries();
+ bool PromoteStackTemporaries();
+
+ Instruction *CreateExceptionValueCall(BasicBlock *BB);
+ Instruction *CreateValueLoad(BasicBlock *BB);
+
+ /// CreateReadOfExceptionValue - Return the result of the eh.exception
+ /// intrinsic by calling the intrinsic if in a landing pad, or loading
+ /// it from the exception value variable otherwise.
+ Instruction *CreateReadOfExceptionValue(BasicBlock *BB) {
+ return LandingPads.count(BB) ?
+ CreateExceptionValueCall(BB) : CreateValueLoad(BB);
+ }
+
+ public:
+ static char ID; // Pass identification, replacement for typeid.
+ DwarfEHPrepare(const TargetLowering *tli, bool fast) :
+ FunctionPass(&ID), TLI(tli), CompileFast(fast),
+ ExceptionValueIntrinsic(0), RewindFunction(0) {}
+
+ virtual bool runOnFunction(Function &Fn);
+
+ // getAnalysisUsage - We need dominance frontiers for memory promotion.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ if (!CompileFast)
+ AU.addRequired<DominatorTree>();
+ AU.addPreserved<DominatorTree>();
+ if (!CompileFast)
+ AU.addRequired<DominanceFrontier>();
+ AU.addPreserved<DominanceFrontier>();
+ }
+
+ const char *getPassName() const {
+ return "Exception handling preparation";
+ }
+
+ };
+} // end anonymous namespace
+
+char DwarfEHPrepare::ID = 0;
+
+FunctionPass *llvm::createDwarfEHPass(const TargetLowering *tli, bool fast) {
+ return new DwarfEHPrepare(tli, fast);
+}
+
+/// NormalizeLandingPads - Normalize and discover landing pads, noting them
+/// in the LandingPads set. A landing pad is normal if the only CFG edges
+/// that end at it are unwind edges from invoke instructions. If we inlined
+/// through an invoke we could have a normal branch from the previous
+/// unwind block through to the landing pad for the original invoke.
+/// Abnormal landing pads are fixed up by redirecting all unwind edges to
+/// a new basic block which falls through to the original.
+bool DwarfEHPrepare::NormalizeLandingPads() {
+ bool Changed = false;
+
+ const MCAsmInfo *MAI = TLI->getTargetMachine().getMCAsmInfo();
+ bool usingSjLjEH = MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
+
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+ TerminatorInst *TI = I->getTerminator();
+ if (!isa<InvokeInst>(TI))
+ continue;
+ BasicBlock *LPad = TI->getSuccessor(1);
+ // Skip landing pads that have already been normalized.
+ if (LandingPads.count(LPad))
+ continue;
+
+ // Check that only invoke unwind edges end at the landing pad.
+ bool OnlyUnwoundTo = true;
+ bool SwitchOK = usingSjLjEH;
+ for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad);
+ PI != PE; ++PI) {
+ TerminatorInst *PT = (*PI)->getTerminator();
+ // The SjLj dispatch block uses a switch instruction. This is effectively
+ // an unwind edge, so we can disregard it here. There will only ever
+ // be one dispatch, however, so if there are multiple switches, one
+ // of them truly is a normal edge, not an unwind edge.
+ if (SwitchOK && isa<SwitchInst>(PT)) {
+ SwitchOK = false;
+ continue;
+ }
+ if (!isa<InvokeInst>(PT) || LPad == PT->getSuccessor(0)) {
+ OnlyUnwoundTo = false;
+ break;
+ }
+ }
+
+ if (OnlyUnwoundTo) {
+ // Only unwind edges lead to the landing pad. Remember the landing pad.
+ LandingPads.insert(LPad);
+ continue;
+ }
+
+ // At least one normal edge ends at the landing pad. Redirect the unwind
+ // edges to a new basic block which falls through into this one.
+
+ // Create the new basic block.
+ BasicBlock *NewBB = BasicBlock::Create(F->getContext(),
+ LPad->getName() + "_unwind_edge");
+
+ // Insert it into the function right before the original landing pad.
+ LPad->getParent()->getBasicBlockList().insert(LPad, NewBB);
+
+ // Redirect unwind edges from the original landing pad to NewBB.
+ for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ) {
+ TerminatorInst *PT = (*PI++)->getTerminator();
+ if (isa<InvokeInst>(PT) && PT->getSuccessor(1) == LPad)
+ // Unwind to the new block.
+ PT->setSuccessor(1, NewBB);
+ }
+
+ // If there are any PHI nodes in LPad, we need to update them so that they
+ // merge incoming values from NewBB instead.
+ for (BasicBlock::iterator II = LPad->begin(); isa<PHINode>(II); ++II) {
+ PHINode *PN = cast<PHINode>(II);
+ pred_iterator PB = pred_begin(NewBB), PE = pred_end(NewBB);
+
+ // Check to see if all of the values coming in via unwind edges are the
+ // same. If so, we don't need to create a new PHI node.
+ Value *InVal = PN->getIncomingValueForBlock(*PB);
+ for (pred_iterator PI = PB; PI != PE; ++PI) {
+ if (PI != PB && InVal != PN->getIncomingValueForBlock(*PI)) {
+ InVal = 0;
+ break;
+ }
+ }
+
+ if (InVal == 0) {
+ // Different unwind edges have different values. Create a new PHI node
+ // in NewBB.
+ PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".unwind",
+ NewBB);
+ // Add an entry for each unwind edge, using the value from the old PHI.
+ for (pred_iterator PI = PB; PI != PE; ++PI)
+ NewPN->addIncoming(PN->getIncomingValueForBlock(*PI), *PI);
+
+ // Now use this new PHI as the common incoming value for NewBB in PN.
+ InVal = NewPN;
+ }
+
+ // Revector exactly one entry in the PHI node to come from NewBB
+ // and delete all other entries that come from unwind edges. If
+ // there are both normal and unwind edges from the same predecessor,
+ // this leaves an entry for the normal edge.
+ for (pred_iterator PI = PB; PI != PE; ++PI)
+ PN->removeIncomingValue(*PI);
+ PN->addIncoming(InVal, NewBB);
+ }
+
+ // Add a fallthrough from NewBB to the original landing pad.
+ BranchInst::Create(LPad, NewBB);
+
+ // Now update DominatorTree and DominanceFrontier analysis information.
+ if (DT)
+ DT->splitBlock(NewBB);
+ if (DF)
+ DF->splitBlock(NewBB);
+
+ // Remember the newly constructed landing pad. The original landing pad
+ // LPad is no longer a landing pad now that all unwind edges have been
+ // revectored to NewBB.
+ LandingPads.insert(NewBB);
+ ++NumLandingPadsSplit;
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+/// LowerUnwinds - Turn unwind instructions into calls to _Unwind_Resume,
+/// rethrowing any previously caught exception. This will crash horribly
+/// at runtime if there is no such exception: using unwind to throw a new
+/// exception is currently not supported.
+bool DwarfEHPrepare::LowerUnwinds() {
+ SmallVector<TerminatorInst*, 16> UnwindInsts;
+
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+ TerminatorInst *TI = I->getTerminator();
+ if (isa<UnwindInst>(TI))
+ UnwindInsts.push_back(TI);
+ }
+
+ if (UnwindInsts.empty()) return false;
+
+ // Find the rewind function if we didn't already.
+ if (!RewindFunction) {
+ LLVMContext &Ctx = UnwindInsts[0]->getContext();
+ std::vector<const Type*>
+ Params(1, Type::getInt8PtrTy(Ctx));
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
+ Params, false);
+ const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME);
+ RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy);
+ }
+
+ bool Changed = false;
+
+ for (SmallVectorImpl<TerminatorInst*>::iterator
+ I = UnwindInsts.begin(), E = UnwindInsts.end(); I != E; ++I) {
+ TerminatorInst *TI = *I;
+
+ // Replace the unwind instruction with a call to _Unwind_Resume (or the
+ // appropriate target equivalent) followed by an UnreachableInst.
+
+ // Create the call...
+ CallInst *CI = CallInst::Create(RewindFunction,
+ CreateReadOfExceptionValue(TI->getParent()),
+ "", TI);
+ CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME));
+ // ...followed by an UnreachableInst.
+ new UnreachableInst(TI->getContext(), TI);
+
+ // Nuke the unwind instruction.
+ TI->eraseFromParent();
+ ++NumUnwindsLowered;
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+/// MoveExceptionValueCalls - Ensure that eh.exception is only ever called from
+/// landing pads by replacing calls outside of landing pads with loads from a
+/// stack temporary. Move eh.exception calls inside landing pads to the start
+/// of the landing pad (optional, but may make things simpler for later passes).
+bool DwarfEHPrepare::MoveExceptionValueCalls() {
+ // If the eh.exception intrinsic is not declared in the module then there is
+ // nothing to do. Speed up compilation by checking for this common case.
+ if (!ExceptionValueIntrinsic &&
+ !F->getParent()->getFunction(Intrinsic::getName(Intrinsic::eh_exception)))
+ return false;
+
+ bool Changed = false;
+
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;)
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
+ if (CI->getIntrinsicID() == Intrinsic::eh_exception) {
+ if (!CI->use_empty()) {
+ Value *ExceptionValue = CreateReadOfExceptionValue(BB);
+ if (CI == ExceptionValue) {
+ // The call was at the start of a landing pad - leave it alone.
+ assert(LandingPads.count(BB) &&
+ "Created eh.exception call outside landing pad!");
+ continue;
+ }
+ CI->replaceAllUsesWith(ExceptionValue);
+ }
+ CI->eraseFromParent();
+ ++NumExceptionValuesMoved;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+/// FinishStackTemporaries - If we introduced a stack variable to hold the
+/// exception value then initialize it in each landing pad.
+bool DwarfEHPrepare::FinishStackTemporaries() {
+ if (!ExceptionValueVar)
+ // Nothing to do.
+ return false;
+
+ bool Changed = false;
+
+ // Make sure that there is a store of the exception value at the start of
+ // each landing pad.
+ for (BBSet::iterator LI = LandingPads.begin(), LE = LandingPads.end();
+ LI != LE; ++LI) {
+ Instruction *ExceptionValue = CreateReadOfExceptionValue(*LI);
+ Instruction *Store = new StoreInst(ExceptionValue, ExceptionValueVar);
+ Store->insertAfter(ExceptionValue);
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+/// PromoteStackTemporaries - Turn any stack temporaries we introduced into
+/// registers if possible.
+bool DwarfEHPrepare::PromoteStackTemporaries() {
+ if (ExceptionValueVar && DT && DF && isAllocaPromotable(ExceptionValueVar)) {
+ // Turn the exception temporary into registers and phi nodes if possible.
+ std::vector<AllocaInst*> Allocas(1, ExceptionValueVar);
+ PromoteMemToReg(Allocas, *DT, *DF);
+ return true;
+ }
+ return false;
+}
+
+/// CreateExceptionValueCall - Insert a call to the eh.exception intrinsic at
+/// the start of the basic block (unless there already is one, in which case
+/// the existing call is returned).
+Instruction *DwarfEHPrepare::CreateExceptionValueCall(BasicBlock *BB) {
+ Instruction *Start = BB->getFirstNonPHI();
+ // Is this a call to eh.exception?
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Start))
+ if (CI->getIntrinsicID() == Intrinsic::eh_exception)
+ // Reuse the existing call.
+ return Start;
+
+ // Find the eh.exception intrinsic if we didn't already.
+ if (!ExceptionValueIntrinsic)
+ ExceptionValueIntrinsic = Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::eh_exception);
+
+ // Create the call.
+ return CallInst::Create(ExceptionValueIntrinsic, "eh.value.call", Start);
+}
+
+/// CreateValueLoad - Insert a load of the exception value stack variable
+/// (creating it if necessary) at the start of the basic block (unless
+/// there already is a load, in which case the existing load is returned).
+Instruction *DwarfEHPrepare::CreateValueLoad(BasicBlock *BB) {
+ Instruction *Start = BB->getFirstNonPHI();
+ // Is this a load of the exception temporary?
+ if (ExceptionValueVar)
+ if (LoadInst* LI = dyn_cast<LoadInst>(Start))
+ if (LI->getPointerOperand() == ExceptionValueVar)
+ // Reuse the existing load.
+ return Start;
+
+ // Create the temporary if we didn't already.
+ if (!ExceptionValueVar) {
+ ExceptionValueVar = new AllocaInst(PointerType::getUnqual(
+ Type::getInt8Ty(BB->getContext())), "eh.value", F->begin()->begin());
+ ++NumStackTempsIntroduced;
+ }
+
+ // Load the value.
+ return new LoadInst(ExceptionValueVar, "eh.value.load", Start);
+}
+
+bool DwarfEHPrepare::runOnFunction(Function &Fn) {
+ bool Changed = false;
+
+ // Initialize internal state.
+ DT = getAnalysisIfAvailable<DominatorTree>();
+ DF = getAnalysisIfAvailable<DominanceFrontier>();
+ ExceptionValueVar = 0;
+ F = &Fn;
+
+ // Ensure that only unwind edges end at landing pads (a landing pad is a
+ // basic block where an invoke unwind edge ends).
+ Changed |= NormalizeLandingPads();
+
+ // Turn unwind instructions into libcalls.
+ Changed |= LowerUnwinds();
+
+ // TODO: Move eh.selector calls to landing pads and combine them.
+
+ // Move eh.exception calls to landing pads.
+ Changed |= MoveExceptionValueCalls();
+
+ // Initialize any stack temporaries we introduced.
+ Changed |= FinishStackTemporaries();
+
+ // Turn any stack temporaries into registers if possible.
+ if (!CompileFast)
+ Changed |= PromoteStackTemporaries();
+
+ LandingPads.clear();
+
+ return Changed;
+}
diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h
new file mode 100644
index 0000000..cb5a8c0
--- /dev/null
+++ b/lib/CodeGen/ELF.h
@@ -0,0 +1,317 @@
+//===-- lib/CodeGen/ELF.h - ELF constants and data structures ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header contains common, non-processor-specific data structures and
+// constants for the ELF file format.
+//
+// The details of the ELF32 bits in this file are largely based on the Tool
+// Interface Standard (TIS) Executable and Linking Format (ELF) Specification
+// Version 1.2, May 1995. The ELF64 is based on HP/Intel definition of the
+// ELF-64 object file format document, Version 1.5 Draft 2 May 27, 1998
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ELF_H
+#define CODEGEN_ELF_H
+
+#include "llvm/CodeGen/BinaryObject.h"
+#include "llvm/CodeGen/MachineRelocation.h"
+#include "llvm/System/DataTypes.h"
+
+namespace llvm {
+ class GlobalValue;
+
+ // Identification Indexes
+ enum {
+ EI_MAG0 = 0,
+ EI_MAG1 = 1,
+ EI_MAG2 = 2,
+ EI_MAG3 = 3
+ };
+
+ // File types
+ enum {
+ ET_NONE = 0, // No file type
+ ET_REL = 1, // Relocatable file
+ ET_EXEC = 2, // Executable file
+ ET_DYN = 3, // Shared object file
+ ET_CORE = 4, // Core file
+ ET_LOPROC = 0xff00, // Beginning of processor-specific codes
+ ET_HIPROC = 0xffff // Processor-specific
+ };
+
+ // Versioning
+ enum {
+ EV_NONE = 0,
+ EV_CURRENT = 1
+ };
+
+ /// ELFSym - This struct contains information about each symbol that is
+ /// added to logical symbol table for the module. This is eventually
+ /// turned into a real symbol table in the file.
+ struct ELFSym {
+
+ // ELF symbols are related to llvm ones by being one of the two llvm
+ // types, for the other ones (section, file, func) a null pointer is
+ // assumed by default.
+ union {
+ const GlobalValue *GV; // If this is a pointer to a GV
+ const char *Ext; // If this is a pointer to a named symbol
+ } Source;
+
+ // Describes from which source type this ELF symbol comes from,
+ // they can be GlobalValue, ExternalSymbol or neither.
+ enum {
+ isGV, // The Source.GV field is valid.
+ isExtSym, // The Source.ExtSym field is valid.
+ isOther // Not a GlobalValue or External Symbol
+ };
+ unsigned SourceType;
+
+ bool isGlobalValue() const { return SourceType == isGV; }
+ bool isExternalSym() const { return SourceType == isExtSym; }
+
+ // getGlobalValue - If this is a global value which originated the
+ // elf symbol, return a reference to it.
+ const GlobalValue *getGlobalValue() const {
+ assert(SourceType == isGV && "This is not a global value");
+ return Source.GV;
+ }
+
+ // getExternalSym - If this is an external symbol which originated the
+ // elf symbol, return a reference to it.
+ const char *getExternalSymbol() const {
+ assert(SourceType == isExtSym && "This is not an external symbol");
+ return Source.Ext;
+ }
+
+ // getGV - From a global value return a elf symbol to represent it
+ static ELFSym *getGV(const GlobalValue *GV, unsigned Bind,
+ unsigned Type, unsigned Visibility) {
+ ELFSym *Sym = new ELFSym();
+ Sym->Source.GV = GV;
+ Sym->setBind(Bind);
+ Sym->setType(Type);
+ Sym->setVisibility(Visibility);
+ Sym->SourceType = isGV;
+ return Sym;
+ }
+
+ // getExtSym - Create and return an elf symbol to represent an
+ // external symbol
+ static ELFSym *getExtSym(const char *Ext) {
+ ELFSym *Sym = new ELFSym();
+ Sym->Source.Ext = Ext;
+ Sym->setBind(STB_GLOBAL);
+ Sym->setType(STT_NOTYPE);
+ Sym->setVisibility(STV_DEFAULT);
+ Sym->SourceType = isExtSym;
+ return Sym;
+ }
+
+ // getSectionSym - Returns a elf symbol to represent an elf section
+ static ELFSym *getSectionSym() {
+ ELFSym *Sym = new ELFSym();
+ Sym->setBind(STB_LOCAL);
+ Sym->setType(STT_SECTION);
+ Sym->setVisibility(STV_DEFAULT);
+ Sym->SourceType = isOther;
+ return Sym;
+ }
+
+ // getFileSym - Returns a elf symbol to represent the module identifier
+ static ELFSym *getFileSym() {
+ ELFSym *Sym = new ELFSym();
+ Sym->setBind(STB_LOCAL);
+ Sym->setType(STT_FILE);
+ Sym->setVisibility(STV_DEFAULT);
+ Sym->SectionIdx = 0xfff1; // ELFSection::SHN_ABS;
+ Sym->SourceType = isOther;
+ return Sym;
+ }
+
+ // getUndefGV - Returns a STT_NOTYPE symbol
+ static ELFSym *getUndefGV(const GlobalValue *GV, unsigned Bind) {
+ ELFSym *Sym = new ELFSym();
+ Sym->Source.GV = GV;
+ Sym->setBind(Bind);
+ Sym->setType(STT_NOTYPE);
+ Sym->setVisibility(STV_DEFAULT);
+ Sym->SectionIdx = 0; //ELFSection::SHN_UNDEF;
+ Sym->SourceType = isGV;
+ return Sym;
+ }
+
+ // ELF specific fields
+ unsigned NameIdx; // Index in .strtab of name, once emitted.
+ uint64_t Value;
+ unsigned Size;
+ uint8_t Info;
+ uint8_t Other;
+ unsigned short SectionIdx;
+
+ // Symbol index into the Symbol table
+ unsigned SymTabIdx;
+
+ enum {
+ STB_LOCAL = 0, // Local sym, not visible outside obj file containing def
+ STB_GLOBAL = 1, // Global sym, visible to all object files being combined
+ STB_WEAK = 2 // Weak symbol, like global but lower-precedence
+ };
+
+ enum {
+ STT_NOTYPE = 0, // Symbol's type is not specified
+ STT_OBJECT = 1, // Symbol is a data object (variable, array, etc.)
+ STT_FUNC = 2, // Symbol is executable code (function, etc.)
+ STT_SECTION = 3, // Symbol refers to a section
+ STT_FILE = 4 // Local, absolute symbol that refers to a file
+ };
+
+ enum {
+ STV_DEFAULT = 0, // Visibility is specified by binding type
+ STV_INTERNAL = 1, // Defined by processor supplements
+ STV_HIDDEN = 2, // Not visible to other components
+ STV_PROTECTED = 3 // Visible in other components but not preemptable
+ };
+
+ ELFSym() : SourceType(isOther), NameIdx(0), Value(0),
+ Size(0), Info(0), Other(STV_DEFAULT), SectionIdx(0),
+ SymTabIdx(0) {}
+
+ unsigned getBind() const { return (Info >> 4) & 0xf; }
+ unsigned getType() const { return Info & 0xf; }
+ bool isLocalBind() const { return getBind() == STB_LOCAL; }
+ bool isFileType() const { return getType() == STT_FILE; }
+
+ void setBind(unsigned X) {
+ assert(X == (X & 0xF) && "Bind value out of range!");
+ Info = (Info & 0x0F) | (X << 4);
+ }
+
+ void setType(unsigned X) {
+ assert(X == (X & 0xF) && "Type value out of range!");
+ Info = (Info & 0xF0) | X;
+ }
+
+ void setVisibility(unsigned V) {
+ assert(V == (V & 0x3) && "Visibility value out of range!");
+ Other = V;
+ }
+ };
+
+ /// ELFSection - This struct contains information about each section that is
+ /// emitted to the file. This is eventually turned into the section header
+ /// table at the end of the file.
+ class ELFSection : public BinaryObject {
+ public:
+ // ELF specific fields
+ unsigned NameIdx; // sh_name - .shstrtab idx of name, once emitted.
+ unsigned Type; // sh_type - Section contents & semantics
+ unsigned Flags; // sh_flags - Section flags.
+ uint64_t Addr; // sh_addr - The mem addr this section is in.
+ unsigned Offset; // sh_offset - Offset from the file start
+ unsigned Size; // sh_size - The section size.
+ unsigned Link; // sh_link - Section header table index link.
+ unsigned Info; // sh_info - Auxillary information.
+ unsigned Align; // sh_addralign - Alignment of section.
+ unsigned EntSize; // sh_entsize - Size of entries in the section e
+
+ // Section Header Flags
+ enum {
+ SHF_WRITE = 1 << 0, // Writable
+ SHF_ALLOC = 1 << 1, // Mapped into the process addr space
+ SHF_EXECINSTR = 1 << 2, // Executable
+ SHF_MERGE = 1 << 4, // Might be merged if equal
+ SHF_STRINGS = 1 << 5, // Contains null-terminated strings
+ SHF_INFO_LINK = 1 << 6, // 'sh_info' contains SHT index
+ SHF_LINK_ORDER = 1 << 7, // Preserve order after combining
+ SHF_OS_NONCONFORMING = 1 << 8, // nonstandard OS support required
+ SHF_GROUP = 1 << 9, // Section is a member of a group
+ SHF_TLS = 1 << 10 // Section holds thread-local data
+ };
+
+ // Section Types
+ enum {
+ SHT_NULL = 0, // No associated section (inactive entry).
+ SHT_PROGBITS = 1, // Program-defined contents.
+ SHT_SYMTAB = 2, // Symbol table.
+ SHT_STRTAB = 3, // String table.
+ SHT_RELA = 4, // Relocation entries; explicit addends.
+ SHT_HASH = 5, // Symbol hash table.
+ SHT_DYNAMIC = 6, // Information for dynamic linking.
+ SHT_NOTE = 7, // Information about the file.
+ SHT_NOBITS = 8, // Data occupies no space in the file.
+ SHT_REL = 9, // Relocation entries; no explicit addends.
+ SHT_SHLIB = 10, // Reserved.
+ SHT_DYNSYM = 11, // Symbol table.
+ SHT_LOPROC = 0x70000000, // Lowest processor arch-specific type.
+ SHT_HIPROC = 0x7fffffff, // Highest processor arch-specific type.
+ SHT_LOUSER = 0x80000000, // Lowest type reserved for applications.
+ SHT_HIUSER = 0xffffffff // Highest type reserved for applications.
+ };
+
+ // Special section indices.
+ enum {
+ SHN_UNDEF = 0, // Undefined, missing, irrelevant
+ SHN_LORESERVE = 0xff00, // Lowest reserved index
+ SHN_LOPROC = 0xff00, // Lowest processor-specific index
+ SHN_HIPROC = 0xff1f, // Highest processor-specific index
+ SHN_ABS = 0xfff1, // Symbol has absolute value; no relocation
+ SHN_COMMON = 0xfff2, // FORTRAN COMMON or C external global variables
+ SHN_HIRESERVE = 0xffff // Highest reserved index
+ };
+
+ /// SectionIdx - The number of the section in the Section Table.
+ unsigned short SectionIdx;
+
+ /// Sym - The symbol to represent this section if it has one.
+ ELFSym *Sym;
+
+ /// getSymIndex - Returns the symbol table index of the symbol
+ /// representing this section.
+ unsigned getSymbolTableIndex() const {
+ assert(Sym && "section not present in the symbol table");
+ return Sym->SymTabIdx;
+ }
+
+ ELFSection(const std::string &name, bool isLittleEndian, bool is64Bit)
+ : BinaryObject(name, isLittleEndian, is64Bit), Type(0), Flags(0), Addr(0),
+ Offset(0), Size(0), Link(0), Info(0), Align(0), EntSize(0), Sym(0) {}
+ };
+
+ /// ELFRelocation - This class contains all the information necessary to
+ /// to generate any 32-bit or 64-bit ELF relocation entry.
+ class ELFRelocation {
+ uint64_t r_offset; // offset in the section of the object this applies to
+ uint32_t r_symidx; // symbol table index of the symbol to use
+ uint32_t r_type; // machine specific relocation type
+ int64_t r_add; // explicit relocation addend
+ bool r_rela; // if true then the addend is part of the entry
+ // otherwise the addend is at the location specified
+ // by r_offset
+ public:
+ uint64_t getInfo(bool is64Bit) const {
+ if (is64Bit)
+ return ((uint64_t)r_symidx << 32) + ((uint64_t)r_type & 0xFFFFFFFFL);
+ else
+ return (r_symidx << 8) + (r_type & 0xFFL);
+ }
+
+ uint64_t getOffset() const { return r_offset; }
+ int64_t getAddend() const { return r_add; }
+
+ ELFRelocation(uint64_t off, uint32_t sym, uint32_t type,
+ bool rela = true, int64_t addend = 0) :
+ r_offset(off), r_symidx(sym), r_type(type),
+ r_add(addend), r_rela(rela) {}
+ };
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp
new file mode 100644
index 0000000..8416d3b
--- /dev/null
+++ b/lib/CodeGen/ELFCodeEmitter.cpp
@@ -0,0 +1,205 @@
+//===-- lib/CodeGen/ELFCodeEmitter.cpp ------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "elfce"
+
+#include "ELF.h"
+#include "ELFWriter.h"
+#include "ELFCodeEmitter.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/BinaryObject.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRelocation.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetELFWriterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+//===----------------------------------------------------------------------===//
+// ELFCodeEmitter Implementation
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+/// startFunction - This callback is invoked when a new machine function is
+/// about to be emitted.
+void ELFCodeEmitter::startFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "processing function: "
+ << MF.getFunction()->getName() << "\n");
+
+ // Get the ELF Section that this function belongs in.
+ ES = &EW.getTextSection(MF.getFunction());
+
+ // Set the desired binary object to be used by the code emitters
+ setBinaryObject(ES);
+
+ // Get the function alignment in bytes
+ unsigned Align = (1 << MF.getAlignment());
+
+ // The function must start on its required alignment
+ ES->emitAlignment(Align);
+
+ // Update the section alignment if needed.
+ ES->Align = std::max(ES->Align, Align);
+
+ // Record the function start offset
+ FnStartOff = ES->getCurrentPCOffset();
+
+ // Emit constant pool and jump tables to their appropriate sections.
+ // They need to be emitted before the function because in some targets
+ // the later may reference JT or CP entry address.
+ emitConstantPool(MF.getConstantPool());
+ if (MF.getJumpTableInfo())
+ emitJumpTables(MF.getJumpTableInfo());
+}
+
+/// finishFunction - This callback is invoked after the function is completely
+/// finished.
+bool ELFCodeEmitter::finishFunction(MachineFunction &MF) {
+ // Add a symbol to represent the function.
+ const Function *F = MF.getFunction();
+ ELFSym *FnSym = ELFSym::getGV(F, EW.getGlobalELFBinding(F), ELFSym::STT_FUNC,
+ EW.getGlobalELFVisibility(F));
+ FnSym->SectionIdx = ES->SectionIdx;
+ FnSym->Size = ES->getCurrentPCOffset()-FnStartOff;
+ EW.AddPendingGlobalSymbol(F, true);
+
+ // Offset from start of Section
+ FnSym->Value = FnStartOff;
+
+ if (!F->hasPrivateLinkage())
+ EW.SymbolList.push_back(FnSym);
+
+ // Patch up Jump Table Section relocations to use the real MBBs offsets
+ // now that the MBB label offsets inside the function are known.
+ if (MF.getJumpTableInfo()) {
+ ELFSection &JTSection = EW.getJumpTableSection();
+ for (std::vector<MachineRelocation>::iterator MRI = JTRelocations.begin(),
+ MRE = JTRelocations.end(); MRI != MRE; ++MRI) {
+ MachineRelocation &MR = *MRI;
+ unsigned MBBOffset = getMachineBasicBlockAddress(MR.getBasicBlock());
+ MR.setResultPointer((void*)MBBOffset);
+ MR.setConstantVal(ES->SectionIdx);
+ JTSection.addRelocation(MR);
+ }
+ }
+
+ // If we have emitted any relocations to function-specific objects such as
+ // basic blocks, constant pools entries, or jump tables, record their
+ // addresses now so that we can rewrite them with the correct addresses later
+ for (unsigned i = 0, e = Relocations.size(); i != e; ++i) {
+ MachineRelocation &MR = Relocations[i];
+ intptr_t Addr;
+ if (MR.isGlobalValue()) {
+ EW.AddPendingGlobalSymbol(MR.getGlobalValue());
+ } else if (MR.isExternalSymbol()) {
+ EW.AddPendingExternalSymbol(MR.getExternalSymbol());
+ } else if (MR.isBasicBlock()) {
+ Addr = getMachineBasicBlockAddress(MR.getBasicBlock());
+ MR.setConstantVal(ES->SectionIdx);
+ MR.setResultPointer((void*)Addr);
+ } else if (MR.isConstantPoolIndex()) {
+ Addr = getConstantPoolEntryAddress(MR.getConstantPoolIndex());
+ MR.setConstantVal(CPSections[MR.getConstantPoolIndex()]);
+ MR.setResultPointer((void*)Addr);
+ } else if (MR.isJumpTableIndex()) {
+ ELFSection &JTSection = EW.getJumpTableSection();
+ Addr = getJumpTableEntryAddress(MR.getJumpTableIndex());
+ MR.setConstantVal(JTSection.SectionIdx);
+ MR.setResultPointer((void*)Addr);
+ } else {
+ llvm_unreachable("Unhandled relocation type");
+ }
+ ES->addRelocation(MR);
+ }
+
+ // Clear per-function data structures.
+ JTRelocations.clear();
+ Relocations.clear();
+ CPLocations.clear();
+ CPSections.clear();
+ JTLocations.clear();
+ MBBLocations.clear();
+ return false;
+}
+
+/// emitConstantPool - For each constant pool entry, figure out which section
+/// the constant should live in and emit the constant
+void ELFCodeEmitter::emitConstantPool(MachineConstantPool *MCP) {
+ const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
+ if (CP.empty()) return;
+
+ // TODO: handle PIC codegen
+ assert(TM.getRelocationModel() != Reloc::PIC_ &&
+ "PIC codegen not yet handled for elf constant pools!");
+
+ for (unsigned i = 0, e = CP.size(); i != e; ++i) {
+ MachineConstantPoolEntry CPE = CP[i];
+
+ // Record the constant pool location and the section index
+ ELFSection &CstPool = EW.getConstantPoolSection(CPE);
+ CPLocations.push_back(CstPool.size());
+ CPSections.push_back(CstPool.SectionIdx);
+
+ if (CPE.isMachineConstantPoolEntry())
+ assert("CPE.isMachineConstantPoolEntry not supported yet");
+
+ // Emit the constant to constant pool section
+ EW.EmitGlobalConstant(CPE.Val.ConstVal, CstPool);
+ }
+}
+
+/// emitJumpTables - Emit all the jump tables for a given jump table info
+/// record to the appropriate section.
+void ELFCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) {
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ if (JT.empty()) return;
+
+ // FIXME: handle PIC codegen
+ assert(TM.getRelocationModel() != Reloc::PIC_ &&
+ "PIC codegen not yet handled for elf jump tables!");
+
+ const TargetELFWriterInfo *TEW = TM.getELFWriterInfo();
+ unsigned EntrySize = 4; //MJTI->getEntrySize();
+
+ // Get the ELF Section to emit the jump table
+ ELFSection &JTSection = EW.getJumpTableSection();
+
+ // For each JT, record its offset from the start of the section
+ for (unsigned i = 0, e = JT.size(); i != e; ++i) {
+ const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
+
+ // Record JT 'i' offset in the JT section
+ JTLocations.push_back(JTSection.size());
+
+ // Each MBB entry in the Jump table section has a relocation entry
+ // against the current text section.
+ for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) {
+ unsigned MachineRelTy = TEW->getAbsoluteLabelMachineRelTy();
+ MachineRelocation MR =
+ MachineRelocation::getBB(JTSection.size(), MachineRelTy, MBBs[mi]);
+
+ // Add the relocation to the Jump Table section
+ JTRelocations.push_back(MR);
+
+ // Output placeholder for MBB in the JT section
+ for (unsigned s=0; s < EntrySize; ++s)
+ JTSection.emitByte(0);
+ }
+ }
+}
+
+} // end namespace llvm
diff --git a/lib/CodeGen/ELFCodeEmitter.h b/lib/CodeGen/ELFCodeEmitter.h
new file mode 100644
index 0000000..b5e9c84
--- /dev/null
+++ b/lib/CodeGen/ELFCodeEmitter.h
@@ -0,0 +1,78 @@
+//===-- lib/CodeGen/ELFCodeEmitter.h ----------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ELFCODEEMITTER_H
+#define ELFCODEEMITTER_H
+
+#include "llvm/CodeGen/ObjectCodeEmitter.h"
+#include <vector>
+
+namespace llvm {
+ class ELFWriter;
+ class ELFSection;
+
+ /// ELFCodeEmitter - This class is used by the ELFWriter to
+ /// emit the code for functions to the ELF file.
+ class ELFCodeEmitter : public ObjectCodeEmitter {
+ ELFWriter &EW;
+
+ /// Target machine description
+ TargetMachine &TM;
+
+ /// Section containing code for functions
+ ELFSection *ES;
+
+ /// Relocations - Record relocations needed by the current function
+ std::vector<MachineRelocation> Relocations;
+
+ /// JTRelocations - Record relocations needed by the relocation
+ /// section.
+ std::vector<MachineRelocation> JTRelocations;
+
+ /// FnStartPtr - Function offset from the beginning of ELFSection 'ES'
+ uintptr_t FnStartOff;
+ public:
+ explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM) {}
+
+ /// addRelocation - Register new relocations for this function
+ void addRelocation(const MachineRelocation &MR) {
+ Relocations.push_back(MR);
+ }
+
+ /// emitConstantPool - For each constant pool entry, figure out which
+ /// section the constant should live in and emit data to it
+ void emitConstantPool(MachineConstantPool *MCP);
+
+ /// emitJumpTables - Emit all the jump tables for a given jump table
+ /// info and record them to the appropriate section.
+ void emitJumpTables(MachineJumpTableInfo *MJTI);
+
+ void startFunction(MachineFunction &F);
+ bool finishFunction(MachineFunction &F);
+
+ /// emitLabel - Emits a label
+ virtual void emitLabel(uint64_t LabelID) {
+ assert("emitLabel not implemented");
+ }
+
+ /// getLabelAddress - Return the address of the specified LabelID,
+ /// only usable after the LabelID has been emitted.
+ virtual uintptr_t getLabelAddress(uint64_t Label) const {
+ assert("getLabelAddress not implemented");
+ return 0;
+ }
+
+ virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) {}
+
+}; // end class ELFCodeEmitter
+
+} // end namespace llvm
+
+#endif
+
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
new file mode 100644
index 0000000..0979c04
--- /dev/null
+++ b/lib/CodeGen/ELFWriter.cpp
@@ -0,0 +1,1092 @@
+//===-- ELFWriter.cpp - Target-independent ELF Writer code ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the target-independent ELF writer. This file writes out
+// the ELF file in the following order:
+//
+// #1. ELF Header
+// #2. '.text' section
+// #3. '.data' section
+// #4. '.bss' section (conceptual position in file)
+// ...
+// #X. '.shstrtab' section
+// #Y. Section Table
+//
+// The entries in the section table are laid out as:
+// #0. Null entry [required]
+// #1. ".text" entry - the program code
+// #2. ".data" entry - global variables with initializers. [ if needed ]
+// #3. ".bss" entry - global variables without initializers. [ if needed ]
+// ...
+// #N. ".shstrtab" entry - String table for the section names.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "elfwriter"
+#include "ELF.h"
+#include "ELFWriter.h"
+#include "ELFCodeEmitter.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/BinaryObject.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/ObjectCodeEmitter.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetELFWriterInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+char ELFWriter::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// ELFWriter Implementation
+//===----------------------------------------------------------------------===//
+
+ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm)
+ : MachineFunctionPass(&ID), O(o), TM(tm),
+ OutContext(*new MCContext()),
+ TLOF(TM.getTargetLowering()->getObjFileLowering()),
+ is64Bit(TM.getTargetData()->getPointerSizeInBits() == 64),
+ isLittleEndian(TM.getTargetData()->isLittleEndian()),
+ ElfHdr(isLittleEndian, is64Bit) {
+
+ MAI = TM.getMCAsmInfo();
+ TEW = TM.getELFWriterInfo();
+
+ // Create the object code emitter object for this target.
+ ElfCE = new ELFCodeEmitter(*this);
+
+ // Inital number of sections
+ NumSections = 0;
+}
+
+ELFWriter::~ELFWriter() {
+ delete ElfCE;
+ delete &OutContext;
+
+ while(!SymbolList.empty()) {
+ delete SymbolList.back();
+ SymbolList.pop_back();
+ }
+
+ while(!PrivateSyms.empty()) {
+ delete PrivateSyms.back();
+ PrivateSyms.pop_back();
+ }
+
+ while(!SectionList.empty()) {
+ delete SectionList.back();
+ SectionList.pop_back();
+ }
+
+ // Release the name mangler object.
+ delete Mang; Mang = 0;
+}
+
+// doInitialization - Emit the file header and all of the global variables for
+// the module to the ELF file.
+bool ELFWriter::doInitialization(Module &M) {
+ // Initialize TargetLoweringObjectFile.
+ const_cast<TargetLoweringObjectFile&>(TLOF).Initialize(OutContext, TM);
+
+ Mang = new Mangler(*MAI);
+
+ // ELF Header
+ // ----------
+ // Fields e_shnum e_shstrndx are only known after all section have
+ // been emitted. They locations in the ouput buffer are recorded so
+ // to be patched up later.
+ //
+ // Note
+ // ----
+ // emitWord method behaves differently for ELF32 and ELF64, writing
+ // 4 bytes in the former and 8 in the last for *_off and *_addr elf types
+
+ ElfHdr.emitByte(0x7f); // e_ident[EI_MAG0]
+ ElfHdr.emitByte('E'); // e_ident[EI_MAG1]
+ ElfHdr.emitByte('L'); // e_ident[EI_MAG2]
+ ElfHdr.emitByte('F'); // e_ident[EI_MAG3]
+
+ ElfHdr.emitByte(TEW->getEIClass()); // e_ident[EI_CLASS]
+ ElfHdr.emitByte(TEW->getEIData()); // e_ident[EI_DATA]
+ ElfHdr.emitByte(EV_CURRENT); // e_ident[EI_VERSION]
+ ElfHdr.emitAlignment(16); // e_ident[EI_NIDENT-EI_PAD]
+
+ ElfHdr.emitWord16(ET_REL); // e_type
+ ElfHdr.emitWord16(TEW->getEMachine()); // e_machine = target
+ ElfHdr.emitWord32(EV_CURRENT); // e_version
+ ElfHdr.emitWord(0); // e_entry, no entry point in .o file
+ ElfHdr.emitWord(0); // e_phoff, no program header for .o
+ ELFHdr_e_shoff_Offset = ElfHdr.size();
+ ElfHdr.emitWord(0); // e_shoff = sec hdr table off in bytes
+ ElfHdr.emitWord32(TEW->getEFlags()); // e_flags = whatever the target wants
+ ElfHdr.emitWord16(TEW->getHdrSize()); // e_ehsize = ELF header size
+ ElfHdr.emitWord16(0); // e_phentsize = prog header entry size
+ ElfHdr.emitWord16(0); // e_phnum = # prog header entries = 0
+
+ // e_shentsize = Section header entry size
+ ElfHdr.emitWord16(TEW->getSHdrSize());
+
+ // e_shnum = # of section header ents
+ ELFHdr_e_shnum_Offset = ElfHdr.size();
+ ElfHdr.emitWord16(0); // Placeholder
+
+ // e_shstrndx = Section # of '.shstrtab'
+ ELFHdr_e_shstrndx_Offset = ElfHdr.size();
+ ElfHdr.emitWord16(0); // Placeholder
+
+ // Add the null section, which is required to be first in the file.
+ getNullSection();
+
+ // The first entry in the symtab is the null symbol and the second
+ // is a local symbol containing the module/file name
+ SymbolList.push_back(new ELFSym());
+ SymbolList.push_back(ELFSym::getFileSym());
+
+ return false;
+}
+
+// AddPendingGlobalSymbol - Add a global to be processed and to
+// the global symbol lookup, use a zero index because the table
+// index will be determined later.
+void ELFWriter::AddPendingGlobalSymbol(const GlobalValue *GV,
+ bool AddToLookup /* = false */) {
+ PendingGlobals.insert(GV);
+ if (AddToLookup)
+ GblSymLookup[GV] = 0;
+}
+
+// AddPendingExternalSymbol - Add the external to be processed
+// and to the external symbol lookup, use a zero index because
+// the symbol table index will be determined later.
+void ELFWriter::AddPendingExternalSymbol(const char *External) {
+ PendingExternals.insert(External);
+ ExtSymLookup[External] = 0;
+}
+
+ELFSection &ELFWriter::getDataSection() {
+ const MCSectionELF *Data = (const MCSectionELF *)TLOF.getDataSection();
+ return getSection(Data->getSectionName(), Data->getType(),
+ Data->getFlags(), 4);
+}
+
+ELFSection &ELFWriter::getBSSSection() {
+ const MCSectionELF *BSS = (const MCSectionELF *)TLOF.getBSSSection();
+ return getSection(BSS->getSectionName(), BSS->getType(), BSS->getFlags(), 4);
+}
+
+// getCtorSection - Get the static constructor section
+ELFSection &ELFWriter::getCtorSection() {
+ const MCSectionELF *Ctor = (const MCSectionELF *)TLOF.getStaticCtorSection();
+ return getSection(Ctor->getSectionName(), Ctor->getType(), Ctor->getFlags());
+}
+
+// getDtorSection - Get the static destructor section
+ELFSection &ELFWriter::getDtorSection() {
+ const MCSectionELF *Dtor = (const MCSectionELF *)TLOF.getStaticDtorSection();
+ return getSection(Dtor->getSectionName(), Dtor->getType(), Dtor->getFlags());
+}
+
+// getTextSection - Get the text section for the specified function
+ELFSection &ELFWriter::getTextSection(Function *F) {
+ const MCSectionELF *Text =
+ (const MCSectionELF *)TLOF.SectionForGlobal(F, Mang, TM);
+ return getSection(Text->getSectionName(), Text->getType(), Text->getFlags());
+}
+
+// getJumpTableSection - Get a read only section for constants when
+// emitting jump tables. TODO: add PIC support
+ELFSection &ELFWriter::getJumpTableSection() {
+ const MCSectionELF *JT =
+ (const MCSectionELF *)TLOF.getSectionForConstant(SectionKind::getReadOnly());
+ return getSection(JT->getSectionName(), JT->getType(), JT->getFlags(),
+ TM.getTargetData()->getPointerABIAlignment());
+}
+
+// getConstantPoolSection - Get a constant pool section based on the machine
+// constant pool entry type and relocation info.
+ELFSection &ELFWriter::getConstantPoolSection(MachineConstantPoolEntry &CPE) {
+ SectionKind Kind;
+ switch (CPE.getRelocationInfo()) {
+ default: llvm_unreachable("Unknown section kind");
+ case 2: Kind = SectionKind::getReadOnlyWithRel(); break;
+ case 1:
+ Kind = SectionKind::getReadOnlyWithRelLocal();
+ break;
+ case 0:
+ switch (TM.getTargetData()->getTypeAllocSize(CPE.getType())) {
+ case 4: Kind = SectionKind::getMergeableConst4(); break;
+ case 8: Kind = SectionKind::getMergeableConst8(); break;
+ case 16: Kind = SectionKind::getMergeableConst16(); break;
+ default: Kind = SectionKind::getMergeableConst(); break;
+ }
+ }
+
+ const MCSectionELF *CPSect =
+ (const MCSectionELF *)TLOF.getSectionForConstant(Kind);
+ return getSection(CPSect->getSectionName(), CPSect->getType(),
+ CPSect->getFlags(), CPE.getAlignment());
+}
+
+// getRelocSection - Return the relocation section of section 'S'. 'RelA'
+// is true if the relocation section contains entries with addends.
+ELFSection &ELFWriter::getRelocSection(ELFSection &S) {
+ unsigned SectionType = TEW->hasRelocationAddend() ?
+ ELFSection::SHT_RELA : ELFSection::SHT_REL;
+
+ std::string SectionName(".rel");
+ if (TEW->hasRelocationAddend())
+ SectionName.append("a");
+ SectionName.append(S.getName());
+
+ return getSection(SectionName, SectionType, 0, TEW->getPrefELFAlignment());
+}
+
+// getGlobalELFVisibility - Returns the ELF specific visibility type
+unsigned ELFWriter::getGlobalELFVisibility(const GlobalValue *GV) {
+ switch (GV->getVisibility()) {
+ default:
+ llvm_unreachable("unknown visibility type");
+ case GlobalValue::DefaultVisibility:
+ return ELFSym::STV_DEFAULT;
+ case GlobalValue::HiddenVisibility:
+ return ELFSym::STV_HIDDEN;
+ case GlobalValue::ProtectedVisibility:
+ return ELFSym::STV_PROTECTED;
+ }
+ return 0;
+}
+
+// getGlobalELFBinding - Returns the ELF specific binding type
+unsigned ELFWriter::getGlobalELFBinding(const GlobalValue *GV) {
+ if (GV->hasInternalLinkage())
+ return ELFSym::STB_LOCAL;
+
+ if (GV->isWeakForLinker() && !GV->hasCommonLinkage())
+ return ELFSym::STB_WEAK;
+
+ return ELFSym::STB_GLOBAL;
+}
+
+// getGlobalELFType - Returns the ELF specific type for a global
+unsigned ELFWriter::getGlobalELFType(const GlobalValue *GV) {
+ if (GV->isDeclaration())
+ return ELFSym::STT_NOTYPE;
+
+ if (isa<Function>(GV))
+ return ELFSym::STT_FUNC;
+
+ return ELFSym::STT_OBJECT;
+}
+
+// IsELFUndefSym - True if the global value must be marked as a symbol
+// which points to a SHN_UNDEF section. This means that the symbol has
+// no definition on the module.
+static bool IsELFUndefSym(const GlobalValue *GV) {
+ return GV->isDeclaration() || (isa<Function>(GV));
+}
+
+// AddToSymbolList - Update the symbol lookup and If the symbol is
+// private add it to PrivateSyms list, otherwise to SymbolList.
+void ELFWriter::AddToSymbolList(ELFSym *GblSym) {
+ assert(GblSym->isGlobalValue() && "Symbol must be a global value");
+
+ const GlobalValue *GV = GblSym->getGlobalValue();
+ if (GV->hasPrivateLinkage()) {
+ // For a private symbols, keep track of the index inside
+ // the private list since it will never go to the symbol
+ // table and won't be patched up later.
+ PrivateSyms.push_back(GblSym);
+ GblSymLookup[GV] = PrivateSyms.size()-1;
+ } else {
+ // Non private symbol are left with zero indices until
+ // they are patched up during the symbol table emition
+ // (where the indicies are created).
+ SymbolList.push_back(GblSym);
+ GblSymLookup[GV] = 0;
+ }
+}
+
+// EmitGlobal - Choose the right section for global and emit it
+void ELFWriter::EmitGlobal(const GlobalValue *GV) {
+
+ // Check if the referenced symbol is already emitted
+ if (GblSymLookup.find(GV) != GblSymLookup.end())
+ return;
+
+ // Handle ELF Bind, Visibility and Type for the current symbol
+ unsigned SymBind = getGlobalELFBinding(GV);
+ unsigned SymType = getGlobalELFType(GV);
+ bool IsUndefSym = IsELFUndefSym(GV);
+
+ ELFSym *GblSym = IsUndefSym ? ELFSym::getUndefGV(GV, SymBind)
+ : ELFSym::getGV(GV, SymBind, SymType, getGlobalELFVisibility(GV));
+
+ if (!IsUndefSym) {
+ assert(isa<GlobalVariable>(GV) && "GV not a global variable!");
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+
+ // Handle special llvm globals
+ if (EmitSpecialLLVMGlobal(GVar))
+ return;
+
+ // Get the ELF section where this global belongs from TLOF
+ const MCSectionELF *S =
+ (const MCSectionELF *)TLOF.SectionForGlobal(GV, Mang, TM);
+ ELFSection &ES =
+ getSection(S->getSectionName(), S->getType(), S->getFlags());
+ SectionKind Kind = S->getKind();
+
+ // The symbol align should update the section alignment if needed
+ const TargetData *TD = TM.getTargetData();
+ unsigned Align = TD->getPreferredAlignment(GVar);
+ unsigned Size = TD->getTypeAllocSize(GVar->getInitializer()->getType());
+ GblSym->Size = Size;
+
+ if (S->HasCommonSymbols()) { // Symbol must go to a common section
+ GblSym->SectionIdx = ELFSection::SHN_COMMON;
+
+ // A new linkonce section is created for each global in the
+ // common section, the default alignment is 1 and the symbol
+ // value contains its alignment.
+ ES.Align = 1;
+ GblSym->Value = Align;
+
+ } else if (Kind.isBSS() || Kind.isThreadBSS()) { // Symbol goes to BSS.
+ GblSym->SectionIdx = ES.SectionIdx;
+
+ // Update the size with alignment and the next object can
+ // start in the right offset in the section
+ if (Align) ES.Size = (ES.Size + Align-1) & ~(Align-1);
+ ES.Align = std::max(ES.Align, Align);
+
+ // GblSym->Value should contain the virtual offset inside the section.
+ // Virtual because the BSS space is not allocated on ELF objects
+ GblSym->Value = ES.Size;
+ ES.Size += Size;
+
+ } else { // The symbol must go to some kind of data section
+ GblSym->SectionIdx = ES.SectionIdx;
+
+ // GblSym->Value should contain the symbol offset inside the section,
+ // and all symbols should start on their required alignment boundary
+ ES.Align = std::max(ES.Align, Align);
+ ES.emitAlignment(Align);
+ GblSym->Value = ES.size();
+
+ // Emit the global to the data section 'ES'
+ EmitGlobalConstant(GVar->getInitializer(), ES);
+ }
+ }
+
+ AddToSymbolList(GblSym);
+}
+
+void ELFWriter::EmitGlobalConstantStruct(const ConstantStruct *CVS,
+ ELFSection &GblS) {
+
+ // Print the fields in successive locations. Pad to align if needed!
+ const TargetData *TD = TM.getTargetData();
+ unsigned Size = TD->getTypeAllocSize(CVS->getType());
+ const StructLayout *cvsLayout = TD->getStructLayout(CVS->getType());
+ uint64_t sizeSoFar = 0;
+ for (unsigned i = 0, e = CVS->getNumOperands(); i != e; ++i) {
+ const Constant* field = CVS->getOperand(i);
+
+ // Check if padding is needed and insert one or more 0s.
+ uint64_t fieldSize = TD->getTypeAllocSize(field->getType());
+ uint64_t padSize = ((i == e-1 ? Size : cvsLayout->getElementOffset(i+1))
+ - cvsLayout->getElementOffset(i)) - fieldSize;
+ sizeSoFar += fieldSize + padSize;
+
+ // Now print the actual field value.
+ EmitGlobalConstant(field, GblS);
+
+ // Insert padding - this may include padding to increase the size of the
+ // current field up to the ABI size (if the struct is not packed) as well
+ // as padding to ensure that the next field starts at the right offset.
+ GblS.emitZeros(padSize);
+ }
+ assert(sizeSoFar == cvsLayout->getSizeInBytes() &&
+ "Layout of constant struct may be incorrect!");
+}
+
+void ELFWriter::EmitGlobalConstant(const Constant *CV, ELFSection &GblS) {
+ const TargetData *TD = TM.getTargetData();
+ unsigned Size = TD->getTypeAllocSize(CV->getType());
+
+ if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) {
+ for (unsigned i = 0, e = CVA->getNumOperands(); i != e; ++i)
+ EmitGlobalConstant(CVA->getOperand(i), GblS);
+ return;
+ } else if (isa<ConstantAggregateZero>(CV)) {
+ GblS.emitZeros(Size);
+ return;
+ } else if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) {
+ EmitGlobalConstantStruct(CVS, GblS);
+ return;
+ } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+ APInt Val = CFP->getValueAPF().bitcastToAPInt();
+ if (CFP->getType()->isDoubleTy())
+ GblS.emitWord64(Val.getZExtValue());
+ else if (CFP->getType()->isFloatTy())
+ GblS.emitWord32(Val.getZExtValue());
+ else if (CFP->getType()->isX86_FP80Ty()) {
+ unsigned PadSize = TD->getTypeAllocSize(CFP->getType())-
+ TD->getTypeStoreSize(CFP->getType());
+ GblS.emitWordFP80(Val.getRawData(), PadSize);
+ } else if (CFP->getType()->isPPC_FP128Ty())
+ llvm_unreachable("PPC_FP128Ty global emission not implemented");
+ return;
+ } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ if (Size == 1)
+ GblS.emitByte(CI->getZExtValue());
+ else if (Size == 2)
+ GblS.emitWord16(CI->getZExtValue());
+ else if (Size == 4)
+ GblS.emitWord32(CI->getZExtValue());
+ else
+ EmitGlobalConstantLargeInt(CI, GblS);
+ return;
+ } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
+ const VectorType *PTy = CP->getType();
+ for (unsigned I = 0, E = PTy->getNumElements(); I < E; ++I)
+ EmitGlobalConstant(CP->getOperand(I), GblS);
+ return;
+ } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+ // Resolve a constant expression which returns a (Constant, Offset)
+ // pair. If 'Res.first' is a GlobalValue, emit a relocation with
+ // the offset 'Res.second', otherwise emit a global constant like
+ // it is always done for not contant expression types.
+ CstExprResTy Res = ResolveConstantExpr(CE);
+ const Constant *Op = Res.first;
+
+ if (isa<GlobalValue>(Op))
+ EmitGlobalDataRelocation(cast<const GlobalValue>(Op),
+ TD->getTypeAllocSize(Op->getType()),
+ GblS, Res.second);
+ else
+ EmitGlobalConstant(Op, GblS);
+
+ return;
+ } else if (CV->getType()->getTypeID() == Type::PointerTyID) {
+ // Fill the data entry with zeros or emit a relocation entry
+ if (isa<ConstantPointerNull>(CV))
+ GblS.emitZeros(Size);
+ else
+ EmitGlobalDataRelocation(cast<const GlobalValue>(CV),
+ Size, GblS);
+ return;
+ } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
+ // This is a constant address for a global variable or function and
+ // therefore must be referenced using a relocation entry.
+ EmitGlobalDataRelocation(GV, Size, GblS);
+ return;
+ }
+
+ std::string msg;
+ raw_string_ostream ErrorMsg(msg);
+ ErrorMsg << "Constant unimp for type: " << *CV->getType();
+ llvm_report_error(ErrorMsg.str());
+}
+
+// ResolveConstantExpr - Resolve the constant expression until it stop
+// yielding other constant expressions.
+CstExprResTy ELFWriter::ResolveConstantExpr(const Constant *CV) {
+ const TargetData *TD = TM.getTargetData();
+
+ // There ins't constant expression inside others anymore
+ if (!isa<ConstantExpr>(CV))
+ return std::make_pair(CV, 0);
+
+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
+ switch (CE->getOpcode()) {
+ case Instruction::BitCast:
+ return ResolveConstantExpr(CE->getOperand(0));
+
+ case Instruction::GetElementPtr: {
+ const Constant *ptrVal = CE->getOperand(0);
+ SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end());
+ int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), &idxVec[0],
+ idxVec.size());
+ return std::make_pair(ptrVal, Offset);
+ }
+ case Instruction::IntToPtr: {
+ Constant *Op = CE->getOperand(0);
+ Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(CV->getContext()),
+ false/*ZExt*/);
+ return ResolveConstantExpr(Op);
+ }
+ case Instruction::PtrToInt: {
+ Constant *Op = CE->getOperand(0);
+ const Type *Ty = CE->getType();
+
+ // We can emit the pointer value into this slot if the slot is an
+ // integer slot greater or equal to the size of the pointer.
+ if (TD->getTypeAllocSize(Ty) == TD->getTypeAllocSize(Op->getType()))
+ return ResolveConstantExpr(Op);
+
+ llvm_unreachable("Integer size less then pointer size");
+ }
+ case Instruction::Add:
+ case Instruction::Sub: {
+ // Only handle cases where there's a constant expression with GlobalValue
+ // as first operand and ConstantInt as second, which are the cases we can
+ // solve direclty using a relocation entry. GlobalValue=Op0, CstInt=Op1
+ // 1) Instruction::Add => (global) + CstInt
+ // 2) Instruction::Sub => (global) + -CstInt
+ const Constant *Op0 = CE->getOperand(0);
+ const Constant *Op1 = CE->getOperand(1);
+ assert(isa<ConstantInt>(Op1) && "Op1 must be a ConstantInt");
+
+ CstExprResTy Res = ResolveConstantExpr(Op0);
+ assert(isa<GlobalValue>(Res.first) && "Op0 must be a GlobalValue");
+
+ const APInt &RHS = cast<ConstantInt>(Op1)->getValue();
+ switch (CE->getOpcode()) {
+ case Instruction::Add:
+ return std::make_pair(Res.first, RHS.getSExtValue());
+ case Instruction::Sub:
+ return std::make_pair(Res.first, (-RHS).getSExtValue());
+ }
+ }
+ }
+
+ std::string msg(CE->getOpcodeName());
+ raw_string_ostream ErrorMsg(msg);
+ ErrorMsg << ": Unsupported ConstantExpr type";
+ llvm_report_error(ErrorMsg.str());
+
+ return std::make_pair(CV, 0); // silence warning
+}
+
+void ELFWriter::EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size,
+ ELFSection &GblS, int64_t Offset) {
+ // Create the relocation entry for the global value
+ MachineRelocation MR =
+ MachineRelocation::getGV(GblS.getCurrentPCOffset(),
+ TEW->getAbsoluteLabelMachineRelTy(),
+ const_cast<GlobalValue*>(GV),
+ Offset);
+
+ // Fill the data entry with zeros
+ GblS.emitZeros(Size);
+
+ // Add the relocation entry for the current data section
+ GblS.addRelocation(MR);
+}
+
+void ELFWriter::EmitGlobalConstantLargeInt(const ConstantInt *CI,
+ ELFSection &S) {
+ const TargetData *TD = TM.getTargetData();
+ unsigned BitWidth = CI->getBitWidth();
+ assert(isPowerOf2_32(BitWidth) &&
+ "Non-power-of-2-sized integers not handled!");
+
+ const uint64_t *RawData = CI->getValue().getRawData();
+ uint64_t Val = 0;
+ for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) {
+ Val = (TD->isBigEndian()) ? RawData[e - i - 1] : RawData[i];
+ S.emitWord64(Val);
+ }
+}
+
+/// EmitSpecialLLVMGlobal - Check to see if the specified global is a
+/// special global used by LLVM. If so, emit it and return true, otherwise
+/// do nothing and return false.
+bool ELFWriter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
+ if (GV->getName() == "llvm.used")
+ llvm_unreachable("not implemented yet");
+
+ // Ignore debug and non-emitted data. This handles llvm.compiler.used.
+ if (GV->getSection() == "llvm.metadata" ||
+ GV->hasAvailableExternallyLinkage())
+ return true;
+
+ if (!GV->hasAppendingLinkage()) return false;
+
+ assert(GV->hasInitializer() && "Not a special LLVM global!");
+
+ const TargetData *TD = TM.getTargetData();
+ unsigned Align = TD->getPointerPrefAlignment();
+ if (GV->getName() == "llvm.global_ctors") {
+ ELFSection &Ctor = getCtorSection();
+ Ctor.emitAlignment(Align);
+ EmitXXStructorList(GV->getInitializer(), Ctor);
+ return true;
+ }
+
+ if (GV->getName() == "llvm.global_dtors") {
+ ELFSection &Dtor = getDtorSection();
+ Dtor.emitAlignment(Align);
+ EmitXXStructorList(GV->getInitializer(), Dtor);
+ return true;
+ }
+
+ return false;
+}
+
+/// EmitXXStructorList - Emit the ctor or dtor list. This just emits out the
+/// function pointers, ignoring the init priority.
+void ELFWriter::EmitXXStructorList(Constant *List, ELFSection &Xtor) {
+ // Should be an array of '{ int, void ()* }' structs. The first value is the
+ // init priority, which we ignore.
+ if (!isa<ConstantArray>(List)) return;
+ ConstantArray *InitList = cast<ConstantArray>(List);
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){
+ if (CS->getNumOperands() != 2) return; // Not array of 2-element structs.
+
+ if (CS->getOperand(1)->isNullValue())
+ return; // Found a null terminator, exit printing.
+ // Emit the function pointer.
+ EmitGlobalConstant(CS->getOperand(1), Xtor);
+ }
+}
+
+bool ELFWriter::runOnMachineFunction(MachineFunction &MF) {
+ // Nothing to do here, this is all done through the ElfCE object above.
+ return false;
+}
+
+/// doFinalization - Now that the module has been completely processed, emit
+/// the ELF file to 'O'.
+bool ELFWriter::doFinalization(Module &M) {
+ // Emit .data section placeholder
+ getDataSection();
+
+ // Emit .bss section placeholder
+ getBSSSection();
+
+ // Build and emit data, bss and "common" sections.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ EmitGlobal(I);
+
+ // Emit all pending globals
+ for (PendingGblsIter I = PendingGlobals.begin(), E = PendingGlobals.end();
+ I != E; ++I)
+ EmitGlobal(*I);
+
+ // Emit all pending externals
+ for (PendingExtsIter I = PendingExternals.begin(), E = PendingExternals.end();
+ I != E; ++I)
+ SymbolList.push_back(ELFSym::getExtSym(*I));
+
+ // Emit a symbol for each section created until now, skip null section
+ for (unsigned i = 1, e = SectionList.size(); i < e; ++i) {
+ ELFSection &ES = *SectionList[i];
+ ELFSym *SectionSym = ELFSym::getSectionSym();
+ SectionSym->SectionIdx = ES.SectionIdx;
+ SymbolList.push_back(SectionSym);
+ ES.Sym = SymbolList.back();
+ }
+
+ // Emit string table
+ EmitStringTable(M.getModuleIdentifier());
+
+ // Emit the symbol table now, if non-empty.
+ EmitSymbolTable();
+
+ // Emit the relocation sections.
+ EmitRelocations();
+
+ // Emit the sections string table.
+ EmitSectionTableStringTable();
+
+ // Dump the sections and section table to the .o file.
+ OutputSectionsAndSectionTable();
+
+ return false;
+}
+
+// RelocateField - Patch relocatable field with 'Offset' in 'BO'
+// using a 'Value' of known 'Size'
+void ELFWriter::RelocateField(BinaryObject &BO, uint32_t Offset,
+ int64_t Value, unsigned Size) {
+ if (Size == 32)
+ BO.fixWord32(Value, Offset);
+ else if (Size == 64)
+ BO.fixWord64(Value, Offset);
+ else
+ llvm_unreachable("don't know howto patch relocatable field");
+}
+
+/// EmitRelocations - Emit relocations
+void ELFWriter::EmitRelocations() {
+
+ // True if the target uses the relocation entry to hold the addend,
+ // otherwise the addend is written directly to the relocatable field.
+ bool HasRelA = TEW->hasRelocationAddend();
+
+ // Create Relocation sections for each section which needs it.
+ for (unsigned i=0, e=SectionList.size(); i != e; ++i) {
+ ELFSection &S = *SectionList[i];
+
+ // This section does not have relocations
+ if (!S.hasRelocations()) continue;
+ ELFSection &RelSec = getRelocSection(S);
+
+ // 'Link' - Section hdr idx of the associated symbol table
+ // 'Info' - Section hdr idx of the section to which the relocation applies
+ ELFSection &SymTab = getSymbolTableSection();
+ RelSec.Link = SymTab.SectionIdx;
+ RelSec.Info = S.SectionIdx;
+ RelSec.EntSize = TEW->getRelocationEntrySize();
+
+ // Get the relocations from Section
+ std::vector<MachineRelocation> Relos = S.getRelocations();
+ for (std::vector<MachineRelocation>::iterator MRI = Relos.begin(),
+ MRE = Relos.end(); MRI != MRE; ++MRI) {
+ MachineRelocation &MR = *MRI;
+
+ // Relocatable field offset from the section start
+ unsigned RelOffset = MR.getMachineCodeOffset();
+
+ // Symbol index in the symbol table
+ unsigned SymIdx = 0;
+
+ // Target specific relocation field type and size
+ unsigned RelType = TEW->getRelocationType(MR.getRelocationType());
+ unsigned RelTySize = TEW->getRelocationTySize(RelType);
+ int64_t Addend = 0;
+
+ // There are several machine relocations types, and each one of
+ // them needs a different approach to retrieve the symbol table index.
+ if (MR.isGlobalValue()) {
+ const GlobalValue *G = MR.getGlobalValue();
+ int64_t GlobalOffset = MR.getConstantVal();
+ SymIdx = GblSymLookup[G];
+ if (G->hasPrivateLinkage()) {
+ // If the target uses a section offset in the relocation:
+ // SymIdx + Addend = section sym for global + section offset
+ unsigned SectionIdx = PrivateSyms[SymIdx]->SectionIdx;
+ Addend = PrivateSyms[SymIdx]->Value + GlobalOffset;
+ SymIdx = SectionList[SectionIdx]->getSymbolTableIndex();
+ } else {
+ Addend = TEW->getDefaultAddendForRelTy(RelType, GlobalOffset);
+ }
+ } else if (MR.isExternalSymbol()) {
+ const char *ExtSym = MR.getExternalSymbol();
+ SymIdx = ExtSymLookup[ExtSym];
+ Addend = TEW->getDefaultAddendForRelTy(RelType);
+ } else {
+ // Get the symbol index for the section symbol
+ unsigned SectionIdx = MR.getConstantVal();
+ SymIdx = SectionList[SectionIdx]->getSymbolTableIndex();
+
+ // The symbol offset inside the section
+ int64_t SymOffset = (int64_t)MR.getResultPointer();
+
+ // For pc relative relocations where symbols are defined in the same
+ // section they are referenced, ignore the relocation entry and patch
+ // the relocatable field with the symbol offset directly.
+ if (S.SectionIdx == SectionIdx && TEW->isPCRelativeRel(RelType)) {
+ int64_t Value = TEW->computeRelocation(SymOffset, RelOffset, RelType);
+ RelocateField(S, RelOffset, Value, RelTySize);
+ continue;
+ }
+
+ Addend = TEW->getDefaultAddendForRelTy(RelType, SymOffset);
+ }
+
+ // The target without addend on the relocation symbol must be
+ // patched in the relocation place itself to contain the addend
+ // otherwise write zeros to make sure there is no garbage there
+ RelocateField(S, RelOffset, HasRelA ? 0 : Addend, RelTySize);
+
+ // Get the relocation entry and emit to the relocation section
+ ELFRelocation Rel(RelOffset, SymIdx, RelType, HasRelA, Addend);
+ EmitRelocation(RelSec, Rel, HasRelA);
+ }
+ }
+}
+
+/// EmitRelocation - Write relocation 'Rel' to the relocation section 'Rel'
+void ELFWriter::EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel,
+ bool HasRelA) {
+ RelSec.emitWord(Rel.getOffset());
+ RelSec.emitWord(Rel.getInfo(is64Bit));
+ if (HasRelA)
+ RelSec.emitWord(Rel.getAddend());
+}
+
+/// EmitSymbol - Write symbol 'Sym' to the symbol table 'SymbolTable'
+void ELFWriter::EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym) {
+ if (is64Bit) {
+ SymbolTable.emitWord32(Sym.NameIdx);
+ SymbolTable.emitByte(Sym.Info);
+ SymbolTable.emitByte(Sym.Other);
+ SymbolTable.emitWord16(Sym.SectionIdx);
+ SymbolTable.emitWord64(Sym.Value);
+ SymbolTable.emitWord64(Sym.Size);
+ } else {
+ SymbolTable.emitWord32(Sym.NameIdx);
+ SymbolTable.emitWord32(Sym.Value);
+ SymbolTable.emitWord32(Sym.Size);
+ SymbolTable.emitByte(Sym.Info);
+ SymbolTable.emitByte(Sym.Other);
+ SymbolTable.emitWord16(Sym.SectionIdx);
+ }
+}
+
+/// EmitSectionHeader - Write section 'Section' header in 'SHdrTab'
+/// Section Header Table
+void ELFWriter::EmitSectionHeader(BinaryObject &SHdrTab,
+ const ELFSection &SHdr) {
+ SHdrTab.emitWord32(SHdr.NameIdx);
+ SHdrTab.emitWord32(SHdr.Type);
+ if (is64Bit) {
+ SHdrTab.emitWord64(SHdr.Flags);
+ SHdrTab.emitWord(SHdr.Addr);
+ SHdrTab.emitWord(SHdr.Offset);
+ SHdrTab.emitWord64(SHdr.Size);
+ SHdrTab.emitWord32(SHdr.Link);
+ SHdrTab.emitWord32(SHdr.Info);
+ SHdrTab.emitWord64(SHdr.Align);
+ SHdrTab.emitWord64(SHdr.EntSize);
+ } else {
+ SHdrTab.emitWord32(SHdr.Flags);
+ SHdrTab.emitWord(SHdr.Addr);
+ SHdrTab.emitWord(SHdr.Offset);
+ SHdrTab.emitWord32(SHdr.Size);
+ SHdrTab.emitWord32(SHdr.Link);
+ SHdrTab.emitWord32(SHdr.Info);
+ SHdrTab.emitWord32(SHdr.Align);
+ SHdrTab.emitWord32(SHdr.EntSize);
+ }
+}
+
+/// EmitStringTable - If the current symbol table is non-empty, emit the string
+/// table for it
+void ELFWriter::EmitStringTable(const std::string &ModuleName) {
+ if (!SymbolList.size()) return; // Empty symbol table.
+ ELFSection &StrTab = getStringTableSection();
+
+ // Set the zero'th symbol to a null byte, as required.
+ StrTab.emitByte(0);
+
+ // Walk on the symbol list and write symbol names into the string table.
+ unsigned Index = 1;
+ for (ELFSymIter I=SymbolList.begin(), E=SymbolList.end(); I != E; ++I) {
+ ELFSym &Sym = *(*I);
+
+ std::string Name;
+ if (Sym.isGlobalValue()) {
+ SmallString<40> NameStr;
+ Mang->getNameWithPrefix(NameStr, Sym.getGlobalValue(), false);
+ Name.append(NameStr.begin(), NameStr.end());
+ } else if (Sym.isExternalSym())
+ Name.append(Sym.getExternalSymbol());
+ else if (Sym.isFileType())
+ Name.append(ModuleName);
+
+ if (Name.empty()) {
+ Sym.NameIdx = 0;
+ } else {
+ Sym.NameIdx = Index;
+ StrTab.emitString(Name);
+
+ // Keep track of the number of bytes emitted to this section.
+ Index += Name.size()+1;
+ }
+ }
+ assert(Index == StrTab.size());
+ StrTab.Size = Index;
+}
+
+// SortSymbols - On the symbol table local symbols must come before
+// all other symbols with non-local bindings. The return value is
+// the position of the first non local symbol.
+unsigned ELFWriter::SortSymbols() {
+ unsigned FirstNonLocalSymbol;
+ std::vector<ELFSym*> LocalSyms, OtherSyms;
+
+ for (ELFSymIter I=SymbolList.begin(), E=SymbolList.end(); I != E; ++I) {
+ if ((*I)->isLocalBind())
+ LocalSyms.push_back(*I);
+ else
+ OtherSyms.push_back(*I);
+ }
+ SymbolList.clear();
+ FirstNonLocalSymbol = LocalSyms.size();
+
+ for (unsigned i = 0; i < FirstNonLocalSymbol; ++i)
+ SymbolList.push_back(LocalSyms[i]);
+
+ for (ELFSymIter I=OtherSyms.begin(), E=OtherSyms.end(); I != E; ++I)
+ SymbolList.push_back(*I);
+
+ LocalSyms.clear();
+ OtherSyms.clear();
+
+ return FirstNonLocalSymbol;
+}
+
+/// EmitSymbolTable - Emit the symbol table itself.
+void ELFWriter::EmitSymbolTable() {
+ if (!SymbolList.size()) return; // Empty symbol table.
+
+ // Now that we have emitted the string table and know the offset into the
+ // string table of each symbol, emit the symbol table itself.
+ ELFSection &SymTab = getSymbolTableSection();
+ SymTab.Align = TEW->getPrefELFAlignment();
+
+ // Section Index of .strtab.
+ SymTab.Link = getStringTableSection().SectionIdx;
+
+ // Size of each symtab entry.
+ SymTab.EntSize = TEW->getSymTabEntrySize();
+
+ // Reorder the symbol table with local symbols first!
+ unsigned FirstNonLocalSymbol = SortSymbols();
+
+ // Emit all the symbols to the symbol table.
+ for (unsigned i = 0, e = SymbolList.size(); i < e; ++i) {
+ ELFSym &Sym = *SymbolList[i];
+
+ // Emit symbol to the symbol table
+ EmitSymbol(SymTab, Sym);
+
+ // Record the symbol table index for each symbol
+ if (Sym.isGlobalValue())
+ GblSymLookup[Sym.getGlobalValue()] = i;
+ else if (Sym.isExternalSym())
+ ExtSymLookup[Sym.getExternalSymbol()] = i;
+
+ // Keep track on the symbol index into the symbol table
+ Sym.SymTabIdx = i;
+ }
+
+ // One greater than the symbol table index of the last local symbol
+ SymTab.Info = FirstNonLocalSymbol;
+ SymTab.Size = SymTab.size();
+}
+
+/// EmitSectionTableStringTable - This method adds and emits a section for the
+/// ELF Section Table string table: the string table that holds all of the
+/// section names.
+void ELFWriter::EmitSectionTableStringTable() {
+ // First step: add the section for the string table to the list of sections:
+ ELFSection &SHStrTab = getSectionHeaderStringTableSection();
+
+ // Now that we know which section number is the .shstrtab section, update the
+ // e_shstrndx entry in the ELF header.
+ ElfHdr.fixWord16(SHStrTab.SectionIdx, ELFHdr_e_shstrndx_Offset);
+
+ // Set the NameIdx of each section in the string table and emit the bytes for
+ // the string table.
+ unsigned Index = 0;
+
+ for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) {
+ ELFSection &S = *(*I);
+ // Set the index into the table. Note if we have lots of entries with
+ // common suffixes, we could memoize them here if we cared.
+ S.NameIdx = Index;
+ SHStrTab.emitString(S.getName());
+
+ // Keep track of the number of bytes emitted to this section.
+ Index += S.getName().size()+1;
+ }
+
+ // Set the size of .shstrtab now that we know what it is.
+ assert(Index == SHStrTab.size());
+ SHStrTab.Size = Index;
+}
+
+/// OutputSectionsAndSectionTable - Now that we have constructed the file header
+/// and all of the sections, emit these to the ostream destination and emit the
+/// SectionTable.
+void ELFWriter::OutputSectionsAndSectionTable() {
+ // Pass #1: Compute the file offset for each section.
+ size_t FileOff = ElfHdr.size(); // File header first.
+
+ // Adjust alignment of all section if needed, skip the null section.
+ for (unsigned i=1, e=SectionList.size(); i < e; ++i) {
+ ELFSection &ES = *SectionList[i];
+ if (!ES.size()) {
+ ES.Offset = FileOff;
+ continue;
+ }
+
+ // Update Section size
+ if (!ES.Size)
+ ES.Size = ES.size();
+
+ // Align FileOff to whatever the alignment restrictions of the section are.
+ if (ES.Align)
+ FileOff = (FileOff+ES.Align-1) & ~(ES.Align-1);
+
+ ES.Offset = FileOff;
+ FileOff += ES.Size;
+ }
+
+ // Align Section Header.
+ unsigned TableAlign = TEW->getPrefELFAlignment();
+ FileOff = (FileOff+TableAlign-1) & ~(TableAlign-1);
+
+ // Now that we know where all of the sections will be emitted, set the e_shnum
+ // entry in the ELF header.
+ ElfHdr.fixWord16(NumSections, ELFHdr_e_shnum_Offset);
+
+ // Now that we know the offset in the file of the section table, update the
+ // e_shoff address in the ELF header.
+ ElfHdr.fixWord(FileOff, ELFHdr_e_shoff_Offset);
+
+ // Now that we know all of the data in the file header, emit it and all of the
+ // sections!
+ O.write((char *)&ElfHdr.getData()[0], ElfHdr.size());
+ FileOff = ElfHdr.size();
+
+ // Section Header Table blob
+ BinaryObject SHdrTable(isLittleEndian, is64Bit);
+
+ // Emit all of sections to the file and build the section header table.
+ for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) {
+ ELFSection &S = *(*I);
+ DEBUG(dbgs() << "SectionIdx: " << S.SectionIdx << ", Name: " << S.getName()
+ << ", Size: " << S.Size << ", Offset: " << S.Offset
+ << ", SectionData Size: " << S.size() << "\n");
+
+ // Align FileOff to whatever the alignment restrictions of the section are.
+ if (S.size()) {
+ if (S.Align) {
+ for (size_t NewFileOff = (FileOff+S.Align-1) & ~(S.Align-1);
+ FileOff != NewFileOff; ++FileOff)
+ O << (char)0xAB;
+ }
+ O.write((char *)&S.getData()[0], S.Size);
+ FileOff += S.Size;
+ }
+
+ EmitSectionHeader(SHdrTable, S);
+ }
+
+ // Align output for the section table.
+ for (size_t NewFileOff = (FileOff+TableAlign-1) & ~(TableAlign-1);
+ FileOff != NewFileOff; ++FileOff)
+ O << (char)0xAB;
+
+ // Emit the section table itself.
+ O.write((char *)&SHdrTable.getData()[0], SHdrTable.size());
+}
diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h
new file mode 100644
index 0000000..b61b484
--- /dev/null
+++ b/lib/CodeGen/ELFWriter.h
@@ -0,0 +1,250 @@
+//===-- ELFWriter.h - Target-independent ELF writer support -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ELFWriter class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ELFWRITER_H
+#define ELFWRITER_H
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include <map>
+
+namespace llvm {
+ class BinaryObject;
+ class Constant;
+ class ConstantInt;
+ class ConstantStruct;
+ class ELFCodeEmitter;
+ class ELFRelocation;
+ class ELFSection;
+ struct ELFSym;
+ class GlobalVariable;
+ class JITDebugRegisterer;
+ class Mangler;
+ class MachineCodeEmitter;
+ class MachineConstantPoolEntry;
+ class ObjectCodeEmitter;
+ class MCAsmInfo;
+ class TargetELFWriterInfo;
+ class TargetLoweringObjectFile;
+ class raw_ostream;
+ class SectionKind;
+ class MCContext;
+
+ typedef std::vector<ELFSym*>::iterator ELFSymIter;
+ typedef std::vector<ELFSection*>::iterator ELFSectionIter;
+ typedef SetVector<const GlobalValue*>::const_iterator PendingGblsIter;
+ typedef SetVector<const char *>::const_iterator PendingExtsIter;
+ typedef std::pair<const Constant *, int64_t> CstExprResTy;
+
+ /// ELFWriter - This class implements the common target-independent code for
+ /// writing ELF files. Targets should derive a class from this to
+ /// parameterize the output format.
+ ///
+ class ELFWriter : public MachineFunctionPass {
+ friend class ELFCodeEmitter;
+ friend class JITDebugRegisterer;
+ public:
+ static char ID;
+
+ /// Return the ELFCodeEmitter as an instance of ObjectCodeEmitter
+ ObjectCodeEmitter *getObjectCodeEmitter() {
+ return reinterpret_cast<ObjectCodeEmitter*>(ElfCE);
+ }
+
+ ELFWriter(raw_ostream &O, TargetMachine &TM);
+ ~ELFWriter();
+
+ protected:
+ /// Output stream to send the resultant object file to.
+ raw_ostream &O;
+
+ /// Target machine description.
+ TargetMachine &TM;
+
+ /// Context object for machine code objects.
+ MCContext &OutContext;
+
+ /// Target Elf Writer description.
+ const TargetELFWriterInfo *TEW;
+
+ /// Mang - The object used to perform name mangling for this module.
+ Mangler *Mang;
+
+ /// MCE - The MachineCodeEmitter object that we are exposing to emit machine
+ /// code for functions to the .o file.
+ ELFCodeEmitter *ElfCE;
+
+ /// TLOF - Target Lowering Object File, provide section names for globals
+ /// and other object file specific stuff
+ const TargetLoweringObjectFile &TLOF;
+
+ /// MAI - Target Asm Info, provide information about section names for
+ /// globals and other target specific stuff.
+ const MCAsmInfo *MAI;
+
+ //===------------------------------------------------------------------===//
+ // Properties inferred automatically from the target machine.
+ //===------------------------------------------------------------------===//
+
+ /// is64Bit/isLittleEndian - This information is inferred from the target
+ /// machine directly, indicating whether to emit a 32- or 64-bit ELF file.
+ bool is64Bit, isLittleEndian;
+
+ /// doInitialization - Emit the file header and all of the global variables
+ /// for the module to the ELF file.
+ bool doInitialization(Module &M);
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ /// doFinalization - Now that the module has been completely processed, emit
+ /// the ELF file to 'O'.
+ bool doFinalization(Module &M);
+
+ private:
+ /// Blob containing the Elf header
+ BinaryObject ElfHdr;
+
+ /// SectionList - This is the list of sections that we have emitted to the
+ /// file. Once the file has been completely built, the section header table
+ /// is constructed from this info.
+ std::vector<ELFSection*> SectionList;
+ unsigned NumSections; // Always = SectionList.size()
+
+ /// SectionLookup - This is a mapping from section name to section number in
+ /// the SectionList. Used to quickly gather the Section Index from MAI names
+ std::map<std::string, ELFSection*> SectionLookup;
+
+ /// PendingGlobals - Globals not processed as symbols yet.
+ SetVector<const GlobalValue*> PendingGlobals;
+
+ /// GblSymLookup - This is a mapping from global value to a symbol index
+ /// in the symbol table or private symbols list. This is useful since reloc
+ /// symbol references must be quickly mapped to their indices on the lists.
+ std::map<const GlobalValue*, uint32_t> GblSymLookup;
+
+ /// PendingExternals - Externals not processed as symbols yet.
+ SetVector<const char *> PendingExternals;
+
+ /// ExtSymLookup - This is a mapping from externals to a symbol index
+ /// in the symbol table list. This is useful since reloc symbol references
+ /// must be quickly mapped to their symbol table indices.
+ std::map<const char *, uint32_t> ExtSymLookup;
+
+ /// SymbolList - This is the list of symbols emitted to the symbol table.
+ /// When the SymbolList is finally built, local symbols must be placed in
+ /// the beginning while non-locals at the end.
+ std::vector<ELFSym*> SymbolList;
+
+ /// PrivateSyms - Record private symbols, every symbol here must never be
+ /// present in the SymbolList.
+ std::vector<ELFSym*> PrivateSyms;
+
+ /// getSection - Return the section with the specified name, creating a new
+ /// section if one does not already exist.
+ ELFSection &getSection(const std::string &Name, unsigned Type,
+ unsigned Flags = 0, unsigned Align = 0) {
+ ELFSection *&SN = SectionLookup[Name];
+ if (SN) return *SN;
+
+ SectionList.push_back(new ELFSection(Name, isLittleEndian, is64Bit));
+ SN = SectionList.back();
+ SN->SectionIdx = NumSections++;
+ SN->Type = Type;
+ SN->Flags = Flags;
+ SN->Link = ELFSection::SHN_UNDEF;
+ SN->Align = Align;
+ return *SN;
+ }
+
+ ELFSection &getNonExecStackSection() {
+ return getSection(".note.GNU-stack", ELFSection::SHT_PROGBITS, 0, 1);
+ }
+
+ ELFSection &getSymbolTableSection() {
+ return getSection(".symtab", ELFSection::SHT_SYMTAB, 0);
+ }
+
+ ELFSection &getStringTableSection() {
+ return getSection(".strtab", ELFSection::SHT_STRTAB, 0, 1);
+ }
+
+ ELFSection &getSectionHeaderStringTableSection() {
+ return getSection(".shstrtab", ELFSection::SHT_STRTAB, 0, 1);
+ }
+
+ ELFSection &getNullSection() {
+ return getSection("", ELFSection::SHT_NULL, 0);
+ }
+
+ ELFSection &getDataSection();
+ ELFSection &getBSSSection();
+ ELFSection &getCtorSection();
+ ELFSection &getDtorSection();
+ ELFSection &getJumpTableSection();
+ ELFSection &getConstantPoolSection(MachineConstantPoolEntry &CPE);
+ ELFSection &getTextSection(Function *F);
+ ELFSection &getRelocSection(ELFSection &S);
+
+ // Helpers for obtaining ELF specific info.
+ unsigned getGlobalELFBinding(const GlobalValue *GV);
+ unsigned getGlobalELFType(const GlobalValue *GV);
+ unsigned getGlobalELFVisibility(const GlobalValue *GV);
+
+ // AddPendingGlobalSymbol - Add a global to be processed and to
+ // the global symbol lookup, use a zero index because the table
+ // index will be determined later.
+ void AddPendingGlobalSymbol(const GlobalValue *GV,
+ bool AddToLookup = false);
+
+ // AddPendingExternalSymbol - Add the external to be processed
+ // and to the external symbol lookup, use a zero index because
+ // the symbol table index will be determined later.
+ void AddPendingExternalSymbol(const char *External);
+
+ // AddToSymbolList - Update the symbol lookup and If the symbol is
+ // private add it to PrivateSyms list, otherwise to SymbolList.
+ void AddToSymbolList(ELFSym *GblSym);
+
+ // As we complete the ELF file, we need to update fields in the ELF header
+ // (e.g. the location of the section table). These members keep track of
+ // the offset in ELFHeader of these various pieces to update and other
+ // locations in the file.
+ unsigned ELFHdr_e_shoff_Offset; // e_shoff in ELF header.
+ unsigned ELFHdr_e_shstrndx_Offset; // e_shstrndx in ELF header.
+ unsigned ELFHdr_e_shnum_Offset; // e_shnum in ELF header.
+
+ private:
+ void EmitGlobal(const GlobalValue *GV);
+ void EmitGlobalConstant(const Constant *C, ELFSection &GblS);
+ void EmitGlobalConstantStruct(const ConstantStruct *CVS,
+ ELFSection &GblS);
+ void EmitGlobalConstantLargeInt(const ConstantInt *CI, ELFSection &S);
+ void EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size,
+ ELFSection &GblS, int64_t Offset = 0);
+ bool EmitSpecialLLVMGlobal(const GlobalVariable *GV);
+ void EmitXXStructorList(Constant *List, ELFSection &Xtor);
+ void EmitRelocations();
+ void EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel, bool HasRelA);
+ void EmitSectionHeader(BinaryObject &SHdrTab, const ELFSection &SHdr);
+ void EmitSectionTableStringTable();
+ void EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym);
+ void EmitSymbolTable();
+ void EmitStringTable(const std::string &ModuleName);
+ void OutputSectionsAndSectionTable();
+ void RelocateField(BinaryObject &BO, uint32_t Offset, int64_t Value,
+ unsigned Size);
+ unsigned SortSymbols();
+ CstExprResTy ResolveConstantExpr(const Constant *CV);
+ };
+}
+
+#endif
diff --git a/lib/CodeGen/ExactHazardRecognizer.cpp b/lib/CodeGen/ExactHazardRecognizer.cpp
new file mode 100644
index 0000000..61959bb
--- /dev/null
+++ b/lib/CodeGen/ExactHazardRecognizer.cpp
@@ -0,0 +1,161 @@
+//===----- ExactHazardRecognizer.cpp - hazard recognizer -------- ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a hazard recognizer using the instructions itineraries
+// defined for the current target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "post-RA-sched"
+#include "ExactHazardRecognizer.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrItineraries.h"
+
+using namespace llvm;
+
+ExactHazardRecognizer::
+ExactHazardRecognizer(const InstrItineraryData &LItinData) :
+ ScheduleHazardRecognizer(), ItinData(LItinData)
+{
+ // Determine the maximum depth of any itinerary. This determines the
+ // depth of the scoreboard. We always make the scoreboard at least 1
+ // cycle deep to avoid dealing with the boundary condition.
+ ScoreboardDepth = 1;
+ if (!ItinData.isEmpty()) {
+ for (unsigned idx = 0; ; ++idx) {
+ if (ItinData.isEndMarker(idx))
+ break;
+
+ const InstrStage *IS = ItinData.beginStage(idx);
+ const InstrStage *E = ItinData.endStage(idx);
+ unsigned ItinDepth = 0;
+ for (; IS != E; ++IS)
+ ItinDepth += IS->getCycles();
+
+ ScoreboardDepth = std::max(ScoreboardDepth, ItinDepth);
+ }
+ }
+
+ Scoreboard = new unsigned[ScoreboardDepth];
+ ScoreboardHead = 0;
+
+ DEBUG(dbgs() << "Using exact hazard recognizer: ScoreboardDepth = "
+ << ScoreboardDepth << '\n');
+}
+
+ExactHazardRecognizer::~ExactHazardRecognizer() {
+ delete [] Scoreboard;
+}
+
+void ExactHazardRecognizer::Reset() {
+ memset(Scoreboard, 0, ScoreboardDepth * sizeof(unsigned));
+ ScoreboardHead = 0;
+}
+
+unsigned ExactHazardRecognizer::getFutureIndex(unsigned offset) {
+ return (ScoreboardHead + offset) % ScoreboardDepth;
+}
+
+void ExactHazardRecognizer::dumpScoreboard() {
+ dbgs() << "Scoreboard:\n";
+
+ unsigned last = ScoreboardDepth - 1;
+ while ((last > 0) && (Scoreboard[getFutureIndex(last)] == 0))
+ last--;
+
+ for (unsigned i = 0; i <= last; i++) {
+ unsigned FUs = Scoreboard[getFutureIndex(i)];
+ dbgs() << "\t";
+ for (int j = 31; j >= 0; j--)
+ dbgs() << ((FUs & (1 << j)) ? '1' : '0');
+ dbgs() << '\n';
+ }
+}
+
+ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU) {
+ if (ItinData.isEmpty())
+ return NoHazard;
+
+ unsigned cycle = 0;
+
+ // Use the itinerary for the underlying instruction to check for
+ // free FU's in the scoreboard at the appropriate future cycles.
+ unsigned idx = SU->getInstr()->getDesc().getSchedClass();
+ for (const InstrStage *IS = ItinData.beginStage(idx),
+ *E = ItinData.endStage(idx); IS != E; ++IS) {
+ // We must find one of the stage's units free for every cycle the
+ // stage is occupied. FIXME it would be more accurate to find the
+ // same unit free in all the cycles.
+ for (unsigned int i = 0; i < IS->getCycles(); ++i) {
+ assert(((cycle + i) < ScoreboardDepth) &&
+ "Scoreboard depth exceeded!");
+
+ unsigned index = getFutureIndex(cycle + i);
+ unsigned freeUnits = IS->getUnits() & ~Scoreboard[index];
+ if (!freeUnits) {
+ DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", ");
+ DEBUG(dbgs() << "SU(" << SU->NodeNum << "): ");
+ DEBUG(SU->getInstr()->dump());
+ return Hazard;
+ }
+ }
+
+ // Advance the cycle to the next stage.
+ cycle += IS->getNextCycles();
+ }
+
+ return NoHazard;
+}
+
+void ExactHazardRecognizer::EmitInstruction(SUnit *SU) {
+ if (ItinData.isEmpty())
+ return;
+
+ unsigned cycle = 0;
+
+ // Use the itinerary for the underlying instruction to reserve FU's
+ // in the scoreboard at the appropriate future cycles.
+ unsigned idx = SU->getInstr()->getDesc().getSchedClass();
+ for (const InstrStage *IS = ItinData.beginStage(idx),
+ *E = ItinData.endStage(idx); IS != E; ++IS) {
+ // We must reserve one of the stage's units for every cycle the
+ // stage is occupied. FIXME it would be more accurate to reserve
+ // the same unit free in all the cycles.
+ for (unsigned int i = 0; i < IS->getCycles(); ++i) {
+ assert(((cycle + i) < ScoreboardDepth) &&
+ "Scoreboard depth exceeded!");
+
+ unsigned index = getFutureIndex(cycle + i);
+ unsigned freeUnits = IS->getUnits() & ~Scoreboard[index];
+
+ // reduce to a single unit
+ unsigned freeUnit = 0;
+ do {
+ freeUnit = freeUnits;
+ freeUnits = freeUnit & (freeUnit - 1);
+ } while (freeUnits);
+
+ assert(freeUnit && "No function unit available!");
+ Scoreboard[index] |= freeUnit;
+ }
+
+ // Advance the cycle to the next stage.
+ cycle += IS->getNextCycles();
+ }
+
+ DEBUG(dumpScoreboard());
+}
+
+void ExactHazardRecognizer::AdvanceCycle() {
+ Scoreboard[ScoreboardHead] = 0;
+ ScoreboardHead = getFutureIndex(1);
+}
diff --git a/lib/CodeGen/ExactHazardRecognizer.h b/lib/CodeGen/ExactHazardRecognizer.h
new file mode 100644
index 0000000..71ac979
--- /dev/null
+++ b/lib/CodeGen/ExactHazardRecognizer.h
@@ -0,0 +1,61 @@
+//=- llvm/CodeGen/ExactHazardRecognizer.h - Scheduling Support -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ExactHazardRecognizer class, which
+// implements hazard-avoidance heuristics for scheduling, based on the
+// scheduling itineraries specified for the target.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_EXACTHAZARDRECOGNIZER_H
+#define LLVM_CODEGEN_EXACTHAZARDRECOGNIZER_H
+
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetInstrItineraries.h"
+
+namespace llvm {
+ class ExactHazardRecognizer : public ScheduleHazardRecognizer {
+ // Itinerary data for the target.
+ const InstrItineraryData &ItinData;
+
+ // Scoreboard to track function unit usage. Scoreboard[0] is a
+ // mask of the FUs in use in the cycle currently being
+ // schedule. Scoreboard[1] is a mask for the next cycle. The
+ // Scoreboard is used as a circular buffer with the current cycle
+ // indicated by ScoreboardHead.
+ unsigned *Scoreboard;
+
+ // The maximum number of cycles monitored by the Scoreboard. This
+ // value is determined based on the target itineraries to ensure
+ // that all hazards can be tracked.
+ unsigned ScoreboardDepth;
+
+ // Indices into the Scoreboard that represent the current cycle.
+ unsigned ScoreboardHead;
+
+ // Return the scoreboard index to use for 'offset' cycles in the
+ // future. 'offset' of 0 returns ScoreboardHead.
+ unsigned getFutureIndex(unsigned offset);
+
+ // Print the scoreboard.
+ void dumpScoreboard();
+
+ public:
+ ExactHazardRecognizer(const InstrItineraryData &ItinData);
+ ~ExactHazardRecognizer();
+
+ virtual HazardType getHazardType(SUnit *SU);
+ virtual void Reset();
+ virtual void EmitInstruction(SUnit *SU);
+ virtual void AdvanceCycle();
+ };
+}
+
+#endif
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
new file mode 100644
index 0000000..055172b
--- /dev/null
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -0,0 +1,210 @@
+//===-- GCMetadata.cpp - Garbage collector metadata -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the GCFunctionInfo class and GCModuleInfo pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+
+ class Printer : public FunctionPass {
+ static char ID;
+ raw_ostream &OS;
+
+ public:
+ Printer() : FunctionPass(&ID), OS(errs()) {}
+ explicit Printer(raw_ostream &OS) : FunctionPass(&ID), OS(OS) {}
+
+
+ const char *getPassName() const;
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool runOnFunction(Function &F);
+ };
+
+ class Deleter : public FunctionPass {
+ static char ID;
+
+ public:
+ Deleter();
+
+ const char *getPassName() const;
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool runOnFunction(Function &F);
+ bool doFinalization(Module &M);
+ };
+
+}
+
+static RegisterPass<GCModuleInfo>
+X("collector-metadata", "Create Garbage Collector Module Metadata");
+
+// -----------------------------------------------------------------------------
+
+GCFunctionInfo::GCFunctionInfo(const Function &F, GCStrategy &S)
+ : F(F), S(S), FrameSize(~0LL) {}
+
+GCFunctionInfo::~GCFunctionInfo() {}
+
+// -----------------------------------------------------------------------------
+
+char GCModuleInfo::ID = 0;
+
+GCModuleInfo::GCModuleInfo()
+ : ImmutablePass(&ID) {}
+
+GCModuleInfo::~GCModuleInfo() {
+ clear();
+}
+
+GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M,
+ const std::string &Name) {
+ strategy_map_type::iterator NMI = StrategyMap.find(Name);
+ if (NMI != StrategyMap.end())
+ return NMI->getValue();
+
+ for (GCRegistry::iterator I = GCRegistry::begin(),
+ E = GCRegistry::end(); I != E; ++I) {
+ if (Name == I->getName()) {
+ GCStrategy *S = I->instantiate();
+ S->M = M;
+ S->Name = Name;
+ StrategyMap.GetOrCreateValue(Name).setValue(S);
+ StrategyList.push_back(S);
+ return S;
+ }
+ }
+
+ dbgs() << "unsupported GC: " << Name << "\n";
+ llvm_unreachable(0);
+}
+
+GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) {
+ assert(!F.isDeclaration() && "Can only get GCFunctionInfo for a definition!");
+ assert(F.hasGC());
+
+ finfo_map_type::iterator I = FInfoMap.find(&F);
+ if (I != FInfoMap.end())
+ return *I->second;
+
+ GCStrategy *S = getOrCreateStrategy(F.getParent(), F.getGC());
+ GCFunctionInfo *GFI = S->insertFunctionInfo(F);
+ FInfoMap[&F] = GFI;
+ return *GFI;
+}
+
+void GCModuleInfo::clear() {
+ FInfoMap.clear();
+ StrategyMap.clear();
+
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ delete *I;
+ StrategyList.clear();
+}
+
+// -----------------------------------------------------------------------------
+
+char Printer::ID = 0;
+
+FunctionPass *llvm::createGCInfoPrinter(raw_ostream &OS) {
+ return new Printer(OS);
+}
+
+
+const char *Printer::getPassName() const {
+ return "Print Garbage Collector Information";
+}
+
+void Printer::getAnalysisUsage(AnalysisUsage &AU) const {
+ FunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+ AU.addRequired<GCModuleInfo>();
+}
+
+static const char *DescKind(GC::PointKind Kind) {
+ switch (Kind) {
+ default: llvm_unreachable("Unknown GC point kind");
+ case GC::Loop: return "loop";
+ case GC::Return: return "return";
+ case GC::PreCall: return "pre-call";
+ case GC::PostCall: return "post-call";
+ }
+}
+
+bool Printer::runOnFunction(Function &F) {
+ if (!F.hasGC()) {
+ GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F);
+
+ OS << "GC roots for " << FD->getFunction().getNameStr() << ":\n";
+ for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(),
+ RE = FD->roots_end(); RI != RE; ++RI)
+ OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n";
+
+ OS << "GC safe points for " << FD->getFunction().getNameStr() << ":\n";
+ for (GCFunctionInfo::iterator PI = FD->begin(),
+ PE = FD->end(); PI != PE; ++PI) {
+
+ OS << "\tlabel " << PI->Num << ": " << DescKind(PI->Kind) << ", live = {";
+
+ for (GCFunctionInfo::live_iterator RI = FD->live_begin(PI),
+ RE = FD->live_end(PI);;) {
+ OS << " " << RI->Num;
+ if (++RI == RE)
+ break;
+ OS << ",";
+ }
+
+ OS << " }\n";
+ }
+ }
+
+ return false;
+}
+
+// -----------------------------------------------------------------------------
+
+char Deleter::ID = 0;
+
+FunctionPass *llvm::createGCInfoDeleter() {
+ return new Deleter();
+}
+
+Deleter::Deleter() : FunctionPass(&ID) {}
+
+const char *Deleter::getPassName() const {
+ return "Delete Garbage Collector Information";
+}
+
+void Deleter::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<GCModuleInfo>();
+}
+
+bool Deleter::runOnFunction(Function &MF) {
+ return false;
+}
+
+bool Deleter::doFinalization(Module &M) {
+ GCModuleInfo *GMI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(GMI && "Deleter didn't require GCModuleInfo?!");
+ GMI->clear();
+ return false;
+}
diff --git a/lib/CodeGen/GCMetadataPrinter.cpp b/lib/CodeGen/GCMetadataPrinter.cpp
new file mode 100644
index 0000000..9cd2925
--- /dev/null
+++ b/lib/CodeGen/GCMetadataPrinter.cpp
@@ -0,0 +1,30 @@
+//===-- GCMetadataPrinter.cpp - Garbage collection infrastructure ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the abstract base class GCMetadataPrinter.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+
+using namespace llvm;
+
+GCMetadataPrinter::GCMetadataPrinter() { }
+
+GCMetadataPrinter::~GCMetadataPrinter() { }
+
+void GCMetadataPrinter::beginAssembly(raw_ostream &OS, AsmPrinter &AP,
+ const MCAsmInfo &MAI) {
+ // Default is no action.
+}
+
+void GCMetadataPrinter::finishAssembly(raw_ostream &OS, AsmPrinter &AP,
+ const MCAsmInfo &MAI) {
+ // Default is no action.
+}
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
new file mode 100644
index 0000000..b5006fd
--- /dev/null
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -0,0 +1,399 @@
+//===-- GCStrategy.cpp - Garbage collection infrastructure -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements target- and collector-independent garbage collection
+// infrastructure.
+//
+// MachineCodeAnalysis identifies the GC safe points in the machine code. Roots
+// are identified in SelectionDAGISel.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+ /// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or
+ /// llvm.gcwrite intrinsics, replacing them with simple loads and stores as
+ /// directed by the GCStrategy. It also performs automatic root initialization
+ /// and custom intrinsic lowering.
+ class LowerIntrinsics : public FunctionPass {
+ static bool NeedsDefaultLoweringPass(const GCStrategy &C);
+ static bool NeedsCustomLoweringPass(const GCStrategy &C);
+ static bool CouldBecomeSafePoint(Instruction *I);
+ bool PerformDefaultLowering(Function &F, GCStrategy &Coll);
+ static bool InsertRootInitializers(Function &F,
+ AllocaInst **Roots, unsigned Count);
+
+ public:
+ static char ID;
+
+ LowerIntrinsics();
+ const char *getPassName() const;
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool doInitialization(Module &M);
+ bool runOnFunction(Function &F);
+ };
+
+
+ /// MachineCodeAnalysis - This is a target-independent pass over the machine
+ /// function representation to identify safe points for the garbage collector
+ /// in the machine code. It inserts labels at safe points and populates a
+ /// GCMetadata record for each function.
+ class MachineCodeAnalysis : public MachineFunctionPass {
+ const TargetMachine *TM;
+ GCFunctionInfo *FI;
+ MachineModuleInfo *MMI;
+ const TargetInstrInfo *TII;
+
+ void FindSafePoints(MachineFunction &MF);
+ void VisitCallPoint(MachineBasicBlock::iterator MI);
+ unsigned InsertLabel(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ DebugLoc DL) const;
+
+ void FindStackOffsets(MachineFunction &MF);
+
+ public:
+ static char ID;
+
+ MachineCodeAnalysis();
+ const char *getPassName() const;
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool runOnMachineFunction(MachineFunction &MF);
+ };
+
+}
+
+// -----------------------------------------------------------------------------
+
+GCStrategy::GCStrategy() :
+ NeededSafePoints(0),
+ CustomReadBarriers(false),
+ CustomWriteBarriers(false),
+ CustomRoots(false),
+ InitRoots(true),
+ UsesMetadata(false)
+{}
+
+GCStrategy::~GCStrategy() {
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ delete *I;
+
+ Functions.clear();
+}
+
+bool GCStrategy::initializeCustomLowering(Module &M) { return false; }
+
+bool GCStrategy::performCustomLowering(Function &F) {
+ dbgs() << "gc " << getName() << " must override performCustomLowering.\n";
+ llvm_unreachable(0);
+ return 0;
+}
+
+GCFunctionInfo *GCStrategy::insertFunctionInfo(const Function &F) {
+ GCFunctionInfo *FI = new GCFunctionInfo(F, *this);
+ Functions.push_back(FI);
+ return FI;
+}
+
+// -----------------------------------------------------------------------------
+
+FunctionPass *llvm::createGCLoweringPass() {
+ return new LowerIntrinsics();
+}
+
+char LowerIntrinsics::ID = 0;
+
+LowerIntrinsics::LowerIntrinsics()
+ : FunctionPass(&ID) {}
+
+const char *LowerIntrinsics::getPassName() const {
+ return "Lower Garbage Collection Instructions";
+}
+
+void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const {
+ FunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<GCModuleInfo>();
+}
+
+/// doInitialization - If this module uses the GC intrinsics, find them now.
+bool LowerIntrinsics::doInitialization(Module &M) {
+ // FIXME: This is rather antisocial in the context of a JIT since it performs
+ // work against the entire module. But this cannot be done at
+ // runFunction time (initializeCustomLowering likely needs to change
+ // the module).
+ GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(MI && "LowerIntrinsics didn't require GCModuleInfo!?");
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (!I->isDeclaration() && I->hasGC())
+ MI->getFunctionInfo(*I); // Instantiate the GC strategy.
+
+ bool MadeChange = false;
+ for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I)
+ if (NeedsCustomLoweringPass(**I))
+ if ((*I)->initializeCustomLowering(M))
+ MadeChange = true;
+
+ return MadeChange;
+}
+
+bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots,
+ unsigned Count) {
+ // Scroll past alloca instructions.
+ BasicBlock::iterator IP = F.getEntryBlock().begin();
+ while (isa<AllocaInst>(IP)) ++IP;
+
+ // Search for initializers in the initial BB.
+ SmallPtrSet<AllocaInst*,16> InitedRoots;
+ for (; !CouldBecomeSafePoint(IP); ++IP)
+ if (StoreInst *SI = dyn_cast<StoreInst>(IP))
+ if (AllocaInst *AI =
+ dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts()))
+ InitedRoots.insert(AI);
+
+ // Add root initializers.
+ bool MadeChange = false;
+
+ for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I)
+ if (!InitedRoots.count(*I)) {
+ new StoreInst(ConstantPointerNull::get(cast<PointerType>(
+ cast<PointerType>((*I)->getType())->getElementType())),
+ *I, IP);
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+bool LowerIntrinsics::NeedsDefaultLoweringPass(const GCStrategy &C) {
+ // Default lowering is necessary only if read or write barriers have a default
+ // action. The default for roots is no action.
+ return !C.customWriteBarrier()
+ || !C.customReadBarrier()
+ || C.initializeRoots();
+}
+
+bool LowerIntrinsics::NeedsCustomLoweringPass(const GCStrategy &C) {
+ // Custom lowering is only necessary if enabled for some action.
+ return C.customWriteBarrier()
+ || C.customReadBarrier()
+ || C.customRoots();
+}
+
+/// CouldBecomeSafePoint - Predicate to conservatively determine whether the
+/// instruction could introduce a safe point.
+bool LowerIntrinsics::CouldBecomeSafePoint(Instruction *I) {
+ // The natural definition of instructions which could introduce safe points
+ // are:
+ //
+ // - call, invoke (AfterCall, BeforeCall)
+ // - phis (Loops)
+ // - invoke, ret, unwind (Exit)
+ //
+ // However, instructions as seemingly inoccuous as arithmetic can become
+ // libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead
+ // it is necessary to take a conservative approach.
+
+ if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I) ||
+ isa<StoreInst>(I) || isa<LoadInst>(I))
+ return false;
+
+ // llvm.gcroot is safe because it doesn't do anything at runtime.
+ if (CallInst *CI = dyn_cast<CallInst>(I))
+ if (Function *F = CI->getCalledFunction())
+ if (unsigned IID = F->getIntrinsicID())
+ if (IID == Intrinsic::gcroot)
+ return false;
+
+ return true;
+}
+
+/// runOnFunction - Replace gcread/gcwrite intrinsics with loads and stores.
+/// Leave gcroot intrinsics; the code generator needs to see those.
+bool LowerIntrinsics::runOnFunction(Function &F) {
+ // Quick exit for functions that do not use GC.
+ if (!F.hasGC())
+ return false;
+
+ GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F);
+ GCStrategy &S = FI.getStrategy();
+
+ bool MadeChange = false;
+
+ if (NeedsDefaultLoweringPass(S))
+ MadeChange |= PerformDefaultLowering(F, S);
+
+ if (NeedsCustomLoweringPass(S))
+ MadeChange |= S.performCustomLowering(F);
+
+ return MadeChange;
+}
+
+bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
+ bool LowerWr = !S.customWriteBarrier();
+ bool LowerRd = !S.customReadBarrier();
+ bool InitRoots = S.initializeRoots();
+
+ SmallVector<AllocaInst*,32> Roots;
+
+ bool MadeChange = false;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) {
+ Function *F = CI->getCalledFunction();
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::gcwrite:
+ if (LowerWr) {
+ // Replace a write barrier with a simple store.
+ Value *St = new StoreInst(CI->getOperand(1), CI->getOperand(3), CI);
+ CI->replaceAllUsesWith(St);
+ CI->eraseFromParent();
+ }
+ break;
+ case Intrinsic::gcread:
+ if (LowerRd) {
+ // Replace a read barrier with a simple load.
+ Value *Ld = new LoadInst(CI->getOperand(2), "", CI);
+ Ld->takeName(CI);
+ CI->replaceAllUsesWith(Ld);
+ CI->eraseFromParent();
+ }
+ break;
+ case Intrinsic::gcroot:
+ if (InitRoots) {
+ // Initialize the GC root, but do not delete the intrinsic. The
+ // backend needs the intrinsic to flag the stack slot.
+ Roots.push_back(cast<AllocaInst>(
+ CI->getOperand(1)->stripPointerCasts()));
+ }
+ break;
+ default:
+ continue;
+ }
+
+ MadeChange = true;
+ }
+ }
+ }
+
+ if (Roots.size())
+ MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size());
+
+ return MadeChange;
+}
+
+// -----------------------------------------------------------------------------
+
+FunctionPass *llvm::createGCMachineCodeAnalysisPass() {
+ return new MachineCodeAnalysis();
+}
+
+char MachineCodeAnalysis::ID = 0;
+
+MachineCodeAnalysis::MachineCodeAnalysis()
+ : MachineFunctionPass(&ID) {}
+
+const char *MachineCodeAnalysis::getPassName() const {
+ return "Analyze Machine Code For Garbage Collection";
+}
+
+void MachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<GCModuleInfo>();
+}
+
+unsigned MachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ DebugLoc DL) const {
+ unsigned Label = MMI->NextLabelID();
+
+ BuildMI(MBB, MI, DL,
+ TII->get(TargetOpcode::GC_LABEL)).addImm(Label);
+
+ return Label;
+}
+
+void MachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
+ // Find the return address (next instruction), too, so as to bracket the call
+ // instruction.
+ MachineBasicBlock::iterator RAI = CI;
+ ++RAI;
+
+ if (FI->getStrategy().needsSafePoint(GC::PreCall))
+ FI->addSafePoint(GC::PreCall, InsertLabel(*CI->getParent(), CI,
+ CI->getDebugLoc()));
+
+ if (FI->getStrategy().needsSafePoint(GC::PostCall))
+ FI->addSafePoint(GC::PostCall, InsertLabel(*CI->getParent(), RAI,
+ CI->getDebugLoc()));
+}
+
+void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
+ for (MachineFunction::iterator BBI = MF.begin(),
+ BBE = MF.end(); BBI != BBE; ++BBI)
+ for (MachineBasicBlock::iterator MI = BBI->begin(),
+ ME = BBI->end(); MI != ME; ++MI)
+ if (MI->getDesc().isCall())
+ VisitCallPoint(MI);
+}
+
+void MachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
+ const TargetRegisterInfo *TRI = TM->getRegisterInfo();
+ assert(TRI && "TargetRegisterInfo not available!");
+
+ for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(),
+ RE = FI->roots_end(); RI != RE; ++RI)
+ RI->StackOffset = TRI->getFrameIndexOffset(MF, RI->Num);
+}
+
+bool MachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
+ // Quick exit for functions that do not use GC.
+ if (!MF.getFunction()->hasGC())
+ return false;
+
+ FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction());
+ if (!FI->getStrategy().needsSafePoints())
+ return false;
+
+ TM = &MF.getTarget();
+ MMI = &getAnalysis<MachineModuleInfo>();
+ TII = TM->getInstrInfo();
+
+ // Find the size of the stack frame.
+ FI->setFrameSize(MF.getFrameInfo()->getStackSize());
+
+ // Find all safe points.
+ FindSafePoints(MF);
+
+ // Find the stack offsets for all roots.
+ FindStackOffsets(MF);
+
+ return false;
+}
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
new file mode 100644
index 0000000..c61fd17
--- /dev/null
+++ b/lib/CodeGen/IfConversion.cpp
@@ -0,0 +1,1254 @@
+//===-- IfConversion.cpp - Machine code if conversion pass. ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the machine instruction level if-conversion pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ifcvt"
+#include "BranchFolding.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+// Hidden options for help debugging.
+static cl::opt<int> IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden);
+static cl::opt<int> IfCvtFnStop("ifcvt-fn-stop", cl::init(-1), cl::Hidden);
+static cl::opt<int> IfCvtLimit("ifcvt-limit", cl::init(-1), cl::Hidden);
+static cl::opt<bool> DisableSimple("disable-ifcvt-simple",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableSimpleF("disable-ifcvt-simple-false",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangle("disable-ifcvt-triangle",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangleR("disable-ifcvt-triangle-rev",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangleF("disable-ifcvt-triangle-false",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond",
+ cl::init(false), cl::Hidden);
+
+STATISTIC(NumSimple, "Number of simple if-conversions performed");
+STATISTIC(NumSimpleFalse, "Number of simple (F) if-conversions performed");
+STATISTIC(NumTriangle, "Number of triangle if-conversions performed");
+STATISTIC(NumTriangleRev, "Number of triangle (R) if-conversions performed");
+STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed");
+STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed");
+STATISTIC(NumDiamonds, "Number of diamond if-conversions performed");
+STATISTIC(NumIfConvBBs, "Number of if-converted blocks");
+STATISTIC(NumDupBBs, "Number of duplicated blocks");
+
+namespace {
+ class IfConverter : public MachineFunctionPass {
+ enum IfcvtKind {
+ ICNotClassfied, // BB data valid, but not classified.
+ ICSimpleFalse, // Same as ICSimple, but on the false path.
+ ICSimple, // BB is entry of an one split, no rejoin sub-CFG.
+ ICTriangleFRev, // Same as ICTriangleFalse, but false path rev condition.
+ ICTriangleRev, // Same as ICTriangle, but true path rev condition.
+ ICTriangleFalse, // Same as ICTriangle, but on the false path.
+ ICTriangle, // BB is entry of a triangle sub-CFG.
+ ICDiamond // BB is entry of a diamond sub-CFG.
+ };
+
+ /// BBInfo - One per MachineBasicBlock, this is used to cache the result
+ /// if-conversion feasibility analysis. This includes results from
+ /// TargetInstrInfo::AnalyzeBranch() (i.e. TBB, FBB, and Cond), and its
+ /// classification, and common tail block of its successors (if it's a
+ /// diamond shape), its size, whether it's predicable, and whether any
+ /// instruction can clobber the 'would-be' predicate.
+ ///
+ /// IsDone - True if BB is not to be considered for ifcvt.
+ /// IsBeingAnalyzed - True if BB is currently being analyzed.
+ /// IsAnalyzed - True if BB has been analyzed (info is still valid).
+ /// IsEnqueued - True if BB has been enqueued to be ifcvt'ed.
+ /// IsBrAnalyzable - True if AnalyzeBranch() returns false.
+ /// HasFallThrough - True if BB may fallthrough to the following BB.
+ /// IsUnpredicable - True if BB is known to be unpredicable.
+ /// ClobbersPred - True if BB could modify predicates (e.g. has
+ /// cmp, call, etc.)
+ /// NonPredSize - Number of non-predicated instructions.
+ /// BB - Corresponding MachineBasicBlock.
+ /// TrueBB / FalseBB- See AnalyzeBranch().
+ /// BrCond - Conditions for end of block conditional branches.
+ /// Predicate - Predicate used in the BB.
+ struct BBInfo {
+ bool IsDone : 1;
+ bool IsBeingAnalyzed : 1;
+ bool IsAnalyzed : 1;
+ bool IsEnqueued : 1;
+ bool IsBrAnalyzable : 1;
+ bool HasFallThrough : 1;
+ bool IsUnpredicable : 1;
+ bool CannotBeCopied : 1;
+ bool ClobbersPred : 1;
+ unsigned NonPredSize;
+ MachineBasicBlock *BB;
+ MachineBasicBlock *TrueBB;
+ MachineBasicBlock *FalseBB;
+ SmallVector<MachineOperand, 4> BrCond;
+ SmallVector<MachineOperand, 4> Predicate;
+ BBInfo() : IsDone(false), IsBeingAnalyzed(false),
+ IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
+ HasFallThrough(false), IsUnpredicable(false),
+ CannotBeCopied(false), ClobbersPred(false), NonPredSize(0),
+ BB(0), TrueBB(0), FalseBB(0) {}
+ };
+
+ /// IfcvtToken - Record information about pending if-conversions to attemp:
+ /// BBI - Corresponding BBInfo.
+ /// Kind - Type of block. See IfcvtKind.
+ /// NeedSubsumption - True if the to-be-predicated BB has already been
+ /// predicated.
+ /// NumDups - Number of instructions that would be duplicated due
+ /// to this if-conversion. (For diamonds, the number of
+ /// identical instructions at the beginnings of both
+ /// paths).
+ /// NumDups2 - For diamonds, the number of identical instructions
+ /// at the ends of both paths.
+ struct IfcvtToken {
+ BBInfo &BBI;
+ IfcvtKind Kind;
+ bool NeedSubsumption;
+ unsigned NumDups;
+ unsigned NumDups2;
+ IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0)
+ : BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {}
+ };
+
+ /// Roots - Basic blocks that do not have successors. These are the starting
+ /// points of Graph traversal.
+ std::vector<MachineBasicBlock*> Roots;
+
+ /// BBAnalysis - Results of if-conversion feasibility analysis indexed by
+ /// basic block number.
+ std::vector<BBInfo> BBAnalysis;
+
+ const TargetLowering *TLI;
+ const TargetInstrInfo *TII;
+ bool MadeChange;
+ int FnNum;
+ public:
+ static char ID;
+ IfConverter() : MachineFunctionPass(&ID), FnNum(-1) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ virtual const char *getPassName() const { return "If Converter"; }
+
+ private:
+ bool ReverseBranchCondition(BBInfo &BBI);
+ bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const;
+ bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ bool FalseBranch, unsigned &Dups) const;
+ bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned &Dups1, unsigned &Dups2) const;
+ void ScanInstructions(BBInfo &BBI);
+ BBInfo &AnalyzeBlock(MachineBasicBlock *BB,
+ std::vector<IfcvtToken*> &Tokens);
+ bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond,
+ bool isTriangle = false, bool RevBranch = false);
+ bool AnalyzeBlocks(MachineFunction &MF,
+ std::vector<IfcvtToken*> &Tokens);
+ void InvalidatePreds(MachineBasicBlock *BB);
+ void RemoveExtraEdges(BBInfo &BBI);
+ bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind);
+ bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind);
+ bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
+ unsigned NumDups1, unsigned NumDups2);
+ void PredicateBlock(BBInfo &BBI,
+ MachineBasicBlock::iterator E,
+ SmallVectorImpl<MachineOperand> &Cond);
+ void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool IgnoreBr = false);
+ void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI);
+
+ bool MeetIfcvtSizeLimit(unsigned Size) const {
+ return Size > 0 && Size <= TLI->getIfCvtBlockSizeLimit();
+ }
+
+ // blockAlwaysFallThrough - Block ends without a terminator.
+ bool blockAlwaysFallThrough(BBInfo &BBI) const {
+ return BBI.IsBrAnalyzable && BBI.TrueBB == NULL;
+ }
+
+ // IfcvtTokenCmp - Used to sort if-conversion candidates.
+ static bool IfcvtTokenCmp(IfcvtToken *C1, IfcvtToken *C2) {
+ int Incr1 = (C1->Kind == ICDiamond)
+ ? -(int)(C1->NumDups + C1->NumDups2) : (int)C1->NumDups;
+ int Incr2 = (C2->Kind == ICDiamond)
+ ? -(int)(C2->NumDups + C2->NumDups2) : (int)C2->NumDups;
+ if (Incr1 > Incr2)
+ return true;
+ else if (Incr1 == Incr2) {
+ // Favors subsumption.
+ if (C1->NeedSubsumption == false && C2->NeedSubsumption == true)
+ return true;
+ else if (C1->NeedSubsumption == C2->NeedSubsumption) {
+ // Favors diamond over triangle, etc.
+ if ((unsigned)C1->Kind < (unsigned)C2->Kind)
+ return true;
+ else if (C1->Kind == C2->Kind)
+ return C1->BBI.BB->getNumber() < C2->BBI.BB->getNumber();
+ }
+ }
+ return false;
+ }
+ };
+
+ char IfConverter::ID = 0;
+}
+
+static RegisterPass<IfConverter>
+X("if-converter", "If Converter");
+
+FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); }
+
+bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
+ TLI = MF.getTarget().getTargetLowering();
+ TII = MF.getTarget().getInstrInfo();
+ if (!TII) return false;
+
+ DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'"
+ << MF.getFunction()->getName() << "\'");
+
+ if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) {
+ DEBUG(dbgs() << " skipped\n");
+ return false;
+ }
+ DEBUG(dbgs() << "\n");
+
+ MF.RenumberBlocks();
+ BBAnalysis.resize(MF.getNumBlockIDs());
+
+ // Look for root nodes, i.e. blocks without successors.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ if (I->succ_empty())
+ Roots.push_back(I);
+
+ std::vector<IfcvtToken*> Tokens;
+ MadeChange = false;
+ unsigned NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle +
+ NumTriangleRev + NumTriangleFalse + NumTriangleFRev + NumDiamonds;
+ while (IfCvtLimit == -1 || (int)NumIfCvts < IfCvtLimit) {
+ // Do an initial analysis for each basic block and find all the potential
+ // candidates to perform if-conversion.
+ bool Change = AnalyzeBlocks(MF, Tokens);
+ while (!Tokens.empty()) {
+ IfcvtToken *Token = Tokens.back();
+ Tokens.pop_back();
+ BBInfo &BBI = Token->BBI;
+ IfcvtKind Kind = Token->Kind;
+ unsigned NumDups = Token->NumDups;
+ unsigned NumDups2 = Token->NumDups2;
+
+ delete Token;
+
+ // If the block has been evicted out of the queue or it has already been
+ // marked dead (due to it being predicated), then skip it.
+ if (BBI.IsDone)
+ BBI.IsEnqueued = false;
+ if (!BBI.IsEnqueued)
+ continue;
+
+ BBI.IsEnqueued = false;
+
+ bool RetVal = false;
+ switch (Kind) {
+ default: assert(false && "Unexpected!");
+ break;
+ case ICSimple:
+ case ICSimpleFalse: {
+ bool isFalse = Kind == ICSimpleFalse;
+ if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break;
+ DEBUG(dbgs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? " false" :"")
+ << "): BB#" << BBI.BB->getNumber() << " ("
+ << ((Kind == ICSimpleFalse)
+ ? BBI.FalseBB->getNumber()
+ : BBI.TrueBB->getNumber()) << ") ");
+ RetVal = IfConvertSimple(BBI, Kind);
+ DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+ if (RetVal) {
+ if (isFalse) NumSimpleFalse++;
+ else NumSimple++;
+ }
+ break;
+ }
+ case ICTriangle:
+ case ICTriangleRev:
+ case ICTriangleFalse:
+ case ICTriangleFRev: {
+ bool isFalse = Kind == ICTriangleFalse;
+ bool isRev = (Kind == ICTriangleRev || Kind == ICTriangleFRev);
+ if (DisableTriangle && !isFalse && !isRev) break;
+ if (DisableTriangleR && !isFalse && isRev) break;
+ if (DisableTriangleF && isFalse && !isRev) break;
+ if (DisableTriangleFR && isFalse && isRev) break;
+ DEBUG(dbgs() << "Ifcvt (Triangle");
+ if (isFalse)
+ DEBUG(dbgs() << " false");
+ if (isRev)
+ DEBUG(dbgs() << " rev");
+ DEBUG(dbgs() << "): BB#" << BBI.BB->getNumber() << " (T:"
+ << BBI.TrueBB->getNumber() << ",F:"
+ << BBI.FalseBB->getNumber() << ") ");
+ RetVal = IfConvertTriangle(BBI, Kind);
+ DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+ if (RetVal) {
+ if (isFalse) {
+ if (isRev) NumTriangleFRev++;
+ else NumTriangleFalse++;
+ } else {
+ if (isRev) NumTriangleRev++;
+ else NumTriangle++;
+ }
+ }
+ break;
+ }
+ case ICDiamond: {
+ if (DisableDiamond) break;
+ DEBUG(dbgs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:"
+ << BBI.TrueBB->getNumber() << ",F:"
+ << BBI.FalseBB->getNumber() << ") ");
+ RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2);
+ DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+ if (RetVal) NumDiamonds++;
+ break;
+ }
+ }
+
+ Change |= RetVal;
+
+ NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + NumTriangleRev +
+ NumTriangleFalse + NumTriangleFRev + NumDiamonds;
+ if (IfCvtLimit != -1 && (int)NumIfCvts >= IfCvtLimit)
+ break;
+ }
+
+ if (!Change)
+ break;
+ MadeChange |= Change;
+ }
+
+ // Delete tokens in case of early exit.
+ while (!Tokens.empty()) {
+ IfcvtToken *Token = Tokens.back();
+ Tokens.pop_back();
+ delete Token;
+ }
+
+ Tokens.clear();
+ Roots.clear();
+ BBAnalysis.clear();
+
+ if (MadeChange) {
+ BranchFolder BF(false);
+ BF.OptimizeFunction(MF, TII,
+ MF.getTarget().getRegisterInfo(),
+ getAnalysisIfAvailable<MachineModuleInfo>());
+ }
+
+ return MadeChange;
+}
+
+/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given
+/// its 'true' successor.
+static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
+ MachineBasicBlock *TrueBB) {
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ E = BB->succ_end(); SI != E; ++SI) {
+ MachineBasicBlock *SuccBB = *SI;
+ if (SuccBB != TrueBB)
+ return SuccBB;
+ }
+ return NULL;
+}
+
+/// ReverseBranchCondition - Reverse the condition of the end of the block
+/// branch. Swap block's 'true' and 'false' successors.
+bool IfConverter::ReverseBranchCondition(BBInfo &BBI) {
+ if (!TII->ReverseBranchCondition(BBI.BrCond)) {
+ TII->RemoveBranch(*BBI.BB);
+ TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond);
+ std::swap(BBI.TrueBB, BBI.FalseBB);
+ return true;
+ }
+ return false;
+}
+
+/// getNextBlock - Returns the next block in the function blocks ordering. If
+/// it is the end, returns NULL.
+static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) {
+ MachineFunction::iterator I = BB;
+ MachineFunction::iterator E = BB->getParent()->end();
+ if (++I == E)
+ return NULL;
+ return I;
+}
+
+/// ValidSimple - Returns true if the 'true' block (along with its
+/// predecessor) forms a valid simple shape for ifcvt. It also returns the
+/// number of instructions that the ifcvt would need to duplicate if performed
+/// in Dups.
+bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const {
+ Dups = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
+ return false;
+
+ if (TrueBBI.IsBrAnalyzable)
+ return false;
+
+ if (TrueBBI.BB->pred_size() > 1) {
+ if (TrueBBI.CannotBeCopied ||
+ TrueBBI.NonPredSize > TLI->getIfCvtDupBlockSizeLimit())
+ return false;
+ Dups = TrueBBI.NonPredSize;
+ }
+
+ return true;
+}
+
+/// ValidTriangle - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) forms a valid triangle shape for ifcvt.
+/// If 'FalseBranch' is true, it checks if 'true' block's false branch
+/// branches to the false branch rather than the other way around. It also
+/// returns the number of instructions that the ifcvt would need to duplicate
+/// if performed in 'Dups'.
+bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ bool FalseBranch, unsigned &Dups) const {
+ Dups = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
+ return false;
+
+ if (TrueBBI.BB->pred_size() > 1) {
+ if (TrueBBI.CannotBeCopied)
+ return false;
+
+ unsigned Size = TrueBBI.NonPredSize;
+ if (TrueBBI.IsBrAnalyzable) {
+ if (TrueBBI.TrueBB && TrueBBI.BrCond.empty())
+ // Ends with an unconditional branch. It will be removed.
+ --Size;
+ else {
+ MachineBasicBlock *FExit = FalseBranch
+ ? TrueBBI.TrueBB : TrueBBI.FalseBB;
+ if (FExit)
+ // Require a conditional branch
+ ++Size;
+ }
+ }
+ if (Size > TLI->getIfCvtDupBlockSizeLimit())
+ return false;
+ Dups = Size;
+ }
+
+ MachineBasicBlock *TExit = FalseBranch ? TrueBBI.FalseBB : TrueBBI.TrueBB;
+ if (!TExit && blockAlwaysFallThrough(TrueBBI)) {
+ MachineFunction::iterator I = TrueBBI.BB;
+ if (++I == TrueBBI.BB->getParent()->end())
+ return false;
+ TExit = I;
+ }
+ return TExit && TExit == FalseBBI.BB;
+}
+
+static
+MachineBasicBlock::iterator firstNonBranchInst(MachineBasicBlock *BB,
+ const TargetInstrInfo *TII) {
+ MachineBasicBlock::iterator I = BB->end();
+ while (I != BB->begin()) {
+ --I;
+ if (!I->getDesc().isBranch())
+ break;
+ }
+ return I;
+}
+
+/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) forms a valid diamond shape for ifcvt.
+bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned &Dups1, unsigned &Dups2) const {
+ Dups1 = Dups2 = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone ||
+ FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone)
+ return false;
+
+ MachineBasicBlock *TT = TrueBBI.TrueBB;
+ MachineBasicBlock *FT = FalseBBI.TrueBB;
+
+ if (!TT && blockAlwaysFallThrough(TrueBBI))
+ TT = getNextBlock(TrueBBI.BB);
+ if (!FT && blockAlwaysFallThrough(FalseBBI))
+ FT = getNextBlock(FalseBBI.BB);
+ if (TT != FT)
+ return false;
+ if (TT == NULL && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable))
+ return false;
+ if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1)
+ return false;
+
+ // FIXME: Allow true block to have an early exit?
+ if (TrueBBI.FalseBB || FalseBBI.FalseBB ||
+ (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred))
+ return false;
+
+ MachineBasicBlock::iterator TI = TrueBBI.BB->begin();
+ MachineBasicBlock::iterator FI = FalseBBI.BB->begin();
+ while (TI != TrueBBI.BB->end() && FI != FalseBBI.BB->end()) {
+ if (!TI->isIdenticalTo(FI))
+ break;
+ ++Dups1;
+ ++TI;
+ ++FI;
+ }
+
+ TI = firstNonBranchInst(TrueBBI.BB, TII);
+ FI = firstNonBranchInst(FalseBBI.BB, TII);
+ while (TI != TrueBBI.BB->begin() && FI != FalseBBI.BB->begin()) {
+ if (!TI->isIdenticalTo(FI))
+ break;
+ ++Dups2;
+ --TI;
+ --FI;
+ }
+
+ return true;
+}
+
+/// ScanInstructions - Scan all the instructions in the block to determine if
+/// the block is predicable. In most cases, that means all the instructions
+/// in the block are isPredicable(). Also checks if the block contains any
+/// instruction which can clobber a predicate (e.g. condition code register).
+/// If so, the block is not predicable unless it's the last instruction.
+void IfConverter::ScanInstructions(BBInfo &BBI) {
+ if (BBI.IsDone)
+ return;
+
+ bool AlreadyPredicated = BBI.Predicate.size() > 0;
+ // First analyze the end of BB branches.
+ BBI.TrueBB = BBI.FalseBB = NULL;
+ BBI.BrCond.clear();
+ BBI.IsBrAnalyzable =
+ !TII->AnalyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond);
+ BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == NULL;
+
+ if (BBI.BrCond.size()) {
+ // No false branch. This BB must end with a conditional branch and a
+ // fallthrough.
+ if (!BBI.FalseBB)
+ BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB);
+ if (!BBI.FalseBB) {
+ // Malformed bcc? True and false blocks are the same?
+ BBI.IsUnpredicable = true;
+ return;
+ }
+ }
+
+ // Then scan all the instructions.
+ BBI.NonPredSize = 0;
+ BBI.ClobbersPred = false;
+ for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end();
+ I != E; ++I) {
+ const TargetInstrDesc &TID = I->getDesc();
+ if (TID.isNotDuplicable())
+ BBI.CannotBeCopied = true;
+
+ bool isPredicated = TII->isPredicated(I);
+ bool isCondBr = BBI.IsBrAnalyzable && TID.isConditionalBranch();
+
+ if (!isCondBr) {
+ if (!isPredicated)
+ BBI.NonPredSize++;
+ else if (!AlreadyPredicated) {
+ // FIXME: This instruction is already predicated before the
+ // if-conversion pass. It's probably something like a conditional move.
+ // Mark this block unpredicable for now.
+ BBI.IsUnpredicable = true;
+ return;
+ }
+ }
+
+ if (BBI.ClobbersPred && !isPredicated) {
+ // Predicate modification instruction should end the block (except for
+ // already predicated instructions and end of block branches).
+ if (isCondBr) {
+ // A conditional branch is not predicable, but it may be eliminated.
+ continue;
+ }
+
+ // Predicate may have been modified, the subsequent (currently)
+ // unpredicated instructions cannot be correctly predicated.
+ BBI.IsUnpredicable = true;
+ return;
+ }
+
+ // FIXME: Make use of PredDefs? e.g. ADDC, SUBC sets predicates but are
+ // still potentially predicable.
+ std::vector<MachineOperand> PredDefs;
+ if (TII->DefinesPredicate(I, PredDefs))
+ BBI.ClobbersPred = true;
+
+ if (!TII->isPredicable(I)) {
+ BBI.IsUnpredicable = true;
+ return;
+ }
+ }
+}
+
+/// FeasibilityAnalysis - Determine if the block is a suitable candidate to be
+/// predicated by the specified predicate.
+bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
+ SmallVectorImpl<MachineOperand> &Pred,
+ bool isTriangle, bool RevBranch) {
+ // If the block is dead or unpredicable, then it cannot be predicated.
+ if (BBI.IsDone || BBI.IsUnpredicable)
+ return false;
+
+ // If it is already predicated, check if its predicate subsumes the new
+ // predicate.
+ if (BBI.Predicate.size() && !TII->SubsumesPredicate(BBI.Predicate, Pred))
+ return false;
+
+ if (BBI.BrCond.size()) {
+ if (!isTriangle)
+ return false;
+
+ // Test predicate subsumption.
+ SmallVector<MachineOperand, 4> RevPred(Pred.begin(), Pred.end());
+ SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (RevBranch) {
+ if (TII->ReverseBranchCondition(Cond))
+ return false;
+ }
+ if (TII->ReverseBranchCondition(RevPred) ||
+ !TII->SubsumesPredicate(Cond, RevPred))
+ return false;
+ }
+
+ return true;
+}
+
+/// AnalyzeBlock - Analyze the structure of the sub-CFG starting from
+/// the specified block. Record its successors and whether it looks like an
+/// if-conversion candidate.
+IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
+ std::vector<IfcvtToken*> &Tokens) {
+ BBInfo &BBI = BBAnalysis[BB->getNumber()];
+
+ if (BBI.IsAnalyzed || BBI.IsBeingAnalyzed)
+ return BBI;
+
+ BBI.BB = BB;
+ BBI.IsBeingAnalyzed = true;
+
+ ScanInstructions(BBI);
+
+ // Unanalyzable or ends with fallthrough or unconditional branch.
+ if (!BBI.IsBrAnalyzable || BBI.BrCond.empty()) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+ }
+
+ // Do not ifcvt if either path is a back edge to the entry block.
+ if (BBI.TrueBB == BB || BBI.FalseBB == BB) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+ }
+
+ // Do not ifcvt if true and false fallthrough blocks are the same.
+ if (!BBI.FalseBB) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+ }
+
+ BBInfo &TrueBBI = AnalyzeBlock(BBI.TrueBB, Tokens);
+ BBInfo &FalseBBI = AnalyzeBlock(BBI.FalseBB, Tokens);
+
+ if (TrueBBI.IsDone && FalseBBI.IsDone) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+ }
+
+ SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
+ bool CanRevCond = !TII->ReverseBranchCondition(RevCond);
+
+ unsigned Dups = 0;
+ unsigned Dups2 = 0;
+ bool TNeedSub = TrueBBI.Predicate.size() > 0;
+ bool FNeedSub = FalseBBI.Predicate.size() > 0;
+ bool Enqueued = false;
+ if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
+ MeetIfcvtSizeLimit(TrueBBI.NonPredSize - (Dups + Dups2)) &&
+ MeetIfcvtSizeLimit(FalseBBI.NonPredSize - (Dups + Dups2)) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
+ FeasibilityAnalysis(FalseBBI, RevCond)) {
+ // Diamond:
+ // EBB
+ // / \_
+ // | |
+ // TBB FBB
+ // \ /
+ // TailBB
+ // Note TailBB can be empty.
+ Tokens.push_back(new IfcvtToken(BBI, ICDiamond, TNeedSub|FNeedSub, Dups,
+ Dups2));
+ Enqueued = true;
+ }
+
+ if (ValidTriangle(TrueBBI, FalseBBI, false, Dups) &&
+ MeetIfcvtSizeLimit(TrueBBI.NonPredSize) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) {
+ // Triangle:
+ // EBB
+ // | \_
+ // | |
+ // | TBB
+ // | /
+ // FBB
+ Tokens.push_back(new IfcvtToken(BBI, ICTriangle, TNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidTriangle(TrueBBI, FalseBBI, true, Dups) &&
+ MeetIfcvtSizeLimit(TrueBBI.NonPredSize) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
+ Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidSimple(TrueBBI, Dups) &&
+ MeetIfcvtSizeLimit(TrueBBI.NonPredSize) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond)) {
+ // Simple (split, no rejoin):
+ // EBB
+ // | \_
+ // | |
+ // | TBB---> exit
+ // |
+ // FBB
+ Tokens.push_back(new IfcvtToken(BBI, ICSimple, TNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (CanRevCond) {
+ // Try the other path...
+ if (ValidTriangle(FalseBBI, TrueBBI, false, Dups) &&
+ MeetIfcvtSizeLimit(FalseBBI.NonPredSize) &&
+ FeasibilityAnalysis(FalseBBI, RevCond, true)) {
+ Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidTriangle(FalseBBI, TrueBBI, true, Dups) &&
+ MeetIfcvtSizeLimit(FalseBBI.NonPredSize) &&
+ FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
+ Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidSimple(FalseBBI, Dups) &&
+ MeetIfcvtSizeLimit(FalseBBI.NonPredSize) &&
+ FeasibilityAnalysis(FalseBBI, RevCond)) {
+ Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups));
+ Enqueued = true;
+ }
+ }
+
+ BBI.IsEnqueued = Enqueued;
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+}
+
+/// AnalyzeBlocks - Analyze all blocks and find entries for all if-conversion
+/// candidates. It returns true if any CFG restructuring is done to expose more
+/// if-conversion opportunities.
+bool IfConverter::AnalyzeBlocks(MachineFunction &MF,
+ std::vector<IfcvtToken*> &Tokens) {
+ bool Change = false;
+ std::set<MachineBasicBlock*> Visited;
+ for (unsigned i = 0, e = Roots.size(); i != e; ++i) {
+ for (idf_ext_iterator<MachineBasicBlock*> I=idf_ext_begin(Roots[i],Visited),
+ E = idf_ext_end(Roots[i], Visited); I != E; ++I) {
+ MachineBasicBlock *BB = *I;
+ AnalyzeBlock(BB, Tokens);
+ }
+ }
+
+ // Sort to favor more complex ifcvt scheme.
+ std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp);
+
+ return Change;
+}
+
+/// canFallThroughTo - Returns true either if ToBB is the next block after BB or
+/// that all the intervening blocks are empty (given BB can fall through to its
+/// next block).
+static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) {
+ MachineFunction::iterator I = BB;
+ MachineFunction::iterator TI = ToBB;
+ MachineFunction::iterator E = BB->getParent()->end();
+ while (++I != TI)
+ if (I == E || !I->empty())
+ return false;
+ return true;
+}
+
+/// InvalidatePreds - Invalidate predecessor BB info so it would be re-analyzed
+/// to determine if it can be if-converted. If predecessor is already enqueued,
+/// dequeue it!
+void IfConverter::InvalidatePreds(MachineBasicBlock *BB) {
+ for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+ E = BB->pred_end(); PI != E; ++PI) {
+ BBInfo &PBBI = BBAnalysis[(*PI)->getNumber()];
+ if (PBBI.IsDone || PBBI.BB == BB)
+ continue;
+ PBBI.IsAnalyzed = false;
+ PBBI.IsEnqueued = false;
+ }
+}
+
+/// InsertUncondBranch - Inserts an unconditional branch from BB to ToBB.
+///
+static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB,
+ const TargetInstrInfo *TII) {
+ SmallVector<MachineOperand, 0> NoCond;
+ TII->InsertBranch(*BB, ToBB, NULL, NoCond);
+}
+
+/// RemoveExtraEdges - Remove true / false edges if either / both are no longer
+/// successors.
+void IfConverter::RemoveExtraEdges(BBInfo &BBI) {
+ MachineBasicBlock *TBB = NULL, *FBB = NULL;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->AnalyzeBranch(*BBI.BB, TBB, FBB, Cond))
+ BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
+}
+
+/// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG.
+///
+bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+ BBInfo *CvtBBI = &TrueBBI;
+ BBInfo *NextBBI = &FalseBBI;
+
+ SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (Kind == ICSimpleFalse)
+ std::swap(CvtBBI, NextBBI);
+
+ if (CvtBBI->IsDone ||
+ (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) {
+ // Something has changed. It's no longer safe to predicate this block.
+ BBI.IsAnalyzed = false;
+ CvtBBI->IsAnalyzed = false;
+ return false;
+ }
+
+ if (Kind == ICSimpleFalse)
+ if (TII->ReverseBranchCondition(Cond))
+ assert(false && "Unable to reverse branch condition!");
+
+ if (CvtBBI->BB->pred_size() > 1) {
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ // Copy instructions in the true block, predicate them, and add them to
+ // the entry block.
+ CopyAndPredicateBlock(BBI, *CvtBBI, Cond);
+ } else {
+ PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond);
+
+ // Merge converted block into entry block.
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ MergeBlocks(BBI, *CvtBBI);
+ }
+
+ bool IterIfcvt = true;
+ if (!canFallThroughTo(BBI.BB, NextBBI->BB)) {
+ InsertUncondBranch(BBI.BB, NextBBI->BB, TII);
+ BBI.HasFallThrough = false;
+ // Now ifcvt'd block will look like this:
+ // BB:
+ // ...
+ // t, f = cmp
+ // if t op
+ // b BBf
+ //
+ // We cannot further ifcvt this block because the unconditional branch
+ // will have to be predicated on the new condition, that will not be
+ // available if cmp executes.
+ IterIfcvt = false;
+ }
+
+ RemoveExtraEdges(BBI);
+
+ // Update block info. BB can be iteratively if-converted.
+ if (!IterIfcvt)
+ BBI.IsDone = true;
+ InvalidatePreds(BBI.BB);
+ CvtBBI->IsDone = true;
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+/// IfConvertTriangle - If convert a triangle sub-CFG.
+///
+bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+ BBInfo *CvtBBI = &TrueBBI;
+ BBInfo *NextBBI = &FalseBBI;
+
+ SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
+ std::swap(CvtBBI, NextBBI);
+
+ if (CvtBBI->IsDone ||
+ (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) {
+ // Something has changed. It's no longer safe to predicate this block.
+ BBI.IsAnalyzed = false;
+ CvtBBI->IsAnalyzed = false;
+ return false;
+ }
+
+ if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
+ if (TII->ReverseBranchCondition(Cond))
+ assert(false && "Unable to reverse branch condition!");
+
+ if (Kind == ICTriangleRev || Kind == ICTriangleFRev) {
+ if (ReverseBranchCondition(*CvtBBI)) {
+ // BB has been changed, modify its predecessors (except for this
+ // one) so they don't get ifcvt'ed based on bad intel.
+ for (MachineBasicBlock::pred_iterator PI = CvtBBI->BB->pred_begin(),
+ E = CvtBBI->BB->pred_end(); PI != E; ++PI) {
+ MachineBasicBlock *PBB = *PI;
+ if (PBB == BBI.BB)
+ continue;
+ BBInfo &PBBI = BBAnalysis[PBB->getNumber()];
+ if (PBBI.IsEnqueued) {
+ PBBI.IsAnalyzed = false;
+ PBBI.IsEnqueued = false;
+ }
+ }
+ }
+ }
+
+ bool HasEarlyExit = CvtBBI->FalseBB != NULL;
+ bool DupBB = CvtBBI->BB->pred_size() > 1;
+ if (DupBB) {
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ // Copy instructions in the true block, predicate them, and add them to
+ // the entry block.
+ CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true);
+ } else {
+ // Predicate the 'true' block after removing its branch.
+ CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB);
+ PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond);
+
+ // Now merge the entry of the triangle with the true block.
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ MergeBlocks(BBI, *CvtBBI);
+ }
+
+ // If 'true' block has a 'false' successor, add an exit branch to it.
+ if (HasEarlyExit) {
+ SmallVector<MachineOperand, 4> RevCond(CvtBBI->BrCond.begin(),
+ CvtBBI->BrCond.end());
+ if (TII->ReverseBranchCondition(RevCond))
+ assert(false && "Unable to reverse branch condition!");
+ TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond);
+ BBI.BB->addSuccessor(CvtBBI->FalseBB);
+ }
+
+ // Merge in the 'false' block if the 'false' block has no other
+ // predecessors. Otherwise, add an unconditional branch to 'false'.
+ bool FalseBBDead = false;
+ bool IterIfcvt = true;
+ bool isFallThrough = canFallThroughTo(BBI.BB, NextBBI->BB);
+ if (!isFallThrough) {
+ // Only merge them if the true block does not fallthrough to the false
+ // block. By not merging them, we make it possible to iteratively
+ // ifcvt the blocks.
+ if (!HasEarlyExit &&
+ NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough) {
+ MergeBlocks(BBI, *NextBBI);
+ FalseBBDead = true;
+ } else {
+ InsertUncondBranch(BBI.BB, NextBBI->BB, TII);
+ BBI.HasFallThrough = false;
+ }
+ // Mixed predicated and unpredicated code. This cannot be iteratively
+ // predicated.
+ IterIfcvt = false;
+ }
+
+ RemoveExtraEdges(BBI);
+
+ // Update block info. BB can be iteratively if-converted.
+ if (!IterIfcvt)
+ BBI.IsDone = true;
+ InvalidatePreds(BBI.BB);
+ CvtBBI->IsDone = true;
+ if (FalseBBDead)
+ NextBBI->IsDone = true;
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+/// IfConvertDiamond - If convert a diamond sub-CFG.
+///
+bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
+ unsigned NumDups1, unsigned NumDups2) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+ MachineBasicBlock *TailBB = TrueBBI.TrueBB;
+ // True block must fall through or end with an unanalyzable terminator.
+ if (!TailBB) {
+ if (blockAlwaysFallThrough(TrueBBI))
+ TailBB = FalseBBI.TrueBB;
+ assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!");
+ }
+
+ if (TrueBBI.IsDone || FalseBBI.IsDone ||
+ TrueBBI.BB->pred_size() > 1 ||
+ FalseBBI.BB->pred_size() > 1) {
+ // Something has changed. It's no longer safe to predicate these blocks.
+ BBI.IsAnalyzed = false;
+ TrueBBI.IsAnalyzed = false;
+ FalseBBI.IsAnalyzed = false;
+ return false;
+ }
+
+ // Merge the 'true' and 'false' blocks by copying the instructions
+ // from the 'false' block to the 'true' block. That is, unless the true
+ // block would clobber the predicate, in that case, do the opposite.
+ BBInfo *BBI1 = &TrueBBI;
+ BBInfo *BBI2 = &FalseBBI;
+ SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (TII->ReverseBranchCondition(RevCond))
+ assert(false && "Unable to reverse branch condition!");
+ SmallVector<MachineOperand, 4> *Cond1 = &BBI.BrCond;
+ SmallVector<MachineOperand, 4> *Cond2 = &RevCond;
+
+ // Figure out the more profitable ordering.
+ bool DoSwap = false;
+ if (TrueBBI.ClobbersPred && !FalseBBI.ClobbersPred)
+ DoSwap = true;
+ else if (TrueBBI.ClobbersPred == FalseBBI.ClobbersPred) {
+ if (TrueBBI.NonPredSize > FalseBBI.NonPredSize)
+ DoSwap = true;
+ }
+ if (DoSwap) {
+ std::swap(BBI1, BBI2);
+ std::swap(Cond1, Cond2);
+ }
+
+ // Remove the conditional branch from entry to the blocks.
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+
+ // Remove the duplicated instructions at the beginnings of both paths.
+ MachineBasicBlock::iterator DI1 = BBI1->BB->begin();
+ MachineBasicBlock::iterator DI2 = BBI2->BB->begin();
+ BBI1->NonPredSize -= NumDups1;
+ BBI2->NonPredSize -= NumDups1;
+ while (NumDups1 != 0) {
+ ++DI1;
+ ++DI2;
+ --NumDups1;
+ }
+ BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1);
+ BBI2->BB->erase(BBI2->BB->begin(), DI2);
+
+ // Predicate the 'true' block after removing its branch.
+ BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB);
+ DI1 = BBI1->BB->end();
+ for (unsigned i = 0; i != NumDups2; ++i)
+ --DI1;
+ BBI1->BB->erase(DI1, BBI1->BB->end());
+ PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1);
+
+ // Predicate the 'false' block.
+ BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB);
+ DI2 = BBI2->BB->end();
+ while (NumDups2 != 0) {
+ --DI2;
+ --NumDups2;
+ }
+ PredicateBlock(*BBI2, DI2, *Cond2);
+
+ // Merge the true block into the entry of the diamond.
+ MergeBlocks(BBI, *BBI1);
+ MergeBlocks(BBI, *BBI2);
+
+ // If the if-converted block falls through or unconditionally branches into
+ // the tail block, and the tail block does not have other predecessors, then
+ // fold the tail block in as well. Otherwise, unless it falls through to the
+ // tail, add a unconditional branch to it.
+ if (TailBB) {
+ BBInfo TailBBI = BBAnalysis[TailBB->getNumber()];
+ if (TailBB->pred_size() == 1 && !TailBBI.HasFallThrough) {
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ MergeBlocks(BBI, TailBBI);
+ TailBBI.IsDone = true;
+ } else {
+ InsertUncondBranch(BBI.BB, TailBB, TII);
+ BBI.HasFallThrough = false;
+ }
+ }
+
+ RemoveExtraEdges(BBI);
+
+ // Update block info.
+ BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;
+ InvalidatePreds(BBI.BB);
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+/// PredicateBlock - Predicate instructions from the start of the block to the
+/// specified end with the specified condition.
+void IfConverter::PredicateBlock(BBInfo &BBI,
+ MachineBasicBlock::iterator E,
+ SmallVectorImpl<MachineOperand> &Cond) {
+ for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) {
+ if (TII->isPredicated(I))
+ continue;
+ if (!TII->PredicateInstruction(I, Cond)) {
+#ifndef NDEBUG
+ dbgs() << "Unable to predicate " << *I << "!\n";
+#endif
+ llvm_unreachable(0);
+ }
+ }
+
+ std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate));
+
+ BBI.IsAnalyzed = false;
+ BBI.NonPredSize = 0;
+
+ NumIfConvBBs++;
+}
+
+/// CopyAndPredicateBlock - Copy and predicate instructions from source BB to
+/// the destination block. Skip end of block branches if IgnoreBr is true.
+void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool IgnoreBr) {
+ MachineFunction &MF = *ToBBI.BB->getParent();
+
+ for (MachineBasicBlock::iterator I = FromBBI.BB->begin(),
+ E = FromBBI.BB->end(); I != E; ++I) {
+ const TargetInstrDesc &TID = I->getDesc();
+ bool isPredicated = TII->isPredicated(I);
+ // Do not copy the end of the block branches.
+ if (IgnoreBr && !isPredicated && TID.isBranch())
+ break;
+
+ MachineInstr *MI = MF.CloneMachineInstr(I);
+ ToBBI.BB->insert(ToBBI.BB->end(), MI);
+ ToBBI.NonPredSize++;
+
+ if (!isPredicated)
+ if (!TII->PredicateInstruction(MI, Cond)) {
+#ifndef NDEBUG
+ dbgs() << "Unable to predicate " << *I << "!\n";
+#endif
+ llvm_unreachable(0);
+ }
+ }
+
+ std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
+ FromBBI.BB->succ_end());
+ MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
+ MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL;
+
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+ MachineBasicBlock *Succ = Succs[i];
+ // Fallthrough edge can't be transferred.
+ if (Succ == FallThrough)
+ continue;
+ ToBBI.BB->addSuccessor(Succ);
+ }
+
+ std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(),
+ std::back_inserter(ToBBI.Predicate));
+ std::copy(Cond.begin(), Cond.end(), std::back_inserter(ToBBI.Predicate));
+
+ ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
+ ToBBI.IsAnalyzed = false;
+
+ NumDupBBs++;
+}
+
+/// MergeBlocks - Move all instructions from FromBB to the end of ToBB.
+///
+void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI) {
+ ToBBI.BB->splice(ToBBI.BB->end(),
+ FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end());
+
+ // Redirect all branches to FromBB to ToBB.
+ std::vector<MachineBasicBlock *> Preds(FromBBI.BB->pred_begin(),
+ FromBBI.BB->pred_end());
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+ MachineBasicBlock *Pred = Preds[i];
+ if (Pred == ToBBI.BB)
+ continue;
+ Pred->ReplaceUsesOfBlockWith(FromBBI.BB, ToBBI.BB);
+ }
+
+ std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
+ FromBBI.BB->succ_end());
+ MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
+ MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL;
+
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+ MachineBasicBlock *Succ = Succs[i];
+ // Fallthrough edge can't be transferred.
+ if (Succ == FallThrough)
+ continue;
+ FromBBI.BB->removeSuccessor(Succ);
+ ToBBI.BB->addSuccessor(Succ);
+ }
+
+ // Now FromBBI always falls through to the next block!
+ if (NBB && !FromBBI.BB->isSuccessor(NBB))
+ FromBBI.BB->addSuccessor(NBB);
+
+ std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(),
+ std::back_inserter(ToBBI.Predicate));
+ FromBBI.Predicate.clear();
+
+ ToBBI.NonPredSize += FromBBI.NonPredSize;
+ FromBBI.NonPredSize = 0;
+
+ ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
+ ToBBI.HasFallThrough = FromBBI.HasFallThrough;
+ ToBBI.IsAnalyzed = false;
+ FromBBI.IsAnalyzed = false;
+}
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
new file mode 100644
index 0000000..9997a48
--- /dev/null
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -0,0 +1,528 @@
+//===-- IntrinsicLowering.cpp - Intrinsic Lowering default implementation -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IntrinsicLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+template <class ArgIt>
+static void EnsureFunctionExists(Module &M, const char *Name,
+ ArgIt ArgBegin, ArgIt ArgEnd,
+ const Type *RetTy) {
+ // Insert a correctly-typed definition now.
+ std::vector<const Type *> ParamTys;
+ for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
+ ParamTys.push_back(I->getType());
+ M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false));
+}
+
+static void EnsureFPIntrinsicsExist(Module &M, Function *Fn,
+ const char *FName,
+ const char *DName, const char *LDName) {
+ // Insert definitions for all the floating point types.
+ switch((int)Fn->arg_begin()->getType()->getTypeID()) {
+ case Type::FloatTyID:
+ EnsureFunctionExists(M, FName, Fn->arg_begin(), Fn->arg_end(),
+ Type::getFloatTy(M.getContext()));
+ break;
+ case Type::DoubleTyID:
+ EnsureFunctionExists(M, DName, Fn->arg_begin(), Fn->arg_end(),
+ Type::getDoubleTy(M.getContext()));
+ break;
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ EnsureFunctionExists(M, LDName, Fn->arg_begin(), Fn->arg_end(),
+ Fn->arg_begin()->getType());
+ break;
+ }
+}
+
+/// ReplaceCallWith - This function is used when we want to lower an intrinsic
+/// call to a call of an external function. This handles hard cases such as
+/// when there was already a prototype for the external function, and if that
+/// prototype doesn't match the arguments we expect to pass in.
+template <class ArgIt>
+static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
+ ArgIt ArgBegin, ArgIt ArgEnd,
+ const Type *RetTy) {
+ // If we haven't already looked up this function, check to see if the
+ // program already contains a function with this name.
+ Module *M = CI->getParent()->getParent()->getParent();
+ // Get or insert the definition now.
+ std::vector<const Type *> ParamTys;
+ for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
+ ParamTys.push_back((*I)->getType());
+ Constant* FCache = M->getOrInsertFunction(NewFn,
+ FunctionType::get(RetTy, ParamTys, false));
+
+ IRBuilder<> Builder(CI->getParent(), CI);
+ SmallVector<Value *, 8> Args(ArgBegin, ArgEnd);
+ CallInst *NewCI = Builder.CreateCall(FCache, Args.begin(), Args.end());
+ NewCI->setName(CI->getName());
+ if (!CI->use_empty())
+ CI->replaceAllUsesWith(NewCI);
+ return NewCI;
+}
+
+void IntrinsicLowering::AddPrototypes(Module &M) {
+ LLVMContext &Context = M.getContext();
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (I->isDeclaration() && !I->use_empty())
+ switch (I->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::setjmp:
+ EnsureFunctionExists(M, "setjmp", I->arg_begin(), I->arg_end(),
+ Type::getInt32Ty(M.getContext()));
+ break;
+ case Intrinsic::longjmp:
+ EnsureFunctionExists(M, "longjmp", I->arg_begin(), I->arg_end(),
+ Type::getVoidTy(M.getContext()));
+ break;
+ case Intrinsic::siglongjmp:
+ EnsureFunctionExists(M, "abort", I->arg_end(), I->arg_end(),
+ Type::getVoidTy(M.getContext()));
+ break;
+ case Intrinsic::memcpy:
+ M.getOrInsertFunction("memcpy",
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ TD.getIntPtrType(Context), (Type *)0);
+ break;
+ case Intrinsic::memmove:
+ M.getOrInsertFunction("memmove",
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ TD.getIntPtrType(Context), (Type *)0);
+ break;
+ case Intrinsic::memset:
+ M.getOrInsertFunction("memset",
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ Type::getInt32Ty(M.getContext()),
+ TD.getIntPtrType(Context), (Type *)0);
+ break;
+ case Intrinsic::sqrt:
+ EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl");
+ break;
+ case Intrinsic::sin:
+ EnsureFPIntrinsicsExist(M, I, "sinf", "sin", "sinl");
+ break;
+ case Intrinsic::cos:
+ EnsureFPIntrinsicsExist(M, I, "cosf", "cos", "cosl");
+ break;
+ case Intrinsic::pow:
+ EnsureFPIntrinsicsExist(M, I, "powf", "pow", "powl");
+ break;
+ case Intrinsic::log:
+ EnsureFPIntrinsicsExist(M, I, "logf", "log", "logl");
+ break;
+ case Intrinsic::log2:
+ EnsureFPIntrinsicsExist(M, I, "log2f", "log2", "log2l");
+ break;
+ case Intrinsic::log10:
+ EnsureFPIntrinsicsExist(M, I, "log10f", "log10", "log10l");
+ break;
+ case Intrinsic::exp:
+ EnsureFPIntrinsicsExist(M, I, "expf", "exp", "expl");
+ break;
+ case Intrinsic::exp2:
+ EnsureFPIntrinsicsExist(M, I, "exp2f", "exp2", "exp2l");
+ break;
+ }
+}
+
+/// LowerBSWAP - Emit the code to lower bswap of V before the specified
+/// instruction IP.
+static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
+ assert(V->getType()->isInteger() && "Can't bswap a non-integer type!");
+
+ unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+
+ IRBuilder<> Builder(IP->getParent(), IP);
+
+ switch(BitSize) {
+ default: llvm_unreachable("Unhandled type size of value to byteswap!");
+ case 16: {
+ Value *Tmp1 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+ "bswap.2");
+ Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+ "bswap.1");
+ V = Builder.CreateOr(Tmp1, Tmp2, "bswap.i16");
+ break;
+ }
+ case 32: {
+ Value *Tmp4 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24),
+ "bswap.4");
+ Value *Tmp3 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+ "bswap.3");
+ Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+ "bswap.2");
+ Value *Tmp1 = Builder.CreateLShr(V,ConstantInt::get(V->getType(), 24),
+ "bswap.1");
+ Tmp3 = Builder.CreateAnd(Tmp3,
+ ConstantInt::get(Type::getInt32Ty(Context), 0xFF0000),
+ "bswap.and3");
+ Tmp2 = Builder.CreateAnd(Tmp2,
+ ConstantInt::get(Type::getInt32Ty(Context), 0xFF00),
+ "bswap.and2");
+ Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or1");
+ Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or2");
+ V = Builder.CreateOr(Tmp4, Tmp2, "bswap.i32");
+ break;
+ }
+ case 64: {
+ Value *Tmp8 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 56),
+ "bswap.8");
+ Value *Tmp7 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 40),
+ "bswap.7");
+ Value *Tmp6 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24),
+ "bswap.6");
+ Value *Tmp5 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+ "bswap.5");
+ Value* Tmp4 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+ "bswap.4");
+ Value* Tmp3 = Builder.CreateLShr(V,
+ ConstantInt::get(V->getType(), 24),
+ "bswap.3");
+ Value* Tmp2 = Builder.CreateLShr(V,
+ ConstantInt::get(V->getType(), 40),
+ "bswap.2");
+ Value* Tmp1 = Builder.CreateLShr(V,
+ ConstantInt::get(V->getType(), 56),
+ "bswap.1");
+ Tmp7 = Builder.CreateAnd(Tmp7,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF000000000000ULL),
+ "bswap.and7");
+ Tmp6 = Builder.CreateAnd(Tmp6,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF0000000000ULL),
+ "bswap.and6");
+ Tmp5 = Builder.CreateAnd(Tmp5,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF00000000ULL),
+ "bswap.and5");
+ Tmp4 = Builder.CreateAnd(Tmp4,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF000000ULL),
+ "bswap.and4");
+ Tmp3 = Builder.CreateAnd(Tmp3,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF0000ULL),
+ "bswap.and3");
+ Tmp2 = Builder.CreateAnd(Tmp2,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF00ULL),
+ "bswap.and2");
+ Tmp8 = Builder.CreateOr(Tmp8, Tmp7, "bswap.or1");
+ Tmp6 = Builder.CreateOr(Tmp6, Tmp5, "bswap.or2");
+ Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or3");
+ Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or4");
+ Tmp8 = Builder.CreateOr(Tmp8, Tmp6, "bswap.or5");
+ Tmp4 = Builder.CreateOr(Tmp4, Tmp2, "bswap.or6");
+ V = Builder.CreateOr(Tmp8, Tmp4, "bswap.i64");
+ break;
+ }
+ }
+ return V;
+}
+
+/// LowerCTPOP - Emit the code to lower ctpop of V before the specified
+/// instruction IP.
+static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) {
+ assert(V->getType()->isInteger() && "Can't ctpop a non-integer type!");
+
+ static const uint64_t MaskValues[6] = {
+ 0x5555555555555555ULL, 0x3333333333333333ULL,
+ 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
+ 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
+ };
+
+ IRBuilder<> Builder(IP->getParent(), IP);
+
+ unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+ unsigned WordSize = (BitSize + 63) / 64;
+ Value *Count = ConstantInt::get(V->getType(), 0);
+
+ for (unsigned n = 0; n < WordSize; ++n) {
+ Value *PartValue = V;
+ for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize);
+ i <<= 1, ++ct) {
+ Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]);
+ Value *LHS = Builder.CreateAnd(PartValue, MaskCst, "cppop.and1");
+ Value *VShift = Builder.CreateLShr(PartValue,
+ ConstantInt::get(V->getType(), i),
+ "ctpop.sh");
+ Value *RHS = Builder.CreateAnd(VShift, MaskCst, "cppop.and2");
+ PartValue = Builder.CreateAdd(LHS, RHS, "ctpop.step");
+ }
+ Count = Builder.CreateAdd(PartValue, Count, "ctpop.part");
+ if (BitSize > 64) {
+ V = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 64),
+ "ctpop.part.sh");
+ BitSize -= 64;
+ }
+ }
+
+ return Count;
+}
+
+/// LowerCTLZ - Emit the code to lower ctlz of V before the specified
+/// instruction IP.
+static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) {
+
+ IRBuilder<> Builder(IP->getParent(), IP);
+
+ unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+ for (unsigned i = 1; i < BitSize; i <<= 1) {
+ Value *ShVal = ConstantInt::get(V->getType(), i);
+ ShVal = Builder.CreateLShr(V, ShVal, "ctlz.sh");
+ V = Builder.CreateOr(V, ShVal, "ctlz.step");
+ }
+
+ V = Builder.CreateNot(V);
+ return LowerCTPOP(Context, V, IP);
+}
+
+static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname,
+ const char *Dname,
+ const char *LDname) {
+ switch (CI->getOperand(1)->getType()->getTypeID()) {
+ default: llvm_unreachable("Invalid type in intrinsic");
+ case Type::FloatTyID:
+ ReplaceCallWith(Fname, CI, CI->op_begin() + 1, CI->op_end(),
+ Type::getFloatTy(CI->getContext()));
+ break;
+ case Type::DoubleTyID:
+ ReplaceCallWith(Dname, CI, CI->op_begin() + 1, CI->op_end(),
+ Type::getDoubleTy(CI->getContext()));
+ break;
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ ReplaceCallWith(LDname, CI, CI->op_begin() + 1, CI->op_end(),
+ CI->getOperand(1)->getType());
+ break;
+ }
+}
+
+void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
+ IRBuilder<> Builder(CI->getParent(), CI);
+ LLVMContext &Context = CI->getContext();
+
+ Function *Callee = CI->getCalledFunction();
+ assert(Callee && "Cannot lower an indirect call!");
+
+ switch (Callee->getIntrinsicID()) {
+ case Intrinsic::not_intrinsic:
+ llvm_report_error("Cannot lower a call to a non-intrinsic function '"+
+ Callee->getName() + "'!");
+ default:
+ llvm_report_error("Code generator does not support intrinsic function '"+
+ Callee->getName()+"'!");
+
+ // The setjmp/longjmp intrinsics should only exist in the code if it was
+ // never optimized (ie, right out of the CFE), or if it has been hacked on
+ // by the lowerinvoke pass. In both cases, the right thing to do is to
+ // convert the call to an explicit setjmp or longjmp call.
+ case Intrinsic::setjmp: {
+ Value *V = ReplaceCallWith("setjmp", CI, CI->op_begin() + 1, CI->op_end(),
+ Type::getInt32Ty(Context));
+ if (!CI->getType()->isVoidTy())
+ CI->replaceAllUsesWith(V);
+ break;
+ }
+ case Intrinsic::sigsetjmp:
+ if (!CI->getType()->isVoidTy())
+ CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+ break;
+
+ case Intrinsic::longjmp: {
+ ReplaceCallWith("longjmp", CI, CI->op_begin() + 1, CI->op_end(),
+ Type::getVoidTy(Context));
+ break;
+ }
+
+ case Intrinsic::siglongjmp: {
+ // Insert the call to abort
+ ReplaceCallWith("abort", CI, CI->op_end(), CI->op_end(),
+ Type::getVoidTy(Context));
+ break;
+ }
+ case Intrinsic::ctpop:
+ CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getOperand(1), CI));
+ break;
+
+ case Intrinsic::bswap:
+ CI->replaceAllUsesWith(LowerBSWAP(Context, CI->getOperand(1), CI));
+ break;
+
+ case Intrinsic::ctlz:
+ CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getOperand(1), CI));
+ break;
+
+ case Intrinsic::cttz: {
+ // cttz(x) -> ctpop(~X & (X-1))
+ Value *Src = CI->getOperand(1);
+ Value *NotSrc = Builder.CreateNot(Src);
+ NotSrc->setName(Src->getName() + ".not");
+ Value *SrcM1 = ConstantInt::get(Src->getType(), 1);
+ SrcM1 = Builder.CreateSub(Src, SrcM1);
+ Src = LowerCTPOP(Context, Builder.CreateAnd(NotSrc, SrcM1), CI);
+ CI->replaceAllUsesWith(Src);
+ break;
+ }
+
+ case Intrinsic::stacksave:
+ case Intrinsic::stackrestore: {
+ if (!Warned)
+ errs() << "WARNING: this target does not support the llvm.stack"
+ << (Callee->getIntrinsicID() == Intrinsic::stacksave ?
+ "save" : "restore") << " intrinsic.\n";
+ Warned = true;
+ if (Callee->getIntrinsicID() == Intrinsic::stacksave)
+ CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+ break;
+ }
+
+ case Intrinsic::returnaddress:
+ case Intrinsic::frameaddress:
+ errs() << "WARNING: this target does not support the llvm."
+ << (Callee->getIntrinsicID() == Intrinsic::returnaddress ?
+ "return" : "frame") << "address intrinsic.\n";
+ CI->replaceAllUsesWith(ConstantPointerNull::get(
+ cast<PointerType>(CI->getType())));
+ break;
+
+ case Intrinsic::prefetch:
+ break; // Simply strip out prefetches on unsupported architectures
+
+ case Intrinsic::pcmarker:
+ break; // Simply strip out pcmarker on unsupported architectures
+ case Intrinsic::readcyclecounter: {
+ errs() << "WARNING: this target does not support the llvm.readcyclecoun"
+ << "ter intrinsic. It is being lowered to a constant 0\n";
+ CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0));
+ break;
+ }
+
+ case Intrinsic::dbg_declare:
+ break; // Simply strip out debugging intrinsics
+
+ case Intrinsic::eh_exception:
+ case Intrinsic::eh_selector:
+ CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+ break;
+
+ case Intrinsic::eh_typeid_for:
+ // Return something different to eh_selector.
+ CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
+ break;
+
+ case Intrinsic::var_annotation:
+ break; // Strip out annotate intrinsic
+
+ case Intrinsic::memcpy: {
+ const IntegerType *IntPtr = TD.getIntPtrType(Context);
+ Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
+ /* isSigned */ false);
+ Value *Ops[3];
+ Ops[0] = CI->getOperand(1);
+ Ops[1] = CI->getOperand(2);
+ Ops[2] = Size;
+ ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getOperand(1)->getType());
+ break;
+ }
+ case Intrinsic::memmove: {
+ const IntegerType *IntPtr = TD.getIntPtrType(Context);
+ Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
+ /* isSigned */ false);
+ Value *Ops[3];
+ Ops[0] = CI->getOperand(1);
+ Ops[1] = CI->getOperand(2);
+ Ops[2] = Size;
+ ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getOperand(1)->getType());
+ break;
+ }
+ case Intrinsic::memset: {
+ const IntegerType *IntPtr = TD.getIntPtrType(Context);
+ Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
+ /* isSigned */ false);
+ Value *Ops[3];
+ Ops[0] = CI->getOperand(1);
+ // Extend the amount to i32.
+ Ops[1] = Builder.CreateIntCast(CI->getOperand(2), Type::getInt32Ty(Context),
+ /* isSigned */ false);
+ Ops[2] = Size;
+ ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getOperand(1)->getType());
+ break;
+ }
+ case Intrinsic::sqrt: {
+ ReplaceFPIntrinsicWithCall(CI, "sqrtf", "sqrt", "sqrtl");
+ break;
+ }
+ case Intrinsic::log: {
+ ReplaceFPIntrinsicWithCall(CI, "logf", "log", "logl");
+ break;
+ }
+ case Intrinsic::log2: {
+ ReplaceFPIntrinsicWithCall(CI, "log2f", "log2", "log2l");
+ break;
+ }
+ case Intrinsic::log10: {
+ ReplaceFPIntrinsicWithCall(CI, "log10f", "log10", "log10l");
+ break;
+ }
+ case Intrinsic::exp: {
+ ReplaceFPIntrinsicWithCall(CI, "expf", "exp", "expl");
+ break;
+ }
+ case Intrinsic::exp2: {
+ ReplaceFPIntrinsicWithCall(CI, "exp2f", "exp2", "exp2l");
+ break;
+ }
+ case Intrinsic::pow: {
+ ReplaceFPIntrinsicWithCall(CI, "powf", "pow", "powl");
+ break;
+ }
+ case Intrinsic::flt_rounds:
+ // Lower to "round to the nearest"
+ if (!CI->getType()->isVoidTy())
+ CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
+ break;
+ case Intrinsic::invariant_start:
+ case Intrinsic::lifetime_start:
+ // Discard region information.
+ CI->replaceAllUsesWith(UndefValue::get(CI->getType()));
+ break;
+ case Intrinsic::invariant_end:
+ case Intrinsic::lifetime_end:
+ // Discard region information.
+ break;
+ }
+
+ assert(CI->use_empty() &&
+ "Lowering should have eliminated any uses of the intrinsic call!");
+ CI->eraseFromParent();
+}
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
new file mode 100644
index 0000000..40e0150
--- /dev/null
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -0,0 +1,381 @@
+//===-- LLVMTargetMachine.cpp - Implement the LLVMTargetMachine class -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVMTargetMachine class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+namespace llvm {
+ bool EnableFastISel;
+}
+
+static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden,
+ cl::desc("Disable Post Regalloc"));
+static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
+ cl::desc("Disable branch folding"));
+static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden,
+ cl::desc("Disable tail duplication"));
+static cl::opt<bool> DisableEarlyTailDup("disable-early-taildup", cl::Hidden,
+ cl::desc("Disable pre-register allocation tail duplication"));
+static cl::opt<bool> DisableCodePlace("disable-code-place", cl::Hidden,
+ cl::desc("Disable code placement"));
+static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
+ cl::desc("Disable Stack Slot Coloring"));
+static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
+ cl::desc("Disable Machine LICM"));
+static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden,
+ cl::desc("Disable Machine Sinking"));
+static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden,
+ cl::desc("Disable Loop Strength Reduction Pass"));
+static cl::opt<bool> DisableCGP("disable-cgp", cl::Hidden,
+ cl::desc("Disable Codegen Prepare"));
+static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
+ cl::desc("Print LLVM IR produced by the loop-reduce pass"));
+static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
+ cl::desc("Print LLVM IR input to isel pass"));
+static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
+ cl::desc("Dump garbage collector data"));
+static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
+ cl::desc("Verify generated machine code"),
+ cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
+
+static cl::opt<cl::boolOrDefault>
+AsmVerbose("asm-verbose", cl::desc("Add comments to directives."),
+ cl::init(cl::BOU_UNSET));
+
+static bool getVerboseAsm() {
+ switch (AsmVerbose) {
+ default:
+ case cl::BOU_UNSET: return TargetMachine::getAsmVerbosityDefault();
+ case cl::BOU_TRUE: return true;
+ case cl::BOU_FALSE: return false;
+ }
+}
+
+// Enable or disable FastISel. Both options are needed, because
+// FastISel is enabled by default with -fast, and we wish to be
+// able to enable or disable fast-isel independently from -O0.
+static cl::opt<cl::boolOrDefault>
+EnableFastISelOption("fast-isel", cl::Hidden,
+ cl::desc("Enable the \"fast\" instruction selector"));
+
+// Enable or disable an experimental optimization to split GEPs
+// and run a special GVN pass which does not examine loads, in
+// an effort to factor out redundancy implicit in complex GEPs.
+static cl::opt<bool> EnableSplitGEPGVN("split-gep-gvn", cl::Hidden,
+ cl::desc("Split GEPs and run no-load GVN"));
+
+LLVMTargetMachine::LLVMTargetMachine(const Target &T,
+ const std::string &TargetTriple)
+ : TargetMachine(T) {
+ AsmInfo = T.createAsmInfo(TargetTriple);
+}
+
+// Set the default code model for the JIT for a generic target.
+// FIXME: Is small right here? or .is64Bit() ? Large : Small?
+void
+LLVMTargetMachine::setCodeModelForJIT() {
+ setCodeModel(CodeModel::Small);
+}
+
+// Set the default code model for static compilation for a generic target.
+void
+LLVMTargetMachine::setCodeModelForStatic() {
+ setCodeModel(CodeModel::Small);
+}
+
+bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &Out,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel) {
+ // Add common CodeGen passes.
+ if (addCommonCodeGenPasses(PM, OptLevel))
+ return true;
+
+ OwningPtr<MCContext> Context(new MCContext());
+ OwningPtr<MCStreamer> AsmStreamer;
+
+ formatted_raw_ostream *LegacyOutput;
+ switch (FileType) {
+ default: return true;
+ case CGFT_AssemblyFile: {
+ const MCAsmInfo &MAI = *getMCAsmInfo();
+ MCInstPrinter *InstPrinter =
+ getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, Out);
+ AsmStreamer.reset(createAsmStreamer(*Context, Out, MAI,
+ getTargetData()->isLittleEndian(),
+ getVerboseAsm(), InstPrinter,
+ /*codeemitter*/0));
+ // Set the AsmPrinter's "O" to the output file.
+ LegacyOutput = &Out;
+ break;
+ }
+ case CGFT_ObjectFile: {
+ // Create the code emitter for the target if it exists. If not, .o file
+ // emission fails.
+ MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this);
+ if (MCE == 0)
+ return true;
+
+ AsmStreamer.reset(createMachOStreamer(*Context, Out, MCE));
+
+ // Any output to the asmprinter's "O" stream is bad and needs to be fixed,
+ // force it to come out stderr.
+ // FIXME: this is horrible and leaks, eventually remove the raw_ostream from
+ // asmprinter.
+ LegacyOutput = new formatted_raw_ostream(errs());
+ break;
+ }
+ case CGFT_Null:
+ // The Null output is intended for use for performance analysis and testing,
+ // not real users.
+ AsmStreamer.reset(createNullStreamer(*Context));
+ // Any output to the asmprinter's "O" stream is bad and needs to be fixed,
+ // force it to come out stderr.
+ // FIXME: this is horrible and leaks, eventually remove the raw_ostream from
+ // asmprinter.
+ LegacyOutput = new formatted_raw_ostream(errs());
+ break;
+ }
+
+ // Create the AsmPrinter, which takes ownership of Context and AsmStreamer
+ // if successful.
+ FunctionPass *Printer =
+ getTarget().createAsmPrinter(*LegacyOutput, *this, *Context, *AsmStreamer,
+ getMCAsmInfo());
+ if (Printer == 0)
+ return true;
+
+ // If successful, createAsmPrinter took ownership of AsmStreamer and Context.
+ Context.take(); AsmStreamer.take();
+
+ PM.add(Printer);
+
+ // Make sure the code model is set.
+ setCodeModelForStatic();
+ PM.add(createGCInfoDeleter());
+ return false;
+}
+
+/// addPassesToEmitMachineCode - Add passes to the specified pass manager to
+/// get machine code emitted. This uses a JITCodeEmitter object to handle
+/// actually outputting the machine code and resolving things like the address
+/// of functions. This method should returns true if machine code emission is
+/// not supported.
+///
+bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
+ JITCodeEmitter &JCE,
+ CodeGenOpt::Level OptLevel) {
+ // Make sure the code model is set.
+ setCodeModelForJIT();
+
+ // Add common CodeGen passes.
+ if (addCommonCodeGenPasses(PM, OptLevel))
+ return true;
+
+ addCodeEmitter(PM, OptLevel, JCE);
+ PM.add(createGCInfoDeleter());
+
+ return false; // success!
+}
+
+static void printAndVerify(PassManagerBase &PM,
+ const char *Banner,
+ bool allowDoubleDefs = false) {
+ if (PrintMachineCode)
+ PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
+
+ if (VerifyMachineCode)
+ PM.add(createMachineVerifierPass(allowDoubleDefs));
+}
+
+/// addCommonCodeGenPasses - Add standard LLVM codegen passes used for both
+/// emitting to assembly files or machine code output.
+///
+bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Standard LLVM-Level Passes.
+
+ // Optionally, tun split-GEPs and no-load GVN.
+ if (EnableSplitGEPGVN) {
+ PM.add(createGEPSplitterPass());
+ PM.add(createGVNPass(/*NoPRE=*/false, /*NoLoads=*/true));
+ }
+
+ // Run loop strength reduction before anything else.
+ if (OptLevel != CodeGenOpt::None && !DisableLSR) {
+ PM.add(createLoopStrengthReducePass(getTargetLowering()));
+ if (PrintLSR)
+ PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
+ }
+
+ // Turn exception handling constructs into something the code generators can
+ // handle.
+ switch (getMCAsmInfo()->getExceptionHandlingType())
+ {
+ case ExceptionHandling::SjLj:
+ // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
+ // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise,
+ // catch info can get misplaced when a selector ends up more than one block
+ // removed from the parent invoke(s). This could happen when a landing
+ // pad is shared by multiple invokes and is also a target of a normal
+ // edge from elsewhere.
+ PM.add(createSjLjEHPass(getTargetLowering()));
+ PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None));
+ break;
+ case ExceptionHandling::Dwarf:
+ PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None));
+ break;
+ case ExceptionHandling::None:
+ PM.add(createLowerInvokePass(getTargetLowering()));
+ break;
+ }
+
+ PM.add(createGCLoweringPass());
+
+ // Make sure that no unreachable blocks are instruction selected.
+ PM.add(createUnreachableBlockEliminationPass());
+
+ if (OptLevel != CodeGenOpt::None && !DisableCGP)
+ PM.add(createCodeGenPreparePass(getTargetLowering()));
+
+ PM.add(createStackProtectorPass(getTargetLowering()));
+
+ if (PrintISelInput)
+ PM.add(createPrintFunctionPass("\n\n"
+ "*** Final LLVM Code input to ISel ***\n",
+ &dbgs()));
+
+ // Standard Lower-Level Passes.
+
+ // Set up a MachineFunction for the rest of CodeGen to work on.
+ PM.add(new MachineFunctionAnalysis(*this, OptLevel));
+
+ // Enable FastISel with -fast, but allow that to be overridden.
+ if (EnableFastISelOption == cl::BOU_TRUE ||
+ (OptLevel == CodeGenOpt::None && EnableFastISelOption != cl::BOU_FALSE))
+ EnableFastISel = true;
+
+ // Ask the target for an isel.
+ if (addInstSelector(PM, OptLevel))
+ return true;
+
+ // Print the instruction selected machine code...
+ printAndVerify(PM, "After Instruction Selection",
+ /* allowDoubleDefs= */ true);
+
+
+ // Delete dead machine instructions regardless of optimization level.
+ PM.add(createDeadMachineInstructionElimPass());
+ printAndVerify(PM, "After codegen DCE pass",
+ /* allowDoubleDefs= */ true);
+
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createOptimizeExtsPass());
+ if (!DisableMachineLICM)
+ PM.add(createMachineLICMPass());
+ if (!DisableMachineSink)
+ PM.add(createMachineSinkingPass());
+ printAndVerify(PM, "After MachineLICM and MachineSinking",
+ /* allowDoubleDefs= */ true);
+ }
+
+ // Pre-ra tail duplication.
+ if (OptLevel != CodeGenOpt::None && !DisableEarlyTailDup) {
+ PM.add(createTailDuplicatePass(true));
+ printAndVerify(PM, "After Pre-RegAlloc TailDuplicate",
+ /* allowDoubleDefs= */ true);
+ }
+
+ // Run pre-ra passes.
+ if (addPreRegAlloc(PM, OptLevel))
+ printAndVerify(PM, "After PreRegAlloc passes",
+ /* allowDoubleDefs= */ true);
+
+ // Perform register allocation.
+ PM.add(createRegisterAllocator());
+ printAndVerify(PM, "After Register Allocation");
+
+ // Perform stack slot coloring.
+ if (OptLevel != CodeGenOpt::None && !DisableSSC) {
+ // FIXME: Re-enable coloring with register when it's capable of adding
+ // kill markers.
+ PM.add(createStackSlotColoringPass(false));
+ printAndVerify(PM, "After StackSlotColoring");
+ }
+
+ // Run post-ra passes.
+ if (addPostRegAlloc(PM, OptLevel))
+ printAndVerify(PM, "After PostRegAlloc passes");
+
+ PM.add(createLowerSubregsPass());
+ printAndVerify(PM, "After LowerSubregs");
+
+ // Insert prolog/epilog code. Eliminate abstract frame index references...
+ PM.add(createPrologEpilogCodeInserter());
+ printAndVerify(PM, "After PrologEpilogCodeInserter");
+
+ // Run pre-sched2 passes.
+ if (addPreSched2(PM, OptLevel))
+ printAndVerify(PM, "After PreSched2 passes");
+
+ // Second pass scheduler.
+ if (OptLevel != CodeGenOpt::None && !DisablePostRA) {
+ PM.add(createPostRAScheduler(OptLevel));
+ printAndVerify(PM, "After PostRAScheduler");
+ }
+
+ // Branch folding must be run after regalloc and prolog/epilog insertion.
+ if (OptLevel != CodeGenOpt::None && !DisableBranchFold) {
+ PM.add(createBranchFoldingPass(getEnableTailMergeDefault()));
+ printAndVerify(PM, "After BranchFolding");
+ }
+
+ // Tail duplication.
+ if (OptLevel != CodeGenOpt::None && !DisableTailDuplicate) {
+ PM.add(createTailDuplicatePass(false));
+ printAndVerify(PM, "After TailDuplicate");
+ }
+
+ PM.add(createGCMachineCodeAnalysisPass());
+
+ if (PrintGCInfo)
+ PM.add(createGCInfoPrinter(dbgs()));
+
+ if (OptLevel != CodeGenOpt::None && !DisableCodePlace) {
+ PM.add(createCodePlacementOptPass());
+ printAndVerify(PM, "After CodePlacementOpt");
+ }
+
+ if (addPreEmitPass(PM, OptLevel))
+ printAndVerify(PM, "After PreEmit passes");
+
+ return false;
+}
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
new file mode 100644
index 0000000..f1bd573
--- /dev/null
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -0,0 +1,116 @@
+//===---- LatencyPriorityQueue.cpp - A latency-oriented priority queue ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LatencyPriorityQueue class, which is a
+// SchedulingPriorityQueue that schedules using latency information to
+// reduce the length of the critical path through the basic block.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "scheduler"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
+ // The isScheduleHigh flag allows nodes with wraparound dependencies that
+ // cannot easily be modeled as edges with latencies to be scheduled as
+ // soon as possible in a top-down schedule.
+ if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
+ return false;
+ if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
+ return true;
+
+ unsigned LHSNum = LHS->NodeNum;
+ unsigned RHSNum = RHS->NodeNum;
+
+ // The most important heuristic is scheduling the critical path.
+ unsigned LHSLatency = PQ->getLatency(LHSNum);
+ unsigned RHSLatency = PQ->getLatency(RHSNum);
+ if (LHSLatency < RHSLatency) return true;
+ if (LHSLatency > RHSLatency) return false;
+
+ // After that, if two nodes have identical latencies, look to see if one will
+ // unblock more other nodes than the other.
+ unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
+ unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
+ if (LHSBlocked < RHSBlocked) return true;
+ if (LHSBlocked > RHSBlocked) return false;
+
+ // Finally, just to provide a stable ordering, use the node number as a
+ // deciding factor.
+ return LHSNum < RHSNum;
+}
+
+
+/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
+/// of SU, return it, otherwise return null.
+SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
+ SUnit *OnlyAvailablePred = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ SUnit &Pred = *I->getSUnit();
+ if (!Pred.isScheduled) {
+ // We found an available, but not scheduled, predecessor. If it's the
+ // only one we have found, keep track of it... otherwise give up.
+ if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
+ return 0;
+ OnlyAvailablePred = &Pred;
+ }
+ }
+
+ return OnlyAvailablePred;
+}
+
+void LatencyPriorityQueue::push_impl(SUnit *SU) {
+ // Look at all of the successors of this node. Count the number of nodes that
+ // this node is the sole unscheduled node for.
+ unsigned NumNodesBlocking = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (getSingleUnscheduledPred(I->getSUnit()) == SU)
+ ++NumNodesBlocking;
+ }
+ NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
+
+ Queue.push(SU);
+}
+
+
+// ScheduledNode - As nodes are scheduled, we look to see if there are any
+// successor nodes that have a single unscheduled predecessor. If so, that
+// single predecessor has a higher priority, since scheduling it will make
+// the node available.
+void LatencyPriorityQueue::ScheduledNode(SUnit *SU) {
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ AdjustPriorityOfUnscheduledPreds(I->getSUnit());
+ }
+}
+
+/// AdjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
+/// scheduled. If SU is not itself available, then there is at least one
+/// predecessor node that has not been scheduled yet. If SU has exactly ONE
+/// unscheduled predecessor, we want to increase its priority: it getting
+/// scheduled will make this node available, so it is better than some other
+/// node of the same priority that will not make a node available.
+void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) {
+ if (SU->isAvailable) return; // All preds scheduled.
+
+ SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
+ if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) return;
+
+ // Okay, we found a single predecessor that is available, but not scheduled.
+ // Since it is available, it must be in the priority queue. First remove it.
+ remove(OnlyAvailablePred);
+
+ // Reinsert the node into the priority queue, which recomputes its
+ // NumNodesSolelyBlocking value.
+ push(OnlyAvailablePred);
+}
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
new file mode 100644
index 0000000..e207f60
--- /dev/null
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -0,0 +1,882 @@
+//===-- LiveInterval.cpp - Live Interval Representation -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveRange and LiveInterval classes. Given some
+// numbering of each the machine instructions an interval [i, j) is said to be a
+// live interval for register v if there is no instruction with number j' > j
+// such that v is live at j' and there is no instruction with number i' < i such
+// that v is live at i'. In this implementation intervals can have holes,
+// i.e. an interval might look like [1,20), [50,65), [1000,1001). Each
+// individual range is represented as an instance of LiveRange, and the whole
+// interval is represented as an instance of LiveInterval.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+using namespace llvm;
+
+// An example for liveAt():
+//
+// this = [1,4), liveAt(0) will return false. The instruction defining this
+// spans slots [0,3]. The interval belongs to an spilled definition of the
+// variable it represents. This is because slot 1 is used (def slot) and spans
+// up to slot 3 (store slot).
+//
+bool LiveInterval::liveAt(SlotIndex I) const {
+ Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I);
+
+ if (r == ranges.begin())
+ return false;
+
+ --r;
+ return r->contains(I);
+}
+
+// liveBeforeAndAt - Check if the interval is live at the index and the index
+// just before it. If index is liveAt, check if it starts a new live range.
+// If it does, then check if the previous live range ends at index-1.
+bool LiveInterval::liveBeforeAndAt(SlotIndex I) const {
+ Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I);
+
+ if (r == ranges.begin())
+ return false;
+
+ --r;
+ if (!r->contains(I))
+ return false;
+ if (I != r->start)
+ return true;
+ // I is the start of a live range. Check if the previous live range ends
+ // at I-1.
+ if (r == ranges.begin())
+ return false;
+ return r->end == I;
+}
+
+// overlaps - Return true if the intersection of the two live intervals is
+// not empty.
+//
+// An example for overlaps():
+//
+// 0: A = ...
+// 4: B = ...
+// 8: C = A + B ;; last use of A
+//
+// The live intervals should look like:
+//
+// A = [3, 11)
+// B = [7, x)
+// C = [11, y)
+//
+// A->overlaps(C) should return false since we want to be able to join
+// A and C.
+//
+bool LiveInterval::overlapsFrom(const LiveInterval& other,
+ const_iterator StartPos) const {
+ const_iterator i = begin();
+ const_iterator ie = end();
+ const_iterator j = StartPos;
+ const_iterator je = other.end();
+
+ assert((StartPos->start <= i->start || StartPos == other.begin()) &&
+ StartPos != other.end() && "Bogus start position hint!");
+
+ if (i->start < j->start) {
+ i = std::upper_bound(i, ie, j->start);
+ if (i != ranges.begin()) --i;
+ } else if (j->start < i->start) {
+ ++StartPos;
+ if (StartPos != other.end() && StartPos->start <= i->start) {
+ assert(StartPos < other.end() && i < end());
+ j = std::upper_bound(j, je, i->start);
+ if (j != other.ranges.begin()) --j;
+ }
+ } else {
+ return true;
+ }
+
+ if (j == je) return false;
+
+ while (i != ie) {
+ if (i->start > j->start) {
+ std::swap(i, j);
+ std::swap(ie, je);
+ }
+
+ if (i->end > j->start)
+ return true;
+ ++i;
+ }
+
+ return false;
+}
+
+/// overlaps - Return true if the live interval overlaps a range specified
+/// by [Start, End).
+bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const {
+ assert(Start < End && "Invalid range");
+ const_iterator I = begin();
+ const_iterator E = end();
+ const_iterator si = std::upper_bound(I, E, Start);
+ const_iterator ei = std::upper_bound(I, E, End);
+ if (si != ei)
+ return true;
+ if (si == I)
+ return false;
+ --si;
+ return si->contains(Start);
+}
+
+/// extendIntervalEndTo - This method is used when we want to extend the range
+/// specified by I to end at the specified endpoint. To do this, we should
+/// merge and eliminate all ranges that this will overlap with. The iterator is
+/// not invalidated.
+void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) {
+ assert(I != ranges.end() && "Not a valid interval!");
+ VNInfo *ValNo = I->valno;
+ SlotIndex OldEnd = I->end;
+
+ // Search for the first interval that we can't merge with.
+ Ranges::iterator MergeTo = next(I);
+ for (; MergeTo != ranges.end() && NewEnd >= MergeTo->end; ++MergeTo) {
+ assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
+ }
+
+ // If NewEnd was in the middle of an interval, make sure to get its endpoint.
+ I->end = std::max(NewEnd, prior(MergeTo)->end);
+
+ // Erase any dead ranges.
+ ranges.erase(next(I), MergeTo);
+
+ // Update kill info.
+ ValNo->removeKills(OldEnd, I->end.getPrevSlot());
+
+ // If the newly formed range now touches the range after it and if they have
+ // the same value number, merge the two ranges into one range.
+ Ranges::iterator Next = next(I);
+ if (Next != ranges.end() && Next->start <= I->end && Next->valno == ValNo) {
+ I->end = Next->end;
+ ranges.erase(Next);
+ }
+}
+
+
+/// extendIntervalStartTo - This method is used when we want to extend the range
+/// specified by I to start at the specified endpoint. To do this, we should
+/// merge and eliminate all ranges that this will overlap with.
+LiveInterval::Ranges::iterator
+LiveInterval::extendIntervalStartTo(Ranges::iterator I, SlotIndex NewStart) {
+ assert(I != ranges.end() && "Not a valid interval!");
+ VNInfo *ValNo = I->valno;
+
+ // Search for the first interval that we can't merge with.
+ Ranges::iterator MergeTo = I;
+ do {
+ if (MergeTo == ranges.begin()) {
+ I->start = NewStart;
+ ranges.erase(MergeTo, I);
+ return I;
+ }
+ assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
+ --MergeTo;
+ } while (NewStart <= MergeTo->start);
+
+ // If we start in the middle of another interval, just delete a range and
+ // extend that interval.
+ if (MergeTo->end >= NewStart && MergeTo->valno == ValNo) {
+ MergeTo->end = I->end;
+ } else {
+ // Otherwise, extend the interval right after.
+ ++MergeTo;
+ MergeTo->start = NewStart;
+ MergeTo->end = I->end;
+ }
+
+ ranges.erase(next(MergeTo), next(I));
+ return MergeTo;
+}
+
+LiveInterval::iterator
+LiveInterval::addRangeFrom(LiveRange LR, iterator From) {
+ SlotIndex Start = LR.start, End = LR.end;
+ iterator it = std::upper_bound(From, ranges.end(), Start);
+
+ // If the inserted interval starts in the middle or right at the end of
+ // another interval, just extend that interval to contain the range of LR.
+ if (it != ranges.begin()) {
+ iterator B = prior(it);
+ if (LR.valno == B->valno) {
+ if (B->start <= Start && B->end >= Start) {
+ extendIntervalEndTo(B, End);
+ return B;
+ }
+ } else {
+ // Check to make sure that we are not overlapping two live ranges with
+ // different valno's.
+ assert(B->end <= Start &&
+ "Cannot overlap two LiveRanges with differing ValID's"
+ " (did you def the same reg twice in a MachineInstr?)");
+ }
+ }
+
+ // Otherwise, if this range ends in the middle of, or right next to, another
+ // interval, merge it into that interval.
+ if (it != ranges.end()) {
+ if (LR.valno == it->valno) {
+ if (it->start <= End) {
+ it = extendIntervalStartTo(it, Start);
+
+ // If LR is a complete superset of an interval, we may need to grow its
+ // endpoint as well.
+ if (End > it->end)
+ extendIntervalEndTo(it, End);
+ else if (End < it->end)
+ // Overlapping intervals, there might have been a kill here.
+ it->valno->removeKill(End);
+ return it;
+ }
+ } else {
+ // Check to make sure that we are not overlapping two live ranges with
+ // different valno's.
+ assert(it->start >= End &&
+ "Cannot overlap two LiveRanges with differing ValID's");
+ }
+ }
+
+ // Otherwise, this is just a new range that doesn't interact with anything.
+ // Insert it.
+ return ranges.insert(it, LR);
+}
+
+/// isInOneLiveRange - Return true if the range specified is entirely in
+/// a single LiveRange of the live interval.
+bool LiveInterval::isInOneLiveRange(SlotIndex Start, SlotIndex End) {
+ Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start);
+ if (I == ranges.begin())
+ return false;
+ --I;
+ return I->containsRange(Start, End);
+}
+
+
+/// removeRange - Remove the specified range from this interval. Note that
+/// the range must be in a single LiveRange in its entirety.
+void LiveInterval::removeRange(SlotIndex Start, SlotIndex End,
+ bool RemoveDeadValNo) {
+ // Find the LiveRange containing this span.
+ Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start);
+ assert(I != ranges.begin() && "Range is not in interval!");
+ --I;
+ assert(I->containsRange(Start, End) && "Range is not entirely in interval!");
+
+ // If the span we are removing is at the start of the LiveRange, adjust it.
+ VNInfo *ValNo = I->valno;
+ if (I->start == Start) {
+ if (I->end == End) {
+ ValNo->removeKills(Start, End);
+ if (RemoveDeadValNo) {
+ // Check if val# is dead.
+ bool isDead = true;
+ for (const_iterator II = begin(), EE = end(); II != EE; ++II)
+ if (II != I && II->valno == ValNo) {
+ isDead = false;
+ break;
+ }
+ if (isDead) {
+ // Now that ValNo is dead, remove it. If it is the largest value
+ // number, just nuke it (and any other deleted values neighboring it),
+ // otherwise mark it as ~1U so it can be nuked later.
+ if (ValNo->id == getNumValNums()-1) {
+ do {
+ VNInfo *VNI = valnos.back();
+ valnos.pop_back();
+ VNI->~VNInfo();
+ } while (!valnos.empty() && valnos.back()->isUnused());
+ } else {
+ ValNo->setIsUnused(true);
+ }
+ }
+ }
+
+ ranges.erase(I); // Removed the whole LiveRange.
+ } else
+ I->start = End;
+ return;
+ }
+
+ // Otherwise if the span we are removing is at the end of the LiveRange,
+ // adjust the other way.
+ if (I->end == End) {
+ ValNo->removeKills(Start, End);
+ I->end = Start;
+ return;
+ }
+
+ // Otherwise, we are splitting the LiveRange into two pieces.
+ SlotIndex OldEnd = I->end;
+ I->end = Start; // Trim the old interval.
+
+ // Insert the new one.
+ ranges.insert(next(I), LiveRange(End, OldEnd, ValNo));
+}
+
+/// removeValNo - Remove all the ranges defined by the specified value#.
+/// Also remove the value# from value# list.
+void LiveInterval::removeValNo(VNInfo *ValNo) {
+ if (empty()) return;
+ Ranges::iterator I = ranges.end();
+ Ranges::iterator E = ranges.begin();
+ do {
+ --I;
+ if (I->valno == ValNo)
+ ranges.erase(I);
+ } while (I != E);
+ // Now that ValNo is dead, remove it. If it is the largest value
+ // number, just nuke it (and any other deleted values neighboring it),
+ // otherwise mark it as ~1U so it can be nuked later.
+ if (ValNo->id == getNumValNums()-1) {
+ do {
+ VNInfo *VNI = valnos.back();
+ valnos.pop_back();
+ VNI->~VNInfo();
+ } while (!valnos.empty() && valnos.back()->isUnused());
+ } else {
+ ValNo->setIsUnused(true);
+ }
+}
+
+/// getLiveRangeContaining - Return the live range that contains the
+/// specified index, or null if there is none.
+LiveInterval::const_iterator
+LiveInterval::FindLiveRangeContaining(SlotIndex Idx) const {
+ const_iterator It = std::upper_bound(begin(), end(), Idx);
+ if (It != ranges.begin()) {
+ --It;
+ if (It->contains(Idx))
+ return It;
+ }
+
+ return end();
+}
+
+LiveInterval::iterator
+LiveInterval::FindLiveRangeContaining(SlotIndex Idx) {
+ iterator It = std::upper_bound(begin(), end(), Idx);
+ if (It != begin()) {
+ --It;
+ if (It->contains(Idx))
+ return It;
+ }
+
+ return end();
+}
+
+/// findDefinedVNInfo - Find the VNInfo defined by the specified
+/// index (register interval).
+VNInfo *LiveInterval::findDefinedVNInfoForRegInt(SlotIndex Idx) const {
+ for (LiveInterval::const_vni_iterator i = vni_begin(), e = vni_end();
+ i != e; ++i) {
+ if ((*i)->def == Idx)
+ return *i;
+ }
+
+ return 0;
+}
+
+/// findDefinedVNInfo - Find the VNInfo defined by the specified
+/// register (stack inteval).
+VNInfo *LiveInterval::findDefinedVNInfoForStackInt(unsigned reg) const {
+ for (LiveInterval::const_vni_iterator i = vni_begin(), e = vni_end();
+ i != e; ++i) {
+ if ((*i)->getReg() == reg)
+ return *i;
+ }
+ return 0;
+}
+
+/// join - Join two live intervals (this, and other) together. This applies
+/// mappings to the value numbers in the LHS/RHS intervals as specified. If
+/// the intervals are not joinable, this aborts.
+void LiveInterval::join(LiveInterval &Other,
+ const int *LHSValNoAssignments,
+ const int *RHSValNoAssignments,
+ SmallVector<VNInfo*, 16> &NewVNInfo,
+ MachineRegisterInfo *MRI) {
+ // Determine if any of our live range values are mapped. This is uncommon, so
+ // we want to avoid the interval scan if not.
+ bool MustMapCurValNos = false;
+ unsigned NumVals = getNumValNums();
+ unsigned NumNewVals = NewVNInfo.size();
+ for (unsigned i = 0; i != NumVals; ++i) {
+ unsigned LHSValID = LHSValNoAssignments[i];
+ if (i != LHSValID ||
+ (NewVNInfo[LHSValID] && NewVNInfo[LHSValID] != getValNumInfo(i)))
+ MustMapCurValNos = true;
+ }
+
+ // If we have to apply a mapping to our base interval assignment, rewrite it
+ // now.
+ if (MustMapCurValNos) {
+ // Map the first live range.
+ iterator OutIt = begin();
+ OutIt->valno = NewVNInfo[LHSValNoAssignments[OutIt->valno->id]];
+ ++OutIt;
+ for (iterator I = OutIt, E = end(); I != E; ++I) {
+ OutIt->valno = NewVNInfo[LHSValNoAssignments[I->valno->id]];
+
+ // If this live range has the same value # as its immediate predecessor,
+ // and if they are neighbors, remove one LiveRange. This happens when we
+ // have [0,3:0)[4,7:1) and map 0/1 onto the same value #.
+ if (OutIt->valno == (OutIt-1)->valno && (OutIt-1)->end == OutIt->start) {
+ (OutIt-1)->end = OutIt->end;
+ } else {
+ if (I != OutIt) {
+ OutIt->start = I->start;
+ OutIt->end = I->end;
+ }
+
+ // Didn't merge, on to the next one.
+ ++OutIt;
+ }
+ }
+
+ // If we merge some live ranges, chop off the end.
+ ranges.erase(OutIt, end());
+ }
+
+ // Remember assignements because val# ids are changing.
+ SmallVector<unsigned, 16> OtherAssignments;
+ for (iterator I = Other.begin(), E = Other.end(); I != E; ++I)
+ OtherAssignments.push_back(RHSValNoAssignments[I->valno->id]);
+
+ // Update val# info. Renumber them and make sure they all belong to this
+ // LiveInterval now. Also remove dead val#'s.
+ unsigned NumValNos = 0;
+ for (unsigned i = 0; i < NumNewVals; ++i) {
+ VNInfo *VNI = NewVNInfo[i];
+ if (VNI) {
+ if (NumValNos >= NumVals)
+ valnos.push_back(VNI);
+ else
+ valnos[NumValNos] = VNI;
+ VNI->id = NumValNos++; // Renumber val#.
+ }
+ }
+ if (NumNewVals < NumVals)
+ valnos.resize(NumNewVals); // shrinkify
+
+ // Okay, now insert the RHS live ranges into the LHS.
+ iterator InsertPos = begin();
+ unsigned RangeNo = 0;
+ for (iterator I = Other.begin(), E = Other.end(); I != E; ++I, ++RangeNo) {
+ // Map the valno in the other live range to the current live range.
+ I->valno = NewVNInfo[OtherAssignments[RangeNo]];
+ assert(I->valno && "Adding a dead range?");
+ InsertPos = addRangeFrom(*I, InsertPos);
+ }
+
+ ComputeJoinedWeight(Other);
+
+ // Update regalloc hint if currently there isn't one.
+ if (TargetRegisterInfo::isVirtualRegister(reg) &&
+ TargetRegisterInfo::isVirtualRegister(Other.reg)) {
+ std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(reg);
+ if (Hint.first == 0 && Hint.second == 0) {
+ std::pair<unsigned, unsigned> OtherHint =
+ MRI->getRegAllocationHint(Other.reg);
+ if (OtherHint.first || OtherHint.second)
+ MRI->setRegAllocationHint(reg, OtherHint.first, OtherHint.second);
+ }
+ }
+}
+
+/// MergeRangesInAsValue - Merge all of the intervals in RHS into this live
+/// interval as the specified value number. The LiveRanges in RHS are
+/// allowed to overlap with LiveRanges in the current interval, but only if
+/// the overlapping LiveRanges have the specified value number.
+void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS,
+ VNInfo *LHSValNo) {
+ // TODO: Make this more efficient.
+ iterator InsertPos = begin();
+ for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
+ // Map the valno in the other live range to the current live range.
+ LiveRange Tmp = *I;
+ Tmp.valno = LHSValNo;
+ InsertPos = addRangeFrom(Tmp, InsertPos);
+ }
+}
+
+
+/// MergeValueInAsValue - Merge all of the live ranges of a specific val#
+/// in RHS into this live interval as the specified value number.
+/// The LiveRanges in RHS are allowed to overlap with LiveRanges in the
+/// current interval, it will replace the value numbers of the overlaped
+/// live ranges with the specified value number.
+void LiveInterval::MergeValueInAsValue(
+ const LiveInterval &RHS,
+ const VNInfo *RHSValNo, VNInfo *LHSValNo) {
+ SmallVector<VNInfo*, 4> ReplacedValNos;
+ iterator IP = begin();
+ for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
+ if (I->valno != RHSValNo)
+ continue;
+ SlotIndex Start = I->start, End = I->end;
+ IP = std::upper_bound(IP, end(), Start);
+ // If the start of this range overlaps with an existing liverange, trim it.
+ if (IP != begin() && IP[-1].end > Start) {
+ if (IP[-1].valno != LHSValNo) {
+ ReplacedValNos.push_back(IP[-1].valno);
+ IP[-1].valno = LHSValNo; // Update val#.
+ }
+ Start = IP[-1].end;
+ // Trimmed away the whole range?
+ if (Start >= End) continue;
+ }
+ // If the end of this range overlaps with an existing liverange, trim it.
+ if (IP != end() && End > IP->start) {
+ if (IP->valno != LHSValNo) {
+ ReplacedValNos.push_back(IP->valno);
+ IP->valno = LHSValNo; // Update val#.
+ }
+ End = IP->start;
+ // If this trimmed away the whole range, ignore it.
+ if (Start == End) continue;
+ }
+
+ // Map the valno in the other live range to the current live range.
+ IP = addRangeFrom(LiveRange(Start, End, LHSValNo), IP);
+ }
+
+
+ SmallSet<VNInfo*, 4> Seen;
+ for (unsigned i = 0, e = ReplacedValNos.size(); i != e; ++i) {
+ VNInfo *V1 = ReplacedValNos[i];
+ if (Seen.insert(V1)) {
+ bool isDead = true;
+ for (const_iterator I = begin(), E = end(); I != E; ++I)
+ if (I->valno == V1) {
+ isDead = false;
+ break;
+ }
+ if (isDead) {
+ // Now that V1 is dead, remove it. If it is the largest value number,
+ // just nuke it (and any other deleted values neighboring it), otherwise
+ // mark it as ~1U so it can be nuked later.
+ if (V1->id == getNumValNums()-1) {
+ do {
+ VNInfo *VNI = valnos.back();
+ valnos.pop_back();
+ VNI->~VNInfo();
+ } while (!valnos.empty() && valnos.back()->isUnused());
+ } else {
+ V1->setIsUnused(true);
+ }
+ }
+ }
+ }
+}
+
+
+/// MergeInClobberRanges - For any live ranges that are not defined in the
+/// current interval, but are defined in the Clobbers interval, mark them
+/// used with an unknown definition value.
+void LiveInterval::MergeInClobberRanges(LiveIntervals &li_,
+ const LiveInterval &Clobbers,
+ BumpPtrAllocator &VNInfoAllocator) {
+ if (Clobbers.empty()) return;
+
+ DenseMap<VNInfo*, VNInfo*> ValNoMaps;
+ VNInfo *UnusedValNo = 0;
+ iterator IP = begin();
+ for (const_iterator I = Clobbers.begin(), E = Clobbers.end(); I != E; ++I) {
+ // For every val# in the Clobbers interval, create a new "unknown" val#.
+ VNInfo *ClobberValNo = 0;
+ DenseMap<VNInfo*, VNInfo*>::iterator VI = ValNoMaps.find(I->valno);
+ if (VI != ValNoMaps.end())
+ ClobberValNo = VI->second;
+ else if (UnusedValNo)
+ ClobberValNo = UnusedValNo;
+ else {
+ UnusedValNo = ClobberValNo =
+ getNextValue(li_.getInvalidIndex(), 0, false, VNInfoAllocator);
+ ValNoMaps.insert(std::make_pair(I->valno, ClobberValNo));
+ }
+
+ bool Done = false;
+ SlotIndex Start = I->start, End = I->end;
+ // If a clobber range starts before an existing range and ends after
+ // it, the clobber range will need to be split into multiple ranges.
+ // Loop until the entire clobber range is handled.
+ while (!Done) {
+ Done = true;
+ IP = std::upper_bound(IP, end(), Start);
+ SlotIndex SubRangeStart = Start;
+ SlotIndex SubRangeEnd = End;
+
+ // If the start of this range overlaps with an existing liverange, trim it.
+ if (IP != begin() && IP[-1].end > SubRangeStart) {
+ SubRangeStart = IP[-1].end;
+ // Trimmed away the whole range?
+ if (SubRangeStart >= SubRangeEnd) continue;
+ }
+ // If the end of this range overlaps with an existing liverange, trim it.
+ if (IP != end() && SubRangeEnd > IP->start) {
+ // If the clobber live range extends beyond the existing live range,
+ // it'll need at least another live range, so set the flag to keep
+ // iterating.
+ if (SubRangeEnd > IP->end) {
+ Start = IP->end;
+ Done = false;
+ }
+ SubRangeEnd = IP->start;
+ // If this trimmed away the whole range, ignore it.
+ if (SubRangeStart == SubRangeEnd) continue;
+ }
+
+ // Insert the clobber interval.
+ IP = addRangeFrom(LiveRange(SubRangeStart, SubRangeEnd, ClobberValNo),
+ IP);
+ UnusedValNo = 0;
+ }
+ }
+
+ if (UnusedValNo) {
+ // Delete the last unused val#.
+ valnos.pop_back();
+ UnusedValNo->~VNInfo();
+ }
+}
+
+void LiveInterval::MergeInClobberRange(LiveIntervals &li_,
+ SlotIndex Start,
+ SlotIndex End,
+ BumpPtrAllocator &VNInfoAllocator) {
+ // Find a value # to use for the clobber ranges. If there is already a value#
+ // for unknown values, use it.
+ VNInfo *ClobberValNo =
+ getNextValue(li_.getInvalidIndex(), 0, false, VNInfoAllocator);
+
+ iterator IP = begin();
+ IP = std::upper_bound(IP, end(), Start);
+
+ // If the start of this range overlaps with an existing liverange, trim it.
+ if (IP != begin() && IP[-1].end > Start) {
+ Start = IP[-1].end;
+ // Trimmed away the whole range?
+ if (Start >= End) return;
+ }
+ // If the end of this range overlaps with an existing liverange, trim it.
+ if (IP != end() && End > IP->start) {
+ End = IP->start;
+ // If this trimmed away the whole range, ignore it.
+ if (Start == End) return;
+ }
+
+ // Insert the clobber interval.
+ addRangeFrom(LiveRange(Start, End, ClobberValNo), IP);
+}
+
+/// MergeValueNumberInto - This method is called when two value nubmers
+/// are found to be equivalent. This eliminates V1, replacing all
+/// LiveRanges with the V1 value number with the V2 value number. This can
+/// cause merging of V1/V2 values numbers and compaction of the value space.
+VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
+ assert(V1 != V2 && "Identical value#'s are always equivalent!");
+
+ // This code actually merges the (numerically) larger value number into the
+ // smaller value number, which is likely to allow us to compactify the value
+ // space. The only thing we have to be careful of is to preserve the
+ // instruction that defines the result value.
+
+ // Make sure V2 is smaller than V1.
+ if (V1->id < V2->id) {
+ V1->copyFrom(*V2);
+ std::swap(V1, V2);
+ }
+
+ // Merge V1 live ranges into V2.
+ for (iterator I = begin(); I != end(); ) {
+ iterator LR = I++;
+ if (LR->valno != V1) continue; // Not a V1 LiveRange.
+
+ // Okay, we found a V1 live range. If it had a previous, touching, V2 live
+ // range, extend it.
+ if (LR != begin()) {
+ iterator Prev = LR-1;
+ if (Prev->valno == V2 && Prev->end == LR->start) {
+ Prev->end = LR->end;
+
+ // Erase this live-range.
+ ranges.erase(LR);
+ I = Prev+1;
+ LR = Prev;
+ }
+ }
+
+ // Okay, now we have a V1 or V2 live range that is maximally merged forward.
+ // Ensure that it is a V2 live-range.
+ LR->valno = V2;
+
+ // If we can merge it into later V2 live ranges, do so now. We ignore any
+ // following V1 live ranges, as they will be merged in subsequent iterations
+ // of the loop.
+ if (I != end()) {
+ if (I->start == LR->end && I->valno == V2) {
+ LR->end = I->end;
+ ranges.erase(I);
+ I = LR+1;
+ }
+ }
+ }
+
+ // Now that V1 is dead, remove it. If it is the largest value number, just
+ // nuke it (and any other deleted values neighboring it), otherwise mark it as
+ // ~1U so it can be nuked later.
+ if (V1->id == getNumValNums()-1) {
+ do {
+ VNInfo *VNI = valnos.back();
+ valnos.pop_back();
+ VNI->~VNInfo();
+ } while (valnos.back()->isUnused());
+ } else {
+ V1->setIsUnused(true);
+ }
+
+ return V2;
+}
+
+void LiveInterval::Copy(const LiveInterval &RHS,
+ MachineRegisterInfo *MRI,
+ BumpPtrAllocator &VNInfoAllocator) {
+ ranges.clear();
+ valnos.clear();
+ std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(RHS.reg);
+ MRI->setRegAllocationHint(reg, Hint.first, Hint.second);
+
+ weight = RHS.weight;
+ for (unsigned i = 0, e = RHS.getNumValNums(); i != e; ++i) {
+ const VNInfo *VNI = RHS.getValNumInfo(i);
+ createValueCopy(VNI, VNInfoAllocator);
+ }
+ for (unsigned i = 0, e = RHS.ranges.size(); i != e; ++i) {
+ const LiveRange &LR = RHS.ranges[i];
+ addRange(LiveRange(LR.start, LR.end, getValNumInfo(LR.valno->id)));
+ }
+}
+
+unsigned LiveInterval::getSize() const {
+ unsigned Sum = 0;
+ for (const_iterator I = begin(), E = end(); I != E; ++I)
+ Sum += I->start.distance(I->end);
+ return Sum;
+}
+
+/// ComputeJoinedWeight - Set the weight of a live interval Joined
+/// after Other has been merged into it.
+void LiveInterval::ComputeJoinedWeight(const LiveInterval &Other) {
+ // If either of these intervals was spilled, the weight is the
+ // weight of the non-spilled interval. This can only happen with
+ // iterative coalescers.
+
+ if (Other.weight != HUGE_VALF) {
+ weight += Other.weight;
+ }
+ else if (weight == HUGE_VALF &&
+ !TargetRegisterInfo::isPhysicalRegister(reg)) {
+ // Remove this assert if you have an iterative coalescer
+ assert(0 && "Joining to spilled interval");
+ weight = Other.weight;
+ }
+ else {
+ // Otherwise the weight stays the same
+ // Remove this assert if you have an iterative coalescer
+ assert(0 && "Joining from spilled interval");
+ }
+}
+
+raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) {
+ return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")";
+}
+
+void LiveRange::dump() const {
+ dbgs() << *this << "\n";
+}
+
+void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
+ if (isStackSlot())
+ OS << "SS#" << getStackSlotIndex();
+ else if (TRI && TargetRegisterInfo::isPhysicalRegister(reg))
+ OS << TRI->getName(reg);
+ else
+ OS << "%reg" << reg;
+
+ OS << ',' << weight;
+
+ if (empty())
+ OS << " EMPTY";
+ else {
+ OS << " = ";
+ for (LiveInterval::Ranges::const_iterator I = ranges.begin(),
+ E = ranges.end(); I != E; ++I)
+ OS << *I;
+ }
+
+ // Print value number info.
+ if (getNumValNums()) {
+ OS << " ";
+ unsigned vnum = 0;
+ for (const_vni_iterator i = vni_begin(), e = vni_end(); i != e;
+ ++i, ++vnum) {
+ const VNInfo *vni = *i;
+ if (vnum) OS << " ";
+ OS << vnum << "@";
+ if (vni->isUnused()) {
+ OS << "x";
+ } else {
+ if (!vni->isDefAccurate() && !vni->isPHIDef())
+ OS << "?";
+ else
+ OS << vni->def;
+ unsigned ee = vni->kills.size();
+ if (ee || vni->hasPHIKill()) {
+ OS << "-(";
+ for (unsigned j = 0; j != ee; ++j) {
+ OS << vni->kills[j];
+ if (j != ee-1)
+ OS << " ";
+ }
+ if (vni->hasPHIKill()) {
+ if (ee)
+ OS << " ";
+ OS << "phi";
+ }
+ OS << ")";
+ }
+ }
+ }
+ }
+}
+
+void LiveInterval::dump() const {
+ dbgs() << *this << "\n";
+}
+
+
+void LiveRange::print(raw_ostream &os) const {
+ os << *this;
+}
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
new file mode 100644
index 0000000..432409a
--- /dev/null
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -0,0 +1,2104 @@
+//===-- LiveIntervalAnalysis.cpp - Live Interval Analysis -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveInterval analysis pass which is used
+// by the Linear Scan Register allocator. This pass linearizes the
+// basic blocks of the function in DFS order and uses the
+// LiveVariables pass to conservatively compute live intervals for
+// each virtual and physical register.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "liveintervals"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "VirtRegMap.h"
+#include "llvm/Value.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ProcessImplicitDefs.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <limits>
+#include <cmath>
+using namespace llvm;
+
+// Hidden options for help debugging.
+static cl::opt<bool> DisableReMat("disable-rematerialization",
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool> EnableFastSpilling("fast-spill",
+ cl::init(false), cl::Hidden);
+
+STATISTIC(numIntervals , "Number of original intervals");
+STATISTIC(numFolds , "Number of loads/stores folded into instructions");
+STATISTIC(numSplits , "Number of intervals split");
+
+char LiveIntervals::ID = 0;
+static RegisterPass<LiveIntervals> X("liveintervals", "Live Interval Analysis");
+
+void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<LiveVariables>();
+ AU.addRequired<LiveVariables>();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+
+ if (!StrongPHIElim) {
+ AU.addPreservedID(PHIEliminationID);
+ AU.addRequiredID(PHIEliminationID);
+ }
+
+ AU.addRequiredID(TwoAddressInstructionPassID);
+ AU.addPreserved<ProcessImplicitDefs>();
+ AU.addRequired<ProcessImplicitDefs>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequiredTransitive<SlotIndexes>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void LiveIntervals::releaseMemory() {
+ // Free the live intervals themselves.
+ for (DenseMap<unsigned, LiveInterval*>::iterator I = r2iMap_.begin(),
+ E = r2iMap_.end(); I != E; ++I)
+ delete I->second;
+
+ r2iMap_.clear();
+
+ // Release VNInfo memroy regions after all VNInfo objects are dtor'd.
+ VNInfoAllocator.Reset();
+ while (!CloneMIs.empty()) {
+ MachineInstr *MI = CloneMIs.back();
+ CloneMIs.pop_back();
+ mf_->DeleteMachineInstr(MI);
+ }
+}
+
+/// runOnMachineFunction - Register allocate the whole function
+///
+bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
+ mf_ = &fn;
+ mri_ = &mf_->getRegInfo();
+ tm_ = &fn.getTarget();
+ tri_ = tm_->getRegisterInfo();
+ tii_ = tm_->getInstrInfo();
+ aa_ = &getAnalysis<AliasAnalysis>();
+ lv_ = &getAnalysis<LiveVariables>();
+ indexes_ = &getAnalysis<SlotIndexes>();
+ allocatableRegs_ = tri_->getAllocatableSet(fn);
+
+ computeIntervals();
+
+ numIntervals += getNumIntervals();
+
+ DEBUG(dump());
+ return true;
+}
+
+/// print - Implement the dump method.
+void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
+ OS << "********** INTERVALS **********\n";
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ I->second->print(OS, tri_);
+ OS << "\n";
+ }
+
+ printInstrs(OS);
+}
+
+void LiveIntervals::printInstrs(raw_ostream &OS) const {
+ OS << "********** MACHINEINSTRS **********\n";
+
+ for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
+ mbbi != mbbe; ++mbbi) {
+ OS << "BB#" << mbbi->getNumber()
+ << ":\t\t# derived from " << mbbi->getName() << "\n";
+ for (MachineBasicBlock::iterator mii = mbbi->begin(),
+ mie = mbbi->end(); mii != mie; ++mii) {
+ if (mii->isDebugValue())
+ OS << SlotIndex::getEmptyKey() << '\t' << *mii;
+ else
+ OS << getInstructionIndex(mii) << '\t' << *mii;
+ }
+ }
+}
+
+void LiveIntervals::dumpInstrs() const {
+ printInstrs(dbgs());
+}
+
+bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li,
+ VirtRegMap &vrm, unsigned reg) {
+ // We don't handle fancy stuff crossing basic block boundaries
+ if (li.ranges.size() != 1)
+ return true;
+ const LiveRange &range = li.ranges.front();
+ SlotIndex idx = range.start.getBaseIndex();
+ SlotIndex end = range.end.getPrevSlot().getBaseIndex().getNextIndex();
+
+ // Skip deleted instructions
+ MachineInstr *firstMI = getInstructionFromIndex(idx);
+ while (!firstMI && idx != end) {
+ idx = idx.getNextIndex();
+ firstMI = getInstructionFromIndex(idx);
+ }
+ if (!firstMI)
+ return false;
+
+ // Find last instruction in range
+ SlotIndex lastIdx = end.getPrevIndex();
+ MachineInstr *lastMI = getInstructionFromIndex(lastIdx);
+ while (!lastMI && lastIdx != idx) {
+ lastIdx = lastIdx.getPrevIndex();
+ lastMI = getInstructionFromIndex(lastIdx);
+ }
+ if (!lastMI)
+ return false;
+
+ // Range cannot cross basic block boundaries or terminators
+ MachineBasicBlock *MBB = firstMI->getParent();
+ if (MBB != lastMI->getParent() || lastMI->getDesc().isTerminator())
+ return true;
+
+ MachineBasicBlock::const_iterator E = lastMI;
+ ++E;
+ for (MachineBasicBlock::const_iterator I = firstMI; I != E; ++I) {
+ const MachineInstr &MI = *I;
+
+ // Allow copies to and from li.reg
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ if (tii_->isMoveInstr(MI, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ if (SrcReg == li.reg || DstReg == li.reg)
+ continue;
+
+ // Check for operands using reg
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand& mop = MI.getOperand(i);
+ if (!mop.isReg())
+ continue;
+ unsigned PhysReg = mop.getReg();
+ if (PhysReg == 0 || PhysReg == li.reg)
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(PhysReg)) {
+ if (!vrm.hasPhys(PhysReg))
+ continue;
+ PhysReg = vrm.getPhys(PhysReg);
+ }
+ if (PhysReg && tri_->regsOverlap(PhysReg, reg))
+ return true;
+ }
+ }
+
+ // No conflicts found.
+ return false;
+}
+
+/// conflictsWithPhysRegRef - Similar to conflictsWithPhysRegRef except
+/// it can check use as well.
+bool LiveIntervals::conflictsWithPhysRegRef(LiveInterval &li,
+ unsigned Reg, bool CheckUse,
+ SmallPtrSet<MachineInstr*,32> &JoinedCopies) {
+ for (LiveInterval::Ranges::const_iterator
+ I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
+ for (SlotIndex index = I->start.getBaseIndex(),
+ end = I->end.getPrevSlot().getBaseIndex().getNextIndex();
+ index != end;
+ index = index.getNextIndex()) {
+ MachineInstr *MI = getInstructionFromIndex(index);
+ if (!MI)
+ continue; // skip deleted instructions
+
+ if (JoinedCopies.count(MI))
+ continue;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ if (MO.isUse() && !CheckUse)
+ continue;
+ unsigned PhysReg = MO.getReg();
+ if (PhysReg == 0 || TargetRegisterInfo::isVirtualRegister(PhysReg))
+ continue;
+ if (tri_->isSubRegister(Reg, PhysReg))
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+#ifndef NDEBUG
+static void printRegName(unsigned reg, const TargetRegisterInfo* tri_) {
+ if (TargetRegisterInfo::isPhysicalRegister(reg))
+ dbgs() << tri_->getName(reg);
+ else
+ dbgs() << "%reg" << reg;
+}
+#endif
+
+void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
+ MachineBasicBlock::iterator mi,
+ SlotIndex MIIdx,
+ MachineOperand& MO,
+ unsigned MOIdx,
+ LiveInterval &interval) {
+ DEBUG({
+ dbgs() << "\t\tregister: ";
+ printRegName(interval.reg, tri_);
+ });
+
+ // Virtual registers may be defined multiple times (due to phi
+ // elimination and 2-addr elimination). Much of what we do only has to be
+ // done once for the vreg. We use an empty interval to detect the first
+ // time we see a vreg.
+ LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg);
+ if (interval.empty()) {
+ // Get the Idx of the defining instructions.
+ SlotIndex defIndex = MIIdx.getDefIndex();
+ // Earlyclobbers move back one, so that they overlap the live range
+ // of inputs.
+ if (MO.isEarlyClobber())
+ defIndex = MIIdx.getUseIndex();
+ VNInfo *ValNo;
+ MachineInstr *CopyMI = NULL;
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ if (mi->isExtractSubreg() || mi->isInsertSubreg() || mi->isSubregToReg() ||
+ tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ CopyMI = mi;
+ // Earlyclobbers move back one.
+ ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator);
+
+ assert(ValNo->id == 0 && "First value in interval is not 0?");
+
+ // Loop over all of the blocks that the vreg is defined in. There are
+ // two cases we have to handle here. The most common case is a vreg
+ // whose lifetime is contained within a basic block. In this case there
+ // will be a single kill, in MBB, which comes after the definition.
+ if (vi.Kills.size() == 1 && vi.Kills[0]->getParent() == mbb) {
+ // FIXME: what about dead vars?
+ SlotIndex killIdx;
+ if (vi.Kills[0] != mi)
+ killIdx = getInstructionIndex(vi.Kills[0]).getDefIndex();
+ else
+ killIdx = defIndex.getStoreIndex();
+
+ // If the kill happens after the definition, we have an intra-block
+ // live range.
+ if (killIdx > defIndex) {
+ assert(vi.AliveBlocks.empty() &&
+ "Shouldn't be alive across any blocks!");
+ LiveRange LR(defIndex, killIdx, ValNo);
+ interval.addRange(LR);
+ DEBUG(dbgs() << " +" << LR << "\n");
+ ValNo->addKill(killIdx);
+ return;
+ }
+ }
+
+ // The other case we handle is when a virtual register lives to the end
+ // of the defining block, potentially live across some blocks, then is
+ // live into some number of blocks, but gets killed. Start by adding a
+ // range that goes from this definition to the end of the defining block.
+ LiveRange NewLR(defIndex, getMBBEndIdx(mbb), ValNo);
+ DEBUG(dbgs() << " +" << NewLR);
+ interval.addRange(NewLR);
+
+ // Iterate over all of the blocks that the variable is completely
+ // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the
+ // live interval.
+ for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(),
+ E = vi.AliveBlocks.end(); I != E; ++I) {
+ MachineBasicBlock *aliveBlock = mf_->getBlockNumbered(*I);
+ LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock), ValNo);
+ interval.addRange(LR);
+ DEBUG(dbgs() << " +" << LR);
+ }
+
+ // Finally, this virtual register is live from the start of any killing
+ // block to the 'use' slot of the killing instruction.
+ for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) {
+ MachineInstr *Kill = vi.Kills[i];
+ SlotIndex killIdx =
+ getInstructionIndex(Kill).getDefIndex();
+ LiveRange LR(getMBBStartIdx(Kill->getParent()), killIdx, ValNo);
+ interval.addRange(LR);
+ ValNo->addKill(killIdx);
+ DEBUG(dbgs() << " +" << LR);
+ }
+
+ } else {
+ // If this is the second time we see a virtual register definition, it
+ // must be due to phi elimination or two addr elimination. If this is
+ // the result of two address elimination, then the vreg is one of the
+ // def-and-use register operand.
+ if (mi->isRegTiedToUseOperand(MOIdx)) {
+ // If this is a two-address definition, then we have already processed
+ // the live range. The only problem is that we didn't realize there
+ // are actually two values in the live interval. Because of this we
+ // need to take the LiveRegion that defines this register and split it
+ // into two values.
+ assert(interval.containsOneValue());
+ SlotIndex DefIndex = interval.getValNumInfo(0)->def.getDefIndex();
+ SlotIndex RedefIndex = MIIdx.getDefIndex();
+ if (MO.isEarlyClobber())
+ RedefIndex = MIIdx.getUseIndex();
+
+ const LiveRange *OldLR =
+ interval.getLiveRangeContaining(RedefIndex.getUseIndex());
+ VNInfo *OldValNo = OldLR->valno;
+
+ // Delete the initial value, which should be short and continuous,
+ // because the 2-addr copy must be in the same MBB as the redef.
+ interval.removeRange(DefIndex, RedefIndex);
+
+ // Two-address vregs should always only be redefined once. This means
+ // that at this point, there should be exactly one value number in it.
+ assert(interval.containsOneValue() && "Unexpected 2-addr liveint!");
+
+ // The new value number (#1) is defined by the instruction we claimed
+ // defined value #0.
+ VNInfo *ValNo = interval.getNextValue(OldValNo->def, OldValNo->getCopy(),
+ false, // update at *
+ VNInfoAllocator);
+ ValNo->setFlags(OldValNo->getFlags()); // * <- updating here
+
+ // Value#0 is now defined by the 2-addr instruction.
+ OldValNo->def = RedefIndex;
+ OldValNo->setCopy(0);
+
+ // Add the new live interval which replaces the range for the input copy.
+ LiveRange LR(DefIndex, RedefIndex, ValNo);
+ DEBUG(dbgs() << " replace range with " << LR);
+ interval.addRange(LR);
+ ValNo->addKill(RedefIndex);
+
+ // If this redefinition is dead, we need to add a dummy unit live
+ // range covering the def slot.
+ if (MO.isDead())
+ interval.addRange(LiveRange(RedefIndex, RedefIndex.getStoreIndex(),
+ OldValNo));
+
+ DEBUG({
+ dbgs() << " RESULT: ";
+ interval.print(dbgs(), tri_);
+ });
+ } else {
+ // Otherwise, this must be because of phi elimination. If this is the
+ // first redefinition of the vreg that we have seen, go back and change
+ // the live range in the PHI block to be a different value number.
+ if (interval.containsOneValue()) {
+
+ VNInfo *VNI = interval.getValNumInfo(0);
+ // Phi elimination may have reused the register for multiple identical
+ // phi nodes. There will be a kill per phi. Remove the old ranges that
+ // we now know have an incorrect number.
+ for (unsigned ki=0, ke=vi.Kills.size(); ki != ke; ++ki) {
+ MachineInstr *Killer = vi.Kills[ki];
+ SlotIndex Start = getMBBStartIdx(Killer->getParent());
+ SlotIndex End = getInstructionIndex(Killer).getDefIndex();
+ DEBUG({
+ dbgs() << "\n\t\trenaming [" << Start << "," << End << "] in: ";
+ interval.print(dbgs(), tri_);
+ });
+ interval.removeRange(Start, End);
+
+ // Replace the interval with one of a NEW value number. Note that
+ // this value number isn't actually defined by an instruction, weird
+ // huh? :)
+ LiveRange LR(Start, End,
+ interval.getNextValue(SlotIndex(Start, true),
+ 0, false, VNInfoAllocator));
+ LR.valno->setIsPHIDef(true);
+ interval.addRange(LR);
+ LR.valno->addKill(End);
+ }
+
+ MachineBasicBlock *killMBB = getMBBFromIndex(VNI->def);
+ VNI->addKill(indexes_->getTerminatorGap(killMBB));
+ VNI->setHasPHIKill(true);
+ DEBUG({
+ dbgs() << " RESULT: ";
+ interval.print(dbgs(), tri_);
+ });
+ }
+
+ // In the case of PHI elimination, each variable definition is only
+ // live until the end of the block. We've already taken care of the
+ // rest of the live range.
+ SlotIndex defIndex = MIIdx.getDefIndex();
+ if (MO.isEarlyClobber())
+ defIndex = MIIdx.getUseIndex();
+
+ VNInfo *ValNo;
+ MachineInstr *CopyMI = NULL;
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ if (mi->isExtractSubreg() || mi->isInsertSubreg() || mi->isSubregToReg()||
+ tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ CopyMI = mi;
+ ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator);
+
+ SlotIndex killIndex = getMBBEndIdx(mbb);
+ LiveRange LR(defIndex, killIndex, ValNo);
+ interval.addRange(LR);
+ ValNo->addKill(indexes_->getTerminatorGap(mbb));
+ ValNo->setHasPHIKill(true);
+ DEBUG(dbgs() << " +" << LR);
+ }
+ }
+
+ DEBUG(dbgs() << '\n');
+}
+
+void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator mi,
+ SlotIndex MIIdx,
+ MachineOperand& MO,
+ LiveInterval &interval,
+ MachineInstr *CopyMI) {
+ // A physical register cannot be live across basic block, so its
+ // lifetime must end somewhere in its defining basic block.
+ DEBUG({
+ dbgs() << "\t\tregister: ";
+ printRegName(interval.reg, tri_);
+ });
+
+ SlotIndex baseIndex = MIIdx;
+ SlotIndex start = baseIndex.getDefIndex();
+ // Earlyclobbers move back one.
+ if (MO.isEarlyClobber())
+ start = MIIdx.getUseIndex();
+ SlotIndex end = start;
+
+ // If it is not used after definition, it is considered dead at
+ // the instruction defining it. Hence its interval is:
+ // [defSlot(def), defSlot(def)+1)
+ // For earlyclobbers, the defSlot was pushed back one; the extra
+ // advance below compensates.
+ if (MO.isDead()) {
+ DEBUG(dbgs() << " dead");
+ end = start.getStoreIndex();
+ goto exit;
+ }
+
+ // If it is not dead on definition, it must be killed by a
+ // subsequent instruction. Hence its interval is:
+ // [defSlot(def), useSlot(kill)+1)
+ baseIndex = baseIndex.getNextIndex();
+ while (++mi != MBB->end()) {
+
+ if (mi->isDebugValue())
+ continue;
+ if (getInstructionFromIndex(baseIndex) == 0)
+ baseIndex = indexes_->getNextNonNullIndex(baseIndex);
+
+ if (mi->killsRegister(interval.reg, tri_)) {
+ DEBUG(dbgs() << " killed");
+ end = baseIndex.getDefIndex();
+ goto exit;
+ } else {
+ int DefIdx = mi->findRegisterDefOperandIdx(interval.reg, false, tri_);
+ if (DefIdx != -1) {
+ if (mi->isRegTiedToUseOperand(DefIdx)) {
+ // Two-address instruction.
+ end = baseIndex.getDefIndex();
+ } else {
+ // Another instruction redefines the register before it is ever read.
+ // Then the register is essentially dead at the instruction that
+ // defines it. Hence its interval is:
+ // [defSlot(def), defSlot(def)+1)
+ DEBUG(dbgs() << " dead");
+ end = start.getStoreIndex();
+ }
+ goto exit;
+ }
+ }
+
+ baseIndex = baseIndex.getNextIndex();
+ }
+
+ // The only case we should have a dead physreg here without a killing or
+ // instruction where we know it's dead is if it is live-in to the function
+ // and never used. Another possible case is the implicit use of the
+ // physical register has been deleted by two-address pass.
+ end = start.getStoreIndex();
+
+exit:
+ assert(start < end && "did not find end of interval?");
+
+ // Already exists? Extend old live interval.
+ LiveInterval::iterator OldLR = interval.FindLiveRangeContaining(start);
+ bool Extend = OldLR != interval.end();
+ VNInfo *ValNo = Extend
+ ? OldLR->valno : interval.getNextValue(start, CopyMI, true, VNInfoAllocator);
+ if (MO.isEarlyClobber() && Extend)
+ ValNo->setHasRedefByEC(true);
+ LiveRange LR(start, end, ValNo);
+ interval.addRange(LR);
+ LR.valno->addKill(end);
+ DEBUG(dbgs() << " +" << LR << '\n');
+}
+
+void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator MI,
+ SlotIndex MIIdx,
+ MachineOperand& MO,
+ unsigned MOIdx) {
+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ handleVirtualRegisterDef(MBB, MI, MIIdx, MO, MOIdx,
+ getOrCreateInterval(MO.getReg()));
+ else if (allocatableRegs_[MO.getReg()]) {
+ MachineInstr *CopyMI = NULL;
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ if (MI->isExtractSubreg() || MI->isInsertSubreg() || MI->isSubregToReg() ||
+ tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ CopyMI = MI;
+ handlePhysicalRegisterDef(MBB, MI, MIIdx, MO,
+ getOrCreateInterval(MO.getReg()), CopyMI);
+ // Def of a register also defines its sub-registers.
+ for (const unsigned* AS = tri_->getSubRegisters(MO.getReg()); *AS; ++AS)
+ // If MI also modifies the sub-register explicitly, avoid processing it
+ // more than once. Do not pass in TRI here so it checks for exact match.
+ if (!MI->modifiesRegister(*AS))
+ handlePhysicalRegisterDef(MBB, MI, MIIdx, MO,
+ getOrCreateInterval(*AS), 0);
+ }
+}
+
+void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
+ SlotIndex MIIdx,
+ LiveInterval &interval, bool isAlias) {
+ DEBUG({
+ dbgs() << "\t\tlivein register: ";
+ printRegName(interval.reg, tri_);
+ });
+
+ // Look for kills, if it reaches a def before it's killed, then it shouldn't
+ // be considered a livein.
+ MachineBasicBlock::iterator mi = MBB->begin();
+ SlotIndex baseIndex = MIIdx;
+ SlotIndex start = baseIndex;
+ if (getInstructionFromIndex(baseIndex) == 0)
+ baseIndex = indexes_->getNextNonNullIndex(baseIndex);
+
+ SlotIndex end = baseIndex;
+ bool SeenDefUse = false;
+
+ MachineBasicBlock::iterator E = MBB->end();
+ while (mi != E) {
+ if (mi->isDebugValue()) {
+ ++mi;
+ continue;
+ }
+ if (mi->killsRegister(interval.reg, tri_)) {
+ DEBUG(dbgs() << " killed");
+ end = baseIndex.getDefIndex();
+ SeenDefUse = true;
+ break;
+ } else if (mi->modifiesRegister(interval.reg, tri_)) {
+ // Another instruction redefines the register before it is ever read.
+ // Then the register is essentially dead at the instruction that defines
+ // it. Hence its interval is:
+ // [defSlot(def), defSlot(def)+1)
+ DEBUG(dbgs() << " dead");
+ end = start.getStoreIndex();
+ SeenDefUse = true;
+ break;
+ }
+
+ ++mi;
+ if (mi != E && !mi->isDebugValue()) {
+ baseIndex = indexes_->getNextNonNullIndex(baseIndex);
+ }
+ }
+
+ // Live-in register might not be used at all.
+ if (!SeenDefUse) {
+ if (isAlias) {
+ DEBUG(dbgs() << " dead");
+ end = MIIdx.getStoreIndex();
+ } else {
+ DEBUG(dbgs() << " live through");
+ end = baseIndex;
+ }
+ }
+
+ VNInfo *vni =
+ interval.getNextValue(SlotIndex(getMBBStartIdx(MBB), true),
+ 0, false, VNInfoAllocator);
+ vni->setIsPHIDef(true);
+ LiveRange LR(start, end, vni);
+
+ interval.addRange(LR);
+ LR.valno->addKill(end);
+ DEBUG(dbgs() << " +" << LR << '\n');
+}
+
+/// computeIntervals - computes the live intervals for virtual
+/// registers. for some ordering of the machine instructions [1,N] a
+/// live interval is an interval [i, j) where 1 <= i <= j < N for
+/// which a variable is live
+void LiveIntervals::computeIntervals() {
+ DEBUG(dbgs() << "********** COMPUTING LIVE INTERVALS **********\n"
+ << "********** Function: "
+ << ((Value*)mf_->getFunction())->getName() << '\n');
+
+ SmallVector<unsigned, 8> UndefUses;
+ for (MachineFunction::iterator MBBI = mf_->begin(), E = mf_->end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock *MBB = MBBI;
+ if (MBB->empty())
+ continue;
+
+ // Track the index of the current machine instr.
+ SlotIndex MIIndex = getMBBStartIdx(MBB);
+ DEBUG(dbgs() << MBB->getName() << ":\n");
+
+ // Create intervals for live-ins to this BB first.
+ for (MachineBasicBlock::const_livein_iterator LI = MBB->livein_begin(),
+ LE = MBB->livein_end(); LI != LE; ++LI) {
+ handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*LI));
+ // Multiple live-ins can alias the same register.
+ for (const unsigned* AS = tri_->getSubRegisters(*LI); *AS; ++AS)
+ if (!hasInterval(*AS))
+ handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*AS),
+ true);
+ }
+
+ // Skip over empty initial indices.
+ if (getInstructionFromIndex(MIIndex) == 0)
+ MIIndex = indexes_->getNextNonNullIndex(MIIndex);
+
+ for (MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end();
+ MI != miEnd; ++MI) {
+ DEBUG(dbgs() << MIIndex << "\t" << *MI);
+ if (MI->isDebugValue())
+ continue;
+
+ // Handle defs.
+ for (int i = MI->getNumOperands() - 1; i >= 0; --i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+
+ // handle register defs - build intervals
+ if (MO.isDef())
+ handleRegisterDef(MBB, MI, MIIndex, MO, i);
+ else if (MO.isUndef())
+ UndefUses.push_back(MO.getReg());
+ }
+
+ // Move to the next instr slot.
+ MIIndex = indexes_->getNextNonNullIndex(MIIndex);
+ }
+ }
+
+ // Create empty intervals for registers defined by implicit_def's (except
+ // for those implicit_def that define values which are liveout of their
+ // blocks.
+ for (unsigned i = 0, e = UndefUses.size(); i != e; ++i) {
+ unsigned UndefReg = UndefUses[i];
+ (void)getOrCreateInterval(UndefReg);
+ }
+}
+
+LiveInterval* LiveIntervals::createInterval(unsigned reg) {
+ float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? HUGE_VALF : 0.0F;
+ return new LiveInterval(reg, Weight);
+}
+
+/// dupInterval - Duplicate a live interval. The caller is responsible for
+/// managing the allocated memory.
+LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) {
+ LiveInterval *NewLI = createInterval(li->reg);
+ NewLI->Copy(*li, mri_, getVNInfoAllocator());
+ return NewLI;
+}
+
+/// getVNInfoSourceReg - Helper function that parses the specified VNInfo
+/// copy field and returns the source register that defines it.
+unsigned LiveIntervals::getVNInfoSourceReg(const VNInfo *VNI) const {
+ if (!VNI->getCopy())
+ return 0;
+
+ if (VNI->getCopy()->isExtractSubreg()) {
+ // If it's extracting out of a physical register, return the sub-register.
+ unsigned Reg = VNI->getCopy()->getOperand(1).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ unsigned SrcSubReg = VNI->getCopy()->getOperand(2).getImm();
+ unsigned DstSubReg = VNI->getCopy()->getOperand(0).getSubReg();
+ if (SrcSubReg == DstSubReg)
+ // %reg1034:3<def> = EXTRACT_SUBREG %EDX, 3
+ // reg1034 can still be coalesced to EDX.
+ return Reg;
+ assert(DstSubReg == 0);
+ Reg = tri_->getSubReg(Reg, VNI->getCopy()->getOperand(2).getImm());
+ }
+ return Reg;
+ } else if (VNI->getCopy()->isInsertSubreg() ||
+ VNI->getCopy()->isSubregToReg())
+ return VNI->getCopy()->getOperand(2).getReg();
+
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ if (tii_->isMoveInstr(*VNI->getCopy(), SrcReg, DstReg, SrcSubReg, DstSubReg))
+ return SrcReg;
+ llvm_unreachable("Unrecognized copy instruction!");
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Register allocator hooks.
+//
+
+/// getReMatImplicitUse - If the remat definition MI has one (for now, we only
+/// allow one) virtual register operand, then its uses are implicitly using
+/// the register. Returns the virtual register.
+unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li,
+ MachineInstr *MI) const {
+ unsigned RegOp = 0;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0 || Reg == li.reg)
+ continue;
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ !allocatableRegs_[Reg])
+ continue;
+ // FIXME: For now, only remat MI with at most one register operand.
+ assert(!RegOp &&
+ "Can't rematerialize instruction with multiple register operand!");
+ RegOp = MO.getReg();
+#ifndef NDEBUG
+ break;
+#endif
+ }
+ return RegOp;
+}
+
+/// isValNoAvailableAt - Return true if the val# of the specified interval
+/// which reaches the given instruction also reaches the specified use index.
+bool LiveIntervals::isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI,
+ SlotIndex UseIdx) const {
+ SlotIndex Index = getInstructionIndex(MI);
+ VNInfo *ValNo = li.FindLiveRangeContaining(Index)->valno;
+ LiveInterval::const_iterator UI = li.FindLiveRangeContaining(UseIdx);
+ return UI != li.end() && UI->valno == ValNo;
+}
+
+/// isReMaterializable - Returns true if the definition MI of the specified
+/// val# of the specified interval is re-materializable.
+bool LiveIntervals::isReMaterializable(const LiveInterval &li,
+ const VNInfo *ValNo, MachineInstr *MI,
+ SmallVectorImpl<LiveInterval*> &SpillIs,
+ bool &isLoad) {
+ if (DisableReMat)
+ return false;
+
+ if (!tii_->isTriviallyReMaterializable(MI, aa_))
+ return false;
+
+ // Target-specific code can mark an instruction as being rematerializable
+ // if it has one virtual reg use, though it had better be something like
+ // a PIC base register which is likely to be live everywhere.
+ unsigned ImpUse = getReMatImplicitUse(li, MI);
+ if (ImpUse) {
+ const LiveInterval &ImpLi = getInterval(ImpUse);
+ for (MachineRegisterInfo::use_iterator ri = mri_->use_begin(li.reg),
+ re = mri_->use_end(); ri != re; ++ri) {
+ MachineInstr *UseMI = &*ri;
+ SlotIndex UseIdx = getInstructionIndex(UseMI);
+ if (li.FindLiveRangeContaining(UseIdx)->valno != ValNo)
+ continue;
+ if (!isValNoAvailableAt(ImpLi, MI, UseIdx))
+ return false;
+ }
+
+ // If a register operand of the re-materialized instruction is going to
+ // be spilled next, then it's not legal to re-materialize this instruction.
+ for (unsigned i = 0, e = SpillIs.size(); i != e; ++i)
+ if (ImpUse == SpillIs[i]->reg)
+ return false;
+ }
+ return true;
+}
+
+/// isReMaterializable - Returns true if the definition MI of the specified
+/// val# of the specified interval is re-materializable.
+bool LiveIntervals::isReMaterializable(const LiveInterval &li,
+ const VNInfo *ValNo, MachineInstr *MI) {
+ SmallVector<LiveInterval*, 4> Dummy1;
+ bool Dummy2;
+ return isReMaterializable(li, ValNo, MI, Dummy1, Dummy2);
+}
+
+/// isReMaterializable - Returns true if every definition of MI of every
+/// val# of the specified interval is re-materializable.
+bool LiveIntervals::isReMaterializable(const LiveInterval &li,
+ SmallVectorImpl<LiveInterval*> &SpillIs,
+ bool &isLoad) {
+ isLoad = false;
+ for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end();
+ i != e; ++i) {
+ const VNInfo *VNI = *i;
+ if (VNI->isUnused())
+ continue; // Dead val#.
+ // Is the def for the val# rematerializable?
+ if (!VNI->isDefAccurate())
+ return false;
+ MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def);
+ bool DefIsLoad = false;
+ if (!ReMatDefMI ||
+ !isReMaterializable(li, VNI, ReMatDefMI, SpillIs, DefIsLoad))
+ return false;
+ isLoad |= DefIsLoad;
+ }
+ return true;
+}
+
+/// FilterFoldedOps - Filter out two-address use operands. Return
+/// true if it finds any issue with the operands that ought to prevent
+/// folding.
+static bool FilterFoldedOps(MachineInstr *MI,
+ SmallVector<unsigned, 2> &Ops,
+ unsigned &MRInfo,
+ SmallVector<unsigned, 2> &FoldOps) {
+ MRInfo = 0;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ unsigned OpIdx = Ops[i];
+ MachineOperand &MO = MI->getOperand(OpIdx);
+ // FIXME: fold subreg use.
+ if (MO.getSubReg())
+ return true;
+ if (MO.isDef())
+ MRInfo |= (unsigned)VirtRegMap::isMod;
+ else {
+ // Filter out two-address use operand(s).
+ if (MI->isRegTiedToDefOperand(OpIdx)) {
+ MRInfo = VirtRegMap::isModRef;
+ continue;
+ }
+ MRInfo |= (unsigned)VirtRegMap::isRef;
+ }
+ FoldOps.push_back(OpIdx);
+ }
+ return false;
+}
+
+
+/// tryFoldMemoryOperand - Attempts to fold either a spill / restore from
+/// slot / to reg or any rematerialized load into ith operand of specified
+/// MI. If it is successul, MI is updated with the newly created MI and
+/// returns true.
+bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI,
+ VirtRegMap &vrm, MachineInstr *DefMI,
+ SlotIndex InstrIdx,
+ SmallVector<unsigned, 2> &Ops,
+ bool isSS, int Slot, unsigned Reg) {
+ // If it is an implicit def instruction, just delete it.
+ if (MI->isImplicitDef()) {
+ RemoveMachineInstrFromMaps(MI);
+ vrm.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ ++numFolds;
+ return true;
+ }
+
+ // Filter the list of operand indexes that are to be folded. Abort if
+ // any operand will prevent folding.
+ unsigned MRInfo = 0;
+ SmallVector<unsigned, 2> FoldOps;
+ if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps))
+ return false;
+
+ // The only time it's safe to fold into a two address instruction is when
+ // it's folding reload and spill from / into a spill stack slot.
+ if (DefMI && (MRInfo & VirtRegMap::isMod))
+ return false;
+
+ MachineInstr *fmi = isSS ? tii_->foldMemoryOperand(*mf_, MI, FoldOps, Slot)
+ : tii_->foldMemoryOperand(*mf_, MI, FoldOps, DefMI);
+ if (fmi) {
+ // Remember this instruction uses the spill slot.
+ if (isSS) vrm.addSpillSlotUse(Slot, fmi);
+
+ // Attempt to fold the memory reference into the instruction. If
+ // we can do this, we don't need to insert spill code.
+ MachineBasicBlock &MBB = *MI->getParent();
+ if (isSS && !mf_->getFrameInfo()->isImmutableObjectIndex(Slot))
+ vrm.virtFolded(Reg, MI, fmi, (VirtRegMap::ModRef)MRInfo);
+ vrm.transferSpillPts(MI, fmi);
+ vrm.transferRestorePts(MI, fmi);
+ vrm.transferEmergencySpills(MI, fmi);
+ ReplaceMachineInstrInMaps(MI, fmi);
+ MI = MBB.insert(MBB.erase(MI), fmi);
+ ++numFolds;
+ return true;
+ }
+ return false;
+}
+
+/// canFoldMemoryOperand - Returns true if the specified load / store
+/// folding is possible.
+bool LiveIntervals::canFoldMemoryOperand(MachineInstr *MI,
+ SmallVector<unsigned, 2> &Ops,
+ bool ReMat) const {
+ // Filter the list of operand indexes that are to be folded. Abort if
+ // any operand will prevent folding.
+ unsigned MRInfo = 0;
+ SmallVector<unsigned, 2> FoldOps;
+ if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps))
+ return false;
+
+ // It's only legal to remat for a use, not a def.
+ if (ReMat && (MRInfo & VirtRegMap::isMod))
+ return false;
+
+ return tii_->canFoldMemoryOperand(MI, FoldOps);
+}
+
+bool LiveIntervals::intervalIsInOneMBB(const LiveInterval &li) const {
+ LiveInterval::Ranges::const_iterator itr = li.ranges.begin();
+
+ MachineBasicBlock *mbb = indexes_->getMBBCoveringRange(itr->start, itr->end);
+
+ if (mbb == 0)
+ return false;
+
+ for (++itr; itr != li.ranges.end(); ++itr) {
+ MachineBasicBlock *mbb2 =
+ indexes_->getMBBCoveringRange(itr->start, itr->end);
+
+ if (mbb2 != mbb)
+ return false;
+ }
+
+ return true;
+}
+
+/// rewriteImplicitOps - Rewrite implicit use operands of MI (i.e. uses of
+/// interval on to-be re-materialized operands of MI) with new register.
+void LiveIntervals::rewriteImplicitOps(const LiveInterval &li,
+ MachineInstr *MI, unsigned NewVReg,
+ VirtRegMap &vrm) {
+ // There is an implicit use. That means one of the other operand is
+ // being remat'ed and the remat'ed instruction has li.reg as an
+ // use operand. Make sure we rewrite that as well.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0 || TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ if (!vrm.isReMaterialized(Reg))
+ continue;
+ MachineInstr *ReMatMI = vrm.getReMaterializedMI(Reg);
+ MachineOperand *UseMO = ReMatMI->findRegisterUseOperand(li.reg);
+ if (UseMO)
+ UseMO->setReg(NewVReg);
+ }
+}
+
+/// rewriteInstructionForSpills, rewriteInstructionsForSpills - Helper functions
+/// for addIntervalsForSpills to rewrite uses / defs for the given live range.
+bool LiveIntervals::
+rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
+ bool TrySplit, SlotIndex index, SlotIndex end,
+ MachineInstr *MI,
+ MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI,
+ unsigned Slot, int LdSlot,
+ bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete,
+ VirtRegMap &vrm,
+ const TargetRegisterClass* rc,
+ SmallVector<int, 4> &ReMatIds,
+ const MachineLoopInfo *loopInfo,
+ unsigned &NewVReg, unsigned ImpUse, bool &HasDef, bool &HasUse,
+ DenseMap<unsigned,unsigned> &MBBVRegsMap,
+ std::vector<LiveInterval*> &NewLIs) {
+ bool CanFold = false;
+ RestartInstruction:
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+ MachineOperand& mop = MI->getOperand(i);
+ if (!mop.isReg())
+ continue;
+ unsigned Reg = mop.getReg();
+ unsigned RegI = Reg;
+ if (Reg == 0 || TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ if (Reg != li.reg)
+ continue;
+
+ bool TryFold = !DefIsReMat;
+ bool FoldSS = true; // Default behavior unless it's a remat.
+ int FoldSlot = Slot;
+ if (DefIsReMat) {
+ // If this is the rematerializable definition MI itself and
+ // all of its uses are rematerialized, simply delete it.
+ if (MI == ReMatOrigDefMI && CanDelete) {
+ DEBUG(dbgs() << "\t\t\t\tErasing re-materializable def: "
+ << MI << '\n');
+ RemoveMachineInstrFromMaps(MI);
+ vrm.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ break;
+ }
+
+ // If def for this use can't be rematerialized, then try folding.
+ // If def is rematerializable and it's a load, also try folding.
+ TryFold = !ReMatDefMI || (ReMatDefMI && (MI == ReMatOrigDefMI || isLoad));
+ if (isLoad) {
+ // Try fold loads (from stack slot, constant pool, etc.) into uses.
+ FoldSS = isLoadSS;
+ FoldSlot = LdSlot;
+ }
+ }
+
+ // Scan all of the operands of this instruction rewriting operands
+ // to use NewVReg instead of li.reg as appropriate. We do this for
+ // two reasons:
+ //
+ // 1. If the instr reads the same spilled vreg multiple times, we
+ // want to reuse the NewVReg.
+ // 2. If the instr is a two-addr instruction, we are required to
+ // keep the src/dst regs pinned.
+ //
+ // Keep track of whether we replace a use and/or def so that we can
+ // create the spill interval with the appropriate range.
+
+ HasUse = mop.isUse();
+ HasDef = mop.isDef();
+ SmallVector<unsigned, 2> Ops;
+ Ops.push_back(i);
+ for (unsigned j = i+1, e = MI->getNumOperands(); j != e; ++j) {
+ const MachineOperand &MOj = MI->getOperand(j);
+ if (!MOj.isReg())
+ continue;
+ unsigned RegJ = MOj.getReg();
+ if (RegJ == 0 || TargetRegisterInfo::isPhysicalRegister(RegJ))
+ continue;
+ if (RegJ == RegI) {
+ Ops.push_back(j);
+ if (!MOj.isUndef()) {
+ HasUse |= MOj.isUse();
+ HasDef |= MOj.isDef();
+ }
+ }
+ }
+
+ // Create a new virtual register for the spill interval.
+ // Create the new register now so we can map the fold instruction
+ // to the new register so when it is unfolded we get the correct
+ // answer.
+ bool CreatedNewVReg = false;
+ if (NewVReg == 0) {
+ NewVReg = mri_->createVirtualRegister(rc);
+ vrm.grow();
+ CreatedNewVReg = true;
+
+ // The new virtual register should get the same allocation hints as the
+ // old one.
+ std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(Reg);
+ if (Hint.first || Hint.second)
+ mri_->setRegAllocationHint(NewVReg, Hint.first, Hint.second);
+ }
+
+ if (!TryFold)
+ CanFold = false;
+ else {
+ // Do not fold load / store here if we are splitting. We'll find an
+ // optimal point to insert a load / store later.
+ if (!TrySplit) {
+ if (tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index,
+ Ops, FoldSS, FoldSlot, NewVReg)) {
+ // Folding the load/store can completely change the instruction in
+ // unpredictable ways, rescan it from the beginning.
+
+ if (FoldSS) {
+ // We need to give the new vreg the same stack slot as the
+ // spilled interval.
+ vrm.assignVirt2StackSlot(NewVReg, FoldSlot);
+ }
+
+ HasUse = false;
+ HasDef = false;
+ CanFold = false;
+ if (isNotInMIMap(MI))
+ break;
+ goto RestartInstruction;
+ }
+ } else {
+ // We'll try to fold it later if it's profitable.
+ CanFold = canFoldMemoryOperand(MI, Ops, DefIsReMat);
+ }
+ }
+
+ mop.setReg(NewVReg);
+ if (mop.isImplicit())
+ rewriteImplicitOps(li, MI, NewVReg, vrm);
+
+ // Reuse NewVReg for other reads.
+ for (unsigned j = 0, e = Ops.size(); j != e; ++j) {
+ MachineOperand &mopj = MI->getOperand(Ops[j]);
+ mopj.setReg(NewVReg);
+ if (mopj.isImplicit())
+ rewriteImplicitOps(li, MI, NewVReg, vrm);
+ }
+
+ if (CreatedNewVReg) {
+ if (DefIsReMat) {
+ vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI);
+ if (ReMatIds[VNI->id] == VirtRegMap::MAX_STACK_SLOT) {
+ // Each valnum may have its own remat id.
+ ReMatIds[VNI->id] = vrm.assignVirtReMatId(NewVReg);
+ } else {
+ vrm.assignVirtReMatId(NewVReg, ReMatIds[VNI->id]);
+ }
+ if (!CanDelete || (HasUse && HasDef)) {
+ // If this is a two-addr instruction then its use operands are
+ // rematerializable but its def is not. It should be assigned a
+ // stack slot.
+ vrm.assignVirt2StackSlot(NewVReg, Slot);
+ }
+ } else {
+ vrm.assignVirt2StackSlot(NewVReg, Slot);
+ }
+ } else if (HasUse && HasDef &&
+ vrm.getStackSlot(NewVReg) == VirtRegMap::NO_STACK_SLOT) {
+ // If this interval hasn't been assigned a stack slot (because earlier
+ // def is a deleted remat def), do it now.
+ assert(Slot != VirtRegMap::NO_STACK_SLOT);
+ vrm.assignVirt2StackSlot(NewVReg, Slot);
+ }
+
+ // Re-matting an instruction with virtual register use. Add the
+ // register as an implicit use on the use MI.
+ if (DefIsReMat && ImpUse)
+ MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true));
+
+ // Create a new register interval for this spill / remat.
+ LiveInterval &nI = getOrCreateInterval(NewVReg);
+ if (CreatedNewVReg) {
+ NewLIs.push_back(&nI);
+ MBBVRegsMap.insert(std::make_pair(MI->getParent()->getNumber(), NewVReg));
+ if (TrySplit)
+ vrm.setIsSplitFromReg(NewVReg, li.reg);
+ }
+
+ if (HasUse) {
+ if (CreatedNewVReg) {
+ LiveRange LR(index.getLoadIndex(), index.getDefIndex(),
+ nI.getNextValue(SlotIndex(), 0, false, VNInfoAllocator));
+ DEBUG(dbgs() << " +" << LR);
+ nI.addRange(LR);
+ } else {
+ // Extend the split live interval to this def / use.
+ SlotIndex End = index.getDefIndex();
+ LiveRange LR(nI.ranges[nI.ranges.size()-1].end, End,
+ nI.getValNumInfo(nI.getNumValNums()-1));
+ DEBUG(dbgs() << " +" << LR);
+ nI.addRange(LR);
+ }
+ }
+ if (HasDef) {
+ LiveRange LR(index.getDefIndex(), index.getStoreIndex(),
+ nI.getNextValue(SlotIndex(), 0, false, VNInfoAllocator));
+ DEBUG(dbgs() << " +" << LR);
+ nI.addRange(LR);
+ }
+
+ DEBUG({
+ dbgs() << "\t\t\t\tAdded new interval: ";
+ nI.print(dbgs(), tri_);
+ dbgs() << '\n';
+ });
+ }
+ return CanFold;
+}
+bool LiveIntervals::anyKillInMBBAfterIdx(const LiveInterval &li,
+ const VNInfo *VNI,
+ MachineBasicBlock *MBB,
+ SlotIndex Idx) const {
+ SlotIndex End = getMBBEndIdx(MBB);
+ for (unsigned j = 0, ee = VNI->kills.size(); j != ee; ++j) {
+ if (VNI->kills[j].isPHI())
+ continue;
+
+ SlotIndex KillIdx = VNI->kills[j];
+ if (KillIdx > Idx && KillIdx <= End)
+ return true;
+ }
+ return false;
+}
+
+/// RewriteInfo - Keep track of machine instrs that will be rewritten
+/// during spilling.
+namespace {
+ struct RewriteInfo {
+ SlotIndex Index;
+ MachineInstr *MI;
+ bool HasUse;
+ bool HasDef;
+ RewriteInfo(SlotIndex i, MachineInstr *mi, bool u, bool d)
+ : Index(i), MI(mi), HasUse(u), HasDef(d) {}
+ };
+
+ struct RewriteInfoCompare {
+ bool operator()(const RewriteInfo &LHS, const RewriteInfo &RHS) const {
+ return LHS.Index < RHS.Index;
+ }
+ };
+}
+
+void LiveIntervals::
+rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
+ LiveInterval::Ranges::const_iterator &I,
+ MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI,
+ unsigned Slot, int LdSlot,
+ bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete,
+ VirtRegMap &vrm,
+ const TargetRegisterClass* rc,
+ SmallVector<int, 4> &ReMatIds,
+ const MachineLoopInfo *loopInfo,
+ BitVector &SpillMBBs,
+ DenseMap<unsigned, std::vector<SRInfo> > &SpillIdxes,
+ BitVector &RestoreMBBs,
+ DenseMap<unsigned, std::vector<SRInfo> > &RestoreIdxes,
+ DenseMap<unsigned,unsigned> &MBBVRegsMap,
+ std::vector<LiveInterval*> &NewLIs) {
+ bool AllCanFold = true;
+ unsigned NewVReg = 0;
+ SlotIndex start = I->start.getBaseIndex();
+ SlotIndex end = I->end.getPrevSlot().getBaseIndex().getNextIndex();
+
+ // First collect all the def / use in this live range that will be rewritten.
+ // Make sure they are sorted according to instruction index.
+ std::vector<RewriteInfo> RewriteMIs;
+ for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg),
+ re = mri_->reg_end(); ri != re; ) {
+ MachineInstr *MI = &*ri;
+ MachineOperand &O = ri.getOperand();
+ ++ri;
+ if (MI->isDebugValue()) {
+ // Remove debug info for now.
+ O.setReg(0U);
+ DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI);
+ continue;
+ }
+ assert(!O.isImplicit() && "Spilling register that's used as implicit use?");
+ SlotIndex index = getInstructionIndex(MI);
+ if (index < start || index >= end)
+ continue;
+
+ if (O.isUndef())
+ // Must be defined by an implicit def. It should not be spilled. Note,
+ // this is for correctness reason. e.g.
+ // 8 %reg1024<def> = IMPLICIT_DEF
+ // 12 %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
+ // The live range [12, 14) are not part of the r1024 live interval since
+ // it's defined by an implicit def. It will not conflicts with live
+ // interval of r1025. Now suppose both registers are spilled, you can
+ // easily see a situation where both registers are reloaded before
+ // the INSERT_SUBREG and both target registers that would overlap.
+ continue;
+ RewriteMIs.push_back(RewriteInfo(index, MI, O.isUse(), O.isDef()));
+ }
+ std::sort(RewriteMIs.begin(), RewriteMIs.end(), RewriteInfoCompare());
+
+ unsigned ImpUse = DefIsReMat ? getReMatImplicitUse(li, ReMatDefMI) : 0;
+ // Now rewrite the defs and uses.
+ for (unsigned i = 0, e = RewriteMIs.size(); i != e; ) {
+ RewriteInfo &rwi = RewriteMIs[i];
+ ++i;
+ SlotIndex index = rwi.Index;
+ bool MIHasUse = rwi.HasUse;
+ bool MIHasDef = rwi.HasDef;
+ MachineInstr *MI = rwi.MI;
+ // If MI def and/or use the same register multiple times, then there
+ // are multiple entries.
+ unsigned NumUses = MIHasUse;
+ while (i != e && RewriteMIs[i].MI == MI) {
+ assert(RewriteMIs[i].Index == index);
+ bool isUse = RewriteMIs[i].HasUse;
+ if (isUse) ++NumUses;
+ MIHasUse |= isUse;
+ MIHasDef |= RewriteMIs[i].HasDef;
+ ++i;
+ }
+ MachineBasicBlock *MBB = MI->getParent();
+
+ if (ImpUse && MI != ReMatDefMI) {
+ // Re-matting an instruction with virtual register use. Update the
+ // register interval's spill weight to HUGE_VALF to prevent it from
+ // being spilled.
+ LiveInterval &ImpLi = getInterval(ImpUse);
+ ImpLi.weight = HUGE_VALF;
+ }
+
+ unsigned MBBId = MBB->getNumber();
+ unsigned ThisVReg = 0;
+ if (TrySplit) {
+ DenseMap<unsigned,unsigned>::iterator NVI = MBBVRegsMap.find(MBBId);
+ if (NVI != MBBVRegsMap.end()) {
+ ThisVReg = NVI->second;
+ // One common case:
+ // x = use
+ // ...
+ // ...
+ // def = ...
+ // = use
+ // It's better to start a new interval to avoid artifically
+ // extend the new interval.
+ if (MIHasDef && !MIHasUse) {
+ MBBVRegsMap.erase(MBB->getNumber());
+ ThisVReg = 0;
+ }
+ }
+ }
+
+ bool IsNew = ThisVReg == 0;
+ if (IsNew) {
+ // This ends the previous live interval. If all of its def / use
+ // can be folded, give it a low spill weight.
+ if (NewVReg && TrySplit && AllCanFold) {
+ LiveInterval &nI = getOrCreateInterval(NewVReg);
+ nI.weight /= 10.0F;
+ }
+ AllCanFold = true;
+ }
+ NewVReg = ThisVReg;
+
+ bool HasDef = false;
+ bool HasUse = false;
+ bool CanFold = rewriteInstructionForSpills(li, I->valno, TrySplit,
+ index, end, MI, ReMatOrigDefMI, ReMatDefMI,
+ Slot, LdSlot, isLoad, isLoadSS, DefIsReMat,
+ CanDelete, vrm, rc, ReMatIds, loopInfo, NewVReg,
+ ImpUse, HasDef, HasUse, MBBVRegsMap, NewLIs);
+ if (!HasDef && !HasUse)
+ continue;
+
+ AllCanFold &= CanFold;
+
+ // Update weight of spill interval.
+ LiveInterval &nI = getOrCreateInterval(NewVReg);
+ if (!TrySplit) {
+ // The spill weight is now infinity as it cannot be spilled again.
+ nI.weight = HUGE_VALF;
+ continue;
+ }
+
+ // Keep track of the last def and first use in each MBB.
+ if (HasDef) {
+ if (MI != ReMatOrigDefMI || !CanDelete) {
+ bool HasKill = false;
+ if (!HasUse)
+ HasKill = anyKillInMBBAfterIdx(li, I->valno, MBB, index.getDefIndex());
+ else {
+ // If this is a two-address code, then this index starts a new VNInfo.
+ const VNInfo *VNI = li.findDefinedVNInfoForRegInt(index.getDefIndex());
+ if (VNI)
+ HasKill = anyKillInMBBAfterIdx(li, VNI, MBB, index.getDefIndex());
+ }
+ DenseMap<unsigned, std::vector<SRInfo> >::iterator SII =
+ SpillIdxes.find(MBBId);
+ if (!HasKill) {
+ if (SII == SpillIdxes.end()) {
+ std::vector<SRInfo> S;
+ S.push_back(SRInfo(index, NewVReg, true));
+ SpillIdxes.insert(std::make_pair(MBBId, S));
+ } else if (SII->second.back().vreg != NewVReg) {
+ SII->second.push_back(SRInfo(index, NewVReg, true));
+ } else if (index > SII->second.back().index) {
+ // If there is an earlier def and this is a two-address
+ // instruction, then it's not possible to fold the store (which
+ // would also fold the load).
+ SRInfo &Info = SII->second.back();
+ Info.index = index;
+ Info.canFold = !HasUse;
+ }
+ SpillMBBs.set(MBBId);
+ } else if (SII != SpillIdxes.end() &&
+ SII->second.back().vreg == NewVReg &&
+ index > SII->second.back().index) {
+ // There is an earlier def that's not killed (must be two-address).
+ // The spill is no longer needed.
+ SII->second.pop_back();
+ if (SII->second.empty()) {
+ SpillIdxes.erase(MBBId);
+ SpillMBBs.reset(MBBId);
+ }
+ }
+ }
+ }
+
+ if (HasUse) {
+ DenseMap<unsigned, std::vector<SRInfo> >::iterator SII =
+ SpillIdxes.find(MBBId);
+ if (SII != SpillIdxes.end() &&
+ SII->second.back().vreg == NewVReg &&
+ index > SII->second.back().index)
+ // Use(s) following the last def, it's not safe to fold the spill.
+ SII->second.back().canFold = false;
+ DenseMap<unsigned, std::vector<SRInfo> >::iterator RII =
+ RestoreIdxes.find(MBBId);
+ if (RII != RestoreIdxes.end() && RII->second.back().vreg == NewVReg)
+ // If we are splitting live intervals, only fold if it's the first
+ // use and there isn't another use later in the MBB.
+ RII->second.back().canFold = false;
+ else if (IsNew) {
+ // Only need a reload if there isn't an earlier def / use.
+ if (RII == RestoreIdxes.end()) {
+ std::vector<SRInfo> Infos;
+ Infos.push_back(SRInfo(index, NewVReg, true));
+ RestoreIdxes.insert(std::make_pair(MBBId, Infos));
+ } else {
+ RII->second.push_back(SRInfo(index, NewVReg, true));
+ }
+ RestoreMBBs.set(MBBId);
+ }
+ }
+
+ // Update spill weight.
+ unsigned loopDepth = loopInfo->getLoopDepth(MBB);
+ nI.weight += getSpillWeight(HasDef, HasUse, loopDepth);
+ }
+
+ if (NewVReg && TrySplit && AllCanFold) {
+ // If all of its def / use can be folded, give it a low spill weight.
+ LiveInterval &nI = getOrCreateInterval(NewVReg);
+ nI.weight /= 10.0F;
+ }
+}
+
+bool LiveIntervals::alsoFoldARestore(int Id, SlotIndex index,
+ unsigned vr, BitVector &RestoreMBBs,
+ DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) {
+ if (!RestoreMBBs[Id])
+ return false;
+ std::vector<SRInfo> &Restores = RestoreIdxes[Id];
+ for (unsigned i = 0, e = Restores.size(); i != e; ++i)
+ if (Restores[i].index == index &&
+ Restores[i].vreg == vr &&
+ Restores[i].canFold)
+ return true;
+ return false;
+}
+
+void LiveIntervals::eraseRestoreInfo(int Id, SlotIndex index,
+ unsigned vr, BitVector &RestoreMBBs,
+ DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) {
+ if (!RestoreMBBs[Id])
+ return;
+ std::vector<SRInfo> &Restores = RestoreIdxes[Id];
+ for (unsigned i = 0, e = Restores.size(); i != e; ++i)
+ if (Restores[i].index == index && Restores[i].vreg)
+ Restores[i].index = SlotIndex();
+}
+
+/// handleSpilledImpDefs - Remove IMPLICIT_DEF instructions which are being
+/// spilled and create empty intervals for their uses.
+void
+LiveIntervals::handleSpilledImpDefs(const LiveInterval &li, VirtRegMap &vrm,
+ const TargetRegisterClass* rc,
+ std::vector<LiveInterval*> &NewLIs) {
+ for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg),
+ re = mri_->reg_end(); ri != re; ) {
+ MachineOperand &O = ri.getOperand();
+ MachineInstr *MI = &*ri;
+ ++ri;
+ if (O.isDef()) {
+ assert(MI->isImplicitDef() &&
+ "Register def was not rewritten?");
+ RemoveMachineInstrFromMaps(MI);
+ vrm.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ } else {
+ // This must be an use of an implicit_def so it's not part of the live
+ // interval. Create a new empty live interval for it.
+ // FIXME: Can we simply erase some of the instructions? e.g. Stores?
+ unsigned NewVReg = mri_->createVirtualRegister(rc);
+ vrm.grow();
+ vrm.setIsImplicitlyDefined(NewVReg);
+ NewLIs.push_back(&getOrCreateInterval(NewVReg));
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == li.reg) {
+ MO.setReg(NewVReg);
+ MO.setIsUndef();
+ }
+ }
+ }
+ }
+}
+
+std::vector<LiveInterval*> LiveIntervals::
+addIntervalsForSpillsFast(const LiveInterval &li,
+ const MachineLoopInfo *loopInfo,
+ VirtRegMap &vrm) {
+ unsigned slot = vrm.assignVirt2StackSlot(li.reg);
+
+ std::vector<LiveInterval*> added;
+
+ assert(li.weight != HUGE_VALF &&
+ "attempt to spill already spilled interval!");
+
+ DEBUG({
+ dbgs() << "\t\t\t\tadding intervals for spills for interval: ";
+ li.dump();
+ dbgs() << '\n';
+ });
+
+ const TargetRegisterClass* rc = mri_->getRegClass(li.reg);
+
+ MachineRegisterInfo::reg_iterator RI = mri_->reg_begin(li.reg);
+ while (RI != mri_->reg_end()) {
+ MachineInstr* MI = &*RI;
+
+ SmallVector<unsigned, 2> Indices;
+ bool HasUse = false;
+ bool HasDef = false;
+
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+ MachineOperand& mop = MI->getOperand(i);
+ if (!mop.isReg() || mop.getReg() != li.reg) continue;
+
+ HasUse |= MI->getOperand(i).isUse();
+ HasDef |= MI->getOperand(i).isDef();
+
+ Indices.push_back(i);
+ }
+
+ if (!tryFoldMemoryOperand(MI, vrm, NULL, getInstructionIndex(MI),
+ Indices, true, slot, li.reg)) {
+ unsigned NewVReg = mri_->createVirtualRegister(rc);
+ vrm.grow();
+ vrm.assignVirt2StackSlot(NewVReg, slot);
+
+ // create a new register for this spill
+ LiveInterval &nI = getOrCreateInterval(NewVReg);
+
+ // the spill weight is now infinity as it
+ // cannot be spilled again
+ nI.weight = HUGE_VALF;
+
+ // Rewrite register operands to use the new vreg.
+ for (SmallVectorImpl<unsigned>::iterator I = Indices.begin(),
+ E = Indices.end(); I != E; ++I) {
+ MI->getOperand(*I).setReg(NewVReg);
+
+ if (MI->getOperand(*I).isUse())
+ MI->getOperand(*I).setIsKill(true);
+ }
+
+ // Fill in the new live interval.
+ SlotIndex index = getInstructionIndex(MI);
+ if (HasUse) {
+ LiveRange LR(index.getLoadIndex(), index.getUseIndex(),
+ nI.getNextValue(SlotIndex(), 0, false,
+ getVNInfoAllocator()));
+ DEBUG(dbgs() << " +" << LR);
+ nI.addRange(LR);
+ vrm.addRestorePoint(NewVReg, MI);
+ }
+ if (HasDef) {
+ LiveRange LR(index.getDefIndex(), index.getStoreIndex(),
+ nI.getNextValue(SlotIndex(), 0, false,
+ getVNInfoAllocator()));
+ DEBUG(dbgs() << " +" << LR);
+ nI.addRange(LR);
+ vrm.addSpillPoint(NewVReg, true, MI);
+ }
+
+ added.push_back(&nI);
+
+ DEBUG({
+ dbgs() << "\t\t\t\tadded new interval: ";
+ nI.dump();
+ dbgs() << '\n';
+ });
+ }
+
+
+ RI = mri_->reg_begin(li.reg);
+ }
+
+ return added;
+}
+
+std::vector<LiveInterval*> LiveIntervals::
+addIntervalsForSpills(const LiveInterval &li,
+ SmallVectorImpl<LiveInterval*> &SpillIs,
+ const MachineLoopInfo *loopInfo, VirtRegMap &vrm) {
+
+ if (EnableFastSpilling)
+ return addIntervalsForSpillsFast(li, loopInfo, vrm);
+
+ assert(li.weight != HUGE_VALF &&
+ "attempt to spill already spilled interval!");
+
+ DEBUG({
+ dbgs() << "\t\t\t\tadding intervals for spills for interval: ";
+ li.print(dbgs(), tri_);
+ dbgs() << '\n';
+ });
+
+ // Each bit specify whether a spill is required in the MBB.
+ BitVector SpillMBBs(mf_->getNumBlockIDs());
+ DenseMap<unsigned, std::vector<SRInfo> > SpillIdxes;
+ BitVector RestoreMBBs(mf_->getNumBlockIDs());
+ DenseMap<unsigned, std::vector<SRInfo> > RestoreIdxes;
+ DenseMap<unsigned,unsigned> MBBVRegsMap;
+ std::vector<LiveInterval*> NewLIs;
+ const TargetRegisterClass* rc = mri_->getRegClass(li.reg);
+
+ unsigned NumValNums = li.getNumValNums();
+ SmallVector<MachineInstr*, 4> ReMatDefs;
+ ReMatDefs.resize(NumValNums, NULL);
+ SmallVector<MachineInstr*, 4> ReMatOrigDefs;
+ ReMatOrigDefs.resize(NumValNums, NULL);
+ SmallVector<int, 4> ReMatIds;
+ ReMatIds.resize(NumValNums, VirtRegMap::MAX_STACK_SLOT);
+ BitVector ReMatDelete(NumValNums);
+ unsigned Slot = VirtRegMap::MAX_STACK_SLOT;
+
+ // Spilling a split live interval. It cannot be split any further. Also,
+ // it's also guaranteed to be a single val# / range interval.
+ if (vrm.getPreSplitReg(li.reg)) {
+ vrm.setIsSplitFromReg(li.reg, 0);
+ // Unset the split kill marker on the last use.
+ SlotIndex KillIdx = vrm.getKillPoint(li.reg);
+ if (KillIdx != SlotIndex()) {
+ MachineInstr *KillMI = getInstructionFromIndex(KillIdx);
+ assert(KillMI && "Last use disappeared?");
+ int KillOp = KillMI->findRegisterUseOperandIdx(li.reg, true);
+ assert(KillOp != -1 && "Last use disappeared?");
+ KillMI->getOperand(KillOp).setIsKill(false);
+ }
+ vrm.removeKillPoint(li.reg);
+ bool DefIsReMat = vrm.isReMaterialized(li.reg);
+ Slot = vrm.getStackSlot(li.reg);
+ assert(Slot != VirtRegMap::MAX_STACK_SLOT);
+ MachineInstr *ReMatDefMI = DefIsReMat ?
+ vrm.getReMaterializedMI(li.reg) : NULL;
+ int LdSlot = 0;
+ bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot);
+ bool isLoad = isLoadSS ||
+ (DefIsReMat && (ReMatDefMI->getDesc().canFoldAsLoad()));
+ bool IsFirstRange = true;
+ for (LiveInterval::Ranges::const_iterator
+ I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
+ // If this is a split live interval with multiple ranges, it means there
+ // are two-address instructions that re-defined the value. Only the
+ // first def can be rematerialized!
+ if (IsFirstRange) {
+ // Note ReMatOrigDefMI has already been deleted.
+ rewriteInstructionsForSpills(li, false, I, NULL, ReMatDefMI,
+ Slot, LdSlot, isLoad, isLoadSS, DefIsReMat,
+ false, vrm, rc, ReMatIds, loopInfo,
+ SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes,
+ MBBVRegsMap, NewLIs);
+ } else {
+ rewriteInstructionsForSpills(li, false, I, NULL, 0,
+ Slot, 0, false, false, false,
+ false, vrm, rc, ReMatIds, loopInfo,
+ SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes,
+ MBBVRegsMap, NewLIs);
+ }
+ IsFirstRange = false;
+ }
+
+ handleSpilledImpDefs(li, vrm, rc, NewLIs);
+ return NewLIs;
+ }
+
+ bool TrySplit = !intervalIsInOneMBB(li);
+ if (TrySplit)
+ ++numSplits;
+ bool NeedStackSlot = false;
+ for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end();
+ i != e; ++i) {
+ const VNInfo *VNI = *i;
+ unsigned VN = VNI->id;
+ if (VNI->isUnused())
+ continue; // Dead val#.
+ // Is the def for the val# rematerializable?
+ MachineInstr *ReMatDefMI = VNI->isDefAccurate()
+ ? getInstructionFromIndex(VNI->def) : 0;
+ bool dummy;
+ if (ReMatDefMI && isReMaterializable(li, VNI, ReMatDefMI, SpillIs, dummy)) {
+ // Remember how to remat the def of this val#.
+ ReMatOrigDefs[VN] = ReMatDefMI;
+ // Original def may be modified so we have to make a copy here.
+ MachineInstr *Clone = mf_->CloneMachineInstr(ReMatDefMI);
+ CloneMIs.push_back(Clone);
+ ReMatDefs[VN] = Clone;
+
+ bool CanDelete = true;
+ if (VNI->hasPHIKill()) {
+ // A kill is a phi node, not all of its uses can be rematerialized.
+ // It must not be deleted.
+ CanDelete = false;
+ // Need a stack slot if there is any live range where uses cannot be
+ // rematerialized.
+ NeedStackSlot = true;
+ }
+ if (CanDelete)
+ ReMatDelete.set(VN);
+ } else {
+ // Need a stack slot if there is any live range where uses cannot be
+ // rematerialized.
+ NeedStackSlot = true;
+ }
+ }
+
+ // One stack slot per live interval.
+ if (NeedStackSlot && vrm.getPreSplitReg(li.reg) == 0) {
+ if (vrm.getStackSlot(li.reg) == VirtRegMap::NO_STACK_SLOT)
+ Slot = vrm.assignVirt2StackSlot(li.reg);
+
+ // This case only occurs when the prealloc splitter has already assigned
+ // a stack slot to this vreg.
+ else
+ Slot = vrm.getStackSlot(li.reg);
+ }
+
+ // Create new intervals and rewrite defs and uses.
+ for (LiveInterval::Ranges::const_iterator
+ I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
+ MachineInstr *ReMatDefMI = ReMatDefs[I->valno->id];
+ MachineInstr *ReMatOrigDefMI = ReMatOrigDefs[I->valno->id];
+ bool DefIsReMat = ReMatDefMI != NULL;
+ bool CanDelete = ReMatDelete[I->valno->id];
+ int LdSlot = 0;
+ bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot);
+ bool isLoad = isLoadSS ||
+ (DefIsReMat && ReMatDefMI->getDesc().canFoldAsLoad());
+ rewriteInstructionsForSpills(li, TrySplit, I, ReMatOrigDefMI, ReMatDefMI,
+ Slot, LdSlot, isLoad, isLoadSS, DefIsReMat,
+ CanDelete, vrm, rc, ReMatIds, loopInfo,
+ SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes,
+ MBBVRegsMap, NewLIs);
+ }
+
+ // Insert spills / restores if we are splitting.
+ if (!TrySplit) {
+ handleSpilledImpDefs(li, vrm, rc, NewLIs);
+ return NewLIs;
+ }
+
+ SmallPtrSet<LiveInterval*, 4> AddedKill;
+ SmallVector<unsigned, 2> Ops;
+ if (NeedStackSlot) {
+ int Id = SpillMBBs.find_first();
+ while (Id != -1) {
+ std::vector<SRInfo> &spills = SpillIdxes[Id];
+ for (unsigned i = 0, e = spills.size(); i != e; ++i) {
+ SlotIndex index = spills[i].index;
+ unsigned VReg = spills[i].vreg;
+ LiveInterval &nI = getOrCreateInterval(VReg);
+ bool isReMat = vrm.isReMaterialized(VReg);
+ MachineInstr *MI = getInstructionFromIndex(index);
+ bool CanFold = false;
+ bool FoundUse = false;
+ Ops.clear();
+ if (spills[i].canFold) {
+ CanFold = true;
+ for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
+ MachineOperand &MO = MI->getOperand(j);
+ if (!MO.isReg() || MO.getReg() != VReg)
+ continue;
+
+ Ops.push_back(j);
+ if (MO.isDef())
+ continue;
+ if (isReMat ||
+ (!FoundUse && !alsoFoldARestore(Id, index, VReg,
+ RestoreMBBs, RestoreIdxes))) {
+ // MI has two-address uses of the same register. If the use
+ // isn't the first and only use in the BB, then we can't fold
+ // it. FIXME: Move this to rewriteInstructionsForSpills.
+ CanFold = false;
+ break;
+ }
+ FoundUse = true;
+ }
+ }
+ // Fold the store into the def if possible.
+ bool Folded = false;
+ if (CanFold && !Ops.empty()) {
+ if (tryFoldMemoryOperand(MI, vrm, NULL, index, Ops, true, Slot,VReg)){
+ Folded = true;
+ if (FoundUse) {
+ // Also folded uses, do not issue a load.
+ eraseRestoreInfo(Id, index, VReg, RestoreMBBs, RestoreIdxes);
+ nI.removeRange(index.getLoadIndex(), index.getDefIndex());
+ }
+ nI.removeRange(index.getDefIndex(), index.getStoreIndex());
+ }
+ }
+
+ // Otherwise tell the spiller to issue a spill.
+ if (!Folded) {
+ LiveRange *LR = &nI.ranges[nI.ranges.size()-1];
+ bool isKill = LR->end == index.getStoreIndex();
+ if (!MI->registerDefIsDead(nI.reg))
+ // No need to spill a dead def.
+ vrm.addSpillPoint(VReg, isKill, MI);
+ if (isKill)
+ AddedKill.insert(&nI);
+ }
+ }
+ Id = SpillMBBs.find_next(Id);
+ }
+ }
+
+ int Id = RestoreMBBs.find_first();
+ while (Id != -1) {
+ std::vector<SRInfo> &restores = RestoreIdxes[Id];
+ for (unsigned i = 0, e = restores.size(); i != e; ++i) {
+ SlotIndex index = restores[i].index;
+ if (index == SlotIndex())
+ continue;
+ unsigned VReg = restores[i].vreg;
+ LiveInterval &nI = getOrCreateInterval(VReg);
+ bool isReMat = vrm.isReMaterialized(VReg);
+ MachineInstr *MI = getInstructionFromIndex(index);
+ bool CanFold = false;
+ Ops.clear();
+ if (restores[i].canFold) {
+ CanFold = true;
+ for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
+ MachineOperand &MO = MI->getOperand(j);
+ if (!MO.isReg() || MO.getReg() != VReg)
+ continue;
+
+ if (MO.isDef()) {
+ // If this restore were to be folded, it would have been folded
+ // already.
+ CanFold = false;
+ break;
+ }
+ Ops.push_back(j);
+ }
+ }
+
+ // Fold the load into the use if possible.
+ bool Folded = false;
+ if (CanFold && !Ops.empty()) {
+ if (!isReMat)
+ Folded = tryFoldMemoryOperand(MI, vrm, NULL,index,Ops,true,Slot,VReg);
+ else {
+ MachineInstr *ReMatDefMI = vrm.getReMaterializedMI(VReg);
+ int LdSlot = 0;
+ bool isLoadSS = tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot);
+ // If the rematerializable def is a load, also try to fold it.
+ if (isLoadSS || ReMatDefMI->getDesc().canFoldAsLoad())
+ Folded = tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index,
+ Ops, isLoadSS, LdSlot, VReg);
+ if (!Folded) {
+ unsigned ImpUse = getReMatImplicitUse(li, ReMatDefMI);
+ if (ImpUse) {
+ // Re-matting an instruction with virtual register use. Add the
+ // register as an implicit use on the use MI and update the register
+ // interval's spill weight to HUGE_VALF to prevent it from being
+ // spilled.
+ LiveInterval &ImpLi = getInterval(ImpUse);
+ ImpLi.weight = HUGE_VALF;
+ MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true));
+ }
+ }
+ }
+ }
+ // If folding is not possible / failed, then tell the spiller to issue a
+ // load / rematerialization for us.
+ if (Folded)
+ nI.removeRange(index.getLoadIndex(), index.getDefIndex());
+ else
+ vrm.addRestorePoint(VReg, MI);
+ }
+ Id = RestoreMBBs.find_next(Id);
+ }
+
+ // Finalize intervals: add kills, finalize spill weights, and filter out
+ // dead intervals.
+ std::vector<LiveInterval*> RetNewLIs;
+ for (unsigned i = 0, e = NewLIs.size(); i != e; ++i) {
+ LiveInterval *LI = NewLIs[i];
+ if (!LI->empty()) {
+ LI->weight /= SlotIndex::NUM * getApproximateInstructionCount(*LI);
+ if (!AddedKill.count(LI)) {
+ LiveRange *LR = &LI->ranges[LI->ranges.size()-1];
+ SlotIndex LastUseIdx = LR->end.getBaseIndex();
+ MachineInstr *LastUse = getInstructionFromIndex(LastUseIdx);
+ int UseIdx = LastUse->findRegisterUseOperandIdx(LI->reg, false);
+ assert(UseIdx != -1);
+ if (!LastUse->isRegTiedToDefOperand(UseIdx)) {
+ LastUse->getOperand(UseIdx).setIsKill();
+ vrm.addKillPoint(LI->reg, LastUseIdx);
+ }
+ }
+ RetNewLIs.push_back(LI);
+ }
+ }
+
+ handleSpilledImpDefs(li, vrm, rc, RetNewLIs);
+ return RetNewLIs;
+}
+
+/// hasAllocatableSuperReg - Return true if the specified physical register has
+/// any super register that's allocatable.
+bool LiveIntervals::hasAllocatableSuperReg(unsigned Reg) const {
+ for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS)
+ if (allocatableRegs_[*AS] && hasInterval(*AS))
+ return true;
+ return false;
+}
+
+/// getRepresentativeReg - Find the largest super register of the specified
+/// physical register.
+unsigned LiveIntervals::getRepresentativeReg(unsigned Reg) const {
+ // Find the largest super-register that is allocatable.
+ unsigned BestReg = Reg;
+ for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS) {
+ unsigned SuperReg = *AS;
+ if (!hasAllocatableSuperReg(SuperReg) && hasInterval(SuperReg)) {
+ BestReg = SuperReg;
+ break;
+ }
+ }
+ return BestReg;
+}
+
+/// getNumConflictsWithPhysReg - Return the number of uses and defs of the
+/// specified interval that conflicts with the specified physical register.
+unsigned LiveIntervals::getNumConflictsWithPhysReg(const LiveInterval &li,
+ unsigned PhysReg) const {
+ unsigned NumConflicts = 0;
+ const LiveInterval &pli = getInterval(getRepresentativeReg(PhysReg));
+ for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg),
+ E = mri_->reg_end(); I != E; ++I) {
+ MachineOperand &O = I.getOperand();
+ MachineInstr *MI = O.getParent();
+ SlotIndex Index = getInstructionIndex(MI);
+ if (pli.liveAt(Index))
+ ++NumConflicts;
+ }
+ return NumConflicts;
+}
+
+/// spillPhysRegAroundRegDefsUses - Spill the specified physical register
+/// around all defs and uses of the specified interval. Return true if it
+/// was able to cut its interval.
+bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
+ unsigned PhysReg, VirtRegMap &vrm) {
+ unsigned SpillReg = getRepresentativeReg(PhysReg);
+
+ for (const unsigned *AS = tri_->getAliasSet(PhysReg); *AS; ++AS)
+ // If there are registers which alias PhysReg, but which are not a
+ // sub-register of the chosen representative super register. Assert
+ // since we can't handle it yet.
+ assert(*AS == SpillReg || !allocatableRegs_[*AS] || !hasInterval(*AS) ||
+ tri_->isSuperRegister(*AS, SpillReg));
+
+ bool Cut = false;
+ SmallVector<unsigned, 4> PRegs;
+ if (hasInterval(SpillReg))
+ PRegs.push_back(SpillReg);
+ else {
+ SmallSet<unsigned, 4> Added;
+ for (const unsigned* AS = tri_->getSubRegisters(SpillReg); *AS; ++AS)
+ if (Added.insert(*AS) && hasInterval(*AS)) {
+ PRegs.push_back(*AS);
+ for (const unsigned* ASS = tri_->getSubRegisters(*AS); *ASS; ++ASS)
+ Added.insert(*ASS);
+ }
+ }
+
+ SmallPtrSet<MachineInstr*, 8> SeenMIs;
+ for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg),
+ E = mri_->reg_end(); I != E; ++I) {
+ MachineOperand &O = I.getOperand();
+ MachineInstr *MI = O.getParent();
+ if (SeenMIs.count(MI))
+ continue;
+ SeenMIs.insert(MI);
+ SlotIndex Index = getInstructionIndex(MI);
+ for (unsigned i = 0, e = PRegs.size(); i != e; ++i) {
+ unsigned PReg = PRegs[i];
+ LiveInterval &pli = getInterval(PReg);
+ if (!pli.liveAt(Index))
+ continue;
+ vrm.addEmergencySpill(PReg, MI);
+ SlotIndex StartIdx = Index.getLoadIndex();
+ SlotIndex EndIdx = Index.getNextIndex().getBaseIndex();
+ if (pli.isInOneLiveRange(StartIdx, EndIdx)) {
+ pli.removeRange(StartIdx, EndIdx);
+ Cut = true;
+ } else {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "Ran out of registers during register allocation!";
+ if (MI->isInlineAsm()) {
+ Msg << "\nPlease check your inline asm statement for invalid "
+ << "constraints:\n";
+ MI->print(Msg, tm_);
+ }
+ llvm_report_error(Msg.str());
+ }
+ for (const unsigned* AS = tri_->getSubRegisters(PReg); *AS; ++AS) {
+ if (!hasInterval(*AS))
+ continue;
+ LiveInterval &spli = getInterval(*AS);
+ if (spli.liveAt(Index))
+ spli.removeRange(Index.getLoadIndex(),
+ Index.getNextIndex().getBaseIndex());
+ }
+ }
+ }
+ return Cut;
+}
+
+LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
+ MachineInstr* startInst) {
+ LiveInterval& Interval = getOrCreateInterval(reg);
+ VNInfo* VN = Interval.getNextValue(
+ SlotIndex(getInstructionIndex(startInst).getDefIndex()),
+ startInst, true, getVNInfoAllocator());
+ VN->setHasPHIKill(true);
+ VN->kills.push_back(indexes_->getTerminatorGap(startInst->getParent()));
+ LiveRange LR(
+ SlotIndex(getInstructionIndex(startInst).getDefIndex()),
+ getMBBEndIdx(startInst->getParent()), VN);
+ Interval.addRange(LR);
+
+ return LR;
+}
+
diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp
new file mode 100644
index 0000000..d2f3775
--- /dev/null
+++ b/lib/CodeGen/LiveStackAnalysis.cpp
@@ -0,0 +1,63 @@
+//===-- LiveStackAnalysis.cpp - Live Stack Slot Analysis ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the live stack slot analysis pass. It is analogous to
+// live interval analysis except it's analyzing liveness of stack slots rather
+// than registers.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "livestacks"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+#include <limits>
+using namespace llvm;
+
+char LiveStacks::ID = 0;
+static RegisterPass<LiveStacks> X("livestacks", "Live Stack Slot Analysis");
+
+void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequiredTransitive<SlotIndexes>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void LiveStacks::releaseMemory() {
+ // Release VNInfo memroy regions after all VNInfo objects are dtor'd.
+ VNInfoAllocator.Reset();
+ S2IMap.clear();
+ S2RCMap.clear();
+}
+
+bool LiveStacks::runOnMachineFunction(MachineFunction &) {
+ // FIXME: No analysis is being done right now. We are relying on the
+ // register allocators to provide the information.
+ return false;
+}
+
+/// print - Implement the dump method.
+void LiveStacks::print(raw_ostream &OS, const Module*) const {
+
+ OS << "********** INTERVALS **********\n";
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ I->second.print(OS);
+ int Slot = I->first;
+ const TargetRegisterClass *RC = getIntervalRegClass(Slot);
+ if (RC)
+ OS << " [" << RC->getName() << "]\n";
+ else
+ OS << " [Unknown]\n";
+ }
+}
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
new file mode 100644
index 0000000..8a124dc
--- /dev/null
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -0,0 +1,788 @@
+//===-- LiveVariables.cpp - Live Variable Analysis for Machine Code -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveVariable analysis pass. For each machine
+// instruction in the function, this pass calculates the set of registers that
+// are immediately dead after the instruction (i.e., the instruction calculates
+// the value, but it is never used) and the set of registers that are used by
+// the instruction, but are never used after the instruction (i.e., they are
+// killed).
+//
+// This class computes live variables using are sparse implementation based on
+// the machine code SSA form. This class computes live variable information for
+// each virtual and _register allocatable_ physical register in a function. It
+// uses the dominance properties of SSA form to efficiently compute live
+// variables for virtual registers, and assumes that physical registers are only
+// live within a single basic block (allowing it to do a single local analysis
+// to resolve physical register lifetimes in each basic block). If a physical
+// register is not register allocatable, it is not tracked. This is useful for
+// things like the stack pointer and condition codes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+char LiveVariables::ID = 0;
+static RegisterPass<LiveVariables> X("livevars", "Live Variable Analysis");
+
+
+void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(UnreachableMachineBlockElimID);
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachineInstr *
+LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const {
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i)
+ if (Kills[i]->getParent() == MBB)
+ return Kills[i];
+ return NULL;
+}
+
+void LiveVariables::VarInfo::dump() const {
+ dbgs() << " Alive in blocks: ";
+ for (SparseBitVector<>::iterator I = AliveBlocks.begin(),
+ E = AliveBlocks.end(); I != E; ++I)
+ dbgs() << *I << ", ";
+ dbgs() << "\n Killed by:";
+ if (Kills.empty())
+ dbgs() << " No instructions.\n";
+ else {
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i)
+ dbgs() << "\n #" << i << ": " << *Kills[i];
+ dbgs() << "\n";
+ }
+}
+
+/// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg.
+LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) {
+ assert(TargetRegisterInfo::isVirtualRegister(RegIdx) &&
+ "getVarInfo: not a virtual register!");
+ RegIdx -= TargetRegisterInfo::FirstVirtualRegister;
+ if (RegIdx >= VirtRegInfo.size()) {
+ if (RegIdx >= 2*VirtRegInfo.size())
+ VirtRegInfo.resize(RegIdx*2);
+ else
+ VirtRegInfo.resize(2*VirtRegInfo.size());
+ }
+ return VirtRegInfo[RegIdx];
+}
+
+void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo,
+ MachineBasicBlock *DefBlock,
+ MachineBasicBlock *MBB,
+ std::vector<MachineBasicBlock*> &WorkList) {
+ unsigned BBNum = MBB->getNumber();
+
+ // Check to see if this basic block is one of the killing blocks. If so,
+ // remove it.
+ for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i)
+ if (VRInfo.Kills[i]->getParent() == MBB) {
+ VRInfo.Kills.erase(VRInfo.Kills.begin()+i); // Erase entry
+ break;
+ }
+
+ if (MBB == DefBlock) return; // Terminate recursion
+
+ if (VRInfo.AliveBlocks.test(BBNum))
+ return; // We already know the block is live
+
+ // Mark the variable known alive in this bb
+ VRInfo.AliveBlocks.set(BBNum);
+
+ for (MachineBasicBlock::const_pred_reverse_iterator PI = MBB->pred_rbegin(),
+ E = MBB->pred_rend(); PI != E; ++PI)
+ WorkList.push_back(*PI);
+}
+
+void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo,
+ MachineBasicBlock *DefBlock,
+ MachineBasicBlock *MBB) {
+ std::vector<MachineBasicBlock*> WorkList;
+ MarkVirtRegAliveInBlock(VRInfo, DefBlock, MBB, WorkList);
+
+ while (!WorkList.empty()) {
+ MachineBasicBlock *Pred = WorkList.back();
+ WorkList.pop_back();
+ MarkVirtRegAliveInBlock(VRInfo, DefBlock, Pred, WorkList);
+ }
+}
+
+void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB,
+ MachineInstr *MI) {
+ assert(MRI->getVRegDef(reg) && "Register use before def!");
+
+ unsigned BBNum = MBB->getNumber();
+
+ VarInfo& VRInfo = getVarInfo(reg);
+ VRInfo.NumUses++;
+
+ // Check to see if this basic block is already a kill block.
+ if (!VRInfo.Kills.empty() && VRInfo.Kills.back()->getParent() == MBB) {
+ // Yes, this register is killed in this basic block already. Increase the
+ // live range by updating the kill instruction.
+ VRInfo.Kills.back() = MI;
+ return;
+ }
+
+#ifndef NDEBUG
+ for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i)
+ assert(VRInfo.Kills[i]->getParent() != MBB && "entry should be at end!");
+#endif
+
+ // This situation can occur:
+ //
+ // ,------.
+ // | |
+ // | v
+ // | t2 = phi ... t1 ...
+ // | |
+ // | v
+ // | t1 = ...
+ // | ... = ... t1 ...
+ // | |
+ // `------'
+ //
+ // where there is a use in a PHI node that's a predecessor to the defining
+ // block. We don't want to mark all predecessors as having the value "alive"
+ // in this case.
+ if (MBB == MRI->getVRegDef(reg)->getParent()) return;
+
+ // Add a new kill entry for this basic block. If this virtual register is
+ // already marked as alive in this basic block, that means it is alive in at
+ // least one of the successor blocks, it's not a kill.
+ if (!VRInfo.AliveBlocks.test(BBNum))
+ VRInfo.Kills.push_back(MI);
+
+ // Update all dominating blocks to mark them as "known live".
+ for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+ E = MBB->pred_end(); PI != E; ++PI)
+ MarkVirtRegAliveInBlock(VRInfo, MRI->getVRegDef(reg)->getParent(), *PI);
+}
+
+void LiveVariables::HandleVirtRegDef(unsigned Reg, MachineInstr *MI) {
+ VarInfo &VRInfo = getVarInfo(Reg);
+
+ if (VRInfo.AliveBlocks.empty())
+ // If vr is not alive in any block, then defaults to dead.
+ VRInfo.Kills.push_back(MI);
+}
+
+/// FindLastPartialDef - Return the last partial def of the specified register.
+/// Also returns the sub-registers that're defined by the instruction.
+MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
+ SmallSet<unsigned,4> &PartDefRegs) {
+ unsigned LastDefReg = 0;
+ unsigned LastDefDist = 0;
+ MachineInstr *LastDef = NULL;
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ MachineInstr *Def = PhysRegDef[SubReg];
+ if (!Def)
+ continue;
+ unsigned Dist = DistanceMap[Def];
+ if (Dist > LastDefDist) {
+ LastDefReg = SubReg;
+ LastDef = Def;
+ LastDefDist = Dist;
+ }
+ }
+
+ if (!LastDef)
+ return 0;
+
+ PartDefRegs.insert(LastDefReg);
+ for (unsigned i = 0, e = LastDef->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = LastDef->getOperand(i);
+ if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0)
+ continue;
+ unsigned DefReg = MO.getReg();
+ if (TRI->isSubRegister(Reg, DefReg)) {
+ PartDefRegs.insert(DefReg);
+ for (const unsigned *SubRegs = TRI->getSubRegisters(DefReg);
+ unsigned SubReg = *SubRegs; ++SubRegs)
+ PartDefRegs.insert(SubReg);
+ }
+ }
+ return LastDef;
+}
+
+/// HandlePhysRegUse - Turn previous partial def's into read/mod/writes. Add
+/// implicit defs to a machine instruction if there was an earlier def of its
+/// super-register.
+void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
+ MachineInstr *LastDef = PhysRegDef[Reg];
+ // If there was a previous use or a "full" def all is well.
+ if (!LastDef && !PhysRegUse[Reg]) {
+ // Otherwise, the last sub-register def implicitly defines this register.
+ // e.g.
+ // AH =
+ // AL = ... <imp-def EAX>, <imp-kill AH>
+ // = AH
+ // ...
+ // = EAX
+ // All of the sub-registers must have been defined before the use of Reg!
+ SmallSet<unsigned, 4> PartDefRegs;
+ MachineInstr *LastPartialDef = FindLastPartialDef(Reg, PartDefRegs);
+ // If LastPartialDef is NULL, it must be using a livein register.
+ if (LastPartialDef) {
+ LastPartialDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/,
+ true/*IsImp*/));
+ PhysRegDef[Reg] = LastPartialDef;
+ SmallSet<unsigned, 8> Processed;
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ if (Processed.count(SubReg))
+ continue;
+ if (PartDefRegs.count(SubReg))
+ continue;
+ // This part of Reg was defined before the last partial def. It's killed
+ // here.
+ LastPartialDef->addOperand(MachineOperand::CreateReg(SubReg,
+ false/*IsDef*/,
+ true/*IsImp*/));
+ PhysRegDef[SubReg] = LastPartialDef;
+ for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ Processed.insert(*SS);
+ }
+ }
+ }
+ else if (LastDef && !PhysRegUse[Reg] &&
+ !LastDef->findRegisterDefOperand(Reg))
+ // Last def defines the super register, add an implicit def of reg.
+ LastDef->addOperand(MachineOperand::CreateReg(Reg,
+ true/*IsDef*/, true/*IsImp*/));
+
+ // Remember this use.
+ PhysRegUse[Reg] = MI;
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs)
+ PhysRegUse[SubReg] = MI;
+}
+
+/// FindLastRefOrPartRef - Return the last reference or partial reference of
+/// the specified register.
+MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) {
+ MachineInstr *LastDef = PhysRegDef[Reg];
+ MachineInstr *LastUse = PhysRegUse[Reg];
+ if (!LastDef && !LastUse)
+ return false;
+
+ MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
+ unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
+ unsigned LastPartDefDist = 0;
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ MachineInstr *Def = PhysRegDef[SubReg];
+ if (Def && Def != LastDef) {
+ // There was a def of this sub-register in between. This is a partial
+ // def, keep track of the last one.
+ unsigned Dist = DistanceMap[Def];
+ if (Dist > LastPartDefDist)
+ LastPartDefDist = Dist;
+ } else if (MachineInstr *Use = PhysRegUse[SubReg]) {
+ unsigned Dist = DistanceMap[Use];
+ if (Dist > LastRefOrPartRefDist) {
+ LastRefOrPartRefDist = Dist;
+ LastRefOrPartRef = Use;
+ }
+ }
+ }
+
+ return LastRefOrPartRef;
+}
+
+bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
+ MachineInstr *LastDef = PhysRegDef[Reg];
+ MachineInstr *LastUse = PhysRegUse[Reg];
+ if (!LastDef && !LastUse)
+ return false;
+
+ MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
+ unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
+ // The whole register is used.
+ // AL =
+ // AH =
+ //
+ // = AX
+ // = AL, AX<imp-use, kill>
+ // AX =
+ //
+ // Or whole register is defined, but not used at all.
+ // AX<dead> =
+ // ...
+ // AX =
+ //
+ // Or whole register is defined, but only partly used.
+ // AX<dead> = AL<imp-def>
+ // = AL<kill>
+ // AX =
+ MachineInstr *LastPartDef = 0;
+ unsigned LastPartDefDist = 0;
+ SmallSet<unsigned, 8> PartUses;
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ MachineInstr *Def = PhysRegDef[SubReg];
+ if (Def && Def != LastDef) {
+ // There was a def of this sub-register in between. This is a partial
+ // def, keep track of the last one.
+ unsigned Dist = DistanceMap[Def];
+ if (Dist > LastPartDefDist) {
+ LastPartDefDist = Dist;
+ LastPartDef = Def;
+ }
+ continue;
+ }
+ if (MachineInstr *Use = PhysRegUse[SubReg]) {
+ PartUses.insert(SubReg);
+ for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ PartUses.insert(*SS);
+ unsigned Dist = DistanceMap[Use];
+ if (Dist > LastRefOrPartRefDist) {
+ LastRefOrPartRefDist = Dist;
+ LastRefOrPartRef = Use;
+ }
+ }
+ }
+
+ if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) {
+ if (LastPartDef)
+ // The last partial def kills the register.
+ LastPartDef->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/,
+ true/*IsImp*/, true/*IsKill*/));
+ else {
+ MachineOperand *MO =
+ LastRefOrPartRef->findRegisterDefOperand(Reg, false, TRI);
+ bool NeedEC = MO->isEarlyClobber() && MO->getReg() != Reg;
+ // If the last reference is the last def, then it's not used at all.
+ // That is, unless we are currently processing the last reference itself.
+ LastRefOrPartRef->addRegisterDead(Reg, TRI, true);
+ if (NeedEC) {
+ // If we are adding a subreg def and the superreg def is marked early
+ // clobber, add an early clobber marker to the subreg def.
+ MO = LastRefOrPartRef->findRegisterDefOperand(Reg);
+ if (MO)
+ MO->setIsEarlyClobber();
+ }
+ }
+ } else if (!PhysRegUse[Reg]) {
+ // Partial uses. Mark register def dead and add implicit def of
+ // sub-registers which are used.
+ // EAX<dead> = op AL<imp-def>
+ // That is, EAX def is dead but AL def extends pass it.
+ PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true);
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ if (!PartUses.count(SubReg))
+ continue;
+ bool NeedDef = true;
+ if (PhysRegDef[Reg] == PhysRegDef[SubReg]) {
+ MachineOperand *MO = PhysRegDef[Reg]->findRegisterDefOperand(SubReg);
+ if (MO) {
+ NeedDef = false;
+ assert(!MO->isDead());
+ }
+ }
+ if (NeedDef)
+ PhysRegDef[Reg]->addOperand(MachineOperand::CreateReg(SubReg,
+ true/*IsDef*/, true/*IsImp*/));
+ MachineInstr *LastSubRef = FindLastRefOrPartRef(SubReg);
+ if (LastSubRef)
+ LastSubRef->addRegisterKilled(SubReg, TRI, true);
+ else {
+ LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true);
+ PhysRegUse[SubReg] = LastRefOrPartRef;
+ for (const unsigned *SSRegs = TRI->getSubRegisters(SubReg);
+ unsigned SSReg = *SSRegs; ++SSRegs)
+ PhysRegUse[SSReg] = LastRefOrPartRef;
+ }
+ for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ PartUses.erase(*SS);
+ }
+ } else
+ LastRefOrPartRef->addRegisterKilled(Reg, TRI, true);
+ return true;
+}
+
+void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
+ SmallVector<unsigned, 4> &Defs) {
+ // What parts of the register are previously defined?
+ SmallSet<unsigned, 32> Live;
+ if (PhysRegDef[Reg] || PhysRegUse[Reg]) {
+ Live.insert(Reg);
+ for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
+ Live.insert(*SS);
+ } else {
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ // If a register isn't itself defined, but all parts that make up of it
+ // are defined, then consider it also defined.
+ // e.g.
+ // AL =
+ // AH =
+ // = AX
+ if (Live.count(SubReg))
+ continue;
+ if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) {
+ Live.insert(SubReg);
+ for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ Live.insert(*SS);
+ }
+ }
+ }
+
+ // Start from the largest piece, find the last time any part of the register
+ // is referenced.
+ HandlePhysRegKill(Reg, MI);
+ // Only some of the sub-registers are used.
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ if (!Live.count(SubReg))
+ // Skip if this sub-register isn't defined.
+ continue;
+ HandlePhysRegKill(SubReg, MI);
+ }
+
+ if (MI)
+ Defs.push_back(Reg); // Remember this def.
+}
+
+void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI,
+ SmallVector<unsigned, 4> &Defs) {
+ while (!Defs.empty()) {
+ unsigned Reg = Defs.back();
+ Defs.pop_back();
+ PhysRegDef[Reg] = MI;
+ PhysRegUse[Reg] = NULL;
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ PhysRegDef[SubReg] = MI;
+ PhysRegUse[SubReg] = NULL;
+ }
+ }
+}
+
+namespace {
+ struct RegSorter {
+ const TargetRegisterInfo *TRI;
+
+ RegSorter(const TargetRegisterInfo *tri) : TRI(tri) { }
+ bool operator()(unsigned A, unsigned B) {
+ if (TRI->isSubRegister(A, B))
+ return true;
+ else if (TRI->isSubRegister(B, A))
+ return false;
+ return A < B;
+ }
+ };
+}
+
+bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ MRI = &mf.getRegInfo();
+ TRI = MF->getTarget().getRegisterInfo();
+
+ ReservedRegisters = TRI->getReservedRegs(mf);
+
+ unsigned NumRegs = TRI->getNumRegs();
+ PhysRegDef = new MachineInstr*[NumRegs];
+ PhysRegUse = new MachineInstr*[NumRegs];
+ PHIVarInfo = new SmallVector<unsigned, 4>[MF->getNumBlockIDs()];
+ std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0);
+ std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0);
+
+ /// Get some space for a respectable number of registers.
+ VirtRegInfo.resize(64);
+
+ analyzePHINodes(mf);
+
+ // Calculate live variable information in depth first order on the CFG of the
+ // function. This guarantees that we will see the definition of a virtual
+ // register before its uses due to dominance properties of SSA (except for PHI
+ // nodes, which are treated as a special case).
+ MachineBasicBlock *Entry = MF->begin();
+ SmallPtrSet<MachineBasicBlock*,16> Visited;
+
+ for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
+ DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
+ DFI != E; ++DFI) {
+ MachineBasicBlock *MBB = *DFI;
+
+ // Mark live-in registers as live-in.
+ SmallVector<unsigned, 4> Defs;
+ for (MachineBasicBlock::const_livein_iterator II = MBB->livein_begin(),
+ EE = MBB->livein_end(); II != EE; ++II) {
+ assert(TargetRegisterInfo::isPhysicalRegister(*II) &&
+ "Cannot have a live-in virtual register!");
+ HandlePhysRegDef(*II, 0, Defs);
+ }
+
+ // Loop over all of the instructions, processing them.
+ DistanceMap.clear();
+ unsigned Dist = 0;
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ MachineInstr *MI = I;
+ if (MI->isDebugValue())
+ continue;
+ DistanceMap.insert(std::make_pair(MI, Dist++));
+
+ // Process all of the operands of the instruction...
+ unsigned NumOperandsToProcess = MI->getNumOperands();
+
+ // Unless it is a PHI node. In this case, ONLY process the DEF, not any
+ // of the uses. They will be handled in other basic blocks.
+ if (MI->isPHI())
+ NumOperandsToProcess = 1;
+
+ SmallVector<unsigned, 4> UseRegs;
+ SmallVector<unsigned, 4> DefRegs;
+ for (unsigned i = 0; i != NumOperandsToProcess; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (MO.isUse())
+ UseRegs.push_back(MOReg);
+ if (MO.isDef())
+ DefRegs.push_back(MOReg);
+ }
+
+ // Process all uses.
+ for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) {
+ unsigned MOReg = UseRegs[i];
+ if (TargetRegisterInfo::isVirtualRegister(MOReg))
+ HandleVirtRegUse(MOReg, MBB, MI);
+ else if (!ReservedRegisters[MOReg])
+ HandlePhysRegUse(MOReg, MI);
+ }
+
+ // Process all defs.
+ for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) {
+ unsigned MOReg = DefRegs[i];
+ if (TargetRegisterInfo::isVirtualRegister(MOReg))
+ HandleVirtRegDef(MOReg, MI);
+ else if (!ReservedRegisters[MOReg])
+ HandlePhysRegDef(MOReg, MI, Defs);
+ }
+ UpdatePhysRegDefs(MI, Defs);
+ }
+
+ // Handle any virtual assignments from PHI nodes which might be at the
+ // bottom of this basic block. We check all of our successor blocks to see
+ // if they have PHI nodes, and if so, we simulate an assignment at the end
+ // of the current block.
+ if (!PHIVarInfo[MBB->getNumber()].empty()) {
+ SmallVector<unsigned, 4>& VarInfoVec = PHIVarInfo[MBB->getNumber()];
+
+ for (SmallVector<unsigned, 4>::iterator I = VarInfoVec.begin(),
+ E = VarInfoVec.end(); I != E; ++I)
+ // Mark it alive only in the block we are representing.
+ MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(),
+ MBB);
+ }
+
+ // Finally, if the last instruction in the block is a return, make sure to
+ // mark it as using all of the live-out values in the function.
+ if (!MBB->empty() && MBB->back().getDesc().isReturn()) {
+ MachineInstr *Ret = &MBB->back();
+
+ for (MachineRegisterInfo::liveout_iterator
+ I = MF->getRegInfo().liveout_begin(),
+ E = MF->getRegInfo().liveout_end(); I != E; ++I) {
+ assert(TargetRegisterInfo::isPhysicalRegister(*I) &&
+ "Cannot have a live-out virtual register!");
+ HandlePhysRegUse(*I, Ret);
+
+ // Add live-out registers as implicit uses.
+ if (!Ret->readsRegister(*I))
+ Ret->addOperand(MachineOperand::CreateReg(*I, false, true));
+ }
+ }
+
+ // Loop over PhysRegDef / PhysRegUse, killing any registers that are
+ // available at the end of the basic block.
+ for (unsigned i = 0; i != NumRegs; ++i)
+ if (PhysRegDef[i] || PhysRegUse[i])
+ HandlePhysRegDef(i, 0, Defs);
+
+ std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0);
+ std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0);
+ }
+
+ // Convert and transfer the dead / killed information we have gathered into
+ // VirtRegInfo onto MI's.
+ for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i)
+ for (unsigned j = 0, e2 = VirtRegInfo[i].Kills.size(); j != e2; ++j)
+ if (VirtRegInfo[i].Kills[j] ==
+ MRI->getVRegDef(i + TargetRegisterInfo::FirstVirtualRegister))
+ VirtRegInfo[i]
+ .Kills[j]->addRegisterDead(i +
+ TargetRegisterInfo::FirstVirtualRegister,
+ TRI);
+ else
+ VirtRegInfo[i]
+ .Kills[j]->addRegisterKilled(i +
+ TargetRegisterInfo::FirstVirtualRegister,
+ TRI);
+
+ // Check to make sure there are no unreachable blocks in the MC CFG for the
+ // function. If so, it is due to a bug in the instruction selector or some
+ // other part of the code generator if this happens.
+#ifndef NDEBUG
+ for(MachineFunction::iterator i = MF->begin(), e = MF->end(); i != e; ++i)
+ assert(Visited.count(&*i) != 0 && "unreachable basic block found");
+#endif
+
+ delete[] PhysRegDef;
+ delete[] PhysRegUse;
+ delete[] PHIVarInfo;
+
+ return false;
+}
+
+/// replaceKillInstruction - Update register kill info by replacing a kill
+/// instruction with a new one.
+void LiveVariables::replaceKillInstruction(unsigned Reg, MachineInstr *OldMI,
+ MachineInstr *NewMI) {
+ VarInfo &VI = getVarInfo(Reg);
+ std::replace(VI.Kills.begin(), VI.Kills.end(), OldMI, NewMI);
+}
+
+/// removeVirtualRegistersKilled - Remove all killed info for the specified
+/// instruction.
+void LiveVariables::removeVirtualRegistersKilled(MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isKill()) {
+ MO.setIsKill(false);
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ bool removed = getVarInfo(Reg).removeKill(MI);
+ assert(removed && "kill not in register's VarInfo?");
+ removed = true;
+ }
+ }
+ }
+}
+
+/// analyzePHINodes - Gather information about the PHI nodes in here. In
+/// particular, we want to map the variable information of a virtual register
+/// which is used in a PHI node. We map that to the BB the vreg is coming from.
+///
+void LiveVariables::analyzePHINodes(const MachineFunction& Fn) {
+ for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end();
+ I != E; ++I)
+ for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
+ BBI != BBE && BBI->isPHI(); ++BBI)
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
+ PHIVarInfo[BBI->getOperand(i + 1).getMBB()->getNumber()]
+ .push_back(BBI->getOperand(i).getReg());
+}
+
+bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB,
+ unsigned Reg,
+ MachineRegisterInfo &MRI) {
+ unsigned Num = MBB.getNumber();
+
+ // Reg is live-through.
+ if (AliveBlocks.test(Num))
+ return true;
+
+ // Registers defined in MBB cannot be live in.
+ const MachineInstr *Def = MRI.getVRegDef(Reg);
+ if (Def && Def->getParent() == &MBB)
+ return false;
+
+ // Reg was not defined in MBB, was it killed here?
+ return findKill(&MBB);
+}
+
+bool LiveVariables::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB) {
+ LiveVariables::VarInfo &VI = getVarInfo(Reg);
+
+ // Loop over all of the successors of the basic block, checking to see if
+ // the value is either live in the block, or if it is killed in the block.
+ std::vector<MachineBasicBlock*> OpSuccBlocks;
+ for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
+ E = MBB.succ_end(); SI != E; ++SI) {
+ MachineBasicBlock *SuccMBB = *SI;
+
+ // Is it alive in this successor?
+ unsigned SuccIdx = SuccMBB->getNumber();
+ if (VI.AliveBlocks.test(SuccIdx))
+ return true;
+ OpSuccBlocks.push_back(SuccMBB);
+ }
+
+ // Check to see if this value is live because there is a use in a successor
+ // that kills it.
+ switch (OpSuccBlocks.size()) {
+ case 1: {
+ MachineBasicBlock *SuccMBB = OpSuccBlocks[0];
+ for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+ if (VI.Kills[i]->getParent() == SuccMBB)
+ return true;
+ break;
+ }
+ case 2: {
+ MachineBasicBlock *SuccMBB1 = OpSuccBlocks[0], *SuccMBB2 = OpSuccBlocks[1];
+ for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+ if (VI.Kills[i]->getParent() == SuccMBB1 ||
+ VI.Kills[i]->getParent() == SuccMBB2)
+ return true;
+ break;
+ }
+ default:
+ std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end());
+ for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+ if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(),
+ VI.Kills[i]->getParent()))
+ return true;
+ }
+ return false;
+}
+
+/// addNewBlock - Add a new basic block BB as an empty succcessor to DomBB. All
+/// variables that are live out of DomBB will be marked as passing live through
+/// BB.
+void LiveVariables::addNewBlock(MachineBasicBlock *BB,
+ MachineBasicBlock *DomBB,
+ MachineBasicBlock *SuccBB) {
+ const unsigned NumNew = BB->getNumber();
+
+ // All registers used by PHI nodes in SuccBB must be live through BB.
+ for (MachineBasicBlock::const_iterator BBI = SuccBB->begin(),
+ BBE = SuccBB->end(); BBI != BBE && BBI->isPHI(); ++BBI)
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
+ if (BBI->getOperand(i+1).getMBB() == BB)
+ getVarInfo(BBI->getOperand(i).getReg()).AliveBlocks.set(NumNew);
+
+ // Update info for all live variables
+ for (unsigned Reg = TargetRegisterInfo::FirstVirtualRegister,
+ E = MRI->getLastVirtReg()+1; Reg != E; ++Reg) {
+ VarInfo &VI = getVarInfo(Reg);
+ if (!VI.AliveBlocks.test(NumNew) && VI.isLiveIn(*SuccBB, Reg, *MRI))
+ VI.AliveBlocks.set(NumNew);
+ }
+}
diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp
new file mode 100644
index 0000000..b4ef648
--- /dev/null
+++ b/lib/CodeGen/LowerSubregs.cpp
@@ -0,0 +1,329 @@
+//===-- LowerSubregs.cpp - Subregister Lowering instruction pass ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a MachineFunction pass which runs after register
+// allocation that turns subreg insert/extract instructions into register
+// copies, as needed. This ensures correct codegen even if the coalescer
+// isn't able to remove all subreg instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lowersubregs"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+ struct LowerSubregsInstructionPass : public MachineFunctionPass {
+ private:
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ LowerSubregsInstructionPass() : MachineFunctionPass(&ID) {}
+
+ const char *getPassName() const {
+ return "Subregister lowering instruction pass";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ /// runOnMachineFunction - pass entry point
+ bool runOnMachineFunction(MachineFunction&);
+
+ private:
+ bool LowerExtract(MachineInstr *MI);
+ bool LowerInsert(MachineInstr *MI);
+ bool LowerSubregToReg(MachineInstr *MI);
+
+ void TransferDeadFlag(MachineInstr *MI, unsigned DstReg,
+ const TargetRegisterInfo *TRI);
+ void TransferKillFlag(MachineInstr *MI, unsigned SrcReg,
+ const TargetRegisterInfo *TRI,
+ bool AddIfNotFound = false);
+ };
+
+ char LowerSubregsInstructionPass::ID = 0;
+}
+
+FunctionPass *llvm::createLowerSubregsPass() {
+ return new LowerSubregsInstructionPass();
+}
+
+/// TransferDeadFlag - MI is a pseudo-instruction with DstReg dead,
+/// and the lowered replacement instructions immediately precede it.
+/// Mark the replacement instructions with the dead flag.
+void
+LowerSubregsInstructionPass::TransferDeadFlag(MachineInstr *MI,
+ unsigned DstReg,
+ const TargetRegisterInfo *TRI) {
+ for (MachineBasicBlock::iterator MII =
+ prior(MachineBasicBlock::iterator(MI)); ; --MII) {
+ if (MII->addRegisterDead(DstReg, TRI))
+ break;
+ assert(MII != MI->getParent()->begin() &&
+ "copyRegToReg output doesn't reference destination register!");
+ }
+}
+
+/// TransferKillFlag - MI is a pseudo-instruction with SrcReg killed,
+/// and the lowered replacement instructions immediately precede it.
+/// Mark the replacement instructions with the kill flag.
+void
+LowerSubregsInstructionPass::TransferKillFlag(MachineInstr *MI,
+ unsigned SrcReg,
+ const TargetRegisterInfo *TRI,
+ bool AddIfNotFound) {
+ for (MachineBasicBlock::iterator MII =
+ prior(MachineBasicBlock::iterator(MI)); ; --MII) {
+ if (MII->addRegisterKilled(SrcReg, TRI, AddIfNotFound))
+ break;
+ assert(MII != MI->getParent()->begin() &&
+ "copyRegToReg output doesn't reference source register!");
+ }
+}
+
+bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) {
+ MachineBasicBlock *MBB = MI->getParent();
+
+ assert(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() &&
+ MI->getOperand(1).isReg() && MI->getOperand(1).isUse() &&
+ MI->getOperand(2).isImm() && "Malformed extract_subreg");
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned SuperReg = MI->getOperand(1).getReg();
+ unsigned SubIdx = MI->getOperand(2).getImm();
+ unsigned SrcReg = TRI->getSubReg(SuperReg, SubIdx);
+
+ assert(TargetRegisterInfo::isPhysicalRegister(SuperReg) &&
+ "Extract supperg source must be a physical register");
+ assert(TargetRegisterInfo::isPhysicalRegister(DstReg) &&
+ "Extract destination must be in a physical register");
+ assert(SrcReg && "invalid subregister index for register");
+
+ DEBUG(dbgs() << "subreg: CONVERTING: " << *MI);
+
+ if (SrcReg == DstReg) {
+ // No need to insert an identity copy instruction.
+ if (MI->getOperand(1).isKill()) {
+ // We must make sure the super-register gets killed. Replace the
+ // instruction with KILL.
+ MI->setDesc(TII->get(TargetOpcode::KILL));
+ MI->RemoveOperand(2); // SubIdx
+ DEBUG(dbgs() << "subreg: replace by: " << *MI);
+ return true;
+ }
+
+ DEBUG(dbgs() << "subreg: eliminated!");
+ } else {
+ // Insert copy
+ const TargetRegisterClass *TRCS = TRI->getPhysicalRegisterRegClass(DstReg);
+ const TargetRegisterClass *TRCD = TRI->getPhysicalRegisterRegClass(SrcReg);
+ bool Emitted = TII->copyRegToReg(*MBB, MI, DstReg, SrcReg, TRCD, TRCS);
+ (void)Emitted;
+ assert(Emitted && "Subreg and Dst must be of compatible register class");
+ // Transfer the kill/dead flags, if needed.
+ if (MI->getOperand(0).isDead())
+ TransferDeadFlag(MI, DstReg, TRI);
+ if (MI->getOperand(1).isKill())
+ TransferKillFlag(MI, SuperReg, TRI, true);
+ DEBUG({
+ MachineBasicBlock::iterator dMI = MI;
+ dbgs() << "subreg: " << *(--dMI);
+ });
+ }
+
+ DEBUG(dbgs() << '\n');
+ MBB->erase(MI);
+ return true;
+}
+
+bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
+ MachineBasicBlock *MBB = MI->getParent();
+ assert((MI->getOperand(0).isReg() && MI->getOperand(0).isDef()) &&
+ MI->getOperand(1).isImm() &&
+ (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) &&
+ MI->getOperand(3).isImm() && "Invalid subreg_to_reg");
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned InsReg = MI->getOperand(2).getReg();
+ unsigned InsSIdx = MI->getOperand(2).getSubReg();
+ unsigned SubIdx = MI->getOperand(3).getImm();
+
+ assert(SubIdx != 0 && "Invalid index for insert_subreg");
+ unsigned DstSubReg = TRI->getSubReg(DstReg, SubIdx);
+
+ assert(TargetRegisterInfo::isPhysicalRegister(DstReg) &&
+ "Insert destination must be in a physical register");
+ assert(TargetRegisterInfo::isPhysicalRegister(InsReg) &&
+ "Inserted value must be in a physical register");
+
+ DEBUG(dbgs() << "subreg: CONVERTING: " << *MI);
+
+ if (DstSubReg == InsReg && InsSIdx == 0) {
+ // No need to insert an identify copy instruction.
+ // Watch out for case like this:
+ // %RAX<def> = ...
+ // %RAX<def> = SUBREG_TO_REG 0, %EAX:3<kill>, 3
+ // The first def is defining RAX, not EAX so the top bits were not
+ // zero extended.
+ DEBUG(dbgs() << "subreg: eliminated!");
+ } else {
+ // Insert sub-register copy
+ const TargetRegisterClass *TRC0= TRI->getPhysicalRegisterRegClass(DstSubReg);
+ const TargetRegisterClass *TRC1= TRI->getPhysicalRegisterRegClass(InsReg);
+ bool Emitted = TII->copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1);
+ (void)Emitted;
+ assert(Emitted && "Subreg and Dst must be of compatible register class");
+ // Transfer the kill/dead flags, if needed.
+ if (MI->getOperand(0).isDead())
+ TransferDeadFlag(MI, DstSubReg, TRI);
+ if (MI->getOperand(2).isKill())
+ TransferKillFlag(MI, InsReg, TRI);
+ DEBUG({
+ MachineBasicBlock::iterator dMI = MI;
+ dbgs() << "subreg: " << *(--dMI);
+ });
+ }
+
+ DEBUG(dbgs() << '\n');
+ MBB->erase(MI);
+ return true;
+}
+
+bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) {
+ MachineBasicBlock *MBB = MI->getParent();
+ assert((MI->getOperand(0).isReg() && MI->getOperand(0).isDef()) &&
+ (MI->getOperand(1).isReg() && MI->getOperand(1).isUse()) &&
+ (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) &&
+ MI->getOperand(3).isImm() && "Invalid insert_subreg");
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+#ifndef NDEBUG
+ unsigned SrcReg = MI->getOperand(1).getReg();
+#endif
+ unsigned InsReg = MI->getOperand(2).getReg();
+ unsigned SubIdx = MI->getOperand(3).getImm();
+
+ assert(DstReg == SrcReg && "insert_subreg not a two-address instruction?");
+ assert(SubIdx != 0 && "Invalid index for insert_subreg");
+ unsigned DstSubReg = TRI->getSubReg(DstReg, SubIdx);
+ assert(DstSubReg && "invalid subregister index for register");
+ assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+ "Insert superreg source must be in a physical register");
+ assert(TargetRegisterInfo::isPhysicalRegister(InsReg) &&
+ "Inserted value must be in a physical register");
+
+ DEBUG(dbgs() << "subreg: CONVERTING: " << *MI);
+
+ if (DstSubReg == InsReg) {
+ // No need to insert an identity copy instruction. If the SrcReg was
+ // <undef>, we need to make sure it is alive by inserting a KILL
+ if (MI->getOperand(1).isUndef() && !MI->getOperand(0).isDead()) {
+ MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::KILL), DstReg);
+ if (MI->getOperand(2).isUndef())
+ MIB.addReg(InsReg, RegState::Undef);
+ else
+ MIB.addReg(InsReg, RegState::Kill);
+ } else {
+ DEBUG(dbgs() << "subreg: eliminated!\n");
+ MBB->erase(MI);
+ return true;
+ }
+ } else {
+ // Insert sub-register copy
+ const TargetRegisterClass *TRC0= TRI->getPhysicalRegisterRegClass(DstSubReg);
+ const TargetRegisterClass *TRC1= TRI->getPhysicalRegisterRegClass(InsReg);
+ if (MI->getOperand(2).isUndef())
+ // If the source register being inserted is undef, then this becomes a
+ // KILL.
+ BuildMI(*MBB, MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::KILL), DstSubReg);
+ else {
+ bool Emitted = TII->copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1);
+ (void)Emitted;
+ assert(Emitted && "Subreg and Dst must be of compatible register class");
+ }
+ MachineBasicBlock::iterator CopyMI = MI;
+ --CopyMI;
+
+ // INSERT_SUBREG is a two-address instruction so it implicitly kills SrcReg.
+ if (!MI->getOperand(1).isUndef())
+ CopyMI->addOperand(MachineOperand::CreateReg(DstReg, false, true, true));
+
+ // Transfer the kill/dead flags, if needed.
+ if (MI->getOperand(0).isDead()) {
+ TransferDeadFlag(MI, DstSubReg, TRI);
+ } else {
+ // Make sure the full DstReg is live after this replacement.
+ CopyMI->addOperand(MachineOperand::CreateReg(DstReg, true, true));
+ }
+
+ // Make sure the inserted register gets killed
+ if (MI->getOperand(2).isKill() && !MI->getOperand(2).isUndef())
+ TransferKillFlag(MI, InsReg, TRI);
+ }
+
+ DEBUG({
+ MachineBasicBlock::iterator dMI = MI;
+ dbgs() << "subreg: " << *(--dMI) << "\n";
+ });
+
+ MBB->erase(MI);
+ return true;
+}
+
+/// runOnMachineFunction - Reduce subregister inserts and extracts to register
+/// copies.
+///
+bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "Machine Function\n"
+ << "********** LOWERING SUBREG INSTRS **********\n"
+ << "********** Function: "
+ << MF.getFunction()->getName() << '\n');
+ TRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getTarget().getInstrInfo();
+
+ bool MadeChange = false;
+
+ for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end();
+ mbbi != mbbe; ++mbbi) {
+ for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
+ mi != me;) {
+ MachineBasicBlock::iterator nmi = llvm::next(mi);
+ MachineInstr *MI = mi;
+ if (MI->isExtractSubreg()) {
+ MadeChange |= LowerExtract(MI);
+ } else if (MI->isInsertSubreg()) {
+ MadeChange |= LowerInsert(MI);
+ } else if (MI->isSubregToReg()) {
+ MadeChange |= LowerSubregToReg(MI);
+ }
+ mi = nmi;
+ }
+ }
+
+ return MadeChange;
+}
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
new file mode 100644
index 0000000..655a0bf
--- /dev/null
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -0,0 +1,563 @@
+//===-- llvm/CodeGen/MachineBasicBlock.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Collect the sequence of machine instructions for a basic block.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrDesc.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb)
+ : BB(bb), Number(-1), xParent(&mf), Alignment(0), IsLandingPad(false),
+ AddressTaken(false) {
+ Insts.Parent = this;
+}
+
+MachineBasicBlock::~MachineBasicBlock() {
+ LeakDetector::removeGarbageObject(this);
+}
+
+/// getSymbol - Return the MCSymbol for this basic block.
+///
+MCSymbol *MachineBasicBlock::getSymbol(MCContext &Ctx) const {
+ SmallString<60> Name;
+ const MachineFunction *MF = getParent();
+ raw_svector_ostream(Name)
+ << MF->getTarget().getMCAsmInfo()->getPrivateGlobalPrefix() << "BB"
+ << MF->getFunctionNumber() << '_' << getNumber();
+ return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) {
+ MBB.print(OS);
+ return OS;
+}
+
+/// addNodeToList (MBB) - When an MBB is added to an MF, we need to update the
+/// parent pointer of the MBB, the MBB numbering, and any instructions in the
+/// MBB to be on the right operand list for registers.
+///
+/// MBBs start out as #-1. When a MBB is added to a MachineFunction, it
+/// gets the next available unique MBB number. If it is removed from a
+/// MachineFunction, it goes back to being #-1.
+void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock *N) {
+ MachineFunction &MF = *N->getParent();
+ N->Number = MF.addToMBBNumbering(N);
+
+ // Make sure the instructions have their operands in the reginfo lists.
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ for (MachineBasicBlock::iterator I = N->begin(), E = N->end(); I != E; ++I)
+ I->AddRegOperandsToUseLists(RegInfo);
+
+ LeakDetector::removeGarbageObject(N);
+}
+
+void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock *N) {
+ N->getParent()->removeFromMBBNumbering(N->Number);
+ N->Number = -1;
+ LeakDetector::addGarbageObject(N);
+}
+
+
+/// addNodeToList (MI) - When we add an instruction to a basic block
+/// list, we update its parent pointer and add its operands from reg use/def
+/// lists if appropriate.
+void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) {
+ assert(N->getParent() == 0 && "machine instruction already in a basic block");
+ N->setParent(Parent);
+
+ // Add the instruction's register operands to their corresponding
+ // use/def lists.
+ MachineFunction *MF = Parent->getParent();
+ N->AddRegOperandsToUseLists(MF->getRegInfo());
+
+ LeakDetector::removeGarbageObject(N);
+}
+
+/// removeNodeFromList (MI) - When we remove an instruction from a basic block
+/// list, we update its parent pointer and remove its operands from reg use/def
+/// lists if appropriate.
+void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
+ assert(N->getParent() != 0 && "machine instruction not in a basic block");
+
+ // Remove from the use/def lists.
+ N->RemoveRegOperandsFromUseLists();
+
+ N->setParent(0);
+
+ LeakDetector::addGarbageObject(N);
+}
+
+/// transferNodesFromList (MI) - When moving a range of instructions from one
+/// MBB list to another, we need to update the parent pointers and the use/def
+/// lists.
+void ilist_traits<MachineInstr>::
+transferNodesFromList(ilist_traits<MachineInstr> &fromList,
+ MachineBasicBlock::iterator first,
+ MachineBasicBlock::iterator last) {
+ assert(Parent->getParent() == fromList.Parent->getParent() &&
+ "MachineInstr parent mismatch!");
+
+ // Splice within the same MBB -> no change.
+ if (Parent == fromList.Parent) return;
+
+ // If splicing between two blocks within the same function, just update the
+ // parent pointers.
+ for (; first != last; ++first)
+ first->setParent(Parent);
+}
+
+void ilist_traits<MachineInstr>::deleteNode(MachineInstr* MI) {
+ assert(!MI->getParent() && "MI is still in a block!");
+ Parent->getParent()->DeleteMachineInstr(MI);
+}
+
+MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
+ iterator I = end();
+ while (I != begin() && (--I)->getDesc().isTerminator())
+ ; /*noop */
+ if (I != end() && !I->getDesc().isTerminator()) ++I;
+ return I;
+}
+
+/// isOnlyReachableViaFallthough - Return true if this basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+bool MachineBasicBlock::isOnlyReachableByFallthrough() const {
+ // If this is a landing pad, it isn't a fall through. If it has no preds,
+ // then nothing falls through to it.
+ if (isLandingPad() || pred_empty())
+ return false;
+
+ // If there isn't exactly one predecessor, it can't be a fall through.
+ const_pred_iterator PI = pred_begin(), PI2 = PI;
+ ++PI2;
+ if (PI2 != pred_end())
+ return false;
+
+ // The predecessor has to be immediately before this block.
+ const MachineBasicBlock *Pred = *PI;
+
+ if (!Pred->isLayoutSuccessor(this))
+ return false;
+
+ // If the block is completely empty, then it definitely does fall through.
+ if (Pred->empty())
+ return true;
+
+ // Otherwise, check the last instruction.
+ const MachineInstr &LastInst = Pred->back();
+ return !LastInst.getDesc().isBarrier();
+}
+
+void MachineBasicBlock::dump() const {
+ print(dbgs());
+}
+
+static inline void OutputReg(raw_ostream &os, unsigned RegNo,
+ const TargetRegisterInfo *TRI = 0) {
+ if (RegNo != 0 && TargetRegisterInfo::isPhysicalRegister(RegNo)) {
+ if (TRI)
+ os << " %" << TRI->get(RegNo).Name;
+ else
+ os << " %physreg" << RegNo;
+ } else
+ os << " %reg" << RegNo;
+}
+
+StringRef MachineBasicBlock::getName() const {
+ if (const BasicBlock *LBB = getBasicBlock())
+ return LBB->getName();
+ else
+ return "(null)";
+}
+
+void MachineBasicBlock::print(raw_ostream &OS) const {
+ const MachineFunction *MF = getParent();
+ if (!MF) {
+ OS << "Can't print out MachineBasicBlock because parent MachineFunction"
+ << " is null\n";
+ return;
+ }
+
+ if (Alignment) { OS << "Alignment " << Alignment << "\n"; }
+
+ OS << "BB#" << getNumber() << ": ";
+
+ const char *Comma = "";
+ if (const BasicBlock *LBB = getBasicBlock()) {
+ OS << Comma << "derived from LLVM BB ";
+ WriteAsOperand(OS, LBB, /*PrintType=*/false);
+ Comma = ", ";
+ }
+ if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; }
+ if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; }
+ OS << '\n';
+
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ if (!livein_empty()) {
+ OS << " Live Ins:";
+ for (const_livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I)
+ OutputReg(OS, *I, TRI);
+ OS << '\n';
+ }
+ // Print the preds of this block according to the CFG.
+ if (!pred_empty()) {
+ OS << " Predecessors according to CFG:";
+ for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI)
+ OS << " BB#" << (*PI)->getNumber();
+ OS << '\n';
+ }
+
+ for (const_iterator I = begin(); I != end(); ++I) {
+ OS << '\t';
+ I->print(OS, &getParent()->getTarget());
+ }
+
+ // Print the successors of this block according to the CFG.
+ if (!succ_empty()) {
+ OS << " Successors according to CFG:";
+ for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI)
+ OS << " BB#" << (*SI)->getNumber();
+ OS << '\n';
+ }
+}
+
+void MachineBasicBlock::removeLiveIn(unsigned Reg) {
+ livein_iterator I = std::find(livein_begin(), livein_end(), Reg);
+ assert(I != livein_end() && "Not a live in!");
+ LiveIns.erase(I);
+}
+
+bool MachineBasicBlock::isLiveIn(unsigned Reg) const {
+ const_livein_iterator I = std::find(livein_begin(), livein_end(), Reg);
+ return I != livein_end();
+}
+
+void MachineBasicBlock::moveBefore(MachineBasicBlock *NewAfter) {
+ getParent()->splice(NewAfter, this);
+}
+
+void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) {
+ MachineFunction::iterator BBI = NewBefore;
+ getParent()->splice(++BBI, this);
+}
+
+void MachineBasicBlock::updateTerminator() {
+ const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo();
+ // A block with no successors has no concerns with fall-through edges.
+ if (this->succ_empty()) return;
+
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ bool B = TII->AnalyzeBranch(*this, TBB, FBB, Cond);
+ (void) B;
+ assert(!B && "UpdateTerminators requires analyzable predecessors!");
+ if (Cond.empty()) {
+ if (TBB) {
+ // The block has an unconditional branch. If its successor is now
+ // its layout successor, delete the branch.
+ if (isLayoutSuccessor(TBB))
+ TII->RemoveBranch(*this);
+ } else {
+ // The block has an unconditional fallthrough. If its successor is not
+ // its layout successor, insert a branch.
+ TBB = *succ_begin();
+ if (!isLayoutSuccessor(TBB))
+ TII->InsertBranch(*this, TBB, 0, Cond);
+ }
+ } else {
+ if (FBB) {
+ // The block has a non-fallthrough conditional branch. If one of its
+ // successors is its layout successor, rewrite it to a fallthrough
+ // conditional branch.
+ if (isLayoutSuccessor(TBB)) {
+ if (TII->ReverseBranchCondition(Cond))
+ return;
+ TII->RemoveBranch(*this);
+ TII->InsertBranch(*this, FBB, 0, Cond);
+ } else if (isLayoutSuccessor(FBB)) {
+ TII->RemoveBranch(*this);
+ TII->InsertBranch(*this, TBB, 0, Cond);
+ }
+ } else {
+ // The block has a fallthrough conditional branch.
+ MachineBasicBlock *MBBA = *succ_begin();
+ MachineBasicBlock *MBBB = *llvm::next(succ_begin());
+ if (MBBA == TBB) std::swap(MBBB, MBBA);
+ if (isLayoutSuccessor(TBB)) {
+ if (TII->ReverseBranchCondition(Cond)) {
+ // We can't reverse the condition, add an unconditional branch.
+ Cond.clear();
+ TII->InsertBranch(*this, MBBA, 0, Cond);
+ return;
+ }
+ TII->RemoveBranch(*this);
+ TII->InsertBranch(*this, MBBA, 0, Cond);
+ } else if (!isLayoutSuccessor(MBBA)) {
+ TII->RemoveBranch(*this);
+ TII->InsertBranch(*this, TBB, MBBA, Cond);
+ }
+ }
+ }
+}
+
+void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ) {
+ Successors.push_back(succ);
+ succ->addPredecessor(this);
+}
+
+void MachineBasicBlock::removeSuccessor(MachineBasicBlock *succ) {
+ succ->removePredecessor(this);
+ succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
+ assert(I != Successors.end() && "Not a current successor!");
+ Successors.erase(I);
+}
+
+MachineBasicBlock::succ_iterator
+MachineBasicBlock::removeSuccessor(succ_iterator I) {
+ assert(I != Successors.end() && "Not a current successor!");
+ (*I)->removePredecessor(this);
+ return Successors.erase(I);
+}
+
+void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) {
+ Predecessors.push_back(pred);
+}
+
+void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) {
+ std::vector<MachineBasicBlock *>::iterator I =
+ std::find(Predecessors.begin(), Predecessors.end(), pred);
+ assert(I != Predecessors.end() && "Pred is not a predecessor of this block!");
+ Predecessors.erase(I);
+}
+
+void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) {
+ if (this == fromMBB)
+ return;
+
+ for (MachineBasicBlock::succ_iterator I = fromMBB->succ_begin(),
+ E = fromMBB->succ_end(); I != E; ++I)
+ addSuccessor(*I);
+
+ while (!fromMBB->succ_empty())
+ fromMBB->removeSuccessor(fromMBB->succ_begin());
+}
+
+bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const {
+ std::vector<MachineBasicBlock *>::const_iterator I =
+ std::find(Successors.begin(), Successors.end(), MBB);
+ return I != Successors.end();
+}
+
+bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const {
+ MachineFunction::const_iterator I(this);
+ return llvm::next(I) == MachineFunction::const_iterator(MBB);
+}
+
+bool MachineBasicBlock::canFallThrough() {
+ MachineFunction::iterator Fallthrough = this;
+ ++Fallthrough;
+ // If FallthroughBlock is off the end of the function, it can't fall through.
+ if (Fallthrough == getParent()->end())
+ return false;
+
+ // If FallthroughBlock isn't a successor, no fallthrough is possible.
+ if (!isSuccessor(Fallthrough))
+ return false;
+
+ // Analyze the branches, if any, at the end of the block.
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo();
+ if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) {
+ // If we couldn't analyze the branch, examine the last instruction.
+ // If the block doesn't end in a known control barrier, assume fallthrough
+ // is possible. The isPredicable check is needed because this code can be
+ // called during IfConversion, where an instruction which is normally a
+ // Barrier is predicated and thus no longer an actual control barrier. This
+ // is over-conservative though, because if an instruction isn't actually
+ // predicated we could still treat it like a barrier.
+ return empty() || !back().getDesc().isBarrier() ||
+ back().getDesc().isPredicable();
+ }
+
+ // If there is no branch, control always falls through.
+ if (TBB == 0) return true;
+
+ // If there is some explicit branch to the fallthrough block, it can obviously
+ // reach, even though the branch should get folded to fall through implicitly.
+ if (MachineFunction::iterator(TBB) == Fallthrough ||
+ MachineFunction::iterator(FBB) == Fallthrough)
+ return true;
+
+ // If it's an unconditional branch to some block not the fall through, it
+ // doesn't fall through.
+ if (Cond.empty()) return false;
+
+ // Otherwise, if it is conditional and has no explicit false block, it falls
+ // through.
+ return FBB == 0;
+}
+
+/// removeFromParent - This method unlinks 'this' from the containing function,
+/// and returns it, but does not delete it.
+MachineBasicBlock *MachineBasicBlock::removeFromParent() {
+ assert(getParent() && "Not embedded in a function!");
+ getParent()->remove(this);
+ return this;
+}
+
+
+/// eraseFromParent - This method unlinks 'this' from the containing function,
+/// and deletes it.
+void MachineBasicBlock::eraseFromParent() {
+ assert(getParent() && "Not embedded in a function!");
+ getParent()->erase(this);
+}
+
+
+/// ReplaceUsesOfBlockWith - Given a machine basic block that branched to
+/// 'Old', change the code and CFG so that it branches to 'New' instead.
+void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ assert(Old != New && "Cannot replace self with self!");
+
+ MachineBasicBlock::iterator I = end();
+ while (I != begin()) {
+ --I;
+ if (!I->getDesc().isTerminator()) break;
+
+ // Scan the operands of this machine instruction, replacing any uses of Old
+ // with New.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (I->getOperand(i).isMBB() &&
+ I->getOperand(i).getMBB() == Old)
+ I->getOperand(i).setMBB(New);
+ }
+
+ // Update the successor information.
+ removeSuccessor(Old);
+ addSuccessor(New);
+}
+
+/// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in the
+/// CFG to be inserted. If we have proven that MBB can only branch to DestA and
+/// DestB, remove any other MBB successors from the CFG. DestA and DestB can be
+/// null.
+///
+/// Besides DestA and DestB, retain other edges leading to LandingPads
+/// (currently there can be only one; we don't check or require that here).
+/// Note it is possible that DestA and/or DestB are LandingPads.
+bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
+ MachineBasicBlock *DestB,
+ bool isCond) {
+ // The values of DestA and DestB frequently come from a call to the
+ // 'TargetInstrInfo::AnalyzeBranch' method. We take our meaning of the initial
+ // values from there.
+ //
+ // 1. If both DestA and DestB are null, then the block ends with no branches
+ // (it falls through to its successor).
+ // 2. If DestA is set, DestB is null, and isCond is false, then the block ends
+ // with only an unconditional branch.
+ // 3. If DestA is set, DestB is null, and isCond is true, then the block ends
+ // with a conditional branch that falls through to a successor (DestB).
+ // 4. If DestA and DestB is set and isCond is true, then the block ends with a
+ // conditional branch followed by an unconditional branch. DestA is the
+ // 'true' destination and DestB is the 'false' destination.
+
+ bool MadeChange = false;
+ bool AddedFallThrough = false;
+
+ MachineFunction::iterator FallThru =
+ llvm::next(MachineFunction::iterator(this));
+
+ if (isCond) {
+ // If this block ends with a conditional branch that falls through to its
+ // successor, set DestB as the successor.
+ if (DestB == 0 && FallThru != getParent()->end()) {
+ DestB = FallThru;
+ AddedFallThrough = true;
+ }
+ } else {
+ // If this is an unconditional branch with no explicit dest, it must just be
+ // a fallthrough into DestA.
+ if (DestA == 0 && FallThru != getParent()->end()) {
+ DestA = FallThru;
+ AddedFallThrough = true;
+ }
+ }
+
+ MachineBasicBlock::succ_iterator SI = succ_begin();
+ MachineBasicBlock *OrigDestA = DestA, *OrigDestB = DestB;
+ while (SI != succ_end()) {
+ const MachineBasicBlock *MBB = *SI;
+ if (MBB == DestA) {
+ DestA = 0;
+ ++SI;
+ } else if (MBB == DestB) {
+ DestB = 0;
+ ++SI;
+ } else if (MBB->isLandingPad() &&
+ MBB != OrigDestA && MBB != OrigDestB) {
+ ++SI;
+ } else {
+ // Otherwise, this is a superfluous edge, remove it.
+ SI = removeSuccessor(SI);
+ MadeChange = true;
+ }
+ }
+
+ if (!AddedFallThrough)
+ assert(DestA == 0 && DestB == 0 && "MachineCFG is missing edges!");
+ else if (isCond)
+ assert(DestA == 0 && "MachineCFG is missing edges!");
+
+ return MadeChange;
+}
+
+/// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping
+/// any DBG_VALUE instructions. Return UnknownLoc if there is none.
+DebugLoc
+MachineBasicBlock::findDebugLoc(MachineBasicBlock::iterator &MBBI) {
+ DebugLoc DL;
+ MachineBasicBlock::iterator E = end();
+ if (MBBI != E) {
+ // Skip debug declarations, we don't want a DebugLoc from them.
+ MachineBasicBlock::iterator MBBI2 = MBBI;
+ while (MBBI2 != E && MBBI2->isDebugValue())
+ MBBI2++;
+ if (MBBI2 != E)
+ DL = MBBI2->getDebugLoc();
+ }
+ return DL;
+}
+
+void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB,
+ bool t) {
+ OS << "BB#" << MBB->getNumber();
+}
+
diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp
new file mode 100644
index 0000000..4088739
--- /dev/null
+++ b/lib/CodeGen/MachineDominators.cpp
@@ -0,0 +1,59 @@
+//===- MachineDominators.cpp - Machine Dominator Calculation --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements simple dominator construction algorithms for finding
+// forward dominators on machine functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/Passes.h"
+
+using namespace llvm;
+
+namespace llvm {
+TEMPLATE_INSTANTIATION(class DomTreeNodeBase<MachineBasicBlock>);
+TEMPLATE_INSTANTIATION(class DominatorTreeBase<MachineBasicBlock>);
+}
+
+char MachineDominatorTree::ID = 0;
+
+static RegisterPass<MachineDominatorTree>
+E("machinedomtree", "MachineDominator Tree Construction", true);
+
+const PassInfo *const llvm::MachineDominatorsID = &E;
+
+void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) {
+ DT->recalculate(F);
+
+ return false;
+}
+
+MachineDominatorTree::MachineDominatorTree()
+ : MachineFunctionPass(&ID) {
+ DT = new DominatorTreeBase<MachineBasicBlock>(false);
+}
+
+MachineDominatorTree::~MachineDominatorTree() {
+ DT->releaseMemory();
+ delete DT;
+}
+
+void MachineDominatorTree::releaseMemory() {
+ DT->releaseMemory();
+}
+
+void MachineDominatorTree::print(raw_ostream &OS, const Module*) const {
+ DT->print(OS);
+}
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
new file mode 100644
index 0000000..f141c56
--- /dev/null
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -0,0 +1,769 @@
+//===-- MachineFunction.cpp -----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Collect native machine code information for a function. This allows
+// target-specific information about the generated code to be stored with each
+// function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Config/config.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+ struct Printer : public MachineFunctionPass {
+ static char ID;
+
+ raw_ostream &OS;
+ const std::string Banner;
+
+ Printer(raw_ostream &os, const std::string &banner)
+ : MachineFunctionPass(&ID), OS(os), Banner(banner) {}
+
+ const char *getPassName() const { return "MachineFunction Printer"; }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) {
+ OS << "# " << Banner << ":\n";
+ MF.print(OS);
+ return false;
+ }
+ };
+ char Printer::ID = 0;
+}
+
+/// Returns a newly-created MachineFunction Printer pass. The default banner is
+/// empty.
+///
+FunctionPass *llvm::createMachineFunctionPrinterPass(raw_ostream &OS,
+ const std::string &Banner){
+ return new Printer(OS, Banner);
+}
+
+//===----------------------------------------------------------------------===//
+// MachineFunction implementation
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method.
+MachineFunctionInfo::~MachineFunctionInfo() {}
+
+void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
+ MBB->getParent()->DeleteMachineBasicBlock(MBB);
+}
+
+MachineFunction::MachineFunction(Function *F, const TargetMachine &TM,
+ unsigned FunctionNum)
+ : Fn(F), Target(TM) {
+ if (TM.getRegisterInfo())
+ RegInfo = new (Allocator.Allocate<MachineRegisterInfo>())
+ MachineRegisterInfo(*TM.getRegisterInfo());
+ else
+ RegInfo = 0;
+ MFInfo = 0;
+ FrameInfo = new (Allocator.Allocate<MachineFrameInfo>())
+ MachineFrameInfo(*TM.getFrameInfo());
+ ConstantPool = new (Allocator.Allocate<MachineConstantPool>())
+ MachineConstantPool(TM.getTargetData());
+ Alignment = TM.getTargetLowering()->getFunctionAlignment(F);
+ FunctionNumber = FunctionNum;
+ JumpTableInfo = 0;
+}
+
+MachineFunction::~MachineFunction() {
+ BasicBlocks.clear();
+ InstructionRecycler.clear(Allocator);
+ BasicBlockRecycler.clear(Allocator);
+ if (RegInfo) {
+ RegInfo->~MachineRegisterInfo();
+ Allocator.Deallocate(RegInfo);
+ }
+ if (MFInfo) {
+ MFInfo->~MachineFunctionInfo();
+ Allocator.Deallocate(MFInfo);
+ }
+ FrameInfo->~MachineFrameInfo(); Allocator.Deallocate(FrameInfo);
+ ConstantPool->~MachineConstantPool(); Allocator.Deallocate(ConstantPool);
+
+ if (JumpTableInfo) {
+ JumpTableInfo->~MachineJumpTableInfo();
+ Allocator.Deallocate(JumpTableInfo);
+ }
+}
+
+/// getOrCreateJumpTableInfo - Get the JumpTableInfo for this function, if it
+/// does already exist, allocate one.
+MachineJumpTableInfo *MachineFunction::
+getOrCreateJumpTableInfo(unsigned EntryKind) {
+ if (JumpTableInfo) return JumpTableInfo;
+
+ JumpTableInfo = new (Allocator.Allocate<MachineJumpTableInfo>())
+ MachineJumpTableInfo((MachineJumpTableInfo::JTEntryKind)EntryKind);
+ return JumpTableInfo;
+}
+
+/// RenumberBlocks - This discards all of the MachineBasicBlock numbers and
+/// recomputes them. This guarantees that the MBB numbers are sequential,
+/// dense, and match the ordering of the blocks within the function. If a
+/// specific MachineBasicBlock is specified, only that block and those after
+/// it are renumbered.
+void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
+ if (empty()) { MBBNumbering.clear(); return; }
+ MachineFunction::iterator MBBI, E = end();
+ if (MBB == 0)
+ MBBI = begin();
+ else
+ MBBI = MBB;
+
+ // Figure out the block number this should have.
+ unsigned BlockNo = 0;
+ if (MBBI != begin())
+ BlockNo = prior(MBBI)->getNumber()+1;
+
+ for (; MBBI != E; ++MBBI, ++BlockNo) {
+ if (MBBI->getNumber() != (int)BlockNo) {
+ // Remove use of the old number.
+ if (MBBI->getNumber() != -1) {
+ assert(MBBNumbering[MBBI->getNumber()] == &*MBBI &&
+ "MBB number mismatch!");
+ MBBNumbering[MBBI->getNumber()] = 0;
+ }
+
+ // If BlockNo is already taken, set that block's number to -1.
+ if (MBBNumbering[BlockNo])
+ MBBNumbering[BlockNo]->setNumber(-1);
+
+ MBBNumbering[BlockNo] = MBBI;
+ MBBI->setNumber(BlockNo);
+ }
+ }
+
+ // Okay, all the blocks are renumbered. If we have compactified the block
+ // numbering, shrink MBBNumbering now.
+ assert(BlockNo <= MBBNumbering.size() && "Mismatch!");
+ MBBNumbering.resize(BlockNo);
+}
+
+/// CreateMachineInstr - Allocate a new MachineInstr. Use this instead
+/// of `new MachineInstr'.
+///
+MachineInstr *
+MachineFunction::CreateMachineInstr(const TargetInstrDesc &TID,
+ DebugLoc DL, bool NoImp) {
+ return new (InstructionRecycler.Allocate<MachineInstr>(Allocator))
+ MachineInstr(TID, DL, NoImp);
+}
+
+/// CloneMachineInstr - Create a new MachineInstr which is a copy of the
+/// 'Orig' instruction, identical in all ways except the instruction
+/// has no parent, prev, or next.
+///
+MachineInstr *
+MachineFunction::CloneMachineInstr(const MachineInstr *Orig) {
+ return new (InstructionRecycler.Allocate<MachineInstr>(Allocator))
+ MachineInstr(*this, *Orig);
+}
+
+/// DeleteMachineInstr - Delete the given MachineInstr.
+///
+void
+MachineFunction::DeleteMachineInstr(MachineInstr *MI) {
+ MI->~MachineInstr();
+ InstructionRecycler.Deallocate(Allocator, MI);
+}
+
+/// CreateMachineBasicBlock - Allocate a new MachineBasicBlock. Use this
+/// instead of `new MachineBasicBlock'.
+///
+MachineBasicBlock *
+MachineFunction::CreateMachineBasicBlock(const BasicBlock *bb) {
+ return new (BasicBlockRecycler.Allocate<MachineBasicBlock>(Allocator))
+ MachineBasicBlock(*this, bb);
+}
+
+/// DeleteMachineBasicBlock - Delete the given MachineBasicBlock.
+///
+void
+MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
+ assert(MBB->getParent() == this && "MBB parent mismatch!");
+ MBB->~MachineBasicBlock();
+ BasicBlockRecycler.Deallocate(Allocator, MBB);
+}
+
+MachineMemOperand *
+MachineFunction::getMachineMemOperand(const Value *v, unsigned f,
+ int64_t o, uint64_t s,
+ unsigned base_alignment) {
+ return new (Allocator.Allocate<MachineMemOperand>())
+ MachineMemOperand(v, f, o, s, base_alignment);
+}
+
+MachineMemOperand *
+MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
+ int64_t Offset, uint64_t Size) {
+ return new (Allocator.Allocate<MachineMemOperand>())
+ MachineMemOperand(MMO->getValue(), MMO->getFlags(),
+ int64_t(uint64_t(MMO->getOffset()) +
+ uint64_t(Offset)),
+ Size, MMO->getBaseAlignment());
+}
+
+MachineInstr::mmo_iterator
+MachineFunction::allocateMemRefsArray(unsigned long Num) {
+ return Allocator.Allocate<MachineMemOperand *>(Num);
+}
+
+std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator>
+MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin,
+ MachineInstr::mmo_iterator End) {
+ // Count the number of load mem refs.
+ unsigned Num = 0;
+ for (MachineInstr::mmo_iterator I = Begin; I != End; ++I)
+ if ((*I)->isLoad())
+ ++Num;
+
+ // Allocate a new array and populate it with the load information.
+ MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num);
+ unsigned Index = 0;
+ for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) {
+ if ((*I)->isLoad()) {
+ if (!(*I)->isStore())
+ // Reuse the MMO.
+ Result[Index] = *I;
+ else {
+ // Clone the MMO and unset the store flag.
+ MachineMemOperand *JustLoad =
+ getMachineMemOperand((*I)->getValue(),
+ (*I)->getFlags() & ~MachineMemOperand::MOStore,
+ (*I)->getOffset(), (*I)->getSize(),
+ (*I)->getBaseAlignment());
+ Result[Index] = JustLoad;
+ }
+ ++Index;
+ }
+ }
+ return std::make_pair(Result, Result + Num);
+}
+
+std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator>
+MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
+ MachineInstr::mmo_iterator End) {
+ // Count the number of load mem refs.
+ unsigned Num = 0;
+ for (MachineInstr::mmo_iterator I = Begin; I != End; ++I)
+ if ((*I)->isStore())
+ ++Num;
+
+ // Allocate a new array and populate it with the store information.
+ MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num);
+ unsigned Index = 0;
+ for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) {
+ if ((*I)->isStore()) {
+ if (!(*I)->isLoad())
+ // Reuse the MMO.
+ Result[Index] = *I;
+ else {
+ // Clone the MMO and unset the load flag.
+ MachineMemOperand *JustStore =
+ getMachineMemOperand((*I)->getValue(),
+ (*I)->getFlags() & ~MachineMemOperand::MOLoad,
+ (*I)->getOffset(), (*I)->getSize(),
+ (*I)->getBaseAlignment());
+ Result[Index] = JustStore;
+ }
+ ++Index;
+ }
+ }
+ return std::make_pair(Result, Result + Num);
+}
+
+void MachineFunction::dump() const {
+ print(dbgs());
+}
+
+void MachineFunction::print(raw_ostream &OS) const {
+ OS << "# Machine code for function " << Fn->getName() << ":\n";
+
+ // Print Frame Information
+ FrameInfo->print(*this, OS);
+
+ // Print JumpTable Information
+ if (JumpTableInfo)
+ JumpTableInfo->print(OS);
+
+ // Print Constant Pool
+ ConstantPool->print(OS);
+
+ const TargetRegisterInfo *TRI = getTarget().getRegisterInfo();
+
+ if (RegInfo && !RegInfo->livein_empty()) {
+ OS << "Function Live Ins: ";
+ for (MachineRegisterInfo::livein_iterator
+ I = RegInfo->livein_begin(), E = RegInfo->livein_end(); I != E; ++I) {
+ if (TRI)
+ OS << "%" << TRI->getName(I->first);
+ else
+ OS << " %physreg" << I->first;
+
+ if (I->second)
+ OS << " in reg%" << I->second;
+
+ if (llvm::next(I) != E)
+ OS << ", ";
+ }
+ OS << '\n';
+ }
+ if (RegInfo && !RegInfo->liveout_empty()) {
+ OS << "Function Live Outs: ";
+ for (MachineRegisterInfo::liveout_iterator
+ I = RegInfo->liveout_begin(), E = RegInfo->liveout_end(); I != E; ++I){
+ if (TRI)
+ OS << '%' << TRI->getName(*I);
+ else
+ OS << "%physreg" << *I;
+
+ if (llvm::next(I) != E)
+ OS << " ";
+ }
+ OS << '\n';
+ }
+
+ for (const_iterator BB = begin(), E = end(); BB != E; ++BB) {
+ OS << '\n';
+ BB->print(OS);
+ }
+
+ OS << "\n# End machine code for function " << Fn->getName() << ".\n\n";
+}
+
+namespace llvm {
+ template<>
+ struct DOTGraphTraits<const MachineFunction*> : public DefaultDOTGraphTraits {
+
+ DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
+ static std::string getGraphName(const MachineFunction *F) {
+ return "CFG for '" + F->getFunction()->getNameStr() + "' function";
+ }
+
+ std::string getNodeLabel(const MachineBasicBlock *Node,
+ const MachineFunction *Graph) {
+ if (isSimple () && Node->getBasicBlock() &&
+ !Node->getBasicBlock()->getName().empty())
+ return Node->getBasicBlock()->getNameStr() + ":";
+
+ std::string OutStr;
+ {
+ raw_string_ostream OSS(OutStr);
+
+ if (isSimple())
+ OSS << Node->getNumber() << ':';
+ else
+ Node->print(OSS);
+ }
+
+ if (OutStr[0] == '\n') OutStr.erase(OutStr.begin());
+
+ // Process string output to make it nicer...
+ for (unsigned i = 0; i != OutStr.length(); ++i)
+ if (OutStr[i] == '\n') { // Left justify
+ OutStr[i] = '\\';
+ OutStr.insert(OutStr.begin()+i+1, 'l');
+ }
+ return OutStr;
+ }
+ };
+}
+
+void MachineFunction::viewCFG() const
+{
+#ifndef NDEBUG
+ ViewGraph(this, "mf" + getFunction()->getNameStr());
+#else
+ errs() << "SelectionDAG::viewGraph is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+void MachineFunction::viewCFGOnly() const
+{
+#ifndef NDEBUG
+ ViewGraph(this, "mf" + getFunction()->getNameStr(), true);
+#else
+ errs() << "SelectionDAG::viewGraph is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+/// addLiveIn - Add the specified physical register as a live-in value and
+/// create a corresponding virtual register for it.
+unsigned MachineFunction::addLiveIn(unsigned PReg,
+ const TargetRegisterClass *RC) {
+ assert(RC->contains(PReg) && "Not the correct regclass!");
+ unsigned VReg = getRegInfo().createVirtualRegister(RC);
+ getRegInfo().addLiveIn(PReg, VReg);
+ return VReg;
+}
+
+/// getDILocation - Get the DILocation for a given DebugLoc object.
+DILocation MachineFunction::getDILocation(DebugLoc DL) const {
+ unsigned Idx = DL.getIndex();
+ assert(Idx < DebugLocInfo.DebugLocations.size() &&
+ "Invalid index into debug locations!");
+ return DILocation(DebugLocInfo.DebugLocations[Idx]);
+}
+
+
+/// getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
+/// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a
+/// normal 'L' label is returned.
+MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
+ bool isLinkerPrivate) const {
+ assert(JumpTableInfo && "No jump tables");
+
+ assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!");
+ const MCAsmInfo &MAI = *getTarget().getMCAsmInfo();
+
+ const char *Prefix = isLinkerPrivate ? MAI.getLinkerPrivateGlobalPrefix() :
+ MAI.getPrivateGlobalPrefix();
+ SmallString<60> Name;
+ raw_svector_ostream(Name)
+ << Prefix << "JTI" << getFunctionNumber() << '_' << JTI;
+ return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+
+//===----------------------------------------------------------------------===//
+// MachineFrameInfo implementation
+//===----------------------------------------------------------------------===//
+
+/// CreateFixedObject - Create a new object at a fixed location on the stack.
+/// All fixed objects should be created before other objects are created for
+/// efficiency. By default, fixed objects are immutable. This returns an
+/// index with a negative value.
+///
+int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
+ bool Immutable, bool isSS) {
+ assert(Size != 0 && "Cannot allocate zero size fixed stack objects!");
+ Objects.insert(Objects.begin(), StackObject(Size, 1, SPOffset, Immutable,
+ isSS));
+ return -++NumFixedObjects;
+}
+
+
+BitVector
+MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
+ assert(MBB && "MBB must be valid");
+ const MachineFunction *MF = MBB->getParent();
+ assert(MF && "MBB must be part of a MachineFunction");
+ const TargetMachine &TM = MF->getTarget();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ BitVector BV(TRI->getNumRegs());
+
+ // Before CSI is calculated, no registers are considered pristine. They can be
+ // freely used and PEI will make sure they are saved.
+ if (!isCalleeSavedInfoValid())
+ return BV;
+
+ for (const unsigned *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR)
+ BV.set(*CSR);
+
+ // The entry MBB always has all CSRs pristine.
+ if (MBB == &MF->front())
+ return BV;
+
+ // On other MBBs the saved CSRs are not pristine.
+ const std::vector<CalleeSavedInfo> &CSI = getCalleeSavedInfo();
+ for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
+ E = CSI.end(); I != E; ++I)
+ BV.reset(I->getReg());
+
+ return BV;
+}
+
+
+void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
+ if (Objects.empty()) return;
+
+ const TargetFrameInfo *FI = MF.getTarget().getFrameInfo();
+ int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0);
+
+ OS << "Frame Objects:\n";
+
+ for (unsigned i = 0, e = Objects.size(); i != e; ++i) {
+ const StackObject &SO = Objects[i];
+ OS << " fi#" << (int)(i-NumFixedObjects) << ": ";
+ if (SO.Size == ~0ULL) {
+ OS << "dead\n";
+ continue;
+ }
+ if (SO.Size == 0)
+ OS << "variable sized";
+ else
+ OS << "size=" << SO.Size;
+ OS << ", align=" << SO.Alignment;
+
+ if (i < NumFixedObjects)
+ OS << ", fixed";
+ if (i < NumFixedObjects || SO.SPOffset != -1) {
+ int64_t Off = SO.SPOffset - ValOffset;
+ OS << ", at location [SP";
+ if (Off > 0)
+ OS << "+" << Off;
+ else if (Off < 0)
+ OS << Off;
+ OS << "]";
+ }
+ OS << "\n";
+ }
+}
+
+void MachineFrameInfo::dump(const MachineFunction &MF) const {
+ print(MF, dbgs());
+}
+
+//===----------------------------------------------------------------------===//
+// MachineJumpTableInfo implementation
+//===----------------------------------------------------------------------===//
+
+/// getEntrySize - Return the size of each entry in the jump table.
+unsigned MachineJumpTableInfo::getEntrySize(const TargetData &TD) const {
+ // The size of a jump table entry is 4 bytes unless the entry is just the
+ // address of a block, in which case it is the pointer size.
+ switch (getEntryKind()) {
+ case MachineJumpTableInfo::EK_BlockAddress:
+ return TD.getPointerSize();
+ case MachineJumpTableInfo::EK_GPRel32BlockAddress:
+ case MachineJumpTableInfo::EK_LabelDifference32:
+ case MachineJumpTableInfo::EK_Custom32:
+ return 4;
+ }
+ assert(0 && "Unknown jump table encoding!");
+ return ~0;
+}
+
+/// getEntryAlignment - Return the alignment of each entry in the jump table.
+unsigned MachineJumpTableInfo::getEntryAlignment(const TargetData &TD) const {
+ // The alignment of a jump table entry is the alignment of int32 unless the
+ // entry is just the address of a block, in which case it is the pointer
+ // alignment.
+ switch (getEntryKind()) {
+ case MachineJumpTableInfo::EK_BlockAddress:
+ return TD.getPointerABIAlignment();
+ case MachineJumpTableInfo::EK_GPRel32BlockAddress:
+ case MachineJumpTableInfo::EK_LabelDifference32:
+ case MachineJumpTableInfo::EK_Custom32:
+ return TD.getABIIntegerTypeAlignment(32);
+ }
+ assert(0 && "Unknown jump table encoding!");
+ return ~0;
+}
+
+/// getJumpTableIndex - Create a new jump table entry in the jump table info
+/// or return an existing one.
+///
+unsigned MachineJumpTableInfo::getJumpTableIndex(
+ const std::vector<MachineBasicBlock*> &DestBBs) {
+ assert(!DestBBs.empty() && "Cannot create an empty jump table!");
+ JumpTables.push_back(MachineJumpTableEntry(DestBBs));
+ return JumpTables.size()-1;
+}
+
+
+/// ReplaceMBBInJumpTables - If Old is the target of any jump tables, update
+/// the jump tables to branch to New instead.
+bool MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ assert(Old != New && "Not making a change?");
+ bool MadeChange = false;
+ for (size_t i = 0, e = JumpTables.size(); i != e; ++i)
+ ReplaceMBBInJumpTable(i, Old, New);
+ return MadeChange;
+}
+
+/// ReplaceMBBInJumpTable - If Old is a target of the jump tables, update
+/// the jump table to branch to New instead.
+bool MachineJumpTableInfo::ReplaceMBBInJumpTable(unsigned Idx,
+ MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ assert(Old != New && "Not making a change?");
+ bool MadeChange = false;
+ MachineJumpTableEntry &JTE = JumpTables[Idx];
+ for (size_t j = 0, e = JTE.MBBs.size(); j != e; ++j)
+ if (JTE.MBBs[j] == Old) {
+ JTE.MBBs[j] = New;
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+void MachineJumpTableInfo::print(raw_ostream &OS) const {
+ if (JumpTables.empty()) return;
+
+ OS << "Jump Tables:\n";
+
+ for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) {
+ OS << " jt#" << i << ": ";
+ for (unsigned j = 0, f = JumpTables[i].MBBs.size(); j != f; ++j)
+ OS << " BB#" << JumpTables[i].MBBs[j]->getNumber();
+ }
+
+ OS << '\n';
+}
+
+void MachineJumpTableInfo::dump() const { print(dbgs()); }
+
+
+//===----------------------------------------------------------------------===//
+// MachineConstantPool implementation
+//===----------------------------------------------------------------------===//
+
+const Type *MachineConstantPoolEntry::getType() const {
+ if (isMachineConstantPoolEntry())
+ return Val.MachineCPVal->getType();
+ return Val.ConstVal->getType();
+}
+
+
+unsigned MachineConstantPoolEntry::getRelocationInfo() const {
+ if (isMachineConstantPoolEntry())
+ return Val.MachineCPVal->getRelocationInfo();
+ return Val.ConstVal->getRelocationInfo();
+}
+
+MachineConstantPool::~MachineConstantPool() {
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i)
+ if (Constants[i].isMachineConstantPoolEntry())
+ delete Constants[i].Val.MachineCPVal;
+}
+
+/// CanShareConstantPoolEntry - Test whether the given two constants
+/// can be allocated the same constant pool entry.
+static bool CanShareConstantPoolEntry(Constant *A, Constant *B,
+ const TargetData *TD) {
+ // Handle the trivial case quickly.
+ if (A == B) return true;
+
+ // If they have the same type but weren't the same constant, quickly
+ // reject them.
+ if (A->getType() == B->getType()) return false;
+
+ // For now, only support constants with the same size.
+ if (TD->getTypeStoreSize(A->getType()) != TD->getTypeStoreSize(B->getType()))
+ return false;
+
+ // If a floating-point value and an integer value have the same encoding,
+ // they can share a constant-pool entry.
+ if (ConstantFP *AFP = dyn_cast<ConstantFP>(A))
+ if (ConstantInt *BI = dyn_cast<ConstantInt>(B))
+ return AFP->getValueAPF().bitcastToAPInt() == BI->getValue();
+ if (ConstantFP *BFP = dyn_cast<ConstantFP>(B))
+ if (ConstantInt *AI = dyn_cast<ConstantInt>(A))
+ return BFP->getValueAPF().bitcastToAPInt() == AI->getValue();
+
+ // Two vectors can share an entry if each pair of corresponding
+ // elements could.
+ if (ConstantVector *AV = dyn_cast<ConstantVector>(A))
+ if (ConstantVector *BV = dyn_cast<ConstantVector>(B)) {
+ if (AV->getType()->getNumElements() != BV->getType()->getNumElements())
+ return false;
+ for (unsigned i = 0, e = AV->getType()->getNumElements(); i != e; ++i)
+ if (!CanShareConstantPoolEntry(AV->getOperand(i),
+ BV->getOperand(i), TD))
+ return false;
+ return true;
+ }
+
+ // TODO: Handle other cases.
+
+ return false;
+}
+
+/// getConstantPoolIndex - Create a new entry in the constant pool or return
+/// an existing one. User must specify the log2 of the minimum required
+/// alignment for the object.
+///
+unsigned MachineConstantPool::getConstantPoolIndex(Constant *C,
+ unsigned Alignment) {
+ assert(Alignment && "Alignment must be specified!");
+ if (Alignment > PoolAlignment) PoolAlignment = Alignment;
+
+ // Check to see if we already have this constant.
+ //
+ // FIXME, this could be made much more efficient for large constant pools.
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i)
+ if (!Constants[i].isMachineConstantPoolEntry() &&
+ CanShareConstantPoolEntry(Constants[i].Val.ConstVal, C, TD)) {
+ if ((unsigned)Constants[i].getAlignment() < Alignment)
+ Constants[i].Alignment = Alignment;
+ return i;
+ }
+
+ Constants.push_back(MachineConstantPoolEntry(C, Alignment));
+ return Constants.size()-1;
+}
+
+unsigned MachineConstantPool::getConstantPoolIndex(MachineConstantPoolValue *V,
+ unsigned Alignment) {
+ assert(Alignment && "Alignment must be specified!");
+ if (Alignment > PoolAlignment) PoolAlignment = Alignment;
+
+ // Check to see if we already have this constant.
+ //
+ // FIXME, this could be made much more efficient for large constant pools.
+ int Idx = V->getExistingMachineCPValue(this, Alignment);
+ if (Idx != -1)
+ return (unsigned)Idx;
+
+ Constants.push_back(MachineConstantPoolEntry(V, Alignment));
+ return Constants.size()-1;
+}
+
+void MachineConstantPool::print(raw_ostream &OS) const {
+ if (Constants.empty()) return;
+
+ OS << "Constant Pool:\n";
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ OS << " cp#" << i << ": ";
+ if (Constants[i].isMachineConstantPoolEntry())
+ Constants[i].Val.MachineCPVal->print(OS);
+ else
+ OS << *(Value*)Constants[i].Val.ConstVal;
+ OS << ", align=" << Constants[i].getAlignment();
+ OS << "\n";
+ }
+}
+
+void MachineConstantPool::dump() const { print(dbgs()); }
diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp
new file mode 100644
index 0000000..8d87e3e
--- /dev/null
+++ b/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -0,0 +1,50 @@
+//===-- MachineFunctionAnalysis.cpp ---------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definitions of the MachineFunctionAnalysis members.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+using namespace llvm;
+
+// Register this pass with PassInfo directly to avoid having to define
+// a default constructor.
+static PassInfo
+X("Machine Function Analysis", "machine-function-analysis",
+ intptr_t(&MachineFunctionAnalysis::ID), 0,
+ /*CFGOnly=*/false, /*is_analysis=*/true);
+
+char MachineFunctionAnalysis::ID = 0;
+
+MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm,
+ CodeGenOpt::Level OL) :
+ FunctionPass(&ID), TM(tm), OptLevel(OL), MF(0) {
+}
+
+MachineFunctionAnalysis::~MachineFunctionAnalysis() {
+ releaseMemory();
+ assert(!MF && "MachineFunctionAnalysis left initialized!");
+}
+
+bool MachineFunctionAnalysis::runOnFunction(Function &F) {
+ assert(!MF && "MachineFunctionAnalysis already initialized!");
+ MF = new MachineFunction(&F, TM, NextFnNum++);
+ return false;
+}
+
+void MachineFunctionAnalysis::releaseMemory() {
+ delete MF;
+ MF = 0;
+}
+
+void MachineFunctionAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+}
diff --git a/lib/CodeGen/MachineFunctionPass.cpp b/lib/CodeGen/MachineFunctionPass.cpp
new file mode 100644
index 0000000..2f8d4c9e
--- /dev/null
+++ b/lib/CodeGen/MachineFunctionPass.cpp
@@ -0,0 +1,50 @@
+//===-- MachineFunctionPass.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definitions of the MachineFunctionPass members.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Function.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+using namespace llvm;
+
+bool MachineFunctionPass::runOnFunction(Function &F) {
+ // Do not codegen any 'available_externally' functions at all, they have
+ // definitions outside the translation unit.
+ if (F.hasAvailableExternallyLinkage())
+ return false;
+
+ MachineFunction &MF = getAnalysis<MachineFunctionAnalysis>().getMF();
+ return runOnMachineFunction(MF);
+}
+
+void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addPreserved<MachineFunctionAnalysis>();
+
+ // MachineFunctionPass preserves all LLVM IR passes, but there's no
+ // high-level way to express this. Instead, just list a bunch of
+ // passes explicitly. This does not include setPreservesCFG,
+ // because CodeGen overloads that to mean preserving the MachineBasicBlock
+ // CFG in addition to the LLVM IR CFG.
+ AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved("scalar-evolution");
+ AU.addPreserved("iv-users");
+ AU.addPreserved("memdep");
+ AU.addPreserved("live-values");
+ AU.addPreserved("domtree");
+ AU.addPreserved("domfrontier");
+ AU.addPreserved("loops");
+ AU.addPreserved("lda");
+
+ FunctionPass::getAnalysisUsage(AU);
+}
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
new file mode 100644
index 0000000..df61c74
--- /dev/null
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -0,0 +1,1326 @@
+//===-- lib/CodeGen/MachineInstr.cpp --------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Methods common to all machine instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Type.h"
+#include "llvm/Value.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetInstrDesc.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Metadata.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MachineOperand Implementation
+//===----------------------------------------------------------------------===//
+
+/// AddRegOperandToRegInfo - Add this register operand to the specified
+/// MachineRegisterInfo. If it is null, then the next/prev fields should be
+/// explicitly nulled out.
+void MachineOperand::AddRegOperandToRegInfo(MachineRegisterInfo *RegInfo) {
+ assert(isReg() && "Can only add reg operand to use lists");
+
+ // If the reginfo pointer is null, just explicitly null out or next/prev
+ // pointers, to ensure they are not garbage.
+ if (RegInfo == 0) {
+ Contents.Reg.Prev = 0;
+ Contents.Reg.Next = 0;
+ return;
+ }
+
+ // Otherwise, add this operand to the head of the registers use/def list.
+ MachineOperand **Head = &RegInfo->getRegUseDefListHead(getReg());
+
+ // For SSA values, we prefer to keep the definition at the start of the list.
+ // we do this by skipping over the definition if it is at the head of the
+ // list.
+ if (*Head && (*Head)->isDef())
+ Head = &(*Head)->Contents.Reg.Next;
+
+ Contents.Reg.Next = *Head;
+ if (Contents.Reg.Next) {
+ assert(getReg() == Contents.Reg.Next->getReg() &&
+ "Different regs on the same list!");
+ Contents.Reg.Next->Contents.Reg.Prev = &Contents.Reg.Next;
+ }
+
+ Contents.Reg.Prev = Head;
+ *Head = this;
+}
+
+/// RemoveRegOperandFromRegInfo - Remove this register operand from the
+/// MachineRegisterInfo it is linked with.
+void MachineOperand::RemoveRegOperandFromRegInfo() {
+ assert(isOnRegUseList() && "Reg operand is not on a use list");
+ // Unlink this from the doubly linked list of operands.
+ MachineOperand *NextOp = Contents.Reg.Next;
+ *Contents.Reg.Prev = NextOp;
+ if (NextOp) {
+ assert(NextOp->getReg() == getReg() && "Corrupt reg use/def chain!");
+ NextOp->Contents.Reg.Prev = Contents.Reg.Prev;
+ }
+ Contents.Reg.Prev = 0;
+ Contents.Reg.Next = 0;
+}
+
+void MachineOperand::setReg(unsigned Reg) {
+ if (getReg() == Reg) return; // No change.
+
+ // Otherwise, we have to change the register. If this operand is embedded
+ // into a machine function, we need to update the old and new register's
+ // use/def lists.
+ if (MachineInstr *MI = getParent())
+ if (MachineBasicBlock *MBB = MI->getParent())
+ if (MachineFunction *MF = MBB->getParent()) {
+ RemoveRegOperandFromRegInfo();
+ Contents.Reg.RegNo = Reg;
+ AddRegOperandToRegInfo(&MF->getRegInfo());
+ return;
+ }
+
+ // Otherwise, just change the register, no problem. :)
+ Contents.Reg.RegNo = Reg;
+}
+
+/// ChangeToImmediate - Replace this operand with a new immediate operand of
+/// the specified value. If an operand is known to be an immediate already,
+/// the setImm method should be used.
+void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
+ // If this operand is currently a register operand, and if this is in a
+ // function, deregister the operand from the register's use/def list.
+ if (isReg() && getParent() && getParent()->getParent() &&
+ getParent()->getParent()->getParent())
+ RemoveRegOperandFromRegInfo();
+
+ OpKind = MO_Immediate;
+ Contents.ImmVal = ImmVal;
+}
+
+/// ChangeToRegister - Replace this operand with a new register operand of
+/// the specified value. If an operand is known to be an register already,
+/// the setReg method should be used.
+void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
+ bool isKill, bool isDead, bool isUndef,
+ bool isDebug) {
+ // If this operand is already a register operand, use setReg to update the
+ // register's use/def lists.
+ if (isReg()) {
+ assert(!isEarlyClobber());
+ setReg(Reg);
+ } else {
+ // Otherwise, change this to a register and set the reg#.
+ OpKind = MO_Register;
+ Contents.Reg.RegNo = Reg;
+
+ // If this operand is embedded in a function, add the operand to the
+ // register's use/def list.
+ if (MachineInstr *MI = getParent())
+ if (MachineBasicBlock *MBB = MI->getParent())
+ if (MachineFunction *MF = MBB->getParent())
+ AddRegOperandToRegInfo(&MF->getRegInfo());
+ }
+
+ IsDef = isDef;
+ IsImp = isImp;
+ IsKill = isKill;
+ IsDead = isDead;
+ IsUndef = isUndef;
+ IsEarlyClobber = false;
+ IsDebug = isDebug;
+ SubReg = 0;
+}
+
+/// isIdenticalTo - Return true if this operand is identical to the specified
+/// operand.
+bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
+ if (getType() != Other.getType() ||
+ getTargetFlags() != Other.getTargetFlags())
+ return false;
+
+ switch (getType()) {
+ default: llvm_unreachable("Unrecognized operand type");
+ case MachineOperand::MO_Register:
+ return getReg() == Other.getReg() && isDef() == Other.isDef() &&
+ getSubReg() == Other.getSubReg();
+ case MachineOperand::MO_Immediate:
+ return getImm() == Other.getImm();
+ case MachineOperand::MO_FPImmediate:
+ return getFPImm() == Other.getFPImm();
+ case MachineOperand::MO_MachineBasicBlock:
+ return getMBB() == Other.getMBB();
+ case MachineOperand::MO_FrameIndex:
+ return getIndex() == Other.getIndex();
+ case MachineOperand::MO_ConstantPoolIndex:
+ return getIndex() == Other.getIndex() && getOffset() == Other.getOffset();
+ case MachineOperand::MO_JumpTableIndex:
+ return getIndex() == Other.getIndex();
+ case MachineOperand::MO_GlobalAddress:
+ return getGlobal() == Other.getGlobal() && getOffset() == Other.getOffset();
+ case MachineOperand::MO_ExternalSymbol:
+ return !strcmp(getSymbolName(), Other.getSymbolName()) &&
+ getOffset() == Other.getOffset();
+ case MachineOperand::MO_BlockAddress:
+ return getBlockAddress() == Other.getBlockAddress();
+ }
+}
+
+/// print - Print the specified machine operand.
+///
+void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
+ // If the instruction is embedded into a basic block, we can find the
+ // target info for the instruction.
+ if (!TM)
+ if (const MachineInstr *MI = getParent())
+ if (const MachineBasicBlock *MBB = MI->getParent())
+ if (const MachineFunction *MF = MBB->getParent())
+ TM = &MF->getTarget();
+
+ switch (getType()) {
+ case MachineOperand::MO_Register:
+ if (getReg() == 0 || TargetRegisterInfo::isVirtualRegister(getReg())) {
+ OS << "%reg" << getReg();
+ } else {
+ if (TM)
+ OS << "%" << TM->getRegisterInfo()->get(getReg()).Name;
+ else
+ OS << "%physreg" << getReg();
+ }
+
+ if (getSubReg() != 0)
+ OS << ':' << getSubReg();
+
+ if (isDef() || isKill() || isDead() || isImplicit() || isUndef() ||
+ isEarlyClobber()) {
+ OS << '<';
+ bool NeedComma = false;
+ if (isDef()) {
+ if (NeedComma) OS << ',';
+ if (isEarlyClobber())
+ OS << "earlyclobber,";
+ if (isImplicit())
+ OS << "imp-";
+ OS << "def";
+ NeedComma = true;
+ } else if (isImplicit()) {
+ OS << "imp-use";
+ NeedComma = true;
+ }
+
+ if (isKill() || isDead() || isUndef()) {
+ if (NeedComma) OS << ',';
+ if (isKill()) OS << "kill";
+ if (isDead()) OS << "dead";
+ if (isUndef()) {
+ if (isKill() || isDead())
+ OS << ',';
+ OS << "undef";
+ }
+ }
+ OS << '>';
+ }
+ break;
+ case MachineOperand::MO_Immediate:
+ OS << getImm();
+ break;
+ case MachineOperand::MO_FPImmediate:
+ if (getFPImm()->getType()->isFloatTy())
+ OS << getFPImm()->getValueAPF().convertToFloat();
+ else
+ OS << getFPImm()->getValueAPF().convertToDouble();
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ OS << "<BB#" << getMBB()->getNumber() << ">";
+ break;
+ case MachineOperand::MO_FrameIndex:
+ OS << "<fi#" << getIndex() << '>';
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ OS << "<cp#" << getIndex();
+ if (getOffset()) OS << "+" << getOffset();
+ OS << '>';
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ OS << "<jt#" << getIndex() << '>';
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ OS << "<ga:";
+ WriteAsOperand(OS, getGlobal(), /*PrintType=*/false);
+ if (getOffset()) OS << "+" << getOffset();
+ OS << '>';
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ OS << "<es:" << getSymbolName();
+ if (getOffset()) OS << "+" << getOffset();
+ OS << '>';
+ break;
+ case MachineOperand::MO_BlockAddress:
+ OS << '<';
+ WriteAsOperand(OS, getBlockAddress(), /*PrintType=*/false);
+ OS << '>';
+ break;
+ case MachineOperand::MO_Metadata:
+ OS << '<';
+ WriteAsOperand(OS, getMetadata(), /*PrintType=*/false);
+ OS << '>';
+ break;
+ default:
+ llvm_unreachable("Unrecognized operand type");
+ }
+
+ if (unsigned TF = getTargetFlags())
+ OS << "[TF=" << TF << ']';
+}
+
+//===----------------------------------------------------------------------===//
+// MachineMemOperand Implementation
+//===----------------------------------------------------------------------===//
+
+MachineMemOperand::MachineMemOperand(const Value *v, unsigned int f,
+ int64_t o, uint64_t s, unsigned int a)
+ : Offset(o), Size(s), V(v),
+ Flags((f & 7) | ((Log2_32(a) + 1) << 3)) {
+ assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
+ assert((isLoad() || isStore()) && "Not a load/store!");
+}
+
+/// Profile - Gather unique data for the object.
+///
+void MachineMemOperand::Profile(FoldingSetNodeID &ID) const {
+ ID.AddInteger(Offset);
+ ID.AddInteger(Size);
+ ID.AddPointer(V);
+ ID.AddInteger(Flags);
+}
+
+void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
+ // The Value and Offset may differ due to CSE. But the flags and size
+ // should be the same.
+ assert(MMO->getFlags() == getFlags() && "Flags mismatch!");
+ assert(MMO->getSize() == getSize() && "Size mismatch!");
+
+ if (MMO->getBaseAlignment() >= getBaseAlignment()) {
+ // Update the alignment value.
+ Flags = (Flags & 7) | ((Log2_32(MMO->getBaseAlignment()) + 1) << 3);
+ // Also update the base and offset, because the new alignment may
+ // not be applicable with the old ones.
+ V = MMO->getValue();
+ Offset = MMO->getOffset();
+ }
+}
+
+/// getAlignment - Return the minimum known alignment in bytes of the
+/// actual memory reference.
+uint64_t MachineMemOperand::getAlignment() const {
+ return MinAlign(getBaseAlignment(), getOffset());
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
+ assert((MMO.isLoad() || MMO.isStore()) &&
+ "SV has to be a load, store or both.");
+
+ if (MMO.isVolatile())
+ OS << "Volatile ";
+
+ if (MMO.isLoad())
+ OS << "LD";
+ if (MMO.isStore())
+ OS << "ST";
+ OS << MMO.getSize();
+
+ // Print the address information.
+ OS << "[";
+ if (!MMO.getValue())
+ OS << "<unknown>";
+ else
+ WriteAsOperand(OS, MMO.getValue(), /*PrintType=*/false);
+
+ // If the alignment of the memory reference itself differs from the alignment
+ // of the base pointer, print the base alignment explicitly, next to the base
+ // pointer.
+ if (MMO.getBaseAlignment() != MMO.getAlignment())
+ OS << "(align=" << MMO.getBaseAlignment() << ")";
+
+ if (MMO.getOffset() != 0)
+ OS << "+" << MMO.getOffset();
+ OS << "]";
+
+ // Print the alignment of the reference.
+ if (MMO.getBaseAlignment() != MMO.getAlignment() ||
+ MMO.getBaseAlignment() != MMO.getSize())
+ OS << "(align=" << MMO.getAlignment() << ")";
+
+ return OS;
+}
+
+//===----------------------------------------------------------------------===//
+// MachineInstr Implementation
+//===----------------------------------------------------------------------===//
+
+/// MachineInstr ctor - This constructor creates a dummy MachineInstr with
+/// TID NULL and no operands.
+MachineInstr::MachineInstr()
+ : TID(0), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0),
+ Parent(0), debugLoc(DebugLoc::getUnknownLoc()) {
+ // Make sure that we get added to a machine basicblock
+ LeakDetector::addGarbageObject(this);
+}
+
+void MachineInstr::addImplicitDefUseOperands() {
+ if (TID->ImplicitDefs)
+ for (const unsigned *ImpDefs = TID->ImplicitDefs; *ImpDefs; ++ImpDefs)
+ addOperand(MachineOperand::CreateReg(*ImpDefs, true, true));
+ if (TID->ImplicitUses)
+ for (const unsigned *ImpUses = TID->ImplicitUses; *ImpUses; ++ImpUses)
+ addOperand(MachineOperand::CreateReg(*ImpUses, false, true));
+}
+
+/// MachineInstr ctor - This constructor create a MachineInstr and add the
+/// implicit operands. It reserves space for number of operands specified by
+/// TargetInstrDesc or the numOperands if it is not zero. (for
+/// instructions with variable number of operands).
+MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp)
+ : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0),
+ MemRefs(0), MemRefsEnd(0), Parent(0),
+ debugLoc(DebugLoc::getUnknownLoc()) {
+ if (!NoImp && TID->getImplicitDefs())
+ for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
+ NumImplicitOps++;
+ if (!NoImp && TID->getImplicitUses())
+ for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses)
+ NumImplicitOps++;
+ Operands.reserve(NumImplicitOps + TID->getNumOperands());
+ if (!NoImp)
+ addImplicitDefUseOperands();
+ // Make sure that we get added to a machine basicblock
+ LeakDetector::addGarbageObject(this);
+}
+
+/// MachineInstr ctor - As above, but with a DebugLoc.
+MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl,
+ bool NoImp)
+ : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0),
+ Parent(0), debugLoc(dl) {
+ if (!NoImp && TID->getImplicitDefs())
+ for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
+ NumImplicitOps++;
+ if (!NoImp && TID->getImplicitUses())
+ for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses)
+ NumImplicitOps++;
+ Operands.reserve(NumImplicitOps + TID->getNumOperands());
+ if (!NoImp)
+ addImplicitDefUseOperands();
+ // Make sure that we get added to a machine basicblock
+ LeakDetector::addGarbageObject(this);
+}
+
+/// MachineInstr ctor - Work exactly the same as the ctor two above, except
+/// that the MachineInstr is created and added to the end of the specified
+/// basic block.
+///
+MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid)
+ : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0),
+ MemRefs(0), MemRefsEnd(0), Parent(0),
+ debugLoc(DebugLoc::getUnknownLoc()) {
+ assert(MBB && "Cannot use inserting ctor with null basic block!");
+ if (TID->ImplicitDefs)
+ for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
+ NumImplicitOps++;
+ if (TID->ImplicitUses)
+ for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses)
+ NumImplicitOps++;
+ Operands.reserve(NumImplicitOps + TID->getNumOperands());
+ addImplicitDefUseOperands();
+ // Make sure that we get added to a machine basicblock
+ LeakDetector::addGarbageObject(this);
+ MBB->push_back(this); // Add instruction to end of basic block!
+}
+
+/// MachineInstr ctor - As above, but with a DebugLoc.
+///
+MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
+ const TargetInstrDesc &tid)
+ : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0),
+ Parent(0), debugLoc(dl) {
+ assert(MBB && "Cannot use inserting ctor with null basic block!");
+ if (TID->ImplicitDefs)
+ for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
+ NumImplicitOps++;
+ if (TID->ImplicitUses)
+ for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses)
+ NumImplicitOps++;
+ Operands.reserve(NumImplicitOps + TID->getNumOperands());
+ addImplicitDefUseOperands();
+ // Make sure that we get added to a machine basicblock
+ LeakDetector::addGarbageObject(this);
+ MBB->push_back(this); // Add instruction to end of basic block!
+}
+
+/// MachineInstr ctor - Copies MachineInstr arg exactly
+///
+MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
+ : TID(&MI.getDesc()), NumImplicitOps(0), AsmPrinterFlags(0),
+ MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd),
+ Parent(0), debugLoc(MI.getDebugLoc()) {
+ Operands.reserve(MI.getNumOperands());
+
+ // Add operands
+ for (unsigned i = 0; i != MI.getNumOperands(); ++i)
+ addOperand(MI.getOperand(i));
+ NumImplicitOps = MI.NumImplicitOps;
+
+ // Set parent to null.
+ Parent = 0;
+
+ LeakDetector::addGarbageObject(this);
+}
+
+MachineInstr::~MachineInstr() {
+ LeakDetector::removeGarbageObject(this);
+#ifndef NDEBUG
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+ assert(Operands[i].ParentMI == this && "ParentMI mismatch!");
+ assert((!Operands[i].isReg() || !Operands[i].isOnRegUseList()) &&
+ "Reg operand def/use list corrupted");
+ }
+#endif
+}
+
+/// getRegInfo - If this instruction is embedded into a MachineFunction,
+/// return the MachineRegisterInfo object for the current function, otherwise
+/// return null.
+MachineRegisterInfo *MachineInstr::getRegInfo() {
+ if (MachineBasicBlock *MBB = getParent())
+ return &MBB->getParent()->getRegInfo();
+ return 0;
+}
+
+/// RemoveRegOperandsFromUseLists - Unlink all of the register operands in
+/// this instruction from their respective use lists. This requires that the
+/// operands already be on their use lists.
+void MachineInstr::RemoveRegOperandsFromUseLists() {
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+ if (Operands[i].isReg())
+ Operands[i].RemoveRegOperandFromRegInfo();
+ }
+}
+
+/// AddRegOperandsToUseLists - Add all of the register operands in
+/// this instruction from their respective use lists. This requires that the
+/// operands not be on their use lists yet.
+void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &RegInfo) {
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+ if (Operands[i].isReg())
+ Operands[i].AddRegOperandToRegInfo(&RegInfo);
+ }
+}
+
+
+/// addOperand - Add the specified operand to the instruction. If it is an
+/// implicit operand, it is added to the end of the operand list. If it is
+/// an explicit operand it is added at the end of the explicit operand list
+/// (before the first implicit operand).
+void MachineInstr::addOperand(const MachineOperand &Op) {
+ bool isImpReg = Op.isReg() && Op.isImplicit();
+ assert((isImpReg || !OperandsComplete()) &&
+ "Trying to add an operand to a machine instr that is already done!");
+
+ MachineRegisterInfo *RegInfo = getRegInfo();
+
+ // If we are adding the operand to the end of the list, our job is simpler.
+ // This is true most of the time, so this is a reasonable optimization.
+ if (isImpReg || NumImplicitOps == 0) {
+ // We can only do this optimization if we know that the operand list won't
+ // reallocate.
+ if (Operands.empty() || Operands.size()+1 <= Operands.capacity()) {
+ Operands.push_back(Op);
+
+ // Set the parent of the operand.
+ Operands.back().ParentMI = this;
+
+ // If the operand is a register, update the operand's use list.
+ if (Op.isReg()) {
+ Operands.back().AddRegOperandToRegInfo(RegInfo);
+ // If the register operand is flagged as early, mark the operand as such
+ unsigned OpNo = Operands.size() - 1;
+ if (TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1)
+ Operands[OpNo].setIsEarlyClobber(true);
+ }
+ return;
+ }
+ }
+
+ // Otherwise, we have to insert a real operand before any implicit ones.
+ unsigned OpNo = Operands.size()-NumImplicitOps;
+
+ // If this instruction isn't embedded into a function, then we don't need to
+ // update any operand lists.
+ if (RegInfo == 0) {
+ // Simple insertion, no reginfo update needed for other register operands.
+ Operands.insert(Operands.begin()+OpNo, Op);
+ Operands[OpNo].ParentMI = this;
+
+ // Do explicitly set the reginfo for this operand though, to ensure the
+ // next/prev fields are properly nulled out.
+ if (Operands[OpNo].isReg()) {
+ Operands[OpNo].AddRegOperandToRegInfo(0);
+ // If the register operand is flagged as early, mark the operand as such
+ if (TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1)
+ Operands[OpNo].setIsEarlyClobber(true);
+ }
+
+ } else if (Operands.size()+1 <= Operands.capacity()) {
+ // Otherwise, we have to remove register operands from their register use
+ // list, add the operand, then add the register operands back to their use
+ // list. This also must handle the case when the operand list reallocates
+ // to somewhere else.
+
+ // If insertion of this operand won't cause reallocation of the operand
+ // list, just remove the implicit operands, add the operand, then re-add all
+ // the rest of the operands.
+ for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
+ assert(Operands[i].isReg() && "Should only be an implicit reg!");
+ Operands[i].RemoveRegOperandFromRegInfo();
+ }
+
+ // Add the operand. If it is a register, add it to the reg list.
+ Operands.insert(Operands.begin()+OpNo, Op);
+ Operands[OpNo].ParentMI = this;
+
+ if (Operands[OpNo].isReg()) {
+ Operands[OpNo].AddRegOperandToRegInfo(RegInfo);
+ // If the register operand is flagged as early, mark the operand as such
+ if (TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1)
+ Operands[OpNo].setIsEarlyClobber(true);
+ }
+
+ // Re-add all the implicit ops.
+ for (unsigned i = OpNo+1, e = Operands.size(); i != e; ++i) {
+ assert(Operands[i].isReg() && "Should only be an implicit reg!");
+ Operands[i].AddRegOperandToRegInfo(RegInfo);
+ }
+ } else {
+ // Otherwise, we will be reallocating the operand list. Remove all reg
+ // operands from their list, then readd them after the operand list is
+ // reallocated.
+ RemoveRegOperandsFromUseLists();
+
+ Operands.insert(Operands.begin()+OpNo, Op);
+ Operands[OpNo].ParentMI = this;
+
+ // Re-add all the operands.
+ AddRegOperandsToUseLists(*RegInfo);
+
+ // If the register operand is flagged as early, mark the operand as such
+ if (Operands[OpNo].isReg()
+ && TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1)
+ Operands[OpNo].setIsEarlyClobber(true);
+ }
+}
+
+/// RemoveOperand - Erase an operand from an instruction, leaving it with one
+/// fewer operand than it started with.
+///
+void MachineInstr::RemoveOperand(unsigned OpNo) {
+ assert(OpNo < Operands.size() && "Invalid operand number");
+
+ // Special case removing the last one.
+ if (OpNo == Operands.size()-1) {
+ // If needed, remove from the reg def/use list.
+ if (Operands.back().isReg() && Operands.back().isOnRegUseList())
+ Operands.back().RemoveRegOperandFromRegInfo();
+
+ Operands.pop_back();
+ return;
+ }
+
+ // Otherwise, we are removing an interior operand. If we have reginfo to
+ // update, remove all operands that will be shifted down from their reg lists,
+ // move everything down, then re-add them.
+ MachineRegisterInfo *RegInfo = getRegInfo();
+ if (RegInfo) {
+ for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
+ if (Operands[i].isReg())
+ Operands[i].RemoveRegOperandFromRegInfo();
+ }
+ }
+
+ Operands.erase(Operands.begin()+OpNo);
+
+ if (RegInfo) {
+ for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
+ if (Operands[i].isReg())
+ Operands[i].AddRegOperandToRegInfo(RegInfo);
+ }
+ }
+}
+
+/// addMemOperand - Add a MachineMemOperand to the machine instruction.
+/// This function should be used only occasionally. The setMemRefs function
+/// is the primary method for setting up a MachineInstr's MemRefs list.
+void MachineInstr::addMemOperand(MachineFunction &MF,
+ MachineMemOperand *MO) {
+ mmo_iterator OldMemRefs = MemRefs;
+ mmo_iterator OldMemRefsEnd = MemRefsEnd;
+
+ size_t NewNum = (MemRefsEnd - MemRefs) + 1;
+ mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NewNum);
+ mmo_iterator NewMemRefsEnd = NewMemRefs + NewNum;
+
+ std::copy(OldMemRefs, OldMemRefsEnd, NewMemRefs);
+ NewMemRefs[NewNum - 1] = MO;
+
+ MemRefs = NewMemRefs;
+ MemRefsEnd = NewMemRefsEnd;
+}
+
+/// removeFromParent - This method unlinks 'this' from the containing basic
+/// block, and returns it, but does not delete it.
+MachineInstr *MachineInstr::removeFromParent() {
+ assert(getParent() && "Not embedded in a basic block!");
+ getParent()->remove(this);
+ return this;
+}
+
+
+/// eraseFromParent - This method unlinks 'this' from the containing basic
+/// block, and deletes it.
+void MachineInstr::eraseFromParent() {
+ assert(getParent() && "Not embedded in a basic block!");
+ getParent()->erase(this);
+}
+
+
+/// OperandComplete - Return true if it's illegal to add a new operand
+///
+bool MachineInstr::OperandsComplete() const {
+ unsigned short NumOperands = TID->getNumOperands();
+ if (!TID->isVariadic() && getNumOperands()-NumImplicitOps >= NumOperands)
+ return true; // Broken: we have all the operands of this instruction!
+ return false;
+}
+
+/// getNumExplicitOperands - Returns the number of non-implicit operands.
+///
+unsigned MachineInstr::getNumExplicitOperands() const {
+ unsigned NumOperands = TID->getNumOperands();
+ if (!TID->isVariadic())
+ return NumOperands;
+
+ for (unsigned i = NumOperands, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isImplicit())
+ NumOperands++;
+ }
+ return NumOperands;
+}
+
+
+/// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of
+/// the specific register or -1 if it is not found. It further tightens
+/// the search criteria to a use that kills the register if isKill is true.
+int MachineInstr::findRegisterUseOperandIdx(unsigned Reg, bool isKill,
+ const TargetRegisterInfo *TRI) const {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MOReg == Reg ||
+ (TRI &&
+ TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+ TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ TRI->isSubRegister(MOReg, Reg)))
+ if (!isKill || MO.isKill())
+ return i;
+ }
+ return -1;
+}
+
+/// findRegisterDefOperandIdx() - Returns the operand index that is a def of
+/// the specified register or -1 if it is not found. If isDead is true, defs
+/// that are not dead are skipped. If TargetRegisterInfo is non-null, then it
+/// also checks if there is a def of a super-register.
+int MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead,
+ const TargetRegisterInfo *TRI) const {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (MOReg == Reg ||
+ (TRI &&
+ TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+ TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ TRI->isSubRegister(MOReg, Reg)))
+ if (!isDead || MO.isDead())
+ return i;
+ }
+ return -1;
+}
+
+/// findFirstPredOperandIdx() - Find the index of the first operand in the
+/// operand list that is used to represent the predicate. It returns -1 if
+/// none is found.
+int MachineInstr::findFirstPredOperandIdx() const {
+ const TargetInstrDesc &TID = getDesc();
+ if (TID.isPredicable()) {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (TID.OpInfo[i].isPredicate())
+ return i;
+ }
+
+ return -1;
+}
+
+/// isRegTiedToUseOperand - Given the index of a register def operand,
+/// check if the register def is tied to a source operand, due to either
+/// two-address elimination or inline assembly constraints. Returns the
+/// first tied use operand index by reference is UseOpIdx is not null.
+bool MachineInstr::
+isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const {
+ if (isInlineAsm()) {
+ assert(DefOpIdx >= 2);
+ const MachineOperand &MO = getOperand(DefOpIdx);
+ if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0)
+ return false;
+ // Determine the actual operand index that corresponds to this index.
+ unsigned DefNo = 0;
+ unsigned DefPart = 0;
+ for (unsigned i = 1, e = getNumOperands(); i < e; ) {
+ const MachineOperand &FMO = getOperand(i);
+ // After the normal asm operands there may be additional imp-def regs.
+ if (!FMO.isImm())
+ return false;
+ // Skip over this def.
+ unsigned NumOps = InlineAsm::getNumOperandRegisters(FMO.getImm());
+ unsigned PrevDef = i + 1;
+ i = PrevDef + NumOps;
+ if (i > DefOpIdx) {
+ DefPart = DefOpIdx - PrevDef;
+ break;
+ }
+ ++DefNo;
+ }
+ for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &FMO = getOperand(i);
+ if (!FMO.isImm())
+ continue;
+ if (i+1 >= e || !getOperand(i+1).isReg() || !getOperand(i+1).isUse())
+ continue;
+ unsigned Idx;
+ if (InlineAsm::isUseOperandTiedToDef(FMO.getImm(), Idx) &&
+ Idx == DefNo) {
+ if (UseOpIdx)
+ *UseOpIdx = (unsigned)i + 1 + DefPart;
+ return true;
+ }
+ }
+ return false;
+ }
+
+ assert(getOperand(DefOpIdx).isDef() && "DefOpIdx is not a def!");
+ const TargetInstrDesc &TID = getDesc();
+ for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (MO.isReg() && MO.isUse() &&
+ TID.getOperandConstraint(i, TOI::TIED_TO) == (int)DefOpIdx) {
+ if (UseOpIdx)
+ *UseOpIdx = (unsigned)i;
+ return true;
+ }
+ }
+ return false;
+}
+
+/// isRegTiedToDefOperand - Return true if the operand of the specified index
+/// is a register use and it is tied to an def operand. It also returns the def
+/// operand index by reference.
+bool MachineInstr::
+isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
+ if (isInlineAsm()) {
+ const MachineOperand &MO = getOperand(UseOpIdx);
+ if (!MO.isReg() || !MO.isUse() || MO.getReg() == 0)
+ return false;
+
+ // Find the flag operand corresponding to UseOpIdx
+ unsigned FlagIdx, NumOps=0;
+ for (FlagIdx = 1; FlagIdx < UseOpIdx; FlagIdx += NumOps+1) {
+ const MachineOperand &UFMO = getOperand(FlagIdx);
+ // After the normal asm operands there may be additional imp-def regs.
+ if (!UFMO.isImm())
+ return false;
+ NumOps = InlineAsm::getNumOperandRegisters(UFMO.getImm());
+ assert(NumOps < getNumOperands() && "Invalid inline asm flag");
+ if (UseOpIdx < FlagIdx+NumOps+1)
+ break;
+ }
+ if (FlagIdx >= UseOpIdx)
+ return false;
+ const MachineOperand &UFMO = getOperand(FlagIdx);
+ unsigned DefNo;
+ if (InlineAsm::isUseOperandTiedToDef(UFMO.getImm(), DefNo)) {
+ if (!DefOpIdx)
+ return true;
+
+ unsigned DefIdx = 1;
+ // Remember to adjust the index. First operand is asm string, then there
+ // is a flag for each.
+ while (DefNo) {
+ const MachineOperand &FMO = getOperand(DefIdx);
+ assert(FMO.isImm());
+ // Skip over this def.
+ DefIdx += InlineAsm::getNumOperandRegisters(FMO.getImm()) + 1;
+ --DefNo;
+ }
+ *DefOpIdx = DefIdx + UseOpIdx - FlagIdx;
+ return true;
+ }
+ return false;
+ }
+
+ const TargetInstrDesc &TID = getDesc();
+ if (UseOpIdx >= TID.getNumOperands())
+ return false;
+ const MachineOperand &MO = getOperand(UseOpIdx);
+ if (!MO.isReg() || !MO.isUse())
+ return false;
+ int DefIdx = TID.getOperandConstraint(UseOpIdx, TOI::TIED_TO);
+ if (DefIdx == -1)
+ return false;
+ if (DefOpIdx)
+ *DefOpIdx = (unsigned)DefIdx;
+ return true;
+}
+
+/// copyKillDeadInfo - Copies kill / dead operand properties from MI.
+///
+void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || (!MO.isKill() && !MO.isDead()))
+ continue;
+ for (unsigned j = 0, ee = getNumOperands(); j != ee; ++j) {
+ MachineOperand &MOp = getOperand(j);
+ if (!MOp.isIdenticalTo(MO))
+ continue;
+ if (MO.isKill())
+ MOp.setIsKill();
+ else
+ MOp.setIsDead();
+ break;
+ }
+ }
+}
+
+/// copyPredicates - Copies predicate operand(s) from MI.
+void MachineInstr::copyPredicates(const MachineInstr *MI) {
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (!TID.isPredicable())
+ return;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ if (TID.OpInfo[i].isPredicate()) {
+ // Predicated operands must be last operands.
+ addOperand(MI->getOperand(i));
+ }
+ }
+}
+
+/// isSafeToMove - Return true if it is safe to move this instruction. If
+/// SawStore is set to true, it means that there is a store (or call) between
+/// the instruction's location and its intended destination.
+bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
+ bool &SawStore,
+ AliasAnalysis *AA) const {
+ // Ignore stuff that we obviously can't move.
+ if (TID->mayStore() || TID->isCall()) {
+ SawStore = true;
+ return false;
+ }
+ if (TID->isTerminator() || TID->hasUnmodeledSideEffects())
+ return false;
+
+ // See if this instruction does a load. If so, we have to guarantee that the
+ // loaded value doesn't change between the load and the its intended
+ // destination. The check for isInvariantLoad gives the targe the chance to
+ // classify the load as always returning a constant, e.g. a constant pool
+ // load.
+ if (TID->mayLoad() && !isInvariantLoad(AA))
+ // Otherwise, this is a real load. If there is a store between the load and
+ // end of block, or if the load is volatile, we can't move it.
+ return !SawStore && !hasVolatileMemoryRef();
+
+ return true;
+}
+
+/// isSafeToReMat - Return true if it's safe to rematerialize the specified
+/// instruction which defined the specified register instead of copying it.
+bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII,
+ unsigned DstReg,
+ AliasAnalysis *AA) const {
+ bool SawStore = false;
+ if (!TII->isTriviallyReMaterializable(this, AA) ||
+ !isSafeToMove(TII, SawStore, AA))
+ return false;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg())
+ continue;
+ // FIXME: For now, do not remat any instruction with register operands.
+ // Later on, we can loosen the restriction is the register operands have
+ // not been modified between the def and use. Note, this is different from
+ // MachineSink because the code is no longer in two-address form (at least
+ // partially).
+ if (MO.isUse())
+ return false;
+ else if (!MO.isDead() && MO.getReg() != DstReg)
+ return false;
+ }
+ return true;
+}
+
+/// hasVolatileMemoryRef - Return true if this instruction may have a
+/// volatile memory reference, or if the information describing the
+/// memory reference is not available. Return false if it is known to
+/// have no volatile memory references.
+bool MachineInstr::hasVolatileMemoryRef() const {
+ // An instruction known never to access memory won't have a volatile access.
+ if (!TID->mayStore() &&
+ !TID->mayLoad() &&
+ !TID->isCall() &&
+ !TID->hasUnmodeledSideEffects())
+ return false;
+
+ // Otherwise, if the instruction has no memory reference information,
+ // conservatively assume it wasn't preserved.
+ if (memoperands_empty())
+ return true;
+
+ // Check the memory reference information for volatile references.
+ for (mmo_iterator I = memoperands_begin(), E = memoperands_end(); I != E; ++I)
+ if ((*I)->isVolatile())
+ return true;
+
+ return false;
+}
+
+/// isInvariantLoad - Return true if this instruction is loading from a
+/// location whose value is invariant across the function. For example,
+/// loading a value from the constant pool or from the argument area
+/// of a function if it does not change. This should only return true of
+/// *all* loads the instruction does are invariant (if it does multiple loads).
+bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
+ // If the instruction doesn't load at all, it isn't an invariant load.
+ if (!TID->mayLoad())
+ return false;
+
+ // If the instruction has lost its memoperands, conservatively assume that
+ // it may not be an invariant load.
+ if (memoperands_empty())
+ return false;
+
+ const MachineFrameInfo *MFI = getParent()->getParent()->getFrameInfo();
+
+ for (mmo_iterator I = memoperands_begin(),
+ E = memoperands_end(); I != E; ++I) {
+ if ((*I)->isVolatile()) return false;
+ if ((*I)->isStore()) return false;
+
+ if (const Value *V = (*I)->getValue()) {
+ // A load from a constant PseudoSourceValue is invariant.
+ if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V))
+ if (PSV->isConstant(MFI))
+ continue;
+ // If we have an AliasAnalysis, ask it whether the memory is constant.
+ if (AA && AA->pointsToConstantMemory(V))
+ continue;
+ }
+
+ // Otherwise assume conservatively.
+ return false;
+ }
+
+ // Everything checks out.
+ return true;
+}
+
+/// isConstantValuePHI - If the specified instruction is a PHI that always
+/// merges together the same virtual register, return the register, otherwise
+/// return 0.
+unsigned MachineInstr::isConstantValuePHI() const {
+ if (!isPHI())
+ return 0;
+ assert(getNumOperands() >= 3 &&
+ "It's illegal to have a PHI without source operands");
+
+ unsigned Reg = getOperand(1).getReg();
+ for (unsigned i = 3, e = getNumOperands(); i < e; i += 2)
+ if (getOperand(i).getReg() != Reg)
+ return 0;
+ return Reg;
+}
+
+void MachineInstr::dump() const {
+ dbgs() << " " << *this;
+}
+
+void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
+ // We can be a bit tidier if we know the TargetMachine and/or MachineFunction.
+ const MachineFunction *MF = 0;
+ if (const MachineBasicBlock *MBB = getParent()) {
+ MF = MBB->getParent();
+ if (!TM && MF)
+ TM = &MF->getTarget();
+ }
+
+ // Print explicitly defined operands on the left of an assignment syntax.
+ unsigned StartOp = 0, e = getNumOperands();
+ for (; StartOp < e && getOperand(StartOp).isReg() &&
+ getOperand(StartOp).isDef() &&
+ !getOperand(StartOp).isImplicit();
+ ++StartOp) {
+ if (StartOp != 0) OS << ", ";
+ getOperand(StartOp).print(OS, TM);
+ }
+
+ if (StartOp != 0)
+ OS << " = ";
+
+ // Print the opcode name.
+ OS << getDesc().getName();
+
+ // Print the rest of the operands.
+ bool OmittedAnyCallClobbers = false;
+ bool FirstOp = true;
+ for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+
+ // Omit call-clobbered registers which aren't used anywhere. This makes
+ // call instructions much less noisy on targets where calls clobber lots
+ // of registers. Don't rely on MO.isDead() because we may be called before
+ // LiveVariables is run, or we may be looking at a non-allocatable reg.
+ if (MF && getDesc().isCall() &&
+ MO.isReg() && MO.isImplicit() && MO.isDef()) {
+ unsigned Reg = MO.getReg();
+ if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ if (MRI.use_empty(Reg) && !MRI.isLiveOut(Reg)) {
+ bool HasAliasLive = false;
+ for (const unsigned *Alias = TM->getRegisterInfo()->getAliasSet(Reg);
+ unsigned AliasReg = *Alias; ++Alias)
+ if (!MRI.use_empty(AliasReg) || MRI.isLiveOut(AliasReg)) {
+ HasAliasLive = true;
+ break;
+ }
+ if (!HasAliasLive) {
+ OmittedAnyCallClobbers = true;
+ continue;
+ }
+ }
+ }
+ }
+
+ if (FirstOp) FirstOp = false; else OS << ",";
+ OS << " ";
+ if (i < getDesc().NumOperands) {
+ const TargetOperandInfo &TOI = getDesc().OpInfo[i];
+ if (TOI.isPredicate())
+ OS << "pred:";
+ if (TOI.isOptionalDef())
+ OS << "opt:";
+ }
+ MO.print(OS, TM);
+ }
+
+ // Briefly indicate whether any call clobbers were omitted.
+ if (OmittedAnyCallClobbers) {
+ if (!FirstOp) OS << ",";
+ OS << " ...";
+ }
+
+ bool HaveSemi = false;
+ if (!memoperands_empty()) {
+ if (!HaveSemi) OS << ";"; HaveSemi = true;
+
+ OS << " mem:";
+ for (mmo_iterator i = memoperands_begin(), e = memoperands_end();
+ i != e; ++i) {
+ OS << **i;
+ if (next(i) != e)
+ OS << " ";
+ }
+ }
+
+ if (!debugLoc.isUnknown() && MF) {
+ if (!HaveSemi) OS << ";";
+
+ // TODO: print InlinedAtLoc information
+
+ DILocation DLT = MF->getDILocation(debugLoc);
+ DIScope Scope = DLT.getScope();
+ OS << " dbg:";
+ // Omit the directory, since it's usually long and uninteresting.
+ if (!Scope.isNull())
+ OS << Scope.getFilename();
+ else
+ OS << "<unknown>";
+ OS << ':' << DLT.getLineNumber();
+ if (DLT.getColumnNumber() != 0)
+ OS << ':' << DLT.getColumnNumber();
+ }
+
+ OS << "\n";
+}
+
+bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
+ const TargetRegisterInfo *RegInfo,
+ bool AddIfNotFound) {
+ bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg);
+ bool hasAliases = isPhysReg && RegInfo->getAliasSet(IncomingReg);
+ bool Found = false;
+ SmallVector<unsigned,4> DeadOps;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || MO.isUndef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ if (Reg == IncomingReg) {
+ if (!Found) {
+ if (MO.isKill())
+ // The register is already marked kill.
+ return true;
+ if (isPhysReg && isRegTiedToDefOperand(i))
+ // Two-address uses of physregs must not be marked kill.
+ return true;
+ MO.setIsKill();
+ Found = true;
+ }
+ } else if (hasAliases && MO.isKill() &&
+ TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // A super-register kill already exists.
+ if (RegInfo->isSuperRegister(IncomingReg, Reg))
+ return true;
+ if (RegInfo->isSubRegister(IncomingReg, Reg))
+ DeadOps.push_back(i);
+ }
+ }
+
+ // Trim unneeded kill operands.
+ while (!DeadOps.empty()) {
+ unsigned OpIdx = DeadOps.back();
+ if (getOperand(OpIdx).isImplicit())
+ RemoveOperand(OpIdx);
+ else
+ getOperand(OpIdx).setIsKill(false);
+ DeadOps.pop_back();
+ }
+
+ // If not found, this means an alias of one of the operands is killed. Add a
+ // new implicit operand if required.
+ if (!Found && AddIfNotFound) {
+ addOperand(MachineOperand::CreateReg(IncomingReg,
+ false /*IsDef*/,
+ true /*IsImp*/,
+ true /*IsKill*/));
+ return true;
+ }
+ return Found;
+}
+
+bool MachineInstr::addRegisterDead(unsigned IncomingReg,
+ const TargetRegisterInfo *RegInfo,
+ bool AddIfNotFound) {
+ bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg);
+ bool hasAliases = isPhysReg && RegInfo->getAliasSet(IncomingReg);
+ bool Found = false;
+ SmallVector<unsigned,4> DeadOps;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ if (Reg == IncomingReg) {
+ if (!Found) {
+ if (MO.isDead())
+ // The register is already marked dead.
+ return true;
+ MO.setIsDead();
+ Found = true;
+ }
+ } else if (hasAliases && MO.isDead() &&
+ TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // There exists a super-register that's marked dead.
+ if (RegInfo->isSuperRegister(IncomingReg, Reg))
+ return true;
+ if (RegInfo->getSubRegisters(IncomingReg) &&
+ RegInfo->getSuperRegisters(Reg) &&
+ RegInfo->isSubRegister(IncomingReg, Reg))
+ DeadOps.push_back(i);
+ }
+ }
+
+ // Trim unneeded dead operands.
+ while (!DeadOps.empty()) {
+ unsigned OpIdx = DeadOps.back();
+ if (getOperand(OpIdx).isImplicit())
+ RemoveOperand(OpIdx);
+ else
+ getOperand(OpIdx).setIsDead(false);
+ DeadOps.pop_back();
+ }
+
+ // If not found, this means an alias of one of the operands is dead. Add a
+ // new implicit operand if required.
+ if (Found || !AddIfNotFound)
+ return Found;
+
+ addOperand(MachineOperand::CreateReg(IncomingReg,
+ true /*IsDef*/,
+ true /*IsImp*/,
+ false /*IsKill*/,
+ true /*IsDead*/));
+ return true;
+}
+
+void MachineInstr::addRegisterDefined(unsigned IncomingReg,
+ const TargetRegisterInfo *RegInfo) {
+ MachineOperand *MO = findRegisterDefOperand(IncomingReg, false, RegInfo);
+ if (!MO || MO->getSubReg())
+ addOperand(MachineOperand::CreateReg(IncomingReg,
+ true /*IsDef*/,
+ true /*IsImp*/));
+}
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
new file mode 100644
index 0000000..92c84f3
--- /dev/null
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -0,0 +1,543 @@
+//===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs loop invariant code motion on machine instructions. We
+// attempt to remove as much code from the body of a loop as possible.
+//
+// This pass does not attempt to throttle itself to limit register pressure.
+// The register allocation phases are expected to perform rematerialization
+// to recover when register pressure is high.
+//
+// This pass is not intended to be a replacement or a complete alternative
+// for the LLVM-IR-level LICM pass. It is only designed to hoist simple
+// constructs that are not exposed before lowering and instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine-licm"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops");
+STATISTIC(NumCSEed, "Number of hoisted machine instructions CSEed");
+
+namespace {
+ class MachineLICM : public MachineFunctionPass {
+ MachineConstantPool *MCP;
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ BitVector AllocatableSet;
+
+ // Various analyses that we use...
+ AliasAnalysis *AA; // Alias analysis info.
+ MachineLoopInfo *LI; // Current MachineLoopInfo
+ MachineDominatorTree *DT; // Machine dominator tree for the cur loop
+ MachineRegisterInfo *RegInfo; // Machine register information
+
+ // State that is updated as we process loops
+ bool Changed; // True if a loop is changed.
+ bool FirstInLoop; // True if it's the first LICM in the loop.
+ MachineLoop *CurLoop; // The current loop we are working on.
+ MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
+
+ // For each opcode, keep a list of potentail CSE instructions.
+ DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ MachineLICM() : MachineFunctionPass(&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const { return "Machine Instruction LICM"; }
+
+ // FIXME: Loop preheaders?
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual void releaseMemory() {
+ CSEMap.clear();
+ }
+
+ private:
+ /// IsLoopInvariantInst - Returns true if the instruction is loop
+ /// invariant. I.e., all virtual register operands are defined outside of
+ /// the loop, physical registers aren't accessed (explicitly or implicitly),
+ /// and the instruction is hoistable.
+ ///
+ bool IsLoopInvariantInst(MachineInstr &I);
+
+ /// IsProfitableToHoist - Return true if it is potentially profitable to
+ /// hoist the given loop invariant.
+ bool IsProfitableToHoist(MachineInstr &MI);
+
+ /// HoistRegion - Walk the specified region of the CFG (defined by all
+ /// blocks dominated by the specified block, and that are in the current
+ /// loop) in depth first order w.r.t the DominatorTree. This allows us to
+ /// visit definitions before uses, allowing us to hoist a loop body in one
+ /// pass without iteration.
+ ///
+ void HoistRegion(MachineDomTreeNode *N);
+
+ /// isLoadFromConstantMemory - Return true if the given instruction is a
+ /// load from constant memory.
+ bool isLoadFromConstantMemory(MachineInstr *MI);
+
+ /// ExtractHoistableLoad - Unfold a load from the given machineinstr if
+ /// the load itself could be hoisted. Return the unfolded and hoistable
+ /// load, or null if the load couldn't be unfolded or if it wouldn't
+ /// be hoistable.
+ MachineInstr *ExtractHoistableLoad(MachineInstr *MI);
+
+ /// LookForDuplicate - Find an instruction amount PrevMIs that is a
+ /// duplicate of MI. Return this instruction if it's found.
+ const MachineInstr *LookForDuplicate(const MachineInstr *MI,
+ std::vector<const MachineInstr*> &PrevMIs);
+
+ /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on
+ /// the preheader that compute the same value. If it's found, do a RAU on
+ /// with the definition of the existing instruction rather than hoisting
+ /// the instruction to the preheader.
+ bool EliminateCSE(MachineInstr *MI,
+ DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI);
+
+ /// Hoist - When an instruction is found to only use loop invariant operands
+ /// that is safe to hoist, this instruction is called to do the dirty work.
+ ///
+ void Hoist(MachineInstr *MI);
+
+ /// InitCSEMap - Initialize the CSE map with instructions that are in the
+ /// current loop preheader that may become duplicates of instructions that
+ /// are hoisted out of the loop.
+ void InitCSEMap(MachineBasicBlock *BB);
+ };
+} // end anonymous namespace
+
+char MachineLICM::ID = 0;
+static RegisterPass<MachineLICM>
+X("machinelicm", "Machine Loop Invariant Code Motion");
+
+FunctionPass *llvm::createMachineLICMPass() { return new MachineLICM(); }
+
+/// LoopIsOuterMostWithPreheader - Test if the given loop is the outer-most
+/// loop that has a preheader.
+static bool LoopIsOuterMostWithPreheader(MachineLoop *CurLoop) {
+ for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop())
+ if (L->getLoopPreheader())
+ return false;
+ return true;
+}
+
+/// Hoist expressions out of the specified loop. Note, alias info for inner loop
+/// is not preserved so it is not a good idea to run LICM multiple times on one
+/// loop.
+///
+bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "******** Machine LICM ********\n");
+
+ Changed = FirstInLoop = false;
+ MCP = MF.getConstantPool();
+ TM = &MF.getTarget();
+ TII = TM->getInstrInfo();
+ TRI = TM->getRegisterInfo();
+ RegInfo = &MF.getRegInfo();
+ AllocatableSet = TRI->getAllocatableSet(MF);
+
+ // Get our Loop information...
+ LI = &getAnalysis<MachineLoopInfo>();
+ DT = &getAnalysis<MachineDominatorTree>();
+ AA = &getAnalysis<AliasAnalysis>();
+
+ for (MachineLoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) {
+ CurLoop = *I;
+
+ // Only visit outer-most preheader-sporting loops.
+ if (!LoopIsOuterMostWithPreheader(CurLoop))
+ continue;
+
+ // Determine the block to which to hoist instructions. If we can't find a
+ // suitable loop preheader, we can't do any hoisting.
+ //
+ // FIXME: We are only hoisting if the basic block coming into this loop
+ // has only one successor. This isn't the case in general because we haven't
+ // broken critical edges or added preheaders.
+ CurPreheader = CurLoop->getLoopPreheader();
+ if (!CurPreheader)
+ continue;
+
+ // CSEMap is initialized for loop header when the first instruction is
+ // being hoisted.
+ FirstInLoop = true;
+ HoistRegion(DT->getNode(CurLoop->getHeader()));
+ CSEMap.clear();
+ }
+
+ return Changed;
+}
+
+/// HoistRegion - Walk the specified region of the CFG (defined by all blocks
+/// dominated by the specified block, and that are in the current loop) in depth
+/// first order w.r.t the DominatorTree. This allows us to visit definitions
+/// before uses, allowing us to hoist a loop body in one pass without iteration.
+///
+void MachineLICM::HoistRegion(MachineDomTreeNode *N) {
+ assert(N != 0 && "Null dominator tree node?");
+ MachineBasicBlock *BB = N->getBlock();
+
+ // If this subregion is not in the top level loop at all, exit.
+ if (!CurLoop->contains(BB)) return;
+
+ for (MachineBasicBlock::iterator
+ MII = BB->begin(), E = BB->end(); MII != E; ) {
+ MachineBasicBlock::iterator NextMII = MII; ++NextMII;
+ Hoist(&*MII);
+ MII = NextMII;
+ }
+
+ const std::vector<MachineDomTreeNode*> &Children = N->getChildren();
+
+ for (unsigned I = 0, E = Children.size(); I != E; ++I)
+ HoistRegion(Children[I]);
+}
+
+/// IsLoopInvariantInst - Returns true if the instruction is loop
+/// invariant. I.e., all virtual register operands are defined outside of the
+/// loop, physical registers aren't accessed explicitly, and there are no side
+/// effects that aren't captured by the operands or other flags.
+///
+bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
+ const TargetInstrDesc &TID = I.getDesc();
+
+ // Ignore stuff that we obviously can't hoist.
+ if (TID.mayStore() || TID.isCall() || TID.isTerminator() ||
+ TID.hasUnmodeledSideEffects())
+ return false;
+
+ if (TID.mayLoad()) {
+ // Okay, this instruction does a load. As a refinement, we allow the target
+ // to decide whether the loaded value is actually a constant. If so, we can
+ // actually use it as a load.
+ if (!I.isInvariantLoad(AA))
+ // FIXME: we should be able to hoist loads with no other side effects if
+ // there are no other instructions which can change memory in this loop.
+ // This is a trivial form of alias analysis.
+ return false;
+ }
+
+ DEBUG({
+ dbgs() << "--- Checking if we can hoist " << I;
+ if (I.getDesc().getImplicitUses()) {
+ dbgs() << " * Instruction has implicit uses:\n";
+
+ const TargetRegisterInfo *TRI = TM->getRegisterInfo();
+ for (const unsigned *ImpUses = I.getDesc().getImplicitUses();
+ *ImpUses; ++ImpUses)
+ dbgs() << " -> " << TRI->getName(*ImpUses) << "\n";
+ }
+
+ if (I.getDesc().getImplicitDefs()) {
+ dbgs() << " * Instruction has implicit defines:\n";
+
+ const TargetRegisterInfo *TRI = TM->getRegisterInfo();
+ for (const unsigned *ImpDefs = I.getDesc().getImplicitDefs();
+ *ImpDefs; ++ImpDefs)
+ dbgs() << " -> " << TRI->getName(*ImpDefs) << "\n";
+ }
+ });
+
+ if (I.getDesc().getImplicitDefs() || I.getDesc().getImplicitUses()) {
+ DEBUG(dbgs() << "Cannot hoist with implicit defines or uses\n");
+ return false;
+ }
+
+ // The instruction is loop invariant if all of its operands are.
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = I.getOperand(i);
+
+ if (!MO.isReg())
+ continue;
+
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ // Don't hoist an instruction that uses or defines a physical register.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (MO.isUse()) {
+ // If the physreg has no defs anywhere, it's just an ambient register
+ // and we can freely move its uses. Alternatively, if it's allocatable,
+ // it could get allocated to something with a def during allocation.
+ if (!RegInfo->def_empty(Reg))
+ return false;
+ if (AllocatableSet.test(Reg))
+ return false;
+ // Check for a def among the register's aliases too.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ if (!RegInfo->def_empty(AliasReg))
+ return false;
+ if (AllocatableSet.test(AliasReg))
+ return false;
+ }
+ // Otherwise it's safe to move.
+ continue;
+ } else if (!MO.isDead()) {
+ // A def that isn't dead. We can't move it.
+ return false;
+ }
+ }
+
+ if (!MO.isUse())
+ continue;
+
+ assert(RegInfo->getVRegDef(Reg) &&
+ "Machine instr not mapped for this vreg?!");
+
+ // If the loop contains the definition of an operand, then the instruction
+ // isn't loop invariant.
+ if (CurLoop->contains(RegInfo->getVRegDef(Reg)))
+ return false;
+ }
+
+ // If we got this far, the instruction is loop invariant!
+ return true;
+}
+
+
+/// HasPHIUses - Return true if the specified register has any PHI use.
+static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *RegInfo) {
+ for (MachineRegisterInfo::use_iterator UI = RegInfo->use_begin(Reg),
+ UE = RegInfo->use_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ if (UseMI->isPHI())
+ return true;
+ }
+ return false;
+}
+
+/// isLoadFromConstantMemory - Return true if the given instruction is a
+/// load from constant memory. Machine LICM will hoist these even if they are
+/// not re-materializable.
+bool MachineLICM::isLoadFromConstantMemory(MachineInstr *MI) {
+ if (!MI->getDesc().mayLoad()) return false;
+ if (!MI->hasOneMemOperand()) return false;
+ MachineMemOperand *MMO = *MI->memoperands_begin();
+ if (MMO->isVolatile()) return false;
+ if (!MMO->getValue()) return false;
+ const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(MMO->getValue());
+ if (PSV) {
+ MachineFunction &MF = *MI->getParent()->getParent();
+ return PSV->isConstant(MF.getFrameInfo());
+ } else {
+ return AA->pointsToConstantMemory(MMO->getValue());
+ }
+}
+
+/// IsProfitableToHoist - Return true if it is potentially profitable to hoist
+/// the given loop invariant.
+bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
+ if (MI.isImplicitDef())
+ return false;
+
+ // FIXME: For now, only hoist re-materilizable instructions. LICM will
+ // increase register pressure. We want to make sure it doesn't increase
+ // spilling.
+ // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting
+ // these tend to help performance in low register pressure situation. The
+ // trade off is it may cause spill in high pressure situation. It will end up
+ // adding a store in the loop preheader. But the reload is no more expensive.
+ // The side benefit is these loads are frequently CSE'ed.
+ if (!TII->isTriviallyReMaterializable(&MI, AA)) {
+ if (!isLoadFromConstantMemory(&MI))
+ return false;
+ }
+
+ // If result(s) of this instruction is used by PHIs, then don't hoist it.
+ // The presence of joins makes it difficult for current register allocator
+ // implementation to perform remat.
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ if (HasPHIUses(MO.getReg(), RegInfo))
+ return false;
+ }
+
+ return true;
+}
+
+MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
+ // If not, we may be able to unfold a load and hoist that.
+ // First test whether the instruction is loading from an amenable
+ // memory location.
+ if (!isLoadFromConstantMemory(MI))
+ return 0;
+
+ // Next determine the register class for a temporary register.
+ unsigned LoadRegIndex;
+ unsigned NewOpc =
+ TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(),
+ /*UnfoldLoad=*/true,
+ /*UnfoldStore=*/false,
+ &LoadRegIndex);
+ if (NewOpc == 0) return 0;
+ const TargetInstrDesc &TID = TII->get(NewOpc);
+ if (TID.getNumDefs() != 1) return 0;
+ const TargetRegisterClass *RC = TID.OpInfo[LoadRegIndex].getRegClass(TRI);
+ // Ok, we're unfolding. Create a temporary register and do the unfold.
+ unsigned Reg = RegInfo->createVirtualRegister(RC);
+
+ MachineFunction &MF = *MI->getParent()->getParent();
+ SmallVector<MachineInstr *, 2> NewMIs;
+ bool Success =
+ TII->unfoldMemoryOperand(MF, MI, Reg,
+ /*UnfoldLoad=*/true, /*UnfoldStore=*/false,
+ NewMIs);
+ (void)Success;
+ assert(Success &&
+ "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
+ "succeeded!");
+ assert(NewMIs.size() == 2 &&
+ "Unfolded a load into multiple instructions!");
+ MachineBasicBlock *MBB = MI->getParent();
+ MBB->insert(MI, NewMIs[0]);
+ MBB->insert(MI, NewMIs[1]);
+ // If unfolding produced a load that wasn't loop-invariant or profitable to
+ // hoist, discard the new instructions and bail.
+ if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) {
+ NewMIs[0]->eraseFromParent();
+ NewMIs[1]->eraseFromParent();
+ return 0;
+ }
+ // Otherwise we successfully unfolded a load that we can hoist.
+ MI->eraseFromParent();
+ return NewMIs[0];
+}
+
+void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
+ for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) {
+ const MachineInstr *MI = &*I;
+ // FIXME: For now, only hoist re-materilizable instructions. LICM will
+ // increase register pressure. We want to make sure it doesn't increase
+ // spilling.
+ if (TII->isTriviallyReMaterializable(MI, AA)) {
+ unsigned Opcode = MI->getOpcode();
+ DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
+ CI = CSEMap.find(Opcode);
+ if (CI != CSEMap.end())
+ CI->second.push_back(MI);
+ else {
+ std::vector<const MachineInstr*> CSEMIs;
+ CSEMIs.push_back(MI);
+ CSEMap.insert(std::make_pair(Opcode, CSEMIs));
+ }
+ }
+ }
+}
+
+const MachineInstr*
+MachineLICM::LookForDuplicate(const MachineInstr *MI,
+ std::vector<const MachineInstr*> &PrevMIs) {
+ for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
+ const MachineInstr *PrevMI = PrevMIs[i];
+ if (TII->isIdentical(MI, PrevMI, RegInfo))
+ return PrevMI;
+ }
+ return 0;
+}
+
+bool MachineLICM::EliminateCSE(MachineInstr *MI,
+ DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) {
+ if (CI == CSEMap.end())
+ return false;
+
+ if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
+ DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef())
+ RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
+ }
+ MI->eraseFromParent();
+ ++NumCSEed;
+ return true;
+ }
+ return false;
+}
+
+/// Hoist - When an instruction is found to use only loop invariant operands
+/// that are safe to hoist, this instruction is called to do the dirty work.
+///
+void MachineLICM::Hoist(MachineInstr *MI) {
+ // First check whether we should hoist this instruction.
+ if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
+ // If not, try unfolding a hoistable load.
+ MI = ExtractHoistableLoad(MI);
+ if (!MI) return;
+ }
+
+ // Now move the instructions to the predecessor, inserting it before any
+ // terminator instructions.
+ DEBUG({
+ dbgs() << "Hoisting " << *MI;
+ if (CurPreheader->getBasicBlock())
+ dbgs() << " to MachineBasicBlock "
+ << CurPreheader->getName();
+ if (MI->getParent()->getBasicBlock())
+ dbgs() << " from MachineBasicBlock "
+ << MI->getParent()->getName();
+ dbgs() << "\n";
+ });
+
+ // If this is the first instruction being hoisted to the preheader,
+ // initialize the CSE map with potential common expressions.
+ InitCSEMap(CurPreheader);
+
+ // Look for opportunity to CSE the hoisted instruction.
+ unsigned Opcode = MI->getOpcode();
+ DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
+ CI = CSEMap.find(Opcode);
+ if (!EliminateCSE(MI, CI)) {
+ // Otherwise, splice the instruction to the preheader.
+ CurPreheader->splice(CurPreheader->getFirstTerminator(),MI->getParent(),MI);
+
+ // Add to the CSE map.
+ if (CI != CSEMap.end())
+ CI->second.push_back(MI);
+ else {
+ std::vector<const MachineInstr*> CSEMIs;
+ CSEMIs.push_back(MI);
+ CSEMap.insert(std::make_pair(Opcode, CSEMIs));
+ }
+ }
+
+ ++NumHoisted;
+ Changed = true;
+}
diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
new file mode 100644
index 0000000..269538b
--- /dev/null
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -0,0 +1,80 @@
+//===- MachineLoopInfo.cpp - Natural Loop Calculator ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MachineLoopInfo class that is used to identify natural
+// loops and determine the loop depth of various nodes of the CFG. Note that
+// the loops identified may actually be several natural loops that share the
+// same header node... not just a single natural loop.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+namespace llvm {
+#define MLB class LoopBase<MachineBasicBlock, MachineLoop>
+TEMPLATE_INSTANTIATION(MLB);
+#undef MLB
+#define MLIB class LoopInfoBase<MachineBasicBlock, MachineLoop>
+TEMPLATE_INSTANTIATION(MLIB);
+#undef MLIB
+}
+
+char MachineLoopInfo::ID = 0;
+static RegisterPass<MachineLoopInfo>
+X("machine-loops", "Machine Natural Loop Construction", true);
+
+const PassInfo *const llvm::MachineLoopInfoID = &X;
+
+bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) {
+ releaseMemory();
+ LI.Calculate(getAnalysis<MachineDominatorTree>().getBase()); // Update
+ return false;
+}
+
+void MachineLoopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachineBasicBlock *MachineLoop::getTopBlock() {
+ MachineBasicBlock *TopMBB = getHeader();
+ MachineFunction::iterator Begin = TopMBB->getParent()->begin();
+ if (TopMBB != Begin) {
+ MachineBasicBlock *PriorMBB = prior(MachineFunction::iterator(TopMBB));
+ while (contains(PriorMBB)) {
+ TopMBB = PriorMBB;
+ if (TopMBB == Begin) break;
+ PriorMBB = prior(MachineFunction::iterator(TopMBB));
+ }
+ }
+ return TopMBB;
+}
+
+MachineBasicBlock *MachineLoop::getBottomBlock() {
+ MachineBasicBlock *BotMBB = getHeader();
+ MachineFunction::iterator End = BotMBB->getParent()->end();
+ if (BotMBB != prior(End)) {
+ MachineBasicBlock *NextMBB = llvm::next(MachineFunction::iterator(BotMBB));
+ while (contains(NextMBB)) {
+ BotMBB = NextMBB;
+ if (BotMBB == llvm::next(MachineFunction::iterator(BotMBB))) break;
+ NextMBB = llvm::next(MachineFunction::iterator(BotMBB));
+ }
+ }
+ return BotMBB;
+}
+
+void MachineLoop::dump() const {
+ print(dbgs());
+}
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
new file mode 100644
index 0000000..5052af7
--- /dev/null
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -0,0 +1,297 @@
+//===-- llvm/CodeGen/MachineModuleInfo.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineModuleInfo.h"
+
+#include "llvm/Constants.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+using namespace llvm::dwarf;
+
+// Handle the Pass registration stuff necessary to use TargetData's.
+static RegisterPass<MachineModuleInfo>
+X("machinemoduleinfo", "Module Information");
+char MachineModuleInfo::ID = 0;
+
+// Out of line virtual method.
+MachineModuleInfoImpl::~MachineModuleInfoImpl() {}
+
+//===----------------------------------------------------------------------===//
+
+MachineModuleInfo::MachineModuleInfo()
+: ImmutablePass(&ID)
+, ObjFileMMI(0)
+, CurCallSite(0)
+, CallsEHReturn(0)
+, CallsUnwindInit(0)
+, DbgInfoAvailable(false) {
+ // Always emit some info, by default "no personality" info.
+ Personalities.push_back(NULL);
+}
+
+MachineModuleInfo::~MachineModuleInfo() {
+ delete ObjFileMMI;
+}
+
+/// doInitialization - Initialize the state for a new module.
+///
+bool MachineModuleInfo::doInitialization() {
+ return false;
+}
+
+/// doFinalization - Tear down the state after completion of a module.
+///
+bool MachineModuleInfo::doFinalization() {
+ return false;
+}
+
+/// EndFunction - Discard function meta information.
+///
+void MachineModuleInfo::EndFunction() {
+ // Clean up frame info.
+ FrameMoves.clear();
+
+ // Clean up exception info.
+ LandingPads.clear();
+ CallSiteMap.clear();
+ TypeInfos.clear();
+ FilterIds.clear();
+ FilterEnds.clear();
+ CallsEHReturn = 0;
+ CallsUnwindInit = 0;
+ VariableDbgInfo.clear();
+}
+
+/// AnalyzeModule - Scan the module for global debug information.
+///
+void MachineModuleInfo::AnalyzeModule(Module &M) {
+ // Insert functions in the llvm.used array (but not llvm.compiler.used) into
+ // UsedFunctions.
+ GlobalVariable *GV = M.getGlobalVariable("llvm.used");
+ if (!GV || !GV->hasInitializer()) return;
+
+ // Should be an array of 'i8*'.
+ ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (InitList == 0) return;
+
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+ if (Function *F =
+ dyn_cast<Function>(InitList->getOperand(i)->stripPointerCasts()))
+ UsedFunctions.insert(F);
+}
+
+//===-EH-------------------------------------------------------------------===//
+
+/// getOrCreateLandingPadInfo - Find or create an LandingPadInfo for the
+/// specified MachineBasicBlock.
+LandingPadInfo &MachineModuleInfo::getOrCreateLandingPadInfo
+ (MachineBasicBlock *LandingPad) {
+ unsigned N = LandingPads.size();
+ for (unsigned i = 0; i < N; ++i) {
+ LandingPadInfo &LP = LandingPads[i];
+ if (LP.LandingPadBlock == LandingPad)
+ return LP;
+ }
+
+ LandingPads.push_back(LandingPadInfo(LandingPad));
+ return LandingPads[N];
+}
+
+/// addInvoke - Provide the begin and end labels of an invoke style call and
+/// associate it with a try landing pad block.
+void MachineModuleInfo::addInvoke(MachineBasicBlock *LandingPad,
+ unsigned BeginLabel, unsigned EndLabel) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.BeginLabels.push_back(BeginLabel);
+ LP.EndLabels.push_back(EndLabel);
+}
+
+/// addLandingPad - Provide the label of a try LandingPad block.
+///
+unsigned MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) {
+ unsigned LandingPadLabel = NextLabelID();
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.LandingPadLabel = LandingPadLabel;
+ return LandingPadLabel;
+}
+
+/// addPersonality - Provide the personality function for the exception
+/// information.
+void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad,
+ Function *Personality) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.Personality = Personality;
+
+ for (unsigned i = 0; i < Personalities.size(); ++i)
+ if (Personalities[i] == Personality)
+ return;
+
+ // If this is the first personality we're adding go
+ // ahead and add it at the beginning.
+ if (Personalities[0] == NULL)
+ Personalities[0] = Personality;
+ else
+ Personalities.push_back(Personality);
+}
+
+/// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
+///
+void MachineModuleInfo::addCatchTypeInfo(MachineBasicBlock *LandingPad,
+ std::vector<GlobalVariable *> &TyInfo) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ for (unsigned N = TyInfo.size(); N; --N)
+ LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1]));
+}
+
+/// addFilterTypeInfo - Provide the filter typeinfo for a landing pad.
+///
+void MachineModuleInfo::addFilterTypeInfo(MachineBasicBlock *LandingPad,
+ std::vector<GlobalVariable *> &TyInfo) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ std::vector<unsigned> IdsInFilter(TyInfo.size());
+ for (unsigned I = 0, E = TyInfo.size(); I != E; ++I)
+ IdsInFilter[I] = getTypeIDFor(TyInfo[I]);
+ LP.TypeIds.push_back(getFilterIDFor(IdsInFilter));
+}
+
+/// addCleanup - Add a cleanup action for a landing pad.
+///
+void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.TypeIds.push_back(0);
+}
+
+/// TidyLandingPads - Remap landing pad labels and remove any deleted landing
+/// pads.
+void MachineModuleInfo::TidyLandingPads() {
+ for (unsigned i = 0; i != LandingPads.size(); ) {
+ LandingPadInfo &LandingPad = LandingPads[i];
+ LandingPad.LandingPadLabel = MappedLabel(LandingPad.LandingPadLabel);
+
+ // Special case: we *should* emit LPs with null LP MBB. This indicates
+ // "nounwind" case.
+ if (!LandingPad.LandingPadLabel && LandingPad.LandingPadBlock) {
+ LandingPads.erase(LandingPads.begin() + i);
+ continue;
+ }
+
+ for (unsigned j=0; j != LandingPads[i].BeginLabels.size(); ) {
+ unsigned BeginLabel = MappedLabel(LandingPad.BeginLabels[j]);
+ unsigned EndLabel = MappedLabel(LandingPad.EndLabels[j]);
+
+ if (!BeginLabel || !EndLabel) {
+ LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
+ LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
+ continue;
+ }
+
+ LandingPad.BeginLabels[j] = BeginLabel;
+ LandingPad.EndLabels[j] = EndLabel;
+ ++j;
+ }
+
+ // Remove landing pads with no try-ranges.
+ if (LandingPads[i].BeginLabels.empty()) {
+ LandingPads.erase(LandingPads.begin() + i);
+ continue;
+ }
+
+ // If there is no landing pad, ensure that the list of typeids is empty.
+ // If the only typeid is a cleanup, this is the same as having no typeids.
+ if (!LandingPad.LandingPadBlock ||
+ (LandingPad.TypeIds.size() == 1 && !LandingPad.TypeIds[0]))
+ LandingPad.TypeIds.clear();
+
+ ++i;
+ }
+}
+
+/// getTypeIDFor - Return the type id for the specified typeinfo. This is
+/// function wide.
+unsigned MachineModuleInfo::getTypeIDFor(GlobalVariable *TI) {
+ for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i)
+ if (TypeInfos[i] == TI) return i + 1;
+
+ TypeInfos.push_back(TI);
+ return TypeInfos.size();
+}
+
+/// getFilterIDFor - Return the filter id for the specified typeinfos. This is
+/// function wide.
+int MachineModuleInfo::getFilterIDFor(std::vector<unsigned> &TyIds) {
+ // If the new filter coincides with the tail of an existing filter, then
+ // re-use the existing filter. Folding filters more than this requires
+ // re-ordering filters and/or their elements - probably not worth it.
+ for (std::vector<unsigned>::iterator I = FilterEnds.begin(),
+ E = FilterEnds.end(); I != E; ++I) {
+ unsigned i = *I, j = TyIds.size();
+
+ while (i && j)
+ if (FilterIds[--i] != TyIds[--j])
+ goto try_next;
+
+ if (!j)
+ // The new filter coincides with range [i, end) of the existing filter.
+ return -(1 + i);
+
+try_next:;
+ }
+
+ // Add the new filter.
+ int FilterID = -(1 + FilterIds.size());
+ FilterIds.reserve(FilterIds.size() + TyIds.size() + 1);
+ for (unsigned I = 0, N = TyIds.size(); I != N; ++I)
+ FilterIds.push_back(TyIds[I]);
+ FilterEnds.push_back(FilterIds.size());
+ FilterIds.push_back(0); // terminator
+ return FilterID;
+}
+
+/// getPersonality - Return the personality function for the current function.
+Function *MachineModuleInfo::getPersonality() const {
+ // FIXME: Until PR1414 will be fixed, we're using 1 personality function per
+ // function
+ return !LandingPads.empty() ? LandingPads[0].Personality : NULL;
+}
+
+/// getPersonalityIndex - Return unique index for current personality
+/// function. NULL/first personality function should always get zero index.
+unsigned MachineModuleInfo::getPersonalityIndex() const {
+ const Function* Personality = NULL;
+
+ // Scan landing pads. If there is at least one non-NULL personality - use it.
+ for (unsigned i = 0; i != LandingPads.size(); ++i)
+ if (LandingPads[i].Personality) {
+ Personality = LandingPads[i].Personality;
+ break;
+ }
+
+ for (unsigned i = 0; i < Personalities.size(); ++i) {
+ if (Personalities[i] == Personality)
+ return i;
+ }
+
+ // This will happen if the current personality function is
+ // in the zero index.
+ return 0;
+}
+
diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp
new file mode 100644
index 0000000..8378906
--- /dev/null
+++ b/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -0,0 +1,45 @@
+//===-- llvm/CodeGen/MachineModuleInfoImpls.cpp ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements object-file format specific implementations of
+// MachineModuleInfoImpl.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCSymbol.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MachineModuleInfoMachO
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method.
+void MachineModuleInfoMachO::Anchor() {}
+
+
+static int SortSymbolPair(const void *LHS, const void *RHS) {
+ const MCSymbol *LHSS =
+ ((const std::pair<MCSymbol*, MCSymbol*>*)LHS)->first;
+ const MCSymbol *RHSS =
+ ((const std::pair<MCSymbol*, MCSymbol*>*)RHS)->first;
+ return LHSS->getName().compare(RHSS->getName());
+}
+
+/// GetSortedStubs - Return the entries from a DenseMap in a deterministic
+/// sorted orer.
+MachineModuleInfoMachO::SymbolListTy
+MachineModuleInfoMachO::GetSortedStubs(const DenseMap<MCSymbol*,
+ MCSymbol*> &Map) {
+ MachineModuleInfoMachO::SymbolListTy List(Map.begin(), Map.end());
+ if (!List.empty())
+ qsort(&List[0], List.size(), sizeof(List[0]), SortSymbolPair);
+ return List;
+}
+
diff --git a/lib/CodeGen/MachinePassRegistry.cpp b/lib/CodeGen/MachinePassRegistry.cpp
new file mode 100644
index 0000000..9f4ef12
--- /dev/null
+++ b/lib/CodeGen/MachinePassRegistry.cpp
@@ -0,0 +1,41 @@
+//===-- CodeGen/MachineInstr.cpp ------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the machine function pass registry for register allocators
+// and instruction schedulers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachinePassRegistry.h"
+
+using namespace llvm;
+
+
+/// Add - Adds a function pass to the registration list.
+///
+void MachinePassRegistry::Add(MachinePassRegistryNode *Node) {
+ Node->setNext(List);
+ List = Node;
+ if (Listener) Listener->NotifyAdd(Node->getName(),
+ Node->getCtor(),
+ Node->getDescription());
+}
+
+
+/// Remove - Removes a function pass from the registration list.
+///
+void MachinePassRegistry::Remove(MachinePassRegistryNode *Node) {
+ for (MachinePassRegistryNode **I = &List; *I; I = (*I)->getNextAddress()) {
+ if (*I == Node) {
+ if (Listener) Listener->NotifyRemove(Node->getName());
+ *I = (*I)->getNext();
+ break;
+ }
+ }
+}
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
new file mode 100644
index 0000000..b31973e
--- /dev/null
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -0,0 +1,125 @@
+//===-- lib/Codegen/MachineRegisterInfo.cpp -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the MachineRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+using namespace llvm;
+
+MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) {
+ VRegInfo.reserve(256);
+ RegAllocHints.reserve(256);
+ RegClass2VRegMap.resize(TRI.getNumRegClasses()+1); // RC ID starts at 1.
+ UsedPhysRegs.resize(TRI.getNumRegs());
+
+ // Create the physreg use/def lists.
+ PhysRegUseDefLists = new MachineOperand*[TRI.getNumRegs()];
+ memset(PhysRegUseDefLists, 0, sizeof(MachineOperand*)*TRI.getNumRegs());
+}
+
+MachineRegisterInfo::~MachineRegisterInfo() {
+#ifndef NDEBUG
+ for (unsigned i = 0, e = VRegInfo.size(); i != e; ++i)
+ assert(VRegInfo[i].second == 0 && "Vreg use list non-empty still?");
+ for (unsigned i = 0, e = UsedPhysRegs.size(); i != e; ++i)
+ assert(!PhysRegUseDefLists[i] &&
+ "PhysRegUseDefLists has entries after all instructions are deleted");
+#endif
+ delete [] PhysRegUseDefLists;
+}
+
+/// setRegClass - Set the register class of the specified virtual register.
+///
+void
+MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) {
+ unsigned VR = Reg;
+ Reg -= TargetRegisterInfo::FirstVirtualRegister;
+ assert(Reg < VRegInfo.size() && "Invalid vreg!");
+ const TargetRegisterClass *OldRC = VRegInfo[Reg].first;
+ VRegInfo[Reg].first = RC;
+
+ // Remove from old register class's vregs list. This may be slow but
+ // fortunately this operation is rarely needed.
+ std::vector<unsigned> &VRegs = RegClass2VRegMap[OldRC->getID()];
+ std::vector<unsigned>::iterator I=std::find(VRegs.begin(), VRegs.end(), VR);
+ VRegs.erase(I);
+
+ // Add to new register class's vregs list.
+ RegClass2VRegMap[RC->getID()].push_back(VR);
+}
+
+/// createVirtualRegister - Create and return a new virtual register in the
+/// function with the specified register class.
+///
+unsigned
+MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
+ assert(RegClass && "Cannot create register without RegClass!");
+ // Add a reg, but keep track of whether the vector reallocated or not.
+ void *ArrayBase = VRegInfo.empty() ? 0 : &VRegInfo[0];
+ VRegInfo.push_back(std::make_pair(RegClass, (MachineOperand*)0));
+ RegAllocHints.push_back(std::make_pair(0, 0));
+
+ if (!((&VRegInfo[0] == ArrayBase || VRegInfo.size() == 1)))
+ // The vector reallocated, handle this now.
+ HandleVRegListReallocation();
+ unsigned VR = getLastVirtReg();
+ RegClass2VRegMap[RegClass->getID()].push_back(VR);
+ return VR;
+}
+
+/// HandleVRegListReallocation - We just added a virtual register to the
+/// VRegInfo info list and it reallocated. Update the use/def lists info
+/// pointers.
+void MachineRegisterInfo::HandleVRegListReallocation() {
+ // The back pointers for the vreg lists point into the previous vector.
+ // Update them to point to their correct slots.
+ for (unsigned i = 0, e = VRegInfo.size(); i != e; ++i) {
+ MachineOperand *List = VRegInfo[i].second;
+ if (!List) continue;
+ // Update the back-pointer to be accurate once more.
+ List->Contents.Reg.Prev = &VRegInfo[i].second;
+ }
+}
+
+/// replaceRegWith - Replace all instances of FromReg with ToReg in the
+/// machine function. This is like llvm-level X->replaceAllUsesWith(Y),
+/// except that it also changes any definitions of the register as well.
+void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) {
+ assert(FromReg != ToReg && "Cannot replace a reg with itself");
+
+ // TODO: This could be more efficient by bulk changing the operands.
+ for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) {
+ MachineOperand &O = I.getOperand();
+ ++I;
+ O.setReg(ToReg);
+ }
+}
+
+
+/// getVRegDef - Return the machine instr that defines the specified virtual
+/// register or null if none is found. This assumes that the code is in SSA
+/// form, so there should only be one definition.
+MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const {
+ assert(Reg-TargetRegisterInfo::FirstVirtualRegister < VRegInfo.size() &&
+ "Invalid vreg!");
+ // Since we are in SSA form, we can use the first definition.
+ if (!def_empty(Reg))
+ return &*def_begin(Reg);
+ return 0;
+}
+
+
+#ifndef NDEBUG
+void MachineRegisterInfo::dumpUses(unsigned Reg) const {
+ for (use_iterator I = use_begin(Reg), E = use_end(); I != E; ++I)
+ I.getOperand().getParent()->dump();
+}
+#endif
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
new file mode 100644
index 0000000..2255dc3
--- /dev/null
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -0,0 +1,394 @@
+//===- MachineSSAUpdater.cpp - Unstructured SSA Update Tool ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MachineSSAUpdater class. It's based on SSAUpdater
+// class in lib/Transforms/Utils.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineSSAUpdater.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+typedef DenseMap<MachineBasicBlock*, unsigned> AvailableValsTy;
+typedef std::vector<std::pair<MachineBasicBlock*, unsigned> >
+ IncomingPredInfoTy;
+
+static AvailableValsTy &getAvailableVals(void *AV) {
+ return *static_cast<AvailableValsTy*>(AV);
+}
+
+static IncomingPredInfoTy &getIncomingPredInfo(void *IPI) {
+ return *static_cast<IncomingPredInfoTy*>(IPI);
+}
+
+
+MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF,
+ SmallVectorImpl<MachineInstr*> *NewPHI)
+ : AV(0), IPI(0), InsertedPHIs(NewPHI) {
+ TII = MF.getTarget().getInstrInfo();
+ MRI = &MF.getRegInfo();
+}
+
+MachineSSAUpdater::~MachineSSAUpdater() {
+ delete &getAvailableVals(AV);
+ delete &getIncomingPredInfo(IPI);
+}
+
+/// Initialize - Reset this object to get ready for a new set of SSA
+/// updates. ProtoValue is the value used to name PHI nodes.
+void MachineSSAUpdater::Initialize(unsigned V) {
+ if (AV == 0)
+ AV = new AvailableValsTy();
+ else
+ getAvailableVals(AV).clear();
+
+ if (IPI == 0)
+ IPI = new IncomingPredInfoTy();
+ else
+ getIncomingPredInfo(IPI).clear();
+
+ VR = V;
+ VRC = MRI->getRegClass(VR);
+}
+
+/// HasValueForBlock - Return true if the MachineSSAUpdater already has a value for
+/// the specified block.
+bool MachineSSAUpdater::HasValueForBlock(MachineBasicBlock *BB) const {
+ return getAvailableVals(AV).count(BB);
+}
+
+/// AddAvailableValue - Indicate that a rewritten value is available in the
+/// specified block with the specified value.
+void MachineSSAUpdater::AddAvailableValue(MachineBasicBlock *BB, unsigned V) {
+ getAvailableVals(AV)[BB] = V;
+}
+
+/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
+/// live at the end of the specified block.
+unsigned MachineSSAUpdater::GetValueAtEndOfBlock(MachineBasicBlock *BB) {
+ return GetValueAtEndOfBlockInternal(BB);
+}
+
+static
+unsigned LookForIdenticalPHI(MachineBasicBlock *BB,
+ SmallVector<std::pair<MachineBasicBlock*, unsigned>, 8> &PredValues) {
+ if (BB->empty())
+ return 0;
+
+ MachineBasicBlock::iterator I = BB->front();
+ if (!I->isPHI())
+ return 0;
+
+ AvailableValsTy AVals;
+ for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
+ AVals[PredValues[i].first] = PredValues[i].second;
+ while (I != BB->end() && I->isPHI()) {
+ bool Same = true;
+ for (unsigned i = 1, e = I->getNumOperands(); i != e; i += 2) {
+ unsigned SrcReg = I->getOperand(i).getReg();
+ MachineBasicBlock *SrcBB = I->getOperand(i+1).getMBB();
+ if (AVals[SrcBB] != SrcReg) {
+ Same = false;
+ break;
+ }
+ }
+ if (Same)
+ return I->getOperand(0).getReg();
+ ++I;
+ }
+ return 0;
+}
+
+/// InsertNewDef - Insert an empty PHI or IMPLICIT_DEF instruction which define
+/// a value of the given register class at the start of the specified basic
+/// block. It returns the virtual register defined by the instruction.
+static
+MachineInstr *InsertNewDef(unsigned Opcode,
+ MachineBasicBlock *BB, MachineBasicBlock::iterator I,
+ const TargetRegisterClass *RC,
+ MachineRegisterInfo *MRI, const TargetInstrInfo *TII) {
+ unsigned NewVR = MRI->createVirtualRegister(RC);
+ return BuildMI(*BB, I, DebugLoc::getUnknownLoc(), TII->get(Opcode), NewVR);
+}
+
+/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
+/// is live in the middle of the specified block.
+///
+/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one
+/// important case: if there is a definition of the rewritten value after the
+/// 'use' in BB. Consider code like this:
+///
+/// X1 = ...
+/// SomeBB:
+/// use(X)
+/// X2 = ...
+/// br Cond, SomeBB, OutBB
+///
+/// In this case, there are two values (X1 and X2) added to the AvailableVals
+/// set by the client of the rewriter, and those values are both live out of
+/// their respective blocks. However, the use of X happens in the *middle* of
+/// a block. Because of this, we need to insert a new PHI node in SomeBB to
+/// merge the appropriate values, and this value isn't live out of the block.
+///
+unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
+ // If there is no definition of the renamed variable in this block, just use
+ // GetValueAtEndOfBlock to do our work.
+ if (!getAvailableVals(AV).count(BB))
+ return GetValueAtEndOfBlockInternal(BB);
+
+ // If there are no predecessors, just return undef.
+ if (BB->pred_empty()) {
+ // Insert an implicit_def to represent an undef value.
+ MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
+ BB, BB->getFirstTerminator(),
+ VRC, MRI, TII);
+ return NewDef->getOperand(0).getReg();
+ }
+
+ // Otherwise, we have the hard case. Get the live-in values for each
+ // predecessor.
+ SmallVector<std::pair<MachineBasicBlock*, unsigned>, 8> PredValues;
+ unsigned SingularValue = 0;
+
+ bool isFirstPred = true;
+ for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+ E = BB->pred_end(); PI != E; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+ unsigned PredVal = GetValueAtEndOfBlockInternal(PredBB);
+ PredValues.push_back(std::make_pair(PredBB, PredVal));
+
+ // Compute SingularValue.
+ if (isFirstPred) {
+ SingularValue = PredVal;
+ isFirstPred = false;
+ } else if (PredVal != SingularValue)
+ SingularValue = 0;
+ }
+
+ // Otherwise, if all the merged values are the same, just use it.
+ if (SingularValue != 0)
+ return SingularValue;
+
+ // If an identical PHI is already in BB, just reuse it.
+ unsigned DupPHI = LookForIdenticalPHI(BB, PredValues);
+ if (DupPHI)
+ return DupPHI;
+
+ // Otherwise, we do need a PHI: insert one now.
+ MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
+ MachineInstr *InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB,
+ Loc, VRC, MRI, TII);
+
+ // Fill in all the predecessors of the PHI.
+ MachineInstrBuilder MIB(InsertedPHI);
+ for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
+ MIB.addReg(PredValues[i].second).addMBB(PredValues[i].first);
+
+ // See if the PHI node can be merged to a single value. This can happen in
+ // loop cases when we get a PHI of itself and one other value.
+ if (unsigned ConstVal = InsertedPHI->isConstantValuePHI()) {
+ InsertedPHI->eraseFromParent();
+ return ConstVal;
+ }
+
+ // If the client wants to know about all new instructions, tell it.
+ if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+
+ DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n");
+ return InsertedPHI->getOperand(0).getReg();
+}
+
+static
+MachineBasicBlock *findCorrespondingPred(const MachineInstr *MI,
+ MachineOperand *U) {
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
+ if (&MI->getOperand(i) == U)
+ return MI->getOperand(i+1).getMBB();
+ }
+
+ llvm_unreachable("MachineOperand::getParent() failure?");
+ return 0;
+}
+
+/// RewriteUse - Rewrite a use of the symbolic value. This handles PHI nodes,
+/// which use their value in the corresponding predecessor.
+void MachineSSAUpdater::RewriteUse(MachineOperand &U) {
+ MachineInstr *UseMI = U.getParent();
+ unsigned NewVR = 0;
+ if (UseMI->isPHI()) {
+ MachineBasicBlock *SourceBB = findCorrespondingPred(UseMI, &U);
+ NewVR = GetValueAtEndOfBlockInternal(SourceBB);
+ } else {
+ NewVR = GetValueInMiddleOfBlock(UseMI->getParent());
+ }
+
+ U.setReg(NewVR);
+}
+
+void MachineSSAUpdater::ReplaceRegWith(unsigned OldReg, unsigned NewReg) {
+ MRI->replaceRegWith(OldReg, NewReg);
+
+ AvailableValsTy &AvailableVals = getAvailableVals(AV);
+ for (DenseMap<MachineBasicBlock*, unsigned>::iterator
+ I = AvailableVals.begin(), E = AvailableVals.end(); I != E; ++I)
+ if (I->second == OldReg)
+ I->second = NewReg;
+}
+
+/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
+/// for the specified BB and if so, return it. If not, construct SSA form by
+/// walking predecessors inserting PHI nodes as needed until we get to a block
+/// where the value is available.
+///
+unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){
+ AvailableValsTy &AvailableVals = getAvailableVals(AV);
+
+ // Query AvailableVals by doing an insertion of null.
+ std::pair<AvailableValsTy::iterator, bool> InsertRes =
+ AvailableVals.insert(std::make_pair(BB, 0));
+
+ // Handle the case when the insertion fails because we have already seen BB.
+ if (!InsertRes.second) {
+ // If the insertion failed, there are two cases. The first case is that the
+ // value is already available for the specified block. If we get this, just
+ // return the value.
+ if (InsertRes.first->second != 0)
+ return InsertRes.first->second;
+
+ // Otherwise, if the value we find is null, then this is the value is not
+ // known but it is being computed elsewhere in our recursion. This means
+ // that we have a cycle. Handle this by inserting a PHI node and returning
+ // it. When we get back to the first instance of the recursion we will fill
+ // in the PHI node.
+ MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
+ MachineInstr *NewPHI = InsertNewDef(TargetOpcode::PHI, BB, Loc,
+ VRC, MRI,TII);
+ unsigned NewVR = NewPHI->getOperand(0).getReg();
+ InsertRes.first->second = NewVR;
+ return NewVR;
+ }
+
+ // If there are no predecessors, then we must have found an unreachable block
+ // just return 'undef'. Since there are no predecessors, InsertRes must not
+ // be invalidated.
+ if (BB->pred_empty()) {
+ // Insert an implicit_def to represent an undef value.
+ MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
+ BB, BB->getFirstTerminator(),
+ VRC, MRI, TII);
+ return InsertRes.first->second = NewDef->getOperand(0).getReg();
+ }
+
+ // Okay, the value isn't in the map and we just inserted a null in the entry
+ // to indicate that we're processing the block. Since we have no idea what
+ // value is in this block, we have to recurse through our predecessors.
+ //
+ // While we're walking our predecessors, we keep track of them in a vector,
+ // then insert a PHI node in the end if we actually need one. We could use a
+ // smallvector here, but that would take a lot of stack space for every level
+ // of the recursion, just use IncomingPredInfo as an explicit stack.
+ IncomingPredInfoTy &IncomingPredInfo = getIncomingPredInfo(IPI);
+ unsigned FirstPredInfoEntry = IncomingPredInfo.size();
+
+ // As we're walking the predecessors, keep track of whether they are all
+ // producing the same value. If so, this value will capture it, if not, it
+ // will get reset to null. We distinguish the no-predecessor case explicitly
+ // below.
+ unsigned SingularValue = 0;
+ bool isFirstPred = true;
+ for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+ E = BB->pred_end(); PI != E; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+ unsigned PredVal = GetValueAtEndOfBlockInternal(PredBB);
+ IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal));
+
+ // Compute SingularValue.
+ if (isFirstPred) {
+ SingularValue = PredVal;
+ isFirstPred = false;
+ } else if (PredVal != SingularValue)
+ SingularValue = 0;
+ }
+
+ /// Look up BB's entry in AvailableVals. 'InsertRes' may be invalidated. If
+ /// this block is involved in a loop, a no-entry PHI node will have been
+ /// inserted as InsertedVal. Otherwise, we'll still have the null we inserted
+ /// above.
+ unsigned &InsertedVal = AvailableVals[BB];
+
+ // If all the predecessor values are the same then we don't need to insert a
+ // PHI. This is the simple and common case.
+ if (SingularValue) {
+ // If a PHI node got inserted, replace it with the singlar value and delete
+ // it.
+ if (InsertedVal) {
+ MachineInstr *OldVal = MRI->getVRegDef(InsertedVal);
+ // Be careful about dead loops. These RAUW's also update InsertedVal.
+ assert(InsertedVal != SingularValue && "Dead loop?");
+ ReplaceRegWith(InsertedVal, SingularValue);
+ OldVal->eraseFromParent();
+ }
+
+ InsertedVal = SingularValue;
+
+ // Drop the entries we added in IncomingPredInfo to restore the stack.
+ IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
+ IncomingPredInfo.end());
+ return InsertedVal;
+ }
+
+
+ // Otherwise, we do need a PHI: insert one now if we don't already have one.
+ MachineInstr *InsertedPHI;
+ if (InsertedVal == 0) {
+ MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
+ InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB, Loc,
+ VRC, MRI, TII);
+ InsertedVal = InsertedPHI->getOperand(0).getReg();
+ } else {
+ InsertedPHI = MRI->getVRegDef(InsertedVal);
+ }
+
+ // Fill in all the predecessors of the PHI.
+ MachineInstrBuilder MIB(InsertedPHI);
+ for (IncomingPredInfoTy::iterator I =
+ IncomingPredInfo.begin()+FirstPredInfoEntry,
+ E = IncomingPredInfo.end(); I != E; ++I)
+ MIB.addReg(I->second).addMBB(I->first);
+
+ // Drop the entries we added in IncomingPredInfo to restore the stack.
+ IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
+ IncomingPredInfo.end());
+
+ // See if the PHI node can be merged to a single value. This can happen in
+ // loop cases when we get a PHI of itself and one other value.
+ if (unsigned ConstVal = InsertedPHI->isConstantValuePHI()) {
+ MRI->replaceRegWith(InsertedVal, ConstVal);
+ InsertedPHI->eraseFromParent();
+ InsertedVal = ConstVal;
+ } else {
+ DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n");
+
+ // If the client wants to know about all new instructions, tell it.
+ if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+ }
+
+ return InsertedVal;
+}
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
new file mode 100644
index 0000000..c391576
--- /dev/null
+++ b/lib/CodeGen/MachineSink.cpp
@@ -0,0 +1,279 @@
+//===-- MachineSink.cpp - Sinking for machine instructions ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass moves instructions into successor blocks, when possible, so that
+// they aren't executed on paths where their results aren't needed.
+//
+// This pass is not intended to be a replacement or a complete alternative
+// for an LLVM-IR-level sinking pass. It is only designed to sink simple
+// constructs that are not exposed before lowering and instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine-sink"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumSunk, "Number of machine instructions sunk");
+
+namespace {
+ class MachineSinking : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *RegInfo; // Machine register information
+ MachineDominatorTree *DT; // Machine dominator tree
+ AliasAnalysis *AA;
+ BitVector AllocatableSet; // Which physregs are allocatable?
+
+ public:
+ static char ID; // Pass identification
+ MachineSinking() : MachineFunctionPass(&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ }
+ private:
+ bool ProcessBlock(MachineBasicBlock &MBB);
+ bool SinkInstruction(MachineInstr *MI, bool &SawStore);
+ bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB) const;
+ };
+} // end anonymous namespace
+
+char MachineSinking::ID = 0;
+static RegisterPass<MachineSinking>
+X("machine-sink", "Machine code sinking");
+
+FunctionPass *llvm::createMachineSinkingPass() { return new MachineSinking(); }
+
+/// AllUsesDominatedByBlock - Return true if all uses of the specified register
+/// occur in blocks dominated by the specified block.
+bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
+ MachineBasicBlock *MBB) const {
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+ "Only makes sense for vregs");
+ for (MachineRegisterInfo::use_iterator I = RegInfo->use_begin(Reg),
+ E = RegInfo->use_end(); I != E; ++I) {
+ // Determine the block of the use.
+ MachineInstr *UseInst = &*I;
+ MachineBasicBlock *UseBlock = UseInst->getParent();
+ if (UseInst->isPHI()) {
+ // PHI nodes use the operand in the predecessor block, not the block with
+ // the PHI.
+ UseBlock = UseInst->getOperand(I.getOperandNo()+1).getMBB();
+ }
+ // Check that it dominates.
+ if (!DT->dominates(MBB, UseBlock))
+ return false;
+ }
+ return true;
+}
+
+bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "******** Machine Sinking ********\n");
+
+ const TargetMachine &TM = MF.getTarget();
+ TII = TM.getInstrInfo();
+ TRI = TM.getRegisterInfo();
+ RegInfo = &MF.getRegInfo();
+ DT = &getAnalysis<MachineDominatorTree>();
+ AA = &getAnalysis<AliasAnalysis>();
+ AllocatableSet = TRI->getAllocatableSet(MF);
+
+ bool EverMadeChange = false;
+
+ while (1) {
+ bool MadeChange = false;
+
+ // Process all basic blocks.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E; ++I)
+ MadeChange |= ProcessBlock(*I);
+
+ // If this iteration over the code changed anything, keep iterating.
+ if (!MadeChange) break;
+ EverMadeChange = true;
+ }
+ return EverMadeChange;
+}
+
+bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
+ // Can't sink anything out of a block that has less than two successors.
+ if (MBB.succ_size() <= 1 || MBB.empty()) return false;
+
+ bool MadeChange = false;
+
+ // Walk the basic block bottom-up. Remember if we saw a store.
+ MachineBasicBlock::iterator I = MBB.end();
+ --I;
+ bool ProcessedBegin, SawStore = false;
+ do {
+ MachineInstr *MI = I; // The instruction to sink.
+
+ // Predecrement I (if it's not begin) so that it isn't invalidated by
+ // sinking.
+ ProcessedBegin = I == MBB.begin();
+ if (!ProcessedBegin)
+ --I;
+
+ if (SinkInstruction(MI, SawStore))
+ ++NumSunk, MadeChange = true;
+
+ // If we just processed the first instruction in the block, we're done.
+ } while (!ProcessedBegin);
+
+ return MadeChange;
+}
+
+/// SinkInstruction - Determine whether it is safe to sink the specified machine
+/// instruction out of its current block into a successor.
+bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
+ // Check if it's safe to move the instruction.
+ if (!MI->isSafeToMove(TII, SawStore, AA))
+ return false;
+
+ // FIXME: This should include support for sinking instructions within the
+ // block they are currently in to shorten the live ranges. We often get
+ // instructions sunk into the top of a large block, but it would be better to
+ // also sink them down before their first use in the block. This xform has to
+ // be careful not to *increase* register pressure though, e.g. sinking
+ // "x = y + z" down if it kills y and z would increase the live ranges of y
+ // and z and only shrink the live range of x.
+
+ // Loop over all the operands of the specified instruction. If there is
+ // anything we can't handle, bail out.
+ MachineBasicBlock *ParentBlock = MI->getParent();
+
+ // SuccToSinkTo - This is the successor to sink this instruction to, once we
+ // decide.
+ MachineBasicBlock *SuccToSinkTo = 0;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue; // Ignore non-register operands.
+
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (MO.isUse()) {
+ // If the physreg has no defs anywhere, it's just an ambient register
+ // and we can freely move its uses. Alternatively, if it's allocatable,
+ // it could get allocated to something with a def during allocation.
+ if (!RegInfo->def_empty(Reg))
+ return false;
+ if (AllocatableSet.test(Reg))
+ return false;
+ // Check for a def among the register's aliases too.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ if (!RegInfo->def_empty(AliasReg))
+ return false;
+ if (AllocatableSet.test(AliasReg))
+ return false;
+ }
+ } else if (!MO.isDead()) {
+ // A def that isn't dead. We can't move it.
+ return false;
+ }
+ } else {
+ // Virtual register uses are always safe to sink.
+ if (MO.isUse()) continue;
+
+ // If it's not safe to move defs of the register class, then abort.
+ if (!TII->isSafeToMoveRegClassDefs(RegInfo->getRegClass(Reg)))
+ return false;
+
+ // FIXME: This picks a successor to sink into based on having one
+ // successor that dominates all the uses. However, there are cases where
+ // sinking can happen but where the sink point isn't a successor. For
+ // example:
+ // x = computation
+ // if () {} else {}
+ // use x
+ // the instruction could be sunk over the whole diamond for the
+ // if/then/else (or loop, etc), allowing it to be sunk into other blocks
+ // after that.
+
+ // Virtual register defs can only be sunk if all their uses are in blocks
+ // dominated by one of the successors.
+ if (SuccToSinkTo) {
+ // If a previous operand picked a block to sink to, then this operand
+ // must be sinkable to the same block.
+ if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo))
+ return false;
+ continue;
+ }
+
+ // Otherwise, we should look at all the successors and decide which one
+ // we should sink to.
+ for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(),
+ E = ParentBlock->succ_end(); SI != E; ++SI) {
+ if (AllUsesDominatedByBlock(Reg, *SI)) {
+ SuccToSinkTo = *SI;
+ break;
+ }
+ }
+
+ // If we couldn't find a block to sink to, ignore this instruction.
+ if (SuccToSinkTo == 0)
+ return false;
+ }
+ }
+
+ // If there are no outputs, it must have side-effects.
+ if (SuccToSinkTo == 0)
+ return false;
+
+ // It's not safe to sink instructions to EH landing pad. Control flow into
+ // landing pad is implicitly defined.
+ if (SuccToSinkTo->isLandingPad())
+ return false;
+
+ // It is not possible to sink an instruction into its own block. This can
+ // happen with loops.
+ if (MI->getParent() == SuccToSinkTo)
+ return false;
+
+ DEBUG(dbgs() << "Sink instr " << *MI);
+ DEBUG(dbgs() << "to block " << *SuccToSinkTo);
+
+ // If the block has multiple predecessors, this would introduce computation on
+ // a path that it doesn't already exist. We could split the critical edge,
+ // but for now we just punt.
+ // FIXME: Split critical edges if not backedges.
+ if (SuccToSinkTo->pred_size() > 1) {
+ DEBUG(dbgs() << " *** PUNTING: Critical edge found\n");
+ return false;
+ }
+
+ // Determine where to insert into. Skip phi nodes.
+ MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin();
+ while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI())
+ ++InsertPos;
+
+ // Move the instruction.
+ SuccToSinkTo->splice(InsertPos, ParentBlock, MI,
+ ++MachineBasicBlock::iterator(MI));
+ return true;
+}
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
new file mode 100644
index 0000000..434a1e8
--- /dev/null
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -0,0 +1,881 @@
+//===-- MachineVerifier.cpp - Machine Code Verifier -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Pass to verify generated machine code. The following is checked:
+//
+// Operand counts: All explicit operands must be present.
+//
+// Register classes: All physical and virtual register operands must be
+// compatible with the register class required by the instruction descriptor.
+//
+// Register live intervals: Registers must be defined only once, and must be
+// defined before use.
+//
+// The machine code verifier is enabled from LLVMTargetMachine.cpp with the
+// command-line option -verify-machineinstrs, or by defining the environment
+// variable LLVM_VERIFY_MACHINEINSTRS to the name of a file that will receive
+// the verifier errors.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+ struct MachineVerifier {
+
+ MachineVerifier(Pass *pass, bool allowDoubleDefs) :
+ PASS(pass),
+ allowVirtDoubleDefs(allowDoubleDefs),
+ allowPhysDoubleDefs(allowDoubleDefs),
+ OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS"))
+ {}
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ Pass *const PASS;
+ const bool allowVirtDoubleDefs;
+ const bool allowPhysDoubleDefs;
+
+ const char *const OutFileName;
+ raw_ostream *OS;
+ const MachineFunction *MF;
+ const TargetMachine *TM;
+ const TargetRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
+
+ unsigned foundErrors;
+
+ typedef SmallVector<unsigned, 16> RegVector;
+ typedef DenseSet<unsigned> RegSet;
+ typedef DenseMap<unsigned, const MachineInstr*> RegMap;
+
+ BitVector regsReserved;
+ RegSet regsLive;
+ RegVector regsDefined, regsDead, regsKilled;
+ RegSet regsLiveInButUnused;
+
+ // Add Reg and any sub-registers to RV
+ void addRegWithSubRegs(RegVector &RV, unsigned Reg) {
+ RV.push_back(Reg);
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ for (const unsigned *R = TRI->getSubRegisters(Reg); *R; R++)
+ RV.push_back(*R);
+ }
+
+ struct BBInfo {
+ // Is this MBB reachable from the MF entry point?
+ bool reachable;
+
+ // Vregs that must be live in because they are used without being
+ // defined. Map value is the user.
+ RegMap vregsLiveIn;
+
+ // Vregs that must be dead in because they are defined without being
+ // killed first. Map value is the defining instruction.
+ RegMap vregsDeadIn;
+
+ // Regs killed in MBB. They may be defined again, and will then be in both
+ // regsKilled and regsLiveOut.
+ RegSet regsKilled;
+
+ // Regs defined in MBB and live out. Note that vregs passing through may
+ // be live out without being mentioned here.
+ RegSet regsLiveOut;
+
+ // Vregs that pass through MBB untouched. This set is disjoint from
+ // regsKilled and regsLiveOut.
+ RegSet vregsPassed;
+
+ // Vregs that must pass through MBB because they are needed by a successor
+ // block. This set is disjoint from regsLiveOut.
+ RegSet vregsRequired;
+
+ BBInfo() : reachable(false) {}
+
+ // Add register to vregsPassed if it belongs there. Return true if
+ // anything changed.
+ bool addPassed(unsigned Reg) {
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return false;
+ if (regsKilled.count(Reg) || regsLiveOut.count(Reg))
+ return false;
+ return vregsPassed.insert(Reg).second;
+ }
+
+ // Same for a full set.
+ bool addPassed(const RegSet &RS) {
+ bool changed = false;
+ for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I)
+ if (addPassed(*I))
+ changed = true;
+ return changed;
+ }
+
+ // Add register to vregsRequired if it belongs there. Return true if
+ // anything changed.
+ bool addRequired(unsigned Reg) {
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return false;
+ if (regsLiveOut.count(Reg))
+ return false;
+ return vregsRequired.insert(Reg).second;
+ }
+
+ // Same for a full set.
+ bool addRequired(const RegSet &RS) {
+ bool changed = false;
+ for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I)
+ if (addRequired(*I))
+ changed = true;
+ return changed;
+ }
+
+ // Same for a full map.
+ bool addRequired(const RegMap &RM) {
+ bool changed = false;
+ for (RegMap::const_iterator I = RM.begin(), E = RM.end(); I != E; ++I)
+ if (addRequired(I->first))
+ changed = true;
+ return changed;
+ }
+
+ // Live-out registers are either in regsLiveOut or vregsPassed.
+ bool isLiveOut(unsigned Reg) const {
+ return regsLiveOut.count(Reg) || vregsPassed.count(Reg);
+ }
+ };
+
+ // Extra register info per MBB.
+ DenseMap<const MachineBasicBlock*, BBInfo> MBBInfoMap;
+
+ bool isReserved(unsigned Reg) {
+ return Reg < regsReserved.size() && regsReserved.test(Reg);
+ }
+
+ // Analysis information if available
+ LiveVariables *LiveVars;
+
+ void visitMachineFunctionBefore();
+ void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB);
+ void visitMachineInstrBefore(const MachineInstr *MI);
+ void visitMachineOperand(const MachineOperand *MO, unsigned MONum);
+ void visitMachineInstrAfter(const MachineInstr *MI);
+ void visitMachineBasicBlockAfter(const MachineBasicBlock *MBB);
+ void visitMachineFunctionAfter();
+
+ void report(const char *msg, const MachineFunction *MF);
+ void report(const char *msg, const MachineBasicBlock *MBB);
+ void report(const char *msg, const MachineInstr *MI);
+ void report(const char *msg, const MachineOperand *MO, unsigned MONum);
+
+ void markReachable(const MachineBasicBlock *MBB);
+ void calcRegsPassed();
+ void checkPHIOps(const MachineBasicBlock *MBB);
+
+ void calcRegsRequired();
+ void verifyLiveVariables();
+ };
+
+ struct MachineVerifierPass : public MachineFunctionPass {
+ static char ID; // Pass ID, replacement for typeid
+ bool AllowDoubleDefs;
+
+ explicit MachineVerifierPass(bool allowDoubleDefs = false)
+ : MachineFunctionPass(&ID),
+ AllowDoubleDefs(allowDoubleDefs) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) {
+ MF.verify(this, AllowDoubleDefs);
+ return false;
+ }
+ };
+
+}
+
+char MachineVerifierPass::ID = 0;
+static RegisterPass<MachineVerifierPass>
+MachineVer("machineverifier", "Verify generated machine code");
+static const PassInfo *const MachineVerifyID = &MachineVer;
+
+FunctionPass *llvm::createMachineVerifierPass(bool allowPhysDoubleDefs) {
+ return new MachineVerifierPass(allowPhysDoubleDefs);
+}
+
+void MachineFunction::verify(Pass *p, bool allowDoubleDefs) const {
+ MachineVerifier(p, allowDoubleDefs)
+ .runOnMachineFunction(const_cast<MachineFunction&>(*this));
+}
+
+bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
+ raw_ostream *OutFile = 0;
+ if (OutFileName) {
+ std::string ErrorInfo;
+ OutFile = new raw_fd_ostream(OutFileName, ErrorInfo,
+ raw_fd_ostream::F_Append);
+ if (!ErrorInfo.empty()) {
+ errs() << "Error opening '" << OutFileName << "': " << ErrorInfo << '\n';
+ exit(1);
+ }
+
+ OS = OutFile;
+ } else {
+ OS = &errs();
+ }
+
+ foundErrors = 0;
+
+ this->MF = &MF;
+ TM = &MF.getTarget();
+ TRI = TM->getRegisterInfo();
+ MRI = &MF.getRegInfo();
+
+ if (PASS) {
+ LiveVars = PASS->getAnalysisIfAvailable<LiveVariables>();
+ } else {
+ LiveVars = NULL;
+ }
+
+ visitMachineFunctionBefore();
+ for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end();
+ MFI!=MFE; ++MFI) {
+ visitMachineBasicBlockBefore(MFI);
+ for (MachineBasicBlock::const_iterator MBBI = MFI->begin(),
+ MBBE = MFI->end(); MBBI != MBBE; ++MBBI) {
+ visitMachineInstrBefore(MBBI);
+ for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I)
+ visitMachineOperand(&MBBI->getOperand(I), I);
+ visitMachineInstrAfter(MBBI);
+ }
+ visitMachineBasicBlockAfter(MFI);
+ }
+ visitMachineFunctionAfter();
+
+ if (OutFile)
+ delete OutFile;
+ else if (foundErrors)
+ llvm_report_error("Found "+Twine(foundErrors)+" machine code errors.");
+
+ // Clean up.
+ regsLive.clear();
+ regsDefined.clear();
+ regsDead.clear();
+ regsKilled.clear();
+ regsLiveInButUnused.clear();
+ MBBInfoMap.clear();
+
+ return false; // no changes
+}
+
+void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
+ assert(MF);
+ *OS << '\n';
+ if (!foundErrors++)
+ MF->print(*OS);
+ *OS << "*** Bad machine code: " << msg << " ***\n"
+ << "- function: " << MF->getFunction()->getNameStr() << "\n";
+}
+
+void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
+ assert(MBB);
+ report(msg, MBB->getParent());
+ *OS << "- basic block: " << MBB->getName()
+ << " " << (void*)MBB
+ << " (BB#" << MBB->getNumber() << ")\n";
+}
+
+void MachineVerifier::report(const char *msg, const MachineInstr *MI) {
+ assert(MI);
+ report(msg, MI->getParent());
+ *OS << "- instruction: ";
+ MI->print(*OS, TM);
+}
+
+void MachineVerifier::report(const char *msg,
+ const MachineOperand *MO, unsigned MONum) {
+ assert(MO);
+ report(msg, MO->getParent());
+ *OS << "- operand " << MONum << ": ";
+ MO->print(*OS, TM);
+ *OS << "\n";
+}
+
+void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
+ BBInfo &MInfo = MBBInfoMap[MBB];
+ if (!MInfo.reachable) {
+ MInfo.reachable = true;
+ for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(),
+ SuE = MBB->succ_end(); SuI != SuE; ++SuI)
+ markReachable(*SuI);
+ }
+}
+
+void MachineVerifier::visitMachineFunctionBefore() {
+ regsReserved = TRI->getReservedRegs(*MF);
+
+ // A sub-register of a reserved register is also reserved
+ for (int Reg = regsReserved.find_first(); Reg>=0;
+ Reg = regsReserved.find_next(Reg)) {
+ for (const unsigned *Sub = TRI->getSubRegisters(Reg); *Sub; ++Sub) {
+ // FIXME: This should probably be:
+ // assert(regsReserved.test(*Sub) && "Non-reserved sub-register");
+ regsReserved.set(*Sub);
+ }
+ }
+ markReachable(&MF->front());
+}
+
+// Does iterator point to a and b as the first two elements?
+bool matchPair(MachineBasicBlock::const_succ_iterator i,
+ const MachineBasicBlock *a, const MachineBasicBlock *b) {
+ if (*i == a)
+ return *++i == b;
+ if (*i == b)
+ return *++i == a;
+ return false;
+}
+
+void
+MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+ // Call AnalyzeBranch. If it succeeds, there several more conditions to check.
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->AnalyzeBranch(*const_cast<MachineBasicBlock *>(MBB),
+ TBB, FBB, Cond)) {
+ // Ok, AnalyzeBranch thinks it knows what's going on with this block. Let's
+ // check whether its answers match up with reality.
+ if (!TBB && !FBB) {
+ // Block falls through to its successor.
+ MachineFunction::const_iterator MBBI = MBB;
+ ++MBBI;
+ if (MBBI == MF->end()) {
+ // It's possible that the block legitimately ends with a noreturn
+ // call or an unreachable, in which case it won't actually fall
+ // out the bottom of the function.
+ } else if (MBB->succ_empty()) {
+ // It's possible that the block legitimately ends with a noreturn
+ // call or an unreachable, in which case it won't actuall fall
+ // out of the block.
+ } else if (MBB->succ_size() != 1) {
+ report("MBB exits via unconditional fall-through but doesn't have "
+ "exactly one CFG successor!", MBB);
+ } else if (MBB->succ_begin()[0] != MBBI) {
+ report("MBB exits via unconditional fall-through but its successor "
+ "differs from its CFG successor!", MBB);
+ }
+ if (!MBB->empty() && MBB->back().getDesc().isBarrier()) {
+ report("MBB exits via unconditional fall-through but ends with a "
+ "barrier instruction!", MBB);
+ }
+ if (!Cond.empty()) {
+ report("MBB exits via unconditional fall-through but has a condition!",
+ MBB);
+ }
+ } else if (TBB && !FBB && Cond.empty()) {
+ // Block unconditionally branches somewhere.
+ if (MBB->succ_size() != 1) {
+ report("MBB exits via unconditional branch but doesn't have "
+ "exactly one CFG successor!", MBB);
+ } else if (MBB->succ_begin()[0] != TBB) {
+ report("MBB exits via unconditional branch but the CFG "
+ "successor doesn't match the actual successor!", MBB);
+ }
+ if (MBB->empty()) {
+ report("MBB exits via unconditional branch but doesn't contain "
+ "any instructions!", MBB);
+ } else if (!MBB->back().getDesc().isBarrier()) {
+ report("MBB exits via unconditional branch but doesn't end with a "
+ "barrier instruction!", MBB);
+ } else if (!MBB->back().getDesc().isTerminator()) {
+ report("MBB exits via unconditional branch but the branch isn't a "
+ "terminator instruction!", MBB);
+ }
+ } else if (TBB && !FBB && !Cond.empty()) {
+ // Block conditionally branches somewhere, otherwise falls through.
+ MachineFunction::const_iterator MBBI = MBB;
+ ++MBBI;
+ if (MBBI == MF->end()) {
+ report("MBB conditionally falls through out of function!", MBB);
+ } if (MBB->succ_size() != 2) {
+ report("MBB exits via conditional branch/fall-through but doesn't have "
+ "exactly two CFG successors!", MBB);
+ } else if (!matchPair(MBB->succ_begin(), TBB, MBBI)) {
+ report("MBB exits via conditional branch/fall-through but the CFG "
+ "successors don't match the actual successors!", MBB);
+ }
+ if (MBB->empty()) {
+ report("MBB exits via conditional branch/fall-through but doesn't "
+ "contain any instructions!", MBB);
+ } else if (MBB->back().getDesc().isBarrier()) {
+ report("MBB exits via conditional branch/fall-through but ends with a "
+ "barrier instruction!", MBB);
+ } else if (!MBB->back().getDesc().isTerminator()) {
+ report("MBB exits via conditional branch/fall-through but the branch "
+ "isn't a terminator instruction!", MBB);
+ }
+ } else if (TBB && FBB) {
+ // Block conditionally branches somewhere, otherwise branches
+ // somewhere else.
+ if (MBB->succ_size() != 2) {
+ report("MBB exits via conditional branch/branch but doesn't have "
+ "exactly two CFG successors!", MBB);
+ } else if (!matchPair(MBB->succ_begin(), TBB, FBB)) {
+ report("MBB exits via conditional branch/branch but the CFG "
+ "successors don't match the actual successors!", MBB);
+ }
+ if (MBB->empty()) {
+ report("MBB exits via conditional branch/branch but doesn't "
+ "contain any instructions!", MBB);
+ } else if (!MBB->back().getDesc().isBarrier()) {
+ report("MBB exits via conditional branch/branch but doesn't end with a "
+ "barrier instruction!", MBB);
+ } else if (!MBB->back().getDesc().isTerminator()) {
+ report("MBB exits via conditional branch/branch but the branch "
+ "isn't a terminator instruction!", MBB);
+ }
+ if (Cond.empty()) {
+ report("MBB exits via conditinal branch/branch but there's no "
+ "condition!", MBB);
+ }
+ } else {
+ report("AnalyzeBranch returned invalid data!", MBB);
+ }
+ }
+
+ regsLive.clear();
+ for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(),
+ E = MBB->livein_end(); I != E; ++I) {
+ if (!TargetRegisterInfo::isPhysicalRegister(*I)) {
+ report("MBB live-in list contains non-physical register", MBB);
+ continue;
+ }
+ regsLive.insert(*I);
+ for (const unsigned *R = TRI->getSubRegisters(*I); *R; R++)
+ regsLive.insert(*R);
+ }
+ regsLiveInButUnused = regsLive;
+
+ const MachineFrameInfo *MFI = MF->getFrameInfo();
+ assert(MFI && "Function has no frame info");
+ BitVector PR = MFI->getPristineRegs(MBB);
+ for (int I = PR.find_first(); I>0; I = PR.find_next(I)) {
+ regsLive.insert(I);
+ for (const unsigned *R = TRI->getSubRegisters(I); *R; R++)
+ regsLive.insert(*R);
+ }
+
+ regsKilled.clear();
+ regsDefined.clear();
+}
+
+void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
+ const TargetInstrDesc &TI = MI->getDesc();
+ if (MI->getNumOperands() < TI.getNumOperands()) {
+ report("Too few operands", MI);
+ *OS << TI.getNumOperands() << " operands expected, but "
+ << MI->getNumExplicitOperands() << " given.\n";
+ }
+
+ // Check the MachineMemOperands for basic consistency.
+ for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
+ E = MI->memoperands_end(); I != E; ++I) {
+ if ((*I)->isLoad() && !TI.mayLoad())
+ report("Missing mayLoad flag", MI);
+ if ((*I)->isStore() && !TI.mayStore())
+ report("Missing mayStore flag", MI);
+ }
+}
+
+void
+MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
+ const MachineInstr *MI = MO->getParent();
+ const TargetInstrDesc &TI = MI->getDesc();
+
+ // The first TI.NumDefs operands must be explicit register defines
+ if (MONum < TI.getNumDefs()) {
+ if (!MO->isReg())
+ report("Explicit definition must be a register", MO, MONum);
+ else if (!MO->isDef())
+ report("Explicit definition marked as use", MO, MONum);
+ else if (MO->isImplicit())
+ report("Explicit definition marked as implicit", MO, MONum);
+ } else if (MONum < TI.getNumOperands()) {
+ if (MO->isReg()) {
+ if (MO->isDef())
+ report("Explicit operand marked as def", MO, MONum);
+ if (MO->isImplicit())
+ report("Explicit operand marked as implicit", MO, MONum);
+ }
+ } else {
+ // ARM adds %reg0 operands to indicate predicates. We'll allow that.
+ if (MO->isReg() && !MO->isImplicit() && !TI.isVariadic() && MO->getReg())
+ report("Extra explicit operand on non-variadic instruction", MO, MONum);
+ }
+
+ switch (MO->getType()) {
+ case MachineOperand::MO_Register: {
+ const unsigned Reg = MO->getReg();
+ if (!Reg)
+ return;
+
+ // Check Live Variables.
+ if (MO->isUndef()) {
+ // An <undef> doesn't refer to any register, so just skip it.
+ } else if (MO->isUse()) {
+ regsLiveInButUnused.erase(Reg);
+
+ bool isKill = false;
+ if (MO->isKill()) {
+ isKill = true;
+ // Tied operands on two-address instuctions MUST NOT have a <kill> flag.
+ if (MI->isRegTiedToDefOperand(MONum))
+ report("Illegal kill flag on two-address instruction operand",
+ MO, MONum);
+ } else {
+ // TwoAddress instr modifying a reg is treated as kill+def.
+ unsigned defIdx;
+ if (MI->isRegTiedToDefOperand(MONum, &defIdx) &&
+ MI->getOperand(defIdx).getReg() == Reg)
+ isKill = true;
+ }
+ if (isKill) {
+ addRegWithSubRegs(regsKilled, Reg);
+
+ // Check that LiveVars knows this kill
+ if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg)) {
+ LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
+ if (std::find(VI.Kills.begin(),
+ VI.Kills.end(), MI) == VI.Kills.end())
+ report("Kill missing from LiveVariables", MO, MONum);
+ }
+ }
+
+ // Use of a dead register.
+ if (!regsLive.count(Reg)) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // Reserved registers may be used even when 'dead'.
+ if (!isReserved(Reg))
+ report("Using an undefined physical register", MO, MONum);
+ } else {
+ BBInfo &MInfo = MBBInfoMap[MI->getParent()];
+ // We don't know which virtual registers are live in, so only complain
+ // if vreg was killed in this MBB. Otherwise keep track of vregs that
+ // must be live in. PHI instructions are handled separately.
+ if (MInfo.regsKilled.count(Reg))
+ report("Using a killed virtual register", MO, MONum);
+ else if (!MI->isPHI())
+ MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI));
+ }
+ }
+ } else {
+ assert(MO->isDef());
+ // Register defined.
+ // TODO: verify that earlyclobber ops are not used.
+ if (MO->isDead())
+ addRegWithSubRegs(regsDead, Reg);
+ else
+ addRegWithSubRegs(regsDefined, Reg);
+ }
+
+ // Check register classes.
+ if (MONum < TI.getNumOperands() && !MO->isImplicit()) {
+ const TargetOperandInfo &TOI = TI.OpInfo[MONum];
+ unsigned SubIdx = MO->getSubReg();
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ unsigned sr = Reg;
+ if (SubIdx) {
+ unsigned s = TRI->getSubReg(Reg, SubIdx);
+ if (!s) {
+ report("Invalid subregister index for physical register",
+ MO, MONum);
+ return;
+ }
+ sr = s;
+ }
+ if (const TargetRegisterClass *DRC = TOI.getRegClass(TRI)) {
+ if (!DRC->contains(sr)) {
+ report("Illegal physical register for instruction", MO, MONum);
+ *OS << TRI->getName(sr) << " is not a "
+ << DRC->getName() << " register.\n";
+ }
+ }
+ } else {
+ // Virtual register.
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ if (SubIdx) {
+ if (RC->subregclasses_begin()+SubIdx >= RC->subregclasses_end()) {
+ report("Invalid subregister index for virtual register", MO, MONum);
+ return;
+ }
+ RC = *(RC->subregclasses_begin()+SubIdx);
+ }
+ if (const TargetRegisterClass *DRC = TOI.getRegClass(TRI)) {
+ if (RC != DRC && !RC->hasSuperClass(DRC)) {
+ report("Illegal virtual register for instruction", MO, MONum);
+ *OS << "Expected a " << DRC->getName() << " register, but got a "
+ << RC->getName() << " register\n";
+ }
+ }
+ }
+ }
+ break;
+ }
+
+ case MachineOperand::MO_MachineBasicBlock:
+ if (MI->isPHI() && !MO->getMBB()->isSuccessor(MI->getParent()))
+ report("PHI operand is not in the CFG", MO, MONum);
+ break;
+
+ default:
+ break;
+ }
+}
+
+void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) {
+ BBInfo &MInfo = MBBInfoMap[MI->getParent()];
+ set_union(MInfo.regsKilled, regsKilled);
+ set_subtract(regsLive, regsKilled);
+ regsKilled.clear();
+
+ // Verify that both <def> and <def,dead> operands refer to dead registers.
+ RegVector defs(regsDefined);
+ defs.append(regsDead.begin(), regsDead.end());
+
+ for (RegVector::const_iterator I = defs.begin(), E = defs.end();
+ I != E; ++I) {
+ if (regsLive.count(*I)) {
+ if (TargetRegisterInfo::isPhysicalRegister(*I)) {
+ if (!allowPhysDoubleDefs && !isReserved(*I) &&
+ !regsLiveInButUnused.count(*I)) {
+ report("Redefining a live physical register", MI);
+ *OS << "Register " << TRI->getName(*I)
+ << " was defined but already live.\n";
+ }
+ } else {
+ if (!allowVirtDoubleDefs) {
+ report("Redefining a live virtual register", MI);
+ *OS << "Virtual register %reg" << *I
+ << " was defined but already live.\n";
+ }
+ }
+ } else if (TargetRegisterInfo::isVirtualRegister(*I) &&
+ !MInfo.regsKilled.count(*I)) {
+ // Virtual register defined without being killed first must be dead on
+ // entry.
+ MInfo.vregsDeadIn.insert(std::make_pair(*I, MI));
+ }
+ }
+
+ set_subtract(regsLive, regsDead); regsDead.clear();
+ set_union(regsLive, regsDefined); regsDefined.clear();
+}
+
+void
+MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) {
+ MBBInfoMap[MBB].regsLiveOut = regsLive;
+ regsLive.clear();
+}
+
+// Calculate the largest possible vregsPassed sets. These are the registers that
+// can pass through an MBB live, but may not be live every time. It is assumed
+// that all vregsPassed sets are empty before the call.
+void MachineVerifier::calcRegsPassed() {
+ // First push live-out regs to successors' vregsPassed. Remember the MBBs that
+ // have any vregsPassed.
+ DenseSet<const MachineBasicBlock*> todo;
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ const MachineBasicBlock &MBB(*MFI);
+ BBInfo &MInfo = MBBInfoMap[&MBB];
+ if (!MInfo.reachable)
+ continue;
+ for (MachineBasicBlock::const_succ_iterator SuI = MBB.succ_begin(),
+ SuE = MBB.succ_end(); SuI != SuE; ++SuI) {
+ BBInfo &SInfo = MBBInfoMap[*SuI];
+ if (SInfo.addPassed(MInfo.regsLiveOut))
+ todo.insert(*SuI);
+ }
+ }
+
+ // Iteratively push vregsPassed to successors. This will converge to the same
+ // final state regardless of DenseSet iteration order.
+ while (!todo.empty()) {
+ const MachineBasicBlock *MBB = *todo.begin();
+ todo.erase(MBB);
+ BBInfo &MInfo = MBBInfoMap[MBB];
+ for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(),
+ SuE = MBB->succ_end(); SuI != SuE; ++SuI) {
+ if (*SuI == MBB)
+ continue;
+ BBInfo &SInfo = MBBInfoMap[*SuI];
+ if (SInfo.addPassed(MInfo.vregsPassed))
+ todo.insert(*SuI);
+ }
+ }
+}
+
+// Calculate the set of virtual registers that must be passed through each basic
+// block in order to satisfy the requirements of successor blocks. This is very
+// similar to calcRegsPassed, only backwards.
+void MachineVerifier::calcRegsRequired() {
+ // First push live-in regs to predecessors' vregsRequired.
+ DenseSet<const MachineBasicBlock*> todo;
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ const MachineBasicBlock &MBB(*MFI);
+ BBInfo &MInfo = MBBInfoMap[&MBB];
+ for (MachineBasicBlock::const_pred_iterator PrI = MBB.pred_begin(),
+ PrE = MBB.pred_end(); PrI != PrE; ++PrI) {
+ BBInfo &PInfo = MBBInfoMap[*PrI];
+ if (PInfo.addRequired(MInfo.vregsLiveIn))
+ todo.insert(*PrI);
+ }
+ }
+
+ // Iteratively push vregsRequired to predecessors. This will converge to the
+ // same final state regardless of DenseSet iteration order.
+ while (!todo.empty()) {
+ const MachineBasicBlock *MBB = *todo.begin();
+ todo.erase(MBB);
+ BBInfo &MInfo = MBBInfoMap[MBB];
+ for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(),
+ PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
+ if (*PrI == MBB)
+ continue;
+ BBInfo &SInfo = MBBInfoMap[*PrI];
+ if (SInfo.addRequired(MInfo.vregsRequired))
+ todo.insert(*PrI);
+ }
+ }
+}
+
+// Check PHI instructions at the beginning of MBB. It is assumed that
+// calcRegsPassed has been run so BBInfo::isLiveOut is valid.
+void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
+ for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ DenseSet<const MachineBasicBlock*> seen;
+
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
+ unsigned Reg = BBI->getOperand(i).getReg();
+ const MachineBasicBlock *Pre = BBI->getOperand(i + 1).getMBB();
+ if (!Pre->isSuccessor(MBB))
+ continue;
+ seen.insert(Pre);
+ BBInfo &PrInfo = MBBInfoMap[Pre];
+ if (PrInfo.reachable && !PrInfo.isLiveOut(Reg))
+ report("PHI operand is not live-out from predecessor",
+ &BBI->getOperand(i), i);
+ }
+
+ // Did we see all predecessors?
+ for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(),
+ PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
+ if (!seen.count(*PrI)) {
+ report("Missing PHI operand", BBI);
+ *OS << "BB#" << (*PrI)->getNumber()
+ << " is a predecessor according to the CFG.\n";
+ }
+ }
+ }
+}
+
+void MachineVerifier::visitMachineFunctionAfter() {
+ calcRegsPassed();
+
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ BBInfo &MInfo = MBBInfoMap[MFI];
+
+ // Skip unreachable MBBs.
+ if (!MInfo.reachable)
+ continue;
+
+ checkPHIOps(MFI);
+
+ // Verify dead-in virtual registers.
+ if (!allowVirtDoubleDefs) {
+ for (MachineBasicBlock::const_pred_iterator PrI = MFI->pred_begin(),
+ PrE = MFI->pred_end(); PrI != PrE; ++PrI) {
+ BBInfo &PrInfo = MBBInfoMap[*PrI];
+ if (!PrInfo.reachable)
+ continue;
+
+ for (RegMap::iterator I = MInfo.vregsDeadIn.begin(),
+ E = MInfo.vregsDeadIn.end(); I != E; ++I) {
+ // DeadIn register must be in neither regsLiveOut or vregsPassed of
+ // any predecessor.
+ if (PrInfo.isLiveOut(I->first)) {
+ report("Live-in virtual register redefined", I->second);
+ *OS << "Register %reg" << I->first
+ << " was live-out from predecessor MBB #"
+ << (*PrI)->getNumber() << ".\n";
+ }
+ }
+ }
+ }
+ }
+
+ // Now check LiveVariables info if available
+ if (LiveVars) {
+ calcRegsRequired();
+ verifyLiveVariables();
+ }
+}
+
+void MachineVerifier::verifyLiveVariables() {
+ assert(LiveVars && "Don't call verifyLiveVariables without LiveVars");
+ for (unsigned Reg = TargetRegisterInfo::FirstVirtualRegister,
+ RegE = MRI->getLastVirtReg()-1; Reg != RegE; ++Reg) {
+ LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ BBInfo &MInfo = MBBInfoMap[MFI];
+
+ // Our vregsRequired should be identical to LiveVariables' AliveBlocks
+ if (MInfo.vregsRequired.count(Reg)) {
+ if (!VI.AliveBlocks.test(MFI->getNumber())) {
+ report("LiveVariables: Block missing from AliveBlocks", MFI);
+ *OS << "Virtual register %reg" << Reg
+ << " must be live through the block.\n";
+ }
+ } else {
+ if (VI.AliveBlocks.test(MFI->getNumber())) {
+ report("LiveVariables: Block should not be in AliveBlocks", MFI);
+ *OS << "Virtual register %reg" << Reg
+ << " is not needed live through the block.\n";
+ }
+ }
+ }
+ }
+}
+
+
diff --git a/lib/CodeGen/Makefile b/lib/CodeGen/Makefile
new file mode 100644
index 0000000..4ab3e3c
--- /dev/null
+++ b/lib/CodeGen/Makefile
@@ -0,0 +1,22 @@
+##===- lib/CodeGen/Makefile --------------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMCodeGen
+PARALLEL_DIRS = SelectionDAG AsmPrinter
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
+# Xcode prior to 2.4 generates an error in -pedantic mode with use of HUGE_VAL
+# in this directory. Disable -pedantic for this broken compiler.
+ifneq ($(HUGE_VAL_SANITY),yes)
+CompileCommonOpts := $(filter-out -pedantic, $(CompileCommonOpts))
+endif
+
diff --git a/lib/CodeGen/ObjectCodeEmitter.cpp b/lib/CodeGen/ObjectCodeEmitter.cpp
new file mode 100644
index 0000000..cf05275
--- /dev/null
+++ b/lib/CodeGen/ObjectCodeEmitter.cpp
@@ -0,0 +1,141 @@
+//===-- llvm/CodeGen/ObjectCodeEmitter.cpp -------------------- -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/BinaryObject.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineRelocation.h"
+#include "llvm/CodeGen/ObjectCodeEmitter.h"
+
+//===----------------------------------------------------------------------===//
+// ObjectCodeEmitter Implementation
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+ObjectCodeEmitter::ObjectCodeEmitter() : BO(0) {}
+ObjectCodeEmitter::ObjectCodeEmitter(BinaryObject *bo) : BO(bo) {}
+ObjectCodeEmitter::~ObjectCodeEmitter() {}
+
+/// setBinaryObject - set the BinaryObject we are writting to
+void ObjectCodeEmitter::setBinaryObject(BinaryObject *bo) { BO = bo; }
+
+/// emitByte - This callback is invoked when a byte needs to be
+/// written to the data stream, without buffer overflow testing.
+void ObjectCodeEmitter::emitByte(uint8_t B) {
+ BO->emitByte(B);
+}
+
+/// emitWordLE - This callback is invoked when a 32-bit word needs to be
+/// written to the data stream in little-endian format.
+void ObjectCodeEmitter::emitWordLE(uint32_t W) {
+ BO->emitWordLE(W);
+}
+
+/// emitWordBE - This callback is invoked when a 32-bit word needs to be
+/// written to the data stream in big-endian format.
+void ObjectCodeEmitter::emitWordBE(uint32_t W) {
+ BO->emitWordBE(W);
+}
+
+/// emitDWordLE - This callback is invoked when a 64-bit word needs to be
+/// written to the data stream in little-endian format.
+void ObjectCodeEmitter::emitDWordLE(uint64_t W) {
+ BO->emitDWordLE(W);
+}
+
+/// emitDWordBE - This callback is invoked when a 64-bit word needs to be
+/// written to the data stream in big-endian format.
+void ObjectCodeEmitter::emitDWordBE(uint64_t W) {
+ BO->emitDWordBE(W);
+}
+
+/// emitAlignment - Align 'BO' to the necessary alignment boundary.
+void ObjectCodeEmitter::emitAlignment(unsigned Alignment /* 0 */,
+ uint8_t fill /* 0 */) {
+ BO->emitAlignment(Alignment, fill);
+}
+
+/// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be
+/// written to the data stream.
+void ObjectCodeEmitter::emitULEB128Bytes(uint64_t Value) {
+ BO->emitULEB128Bytes(Value);
+}
+
+/// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be
+/// written to the data stream.
+void ObjectCodeEmitter::emitSLEB128Bytes(uint64_t Value) {
+ BO->emitSLEB128Bytes(Value);
+}
+
+/// emitString - This callback is invoked when a String needs to be
+/// written to the data stream.
+void ObjectCodeEmitter::emitString(const std::string &String) {
+ BO->emitString(String);
+}
+
+/// getCurrentPCValue - This returns the address that the next emitted byte
+/// will be output to.
+uintptr_t ObjectCodeEmitter::getCurrentPCValue() const {
+ return BO->getCurrentPCOffset();
+}
+
+/// getCurrentPCOffset - Return the offset from the start of the emitted
+/// buffer that we are currently writing to.
+uintptr_t ObjectCodeEmitter::getCurrentPCOffset() const {
+ return BO->getCurrentPCOffset();
+}
+
+/// addRelocation - Whenever a relocatable address is needed, it should be
+/// noted with this interface.
+void ObjectCodeEmitter::addRelocation(const MachineRelocation& relocation) {
+ BO->addRelocation(relocation);
+}
+
+/// StartMachineBasicBlock - This should be called by the target when a new
+/// basic block is about to be emitted. This way the MCE knows where the
+/// start of the block is, and can implement getMachineBasicBlockAddress.
+void ObjectCodeEmitter::StartMachineBasicBlock(MachineBasicBlock *MBB) {
+ if (MBBLocations.size() <= (unsigned)MBB->getNumber())
+ MBBLocations.resize((MBB->getNumber()+1)*2);
+ MBBLocations[MBB->getNumber()] = getCurrentPCOffset();
+}
+
+/// getMachineBasicBlockAddress - Return the address of the specified
+/// MachineBasicBlock, only usable after the label for the MBB has been
+/// emitted.
+uintptr_t
+ObjectCodeEmitter::getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
+ assert(MBBLocations.size() > (unsigned)MBB->getNumber() &&
+ MBBLocations[MBB->getNumber()] && "MBB not emitted!");
+ return MBBLocations[MBB->getNumber()];
+}
+
+/// getJumpTableEntryAddress - Return the address of the jump table with index
+/// 'Index' in the function that last called initJumpTableInfo.
+uintptr_t ObjectCodeEmitter::getJumpTableEntryAddress(unsigned Index) const {
+ assert(JTLocations.size() > Index && "JT not emitted!");
+ return JTLocations[Index];
+}
+
+/// getConstantPoolEntryAddress - Return the address of the 'Index' entry in
+/// the constant pool that was last emitted with the emitConstantPool method.
+uintptr_t ObjectCodeEmitter::getConstantPoolEntryAddress(unsigned Index) const {
+ assert(CPLocations.size() > Index && "CP not emitted!");
+ return CPLocations[Index];
+}
+
+/// getConstantPoolEntrySection - Return the section of the 'Index' entry in
+/// the constant pool that was last emitted with the emitConstantPool method.
+uintptr_t ObjectCodeEmitter::getConstantPoolEntrySection(unsigned Index) const {
+ assert(CPSections.size() > Index && "CP not emitted!");
+ return CPSections[Index];
+}
+
+} // end namespace llvm
+
diff --git a/lib/CodeGen/OcamlGC.cpp b/lib/CodeGen/OcamlGC.cpp
new file mode 100644
index 0000000..48db200
--- /dev/null
+++ b/lib/CodeGen/OcamlGC.cpp
@@ -0,0 +1,37 @@
+//===-- OcamlGC.cpp - Ocaml frametable GC strategy ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements lowering for the llvm.gc* intrinsics compatible with
+// Objective Caml 3.10.0, which uses a liveness-accurate static stack map.
+//
+// The frametable emitter is in OcamlGCPrinter.cpp.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/GCStrategy.h"
+
+using namespace llvm;
+
+namespace {
+ class OcamlGC : public GCStrategy {
+ public:
+ OcamlGC();
+ };
+}
+
+static GCRegistry::Add<OcamlGC>
+X("ocaml", "ocaml 3.10-compatible GC");
+
+void llvm::linkOcamlGC() { }
+
+OcamlGC::OcamlGC() {
+ NeededSafePoints = 1 << GC::PostCall;
+ UsesMetadata = true;
+}
diff --git a/lib/CodeGen/OptimizeExts.cpp b/lib/CodeGen/OptimizeExts.cpp
new file mode 100644
index 0000000..acb6869
--- /dev/null
+++ b/lib/CodeGen/OptimizeExts.cpp
@@ -0,0 +1,197 @@
+//===-- OptimizeExts.cpp - Optimize sign / zero extension instrs -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs optimization of sign / zero extension instructions. It
+// may be extended to handle other instructions of similar property.
+//
+// On some targets, some instructions, e.g. X86 sign / zero extension, may
+// leave the source value in the lower part of the result. This pass will
+// replace (some) uses of the pre-extension value with uses of the sub-register
+// of the results.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ext-opt"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+static cl::opt<bool> Aggressive("aggressive-ext-opt", cl::Hidden,
+ cl::desc("Aggressive extension optimization"));
+
+STATISTIC(NumReuse, "Number of extension results reused");
+
+namespace {
+ class OptimizeExts : public MachineFunctionPass {
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII;
+ MachineRegisterInfo *MRI;
+ MachineDominatorTree *DT; // Machine dominator tree
+
+ public:
+ static char ID; // Pass identification
+ OptimizeExts() : MachineFunctionPass(&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ if (Aggressive) {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ }
+ }
+
+ private:
+ bool OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallPtrSet<MachineInstr*, 8> &LocalMIs);
+ };
+}
+
+char OptimizeExts::ID = 0;
+static RegisterPass<OptimizeExts>
+X("opt-exts", "Optimize sign / zero extensions");
+
+FunctionPass *llvm::createOptimizeExtsPass() { return new OptimizeExts(); }
+
+/// OptimizeInstr - If instruction is a copy-like instruction, i.e. it reads
+/// a single register and writes a single register and it does not modify
+/// the source, and if the source value is preserved as a sub-register of
+/// the result, then replace all reachable uses of the source with the subreg
+/// of the result.
+bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
+ bool Changed = false;
+ LocalMIs.insert(MI);
+
+ unsigned SrcReg, DstReg, SubIdx;
+ if (TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) {
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+ TargetRegisterInfo::isPhysicalRegister(SrcReg))
+ return false;
+
+ MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg);
+ if (++UI == MRI->use_end())
+ // No other uses.
+ return false;
+
+ // Ok, the source has other uses. See if we can replace the other uses
+ // with use of the result of the extension.
+ SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
+ UI = MRI->use_begin(DstReg);
+ for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE;
+ ++UI)
+ ReachedBBs.insert(UI->getParent());
+
+ bool ExtendLife = true;
+ // Uses that are in the same BB of uses of the result of the instruction.
+ SmallVector<MachineOperand*, 8> Uses;
+ // Uses that the result of the instruction can reach.
+ SmallVector<MachineOperand*, 8> ExtendedUses;
+
+ UI = MRI->use_begin(SrcReg);
+ for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE;
+ ++UI) {
+ MachineOperand &UseMO = UI.getOperand();
+ MachineInstr *UseMI = &*UI;
+ if (UseMI == MI)
+ continue;
+ if (UseMI->isPHI()) {
+ ExtendLife = false;
+ continue;
+ }
+
+ MachineBasicBlock *UseMBB = UseMI->getParent();
+ if (UseMBB == MBB) {
+ // Local uses that come after the extension.
+ if (!LocalMIs.count(UseMI))
+ Uses.push_back(&UseMO);
+ } else if (ReachedBBs.count(UseMBB))
+ // Non-local uses where the result of extension is used. Always
+ // replace these unless it's a PHI.
+ Uses.push_back(&UseMO);
+ else if (Aggressive && DT->dominates(MBB, UseMBB))
+ // We may want to extend live range of the extension result in order
+ // to replace these uses.
+ ExtendedUses.push_back(&UseMO);
+ else {
+ // Both will be live out of the def MBB anyway. Don't extend live
+ // range of the extension result.
+ ExtendLife = false;
+ break;
+ }
+ }
+
+ if (ExtendLife && !ExtendedUses.empty())
+ // Ok, we'll extend the liveness of the extension result.
+ std::copy(ExtendedUses.begin(), ExtendedUses.end(),
+ std::back_inserter(Uses));
+
+ // Now replace all uses.
+ if (!Uses.empty()) {
+ SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;
+ // Look for PHI uses of the extended result, we don't want to extend the
+ // liveness of a PHI input. It breaks all kinds of assumptions down
+ // stream. A PHI use is expected to be the kill of its source values.
+ UI = MRI->use_begin(DstReg);
+ for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE;
+ ++UI)
+ if (UI->isPHI())
+ PHIBBs.insert(UI->getParent());
+
+ const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+ for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
+ MachineOperand *UseMO = Uses[i];
+ MachineInstr *UseMI = UseMO->getParent();
+ MachineBasicBlock *UseMBB = UseMI->getParent();
+ if (PHIBBs.count(UseMBB))
+ continue;
+ unsigned NewVR = MRI->createVirtualRegister(RC);
+ BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
+ TII->get(TargetOpcode::EXTRACT_SUBREG), NewVR)
+ .addReg(DstReg).addImm(SubIdx);
+ UseMO->setReg(NewVR);
+ ++NumReuse;
+ Changed = true;
+ }
+ }
+ }
+
+ return Changed;
+}
+
+bool OptimizeExts::runOnMachineFunction(MachineFunction &MF) {
+ TM = &MF.getTarget();
+ TII = TM->getInstrInfo();
+ MRI = &MF.getRegInfo();
+ DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0;
+
+ bool Changed = false;
+
+ SmallPtrSet<MachineInstr*, 8> LocalMIs;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = &*I;
+ LocalMIs.clear();
+ for (MachineBasicBlock::iterator MII = I->begin(), ME = I->end(); MII != ME;
+ ++MII) {
+ MachineInstr *MI = &*MII;
+ Changed |= OptimizeInstr(MI, MBB, LocalMIs);
+ }
+ }
+
+ return Changed;
+}
diff --git a/lib/CodeGen/PBQP/Graph.h b/lib/CodeGen/PBQP/Graph.h
new file mode 100644
index 0000000..b2224cb
--- /dev/null
+++ b/lib/CodeGen/PBQP/Graph.h
@@ -0,0 +1,425 @@
+//===-------------------- Graph.h - PBQP Graph ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// PBQP Graph class.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_CODEGEN_PBQP_GRAPH_H
+#define LLVM_CODEGEN_PBQP_GRAPH_H
+
+#include "Math.h"
+
+#include <list>
+#include <vector>
+#include <map>
+
+namespace PBQP {
+
+ /// PBQP Graph class.
+ /// Instances of this class describe PBQP problems.
+ class Graph {
+ private:
+
+ // ----- TYPEDEFS -----
+ class NodeEntry;
+ class EdgeEntry;
+
+ typedef std::list<NodeEntry> NodeList;
+ typedef std::list<EdgeEntry> EdgeList;
+
+ public:
+
+ typedef NodeList::iterator NodeItr;
+ typedef NodeList::const_iterator ConstNodeItr;
+
+ typedef EdgeList::iterator EdgeItr;
+ typedef EdgeList::const_iterator ConstEdgeItr;
+
+ private:
+
+ typedef std::list<EdgeItr> AdjEdgeList;
+
+ public:
+
+ typedef AdjEdgeList::iterator AdjEdgeItr;
+
+ private:
+
+ class NodeEntry {
+ private:
+ Vector costs;
+ AdjEdgeList adjEdges;
+ unsigned degree;
+ void *data;
+ public:
+ NodeEntry(const Vector &costs) : costs(costs), degree(0) {}
+ Vector& getCosts() { return costs; }
+ const Vector& getCosts() const { return costs; }
+ unsigned getDegree() const { return degree; }
+ AdjEdgeItr edgesBegin() { return adjEdges.begin(); }
+ AdjEdgeItr edgesEnd() { return adjEdges.end(); }
+ AdjEdgeItr addEdge(EdgeItr e) {
+ ++degree;
+ return adjEdges.insert(adjEdges.end(), e);
+ }
+ void removeEdge(AdjEdgeItr ae) {
+ --degree;
+ adjEdges.erase(ae);
+ }
+ void setData(void *data) { this->data = data; }
+ void* getData() { return data; }
+ };
+
+ class EdgeEntry {
+ private:
+ NodeItr node1, node2;
+ Matrix costs;
+ AdjEdgeItr node1AEItr, node2AEItr;
+ void *data;
+ public:
+ EdgeEntry(NodeItr node1, NodeItr node2, const Matrix &costs)
+ : node1(node1), node2(node2), costs(costs) {}
+ NodeItr getNode1() const { return node1; }
+ NodeItr getNode2() const { return node2; }
+ Matrix& getCosts() { return costs; }
+ const Matrix& getCosts() const { return costs; }
+ void setNode1AEItr(AdjEdgeItr ae) { node1AEItr = ae; }
+ AdjEdgeItr getNode1AEItr() { return node1AEItr; }
+ void setNode2AEItr(AdjEdgeItr ae) { node2AEItr = ae; }
+ AdjEdgeItr getNode2AEItr() { return node2AEItr; }
+ void setData(void *data) { this->data = data; }
+ void *getData() { return data; }
+ };
+
+ // ----- MEMBERS -----
+
+ NodeList nodes;
+ unsigned numNodes;
+
+ EdgeList edges;
+ unsigned numEdges;
+
+ // ----- INTERNAL METHODS -----
+
+ NodeEntry& getNode(NodeItr nItr) { return *nItr; }
+ const NodeEntry& getNode(ConstNodeItr nItr) const { return *nItr; }
+
+ EdgeEntry& getEdge(EdgeItr eItr) { return *eItr; }
+ const EdgeEntry& getEdge(ConstEdgeItr eItr) const { return *eItr; }
+
+ NodeItr addConstructedNode(const NodeEntry &n) {
+ ++numNodes;
+ return nodes.insert(nodes.end(), n);
+ }
+
+ EdgeItr addConstructedEdge(const EdgeEntry &e) {
+ assert(findEdge(e.getNode1(), e.getNode2()) == edges.end() &&
+ "Attempt to add duplicate edge.");
+ ++numEdges;
+ EdgeItr edgeItr = edges.insert(edges.end(), e);
+ EdgeEntry &ne = getEdge(edgeItr);
+ NodeEntry &n1 = getNode(ne.getNode1());
+ NodeEntry &n2 = getNode(ne.getNode2());
+ // Sanity check on matrix dimensions:
+ assert((n1.getCosts().getLength() == ne.getCosts().getRows()) &&
+ (n2.getCosts().getLength() == ne.getCosts().getCols()) &&
+ "Edge cost dimensions do not match node costs dimensions.");
+ ne.setNode1AEItr(n1.addEdge(edgeItr));
+ ne.setNode2AEItr(n2.addEdge(edgeItr));
+ return edgeItr;
+ }
+
+ inline void copyFrom(const Graph &other);
+ public:
+
+ /// \brief Construct an empty PBQP graph.
+ Graph() : numNodes(0), numEdges(0) {}
+
+ /// \brief Copy construct this graph from "other". Note: Does not copy node
+ /// and edge data, only graph structure and costs.
+ /// @param other Source graph to copy from.
+ Graph(const Graph &other) : numNodes(0), numEdges(0) {
+ copyFrom(other);
+ }
+
+ /// \brief Make this graph a copy of "other". Note: Does not copy node and
+ /// edge data, only graph structure and costs.
+ /// @param other The graph to copy from.
+ /// @return A reference to this graph.
+ ///
+ /// This will clear the current graph, erasing any nodes and edges added,
+ /// before copying from other.
+ Graph& operator=(const Graph &other) {
+ clear();
+ copyFrom(other);
+ return *this;
+ }
+
+ /// \brief Add a node with the given costs.
+ /// @param costs Cost vector for the new node.
+ /// @return Node iterator for the added node.
+ NodeItr addNode(const Vector &costs) {
+ return addConstructedNode(NodeEntry(costs));
+ }
+
+ /// \brief Add an edge between the given nodes with the given costs.
+ /// @param n1Itr First node.
+ /// @param n2Itr Second node.
+ /// @return Edge iterator for the added edge.
+ EdgeItr addEdge(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr,
+ const Matrix &costs) {
+ assert(getNodeCosts(n1Itr).getLength() == costs.getRows() &&
+ getNodeCosts(n2Itr).getLength() == costs.getCols() &&
+ "Matrix dimensions mismatch.");
+ return addConstructedEdge(EdgeEntry(n1Itr, n2Itr, costs));
+ }
+
+ /// \brief Get the number of nodes in the graph.
+ /// @return Number of nodes in the graph.
+ unsigned getNumNodes() const { return numNodes; }
+
+ /// \brief Get the number of edges in the graph.
+ /// @return Number of edges in the graph.
+ unsigned getNumEdges() const { return numEdges; }
+
+ /// \brief Get a node's cost vector.
+ /// @param nItr Node iterator.
+ /// @return Node cost vector.
+ Vector& getNodeCosts(NodeItr nItr) { return getNode(nItr).getCosts(); }
+
+ /// \brief Get a node's cost vector (const version).
+ /// @param nItr Node iterator.
+ /// @return Node cost vector.
+ const Vector& getNodeCosts(ConstNodeItr nItr) const {
+ return getNode(nItr).getCosts();
+ }
+
+ /// \brief Set a node's data pointer.
+ /// @param nItr Node iterator.
+ /// @param data Pointer to node data.
+ ///
+ /// Typically used by a PBQP solver to attach data to aid in solution.
+ void setNodeData(NodeItr nItr, void *data) { getNode(nItr).setData(data); }
+
+ /// \brief Get the node's data pointer.
+ /// @param nItr Node iterator.
+ /// @return Pointer to node data.
+ void* getNodeData(NodeItr nItr) { return getNode(nItr).getData(); }
+
+ /// \brief Get an edge's cost matrix.
+ /// @param eItr Edge iterator.
+ /// @return Edge cost matrix.
+ Matrix& getEdgeCosts(EdgeItr eItr) { return getEdge(eItr).getCosts(); }
+
+ /// \brief Get an edge's cost matrix (const version).
+ /// @param eItr Edge iterator.
+ /// @return Edge cost matrix.
+ const Matrix& getEdgeCosts(ConstEdgeItr eItr) const {
+ return getEdge(eItr).getCosts();
+ }
+
+ /// \brief Set an edge's data pointer.
+ /// @param eItr Edge iterator.
+ /// @param data Pointer to edge data.
+ ///
+ /// Typically used by a PBQP solver to attach data to aid in solution.
+ void setEdgeData(EdgeItr eItr, void *data) { getEdge(eItr).setData(data); }
+
+ /// \brief Get an edge's data pointer.
+ /// @param eItr Edge iterator.
+ /// @return Pointer to edge data.
+ void* getEdgeData(EdgeItr eItr) { return getEdge(eItr).getData(); }
+
+ /// \brief Get a node's degree.
+ /// @param nItr Node iterator.
+ /// @return The degree of the node.
+ unsigned getNodeDegree(NodeItr nItr) const {
+ return getNode(nItr).getDegree();
+ }
+
+ /// \brief Begin iterator for node set.
+ NodeItr nodesBegin() { return nodes.begin(); }
+
+ /// \brief Begin const iterator for node set.
+ ConstNodeItr nodesBegin() const { return nodes.begin(); }
+
+ /// \brief End iterator for node set.
+ NodeItr nodesEnd() { return nodes.end(); }
+
+ /// \brief End const iterator for node set.
+ ConstNodeItr nodesEnd() const { return nodes.end(); }
+
+ /// \brief Begin iterator for edge set.
+ EdgeItr edgesBegin() { return edges.begin(); }
+
+ /// \brief End iterator for edge set.
+ EdgeItr edgesEnd() { return edges.end(); }
+
+ /// \brief Get begin iterator for adjacent edge set.
+ /// @param nItr Node iterator.
+ /// @return Begin iterator for the set of edges connected to the given node.
+ AdjEdgeItr adjEdgesBegin(NodeItr nItr) {
+ return getNode(nItr).edgesBegin();
+ }
+
+ /// \brief Get end iterator for adjacent edge set.
+ /// @param nItr Node iterator.
+ /// @return End iterator for the set of edges connected to the given node.
+ AdjEdgeItr adjEdgesEnd(NodeItr nItr) {
+ return getNode(nItr).edgesEnd();
+ }
+
+ /// \brief Get the first node connected to this edge.
+ /// @param eItr Edge iterator.
+ /// @return The first node connected to the given edge.
+ NodeItr getEdgeNode1(EdgeItr eItr) {
+ return getEdge(eItr).getNode1();
+ }
+
+ /// \brief Get the second node connected to this edge.
+ /// @param eItr Edge iterator.
+ /// @return The second node connected to the given edge.
+ NodeItr getEdgeNode2(EdgeItr eItr) {
+ return getEdge(eItr).getNode2();
+ }
+
+ /// \brief Get the "other" node connected to this edge.
+ /// @param eItr Edge iterator.
+ /// @param nItr Node iterator for the "given" node.
+ /// @return The iterator for the "other" node connected to this edge.
+ NodeItr getEdgeOtherNode(EdgeItr eItr, NodeItr nItr) {
+ EdgeEntry &e = getEdge(eItr);
+ if (e.getNode1() == nItr) {
+ return e.getNode2();
+ } // else
+ return e.getNode1();
+ }
+
+ /// \brief Get the edge connecting two nodes.
+ /// @param n1Itr First node iterator.
+ /// @param n2Itr Second node iterator.
+ /// @return An iterator for edge (n1Itr, n2Itr) if such an edge exists,
+ /// otherwise returns edgesEnd().
+ EdgeItr findEdge(NodeItr n1Itr, NodeItr n2Itr) {
+ for (AdjEdgeItr aeItr = adjEdgesBegin(n1Itr), aeEnd = adjEdgesEnd(n1Itr);
+ aeItr != aeEnd; ++aeItr) {
+ if ((getEdgeNode1(*aeItr) == n2Itr) ||
+ (getEdgeNode2(*aeItr) == n2Itr)) {
+ return *aeItr;
+ }
+ }
+ return edges.end();
+ }
+
+ /// \brief Remove a node from the graph.
+ /// @param nItr Node iterator.
+ void removeNode(NodeItr nItr) {
+ NodeEntry &n = getNode(nItr);
+ for (AdjEdgeItr itr = n.edgesBegin(), end = n.edgesEnd(); itr != end;) {
+ EdgeItr eItr = *itr;
+ ++itr;
+ removeEdge(eItr);
+ }
+ nodes.erase(nItr);
+ --numNodes;
+ }
+
+ /// \brief Remove an edge from the graph.
+ /// @param eItr Edge iterator.
+ void removeEdge(EdgeItr eItr) {
+ EdgeEntry &e = getEdge(eItr);
+ NodeEntry &n1 = getNode(e.getNode1());
+ NodeEntry &n2 = getNode(e.getNode2());
+ n1.removeEdge(e.getNode1AEItr());
+ n2.removeEdge(e.getNode2AEItr());
+ edges.erase(eItr);
+ --numEdges;
+ }
+
+ /// \brief Remove all nodes and edges from the graph.
+ void clear() {
+ nodes.clear();
+ edges.clear();
+ numNodes = numEdges = 0;
+ }
+
+ /// \brief Print a representation of this graph in DOT format.
+ /// @param os Output stream to print on.
+ template <typename OStream>
+ void printDot(OStream &os) {
+
+ os << "graph {\n";
+
+ for (NodeItr nodeItr = nodesBegin(), nodeEnd = nodesEnd();
+ nodeItr != nodeEnd; ++nodeItr) {
+
+ os << " node" << nodeItr << " [ label=\""
+ << nodeItr << ": " << getNodeCosts(nodeItr) << "\" ]\n";
+ }
+
+ os << " edge [ len=" << getNumNodes() << " ]\n";
+
+ for (EdgeItr edgeItr = edgesBegin(), edgeEnd = edgesEnd();
+ edgeItr != edgeEnd; ++edgeItr) {
+
+ os << " node" << getEdgeNode1(edgeItr)
+ << " -- node" << getEdgeNode2(edgeItr)
+ << " [ label=\"";
+
+ const Matrix &edgeCosts = getEdgeCosts(edgeItr);
+
+ for (unsigned i = 0; i < edgeCosts.getRows(); ++i) {
+ os << edgeCosts.getRowAsVector(i) << "\\n";
+ }
+ os << "\" ]\n";
+ }
+ os << "}\n";
+ }
+
+ };
+
+ class NodeItrComparator {
+ public:
+ bool operator()(Graph::NodeItr n1, Graph::NodeItr n2) const {
+ return &*n1 < &*n2;
+ }
+
+ bool operator()(Graph::ConstNodeItr n1, Graph::ConstNodeItr n2) const {
+ return &*n1 < &*n2;
+ }
+ };
+
+ class EdgeItrCompartor {
+ public:
+ bool operator()(Graph::EdgeItr e1, Graph::EdgeItr e2) const {
+ return &*e1 < &*e2;
+ }
+
+ bool operator()(Graph::ConstEdgeItr e1, Graph::ConstEdgeItr e2) const {
+ return &*e1 < &*e2;
+ }
+ };
+
+ void Graph::copyFrom(const Graph &other) {
+ std::map<Graph::ConstNodeItr, Graph::NodeItr,
+ NodeItrComparator> nodeMap;
+
+ for (Graph::ConstNodeItr nItr = other.nodesBegin(),
+ nEnd = other.nodesEnd();
+ nItr != nEnd; ++nItr) {
+ nodeMap[nItr] = addNode(other.getNodeCosts(nItr));
+ }
+
+ }
+
+}
+
+#endif // LLVM_CODEGEN_PBQP_GRAPH_HPP
diff --git a/lib/CodeGen/PBQP/HeuristicBase.h b/lib/CodeGen/PBQP/HeuristicBase.h
new file mode 100644
index 0000000..3bb24e1
--- /dev/null
+++ b/lib/CodeGen/PBQP/HeuristicBase.h
@@ -0,0 +1,242 @@
+//===-- HeuristcBase.h --- Heuristic base class for PBQP --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PBQP_HEURISTICBASE_H
+#define LLVM_CODEGEN_PBQP_HEURISTICBASE_H
+
+#include "HeuristicSolver.h"
+
+namespace PBQP {
+
+ /// \brief Abstract base class for heuristic implementations.
+ ///
+ /// This class provides a handy base for heuristic implementations with common
+ /// solver behaviour implemented for a number of methods.
+ ///
+ /// To implement your own heuristic using this class as a base you'll have to
+ /// implement, as a minimum, the following methods:
+ /// <ul>
+ /// <li> void addToHeuristicList(Graph::NodeItr) : Add a node to the
+ /// heuristic reduction list.
+ /// <li> void heuristicReduce() : Perform a single heuristic reduction.
+ /// <li> void preUpdateEdgeCosts(Graph::EdgeItr) : Handle the (imminent)
+ /// change to the cost matrix on the given edge (by R2).
+ /// <li> void postUpdateEdgeCostts(Graph::EdgeItr) : Handle the new
+ /// costs on the given edge.
+ /// <li> void handleAddEdge(Graph::EdgeItr) : Handle the addition of a new
+ /// edge into the PBQP graph (by R2).
+ /// <li> void handleRemoveEdge(Graph::EdgeItr, Graph::NodeItr) : Handle the
+ /// disconnection of the given edge from the given node.
+ /// <li> A constructor for your derived class : to pass back a reference to
+ /// the solver which is using this heuristic.
+ /// </ul>
+ ///
+ /// These methods are implemented in this class for documentation purposes,
+ /// but will assert if called.
+ ///
+ /// Note that this class uses the curiously recursive template idiom to
+ /// forward calls to the derived class. These methods need not be made
+ /// virtual, and indeed probably shouldn't for performance reasons.
+ ///
+ /// You'll also need to provide NodeData and EdgeData structs in your class.
+ /// These can be used to attach data relevant to your heuristic to each
+ /// node/edge in the PBQP graph.
+
+ template <typename HImpl>
+ class HeuristicBase {
+ private:
+
+ typedef std::list<Graph::NodeItr> OptimalList;
+
+ HeuristicSolverImpl<HImpl> &s;
+ Graph &g;
+ OptimalList optimalList;
+
+ // Return a reference to the derived heuristic.
+ HImpl& impl() { return static_cast<HImpl&>(*this); }
+
+ // Add the given node to the optimal reductions list. Keep an iterator to
+ // its location for fast removal.
+ void addToOptimalReductionList(Graph::NodeItr nItr) {
+ optimalList.insert(optimalList.end(), nItr);
+ }
+
+ public:
+
+ /// \brief Construct an instance with a reference to the given solver.
+ /// @param solver The solver which is using this heuristic instance.
+ HeuristicBase(HeuristicSolverImpl<HImpl> &solver)
+ : s(solver), g(s.getGraph()) { }
+
+ /// \brief Get the solver which is using this heuristic instance.
+ /// @return The solver which is using this heuristic instance.
+ ///
+ /// You can use this method to get access to the solver in your derived
+ /// heuristic implementation.
+ HeuristicSolverImpl<HImpl>& getSolver() { return s; }
+
+ /// \brief Get the graph representing the problem to be solved.
+ /// @return The graph representing the problem to be solved.
+ Graph& getGraph() { return g; }
+
+ /// \brief Tell the solver to simplify the graph before the reduction phase.
+ /// @return Whether or not the solver should run a simplification phase
+ /// prior to the main setup and reduction.
+ ///
+ /// HeuristicBase returns true from this method as it's a sensible default,
+ /// however you can over-ride it in your derived class if you want different
+ /// behaviour.
+ bool solverRunSimplify() const { return true; }
+
+ /// \brief Decide whether a node should be optimally or heuristically
+ /// reduced.
+ /// @return Whether or not the given node should be listed for optimal
+ /// reduction (via R0, R1 or R2).
+ ///
+ /// HeuristicBase returns true for any node with degree less than 3. This is
+ /// sane and sensible for many situations, but not all. You can over-ride
+ /// this method in your derived class if you want a different selection
+ /// criteria. Note however that your criteria for selecting optimal nodes
+ /// should be <i>at least</i> as strong as this. I.e. Nodes of degree 3 or
+ /// higher should not be selected under any circumstances.
+ bool shouldOptimallyReduce(Graph::NodeItr nItr) {
+ if (g.getNodeDegree(nItr) < 3)
+ return true;
+ // else
+ return false;
+ }
+
+ /// \brief Add the given node to the list of nodes to be optimally reduced.
+ /// @return nItr Node iterator to be added.
+ ///
+ /// You probably don't want to over-ride this, except perhaps to record
+ /// statistics before calling this implementation. HeuristicBase relies on
+ /// its behaviour.
+ void addToOptimalReduceList(Graph::NodeItr nItr) {
+ optimalList.push_back(nItr);
+ }
+
+ /// \brief Initialise the heuristic.
+ ///
+ /// HeuristicBase iterates over all nodes in the problem and adds them to
+ /// the appropriate list using addToOptimalReduceList or
+ /// addToHeuristicReduceList based on the result of shouldOptimallyReduce.
+ ///
+ /// This behaviour should be fine for most situations.
+ void setup() {
+ for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd();
+ nItr != nEnd; ++nItr) {
+ if (impl().shouldOptimallyReduce(nItr)) {
+ addToOptimalReduceList(nItr);
+ } else {
+ impl().addToHeuristicReduceList(nItr);
+ }
+ }
+ }
+
+ /// \brief Optimally reduce one of the nodes in the optimal reduce list.
+ /// @return True if a reduction takes place, false if the optimal reduce
+ /// list is empty.
+ ///
+ /// Selects a node from the optimal reduce list and removes it, applying
+ /// R0, R1 or R2 as appropriate based on the selected node's degree.
+ bool optimalReduce() {
+ if (optimalList.empty())
+ return false;
+
+ Graph::NodeItr nItr = optimalList.front();
+ optimalList.pop_front();
+
+ switch (s.getSolverDegree(nItr)) {
+ case 0: s.applyR0(nItr); break;
+ case 1: s.applyR1(nItr); break;
+ case 2: s.applyR2(nItr); break;
+ default: assert(false &&
+ "Optimal reductions of degree > 2 nodes is invalid.");
+ }
+
+ return true;
+ }
+
+ /// \brief Perform the PBQP reduction process.
+ ///
+ /// Reduces the problem to the empty graph by repeated application of the
+ /// reduction rules R0, R1, R2 and RN.
+ /// R0, R1 or R2 are always applied if possible before RN is used.
+ void reduce() {
+ bool finished = false;
+
+ while (!finished) {
+ if (!optimalReduce())
+ if (!impl().heuristicReduce())
+ finished = true;
+ }
+ }
+
+ /// \brief Add a node to the heuristic reduce list.
+ /// @param nItr Node iterator to add to the heuristic reduce list.
+ void addToHeuristicList(Graph::NodeItr nItr) {
+ assert(false && "Must be implemented in derived class.");
+ }
+
+ /// \brief Heuristically reduce one of the nodes in the heuristic
+ /// reduce list.
+ /// @return True if a reduction takes place, false if the heuristic reduce
+ /// list is empty.
+ void heuristicReduce() {
+ assert(false && "Must be implemented in derived class.");
+ }
+
+ /// \brief Prepare a change in the costs on the given edge.
+ /// @param eItr Edge iterator.
+ void preUpdateEdgeCosts(Graph::EdgeItr eItr) {
+ assert(false && "Must be implemented in derived class.");
+ }
+
+ /// \brief Handle the change in the costs on the given edge.
+ /// @param eItr Edge iterator.
+ void postUpdateEdgeCostts(Graph::EdgeItr eItr) {
+ assert(false && "Must be implemented in derived class.");
+ }
+
+ /// \brief Handle the addition of a new edge into the PBQP graph.
+ /// @param eItr Edge iterator for the added edge.
+ void handleAddEdge(Graph::EdgeItr eItr) {
+ assert(false && "Must be implemented in derived class.");
+ }
+
+ /// \brief Handle disconnection of an edge from a node.
+ /// @param eItr Edge iterator for edge being disconnected.
+ /// @param nItr Node iterator for the node being disconnected from.
+ ///
+ /// Edges are frequently removed due to the removal of a node. This
+ /// method allows for the effect to be computed only for the remaining
+ /// node in the graph.
+ void handleRemoveEdge(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
+ assert(false && "Must be implemented in derived class.");
+ }
+
+ /// \brief Clean up any structures used by HeuristicBase.
+ ///
+ /// At present this just performs a sanity check: that the optimal reduce
+ /// list is empty now that reduction has completed.
+ ///
+ /// If your derived class has more complex structures which need tearing
+ /// down you should over-ride this method but include a call back to this
+ /// implementation.
+ void cleanup() {
+ assert(optimalList.empty() && "Nodes left over in optimal reduce list?");
+ }
+
+ };
+
+}
+
+
+#endif // LLVM_CODEGEN_PBQP_HEURISTICBASE_H
diff --git a/lib/CodeGen/PBQP/HeuristicSolver.h b/lib/CodeGen/PBQP/HeuristicSolver.h
new file mode 100644
index 0000000..b48f548
--- /dev/null
+++ b/lib/CodeGen/PBQP/HeuristicSolver.h
@@ -0,0 +1,592 @@
+//===-- HeuristicSolver.h - Heuristic PBQP Solver --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Heuristic PBQP solver. This solver is able to perform optimal reductions for
+// nodes of degree 0, 1 or 2. For nodes of degree >2 a plugable heuristic is
+// used to select a node for reduction.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H
+#define LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H
+
+#include "Graph.h"
+#include "Solution.h"
+#include "llvm/Support/raw_ostream.h"
+#include <vector>
+#include <limits>
+
+namespace PBQP {
+
+ /// \brief Heuristic PBQP solver implementation.
+ ///
+ /// This class should usually be created (and destroyed) indirectly via a call
+ /// to HeuristicSolver<HImpl>::solve(Graph&).
+ /// See the comments for HeuristicSolver.
+ ///
+ /// HeuristicSolverImpl provides the R0, R1 and R2 reduction rules,
+ /// backpropagation phase, and maintains the internal copy of the graph on
+ /// which the reduction is carried out (the original being kept to facilitate
+ /// backpropagation).
+ template <typename HImpl>
+ class HeuristicSolverImpl {
+ private:
+
+ typedef typename HImpl::NodeData HeuristicNodeData;
+ typedef typename HImpl::EdgeData HeuristicEdgeData;
+
+ typedef std::list<Graph::EdgeItr> SolverEdges;
+
+ public:
+
+ /// \brief Iterator type for edges in the solver graph.
+ typedef SolverEdges::iterator SolverEdgeItr;
+
+ private:
+
+ class NodeData {
+ public:
+ NodeData() : solverDegree(0) {}
+
+ HeuristicNodeData& getHeuristicData() { return hData; }
+
+ SolverEdgeItr addSolverEdge(Graph::EdgeItr eItr) {
+ ++solverDegree;
+ return solverEdges.insert(solverEdges.end(), eItr);
+ }
+
+ void removeSolverEdge(SolverEdgeItr seItr) {
+ --solverDegree;
+ solverEdges.erase(seItr);
+ }
+
+ SolverEdgeItr solverEdgesBegin() { return solverEdges.begin(); }
+ SolverEdgeItr solverEdgesEnd() { return solverEdges.end(); }
+ unsigned getSolverDegree() const { return solverDegree; }
+ void clearSolverEdges() {
+ solverDegree = 0;
+ solverEdges.clear();
+ }
+
+ private:
+ HeuristicNodeData hData;
+ unsigned solverDegree;
+ SolverEdges solverEdges;
+ };
+
+ class EdgeData {
+ public:
+ HeuristicEdgeData& getHeuristicData() { return hData; }
+
+ void setN1SolverEdgeItr(SolverEdgeItr n1SolverEdgeItr) {
+ this->n1SolverEdgeItr = n1SolverEdgeItr;
+ }
+
+ SolverEdgeItr getN1SolverEdgeItr() { return n1SolverEdgeItr; }
+
+ void setN2SolverEdgeItr(SolverEdgeItr n2SolverEdgeItr){
+ this->n2SolverEdgeItr = n2SolverEdgeItr;
+ }
+
+ SolverEdgeItr getN2SolverEdgeItr() { return n2SolverEdgeItr; }
+
+ private:
+
+ HeuristicEdgeData hData;
+ SolverEdgeItr n1SolverEdgeItr, n2SolverEdgeItr;
+ };
+
+ Graph &g;
+ HImpl h;
+ Solution s;
+ std::vector<Graph::NodeItr> stack;
+
+ typedef std::list<NodeData> NodeDataList;
+ NodeDataList nodeDataList;
+
+ typedef std::list<EdgeData> EdgeDataList;
+ EdgeDataList edgeDataList;
+
+ public:
+
+ /// \brief Construct a heuristic solver implementation to solve the given
+ /// graph.
+ /// @param g The graph representing the problem instance to be solved.
+ HeuristicSolverImpl(Graph &g) : g(g), h(*this) {}
+
+ /// \brief Get the graph being solved by this solver.
+ /// @return The graph representing the problem instance being solved by this
+ /// solver.
+ Graph& getGraph() { return g; }
+
+ /// \brief Get the heuristic data attached to the given node.
+ /// @param nItr Node iterator.
+ /// @return The heuristic data attached to the given node.
+ HeuristicNodeData& getHeuristicNodeData(Graph::NodeItr nItr) {
+ return getSolverNodeData(nItr).getHeuristicData();
+ }
+
+ /// \brief Get the heuristic data attached to the given edge.
+ /// @param eItr Edge iterator.
+ /// @return The heuristic data attached to the given node.
+ HeuristicEdgeData& getHeuristicEdgeData(Graph::EdgeItr eItr) {
+ return getSolverEdgeData(eItr).getHeuristicData();
+ }
+
+ /// \brief Begin iterator for the set of edges adjacent to the given node in
+ /// the solver graph.
+ /// @param nItr Node iterator.
+ /// @return Begin iterator for the set of edges adjacent to the given node
+ /// in the solver graph.
+ SolverEdgeItr solverEdgesBegin(Graph::NodeItr nItr) {
+ return getSolverNodeData(nItr).solverEdgesBegin();
+ }
+
+ /// \brief End iterator for the set of edges adjacent to the given node in
+ /// the solver graph.
+ /// @param nItr Node iterator.
+ /// @return End iterator for the set of edges adjacent to the given node in
+ /// the solver graph.
+ SolverEdgeItr solverEdgesEnd(Graph::NodeItr nItr) {
+ return getSolverNodeData(nItr).solverEdgesEnd();
+ }
+
+ /// \brief Remove a node from the solver graph.
+ /// @param eItr Edge iterator for edge to be removed.
+ ///
+ /// Does <i>not</i> notify the heuristic of the removal. That should be
+ /// done manually if necessary.
+ void removeSolverEdge(Graph::EdgeItr eItr) {
+ EdgeData &eData = getSolverEdgeData(eItr);
+ NodeData &n1Data = getSolverNodeData(g.getEdgeNode1(eItr)),
+ &n2Data = getSolverNodeData(g.getEdgeNode2(eItr));
+
+ n1Data.removeSolverEdge(eData.getN1SolverEdgeItr());
+ n2Data.removeSolverEdge(eData.getN2SolverEdgeItr());
+ }
+
+ /// \brief Compute a solution to the PBQP problem instance with which this
+ /// heuristic solver was constructed.
+ /// @return A solution to the PBQP problem.
+ ///
+ /// Performs the full PBQP heuristic solver algorithm, including setup,
+ /// calls to the heuristic (which will call back to the reduction rules in
+ /// this class), and cleanup.
+ Solution computeSolution() {
+ setup();
+ h.setup();
+ h.reduce();
+ backpropagate();
+ h.cleanup();
+ cleanup();
+ return s;
+ }
+
+ /// \brief Add to the end of the stack.
+ /// @param nItr Node iterator to add to the reduction stack.
+ void pushToStack(Graph::NodeItr nItr) {
+ getSolverNodeData(nItr).clearSolverEdges();
+ stack.push_back(nItr);
+ }
+
+ /// \brief Returns the solver degree of the given node.
+ /// @param nItr Node iterator for which degree is requested.
+ /// @return Node degree in the <i>solver</i> graph (not the original graph).
+ unsigned getSolverDegree(Graph::NodeItr nItr) {
+ return getSolverNodeData(nItr).getSolverDegree();
+ }
+
+ /// \brief Set the solution of the given node.
+ /// @param nItr Node iterator to set solution for.
+ /// @param selection Selection for node.
+ void setSolution(const Graph::NodeItr &nItr, unsigned selection) {
+ s.setSelection(nItr, selection);
+
+ for (Graph::AdjEdgeItr aeItr = g.adjEdgesBegin(nItr),
+ aeEnd = g.adjEdgesEnd(nItr);
+ aeItr != aeEnd; ++aeItr) {
+ Graph::EdgeItr eItr(*aeItr);
+ Graph::NodeItr anItr(g.getEdgeOtherNode(eItr, nItr));
+ getSolverNodeData(anItr).addSolverEdge(eItr);
+ }
+ }
+
+ /// \brief Apply rule R0.
+ /// @param nItr Node iterator for node to apply R0 to.
+ ///
+ /// Node will be automatically pushed to the solver stack.
+ void applyR0(Graph::NodeItr nItr) {
+ assert(getSolverNodeData(nItr).getSolverDegree() == 0 &&
+ "R0 applied to node with degree != 0.");
+
+ // Nothing to do. Just push the node onto the reduction stack.
+ pushToStack(nItr);
+ }
+
+ /// \brief Apply rule R1.
+ /// @param nItr Node iterator for node to apply R1 to.
+ ///
+ /// Node will be automatically pushed to the solver stack.
+ void applyR1(Graph::NodeItr xnItr) {
+ NodeData &nd = getSolverNodeData(xnItr);
+ assert(nd.getSolverDegree() == 1 &&
+ "R1 applied to node with degree != 1.");
+
+ Graph::EdgeItr eItr = *nd.solverEdgesBegin();
+
+ const Matrix &eCosts = g.getEdgeCosts(eItr);
+ const Vector &xCosts = g.getNodeCosts(xnItr);
+
+ // Duplicate a little to avoid transposing matrices.
+ if (xnItr == g.getEdgeNode1(eItr)) {
+ Graph::NodeItr ynItr = g.getEdgeNode2(eItr);
+ Vector &yCosts = g.getNodeCosts(ynItr);
+ for (unsigned j = 0; j < yCosts.getLength(); ++j) {
+ PBQPNum min = eCosts[0][j] + xCosts[0];
+ for (unsigned i = 1; i < xCosts.getLength(); ++i) {
+ PBQPNum c = eCosts[i][j] + xCosts[i];
+ if (c < min)
+ min = c;
+ }
+ yCosts[j] += min;
+ }
+ h.handleRemoveEdge(eItr, ynItr);
+ } else {
+ Graph::NodeItr ynItr = g.getEdgeNode1(eItr);
+ Vector &yCosts = g.getNodeCosts(ynItr);
+ for (unsigned i = 0; i < yCosts.getLength(); ++i) {
+ PBQPNum min = eCosts[i][0] + xCosts[0];
+ for (unsigned j = 1; j < xCosts.getLength(); ++j) {
+ PBQPNum c = eCosts[i][j] + xCosts[j];
+ if (c < min)
+ min = c;
+ }
+ yCosts[i] += min;
+ }
+ h.handleRemoveEdge(eItr, ynItr);
+ }
+ removeSolverEdge(eItr);
+ assert(nd.getSolverDegree() == 0 &&
+ "Degree 1 with edge removed should be 0.");
+ pushToStack(xnItr);
+ }
+
+ /// \brief Apply rule R2.
+ /// @param nItr Node iterator for node to apply R2 to.
+ ///
+ /// Node will be automatically pushed to the solver stack.
+ void applyR2(Graph::NodeItr xnItr) {
+ assert(getSolverNodeData(xnItr).getSolverDegree() == 2 &&
+ "R2 applied to node with degree != 2.");
+
+ NodeData &nd = getSolverNodeData(xnItr);
+ const Vector &xCosts = g.getNodeCosts(xnItr);
+
+ SolverEdgeItr aeItr = nd.solverEdgesBegin();
+ Graph::EdgeItr yxeItr = *aeItr,
+ zxeItr = *(++aeItr);
+
+ Graph::NodeItr ynItr = g.getEdgeOtherNode(yxeItr, xnItr),
+ znItr = g.getEdgeOtherNode(zxeItr, xnItr);
+
+ bool flipEdge1 = (g.getEdgeNode1(yxeItr) == xnItr),
+ flipEdge2 = (g.getEdgeNode1(zxeItr) == xnItr);
+
+ const Matrix *yxeCosts = flipEdge1 ?
+ new Matrix(g.getEdgeCosts(yxeItr).transpose()) :
+ &g.getEdgeCosts(yxeItr);
+
+ const Matrix *zxeCosts = flipEdge2 ?
+ new Matrix(g.getEdgeCosts(zxeItr).transpose()) :
+ &g.getEdgeCosts(zxeItr);
+
+ unsigned xLen = xCosts.getLength(),
+ yLen = yxeCosts->getRows(),
+ zLen = zxeCosts->getRows();
+
+ Matrix delta(yLen, zLen);
+
+ for (unsigned i = 0; i < yLen; ++i) {
+ for (unsigned j = 0; j < zLen; ++j) {
+ PBQPNum min = (*yxeCosts)[i][0] + (*zxeCosts)[j][0] + xCosts[0];
+ for (unsigned k = 1; k < xLen; ++k) {
+ PBQPNum c = (*yxeCosts)[i][k] + (*zxeCosts)[j][k] + xCosts[k];
+ if (c < min) {
+ min = c;
+ }
+ }
+ delta[i][j] = min;
+ }
+ }
+
+ if (flipEdge1)
+ delete yxeCosts;
+
+ if (flipEdge2)
+ delete zxeCosts;
+
+ Graph::EdgeItr yzeItr = g.findEdge(ynItr, znItr);
+ bool addedEdge = false;
+
+ if (yzeItr == g.edgesEnd()) {
+ yzeItr = g.addEdge(ynItr, znItr, delta);
+ addedEdge = true;
+ } else {
+ Matrix &yzeCosts = g.getEdgeCosts(yzeItr);
+ h.preUpdateEdgeCosts(yzeItr);
+ if (ynItr == g.getEdgeNode1(yzeItr)) {
+ yzeCosts += delta;
+ } else {
+ yzeCosts += delta.transpose();
+ }
+ }
+
+ bool nullCostEdge = tryNormaliseEdgeMatrix(yzeItr);
+
+ if (!addedEdge) {
+ // If we modified the edge costs let the heuristic know.
+ h.postUpdateEdgeCosts(yzeItr);
+ }
+
+ if (nullCostEdge) {
+ // If this edge ended up null remove it.
+ if (!addedEdge) {
+ // We didn't just add it, so we need to notify the heuristic
+ // and remove it from the solver.
+ h.handleRemoveEdge(yzeItr, ynItr);
+ h.handleRemoveEdge(yzeItr, znItr);
+ removeSolverEdge(yzeItr);
+ }
+ g.removeEdge(yzeItr);
+ } else if (addedEdge) {
+ // If the edge was added, and non-null, finish setting it up, add it to
+ // the solver & notify heuristic.
+ edgeDataList.push_back(EdgeData());
+ g.setEdgeData(yzeItr, &edgeDataList.back());
+ addSolverEdge(yzeItr);
+ h.handleAddEdge(yzeItr);
+ }
+
+ h.handleRemoveEdge(yxeItr, ynItr);
+ removeSolverEdge(yxeItr);
+ h.handleRemoveEdge(zxeItr, znItr);
+ removeSolverEdge(zxeItr);
+
+ pushToStack(xnItr);
+ }
+
+ private:
+
+ NodeData& getSolverNodeData(Graph::NodeItr nItr) {
+ return *static_cast<NodeData*>(g.getNodeData(nItr));
+ }
+
+ EdgeData& getSolverEdgeData(Graph::EdgeItr eItr) {
+ return *static_cast<EdgeData*>(g.getEdgeData(eItr));
+ }
+
+ void addSolverEdge(Graph::EdgeItr eItr) {
+ EdgeData &eData = getSolverEdgeData(eItr);
+ NodeData &n1Data = getSolverNodeData(g.getEdgeNode1(eItr)),
+ &n2Data = getSolverNodeData(g.getEdgeNode2(eItr));
+
+ eData.setN1SolverEdgeItr(n1Data.addSolverEdge(eItr));
+ eData.setN2SolverEdgeItr(n2Data.addSolverEdge(eItr));
+ }
+
+ void setup() {
+ if (h.solverRunSimplify()) {
+ simplify();
+ }
+
+ // Create node data objects.
+ for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd();
+ nItr != nEnd; ++nItr) {
+ nodeDataList.push_back(NodeData());
+ g.setNodeData(nItr, &nodeDataList.back());
+ }
+
+ // Create edge data objects.
+ for (Graph::EdgeItr eItr = g.edgesBegin(), eEnd = g.edgesEnd();
+ eItr != eEnd; ++eItr) {
+ edgeDataList.push_back(EdgeData());
+ g.setEdgeData(eItr, &edgeDataList.back());
+ addSolverEdge(eItr);
+ }
+ }
+
+ void simplify() {
+ disconnectTrivialNodes();
+ eliminateIndependentEdges();
+ }
+
+ // Eliminate trivial nodes.
+ void disconnectTrivialNodes() {
+ unsigned numDisconnected = 0;
+
+ for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd();
+ nItr != nEnd; ++nItr) {
+
+ if (g.getNodeCosts(nItr).getLength() == 1) {
+
+ std::vector<Graph::EdgeItr> edgesToRemove;
+
+ for (Graph::AdjEdgeItr aeItr = g.adjEdgesBegin(nItr),
+ aeEnd = g.adjEdgesEnd(nItr);
+ aeItr != aeEnd; ++aeItr) {
+
+ Graph::EdgeItr eItr = *aeItr;
+
+ if (g.getEdgeNode1(eItr) == nItr) {
+ Graph::NodeItr otherNodeItr = g.getEdgeNode2(eItr);
+ g.getNodeCosts(otherNodeItr) +=
+ g.getEdgeCosts(eItr).getRowAsVector(0);
+ }
+ else {
+ Graph::NodeItr otherNodeItr = g.getEdgeNode1(eItr);
+ g.getNodeCosts(otherNodeItr) +=
+ g.getEdgeCosts(eItr).getColAsVector(0);
+ }
+
+ edgesToRemove.push_back(eItr);
+ }
+
+ if (!edgesToRemove.empty())
+ ++numDisconnected;
+
+ while (!edgesToRemove.empty()) {
+ g.removeEdge(edgesToRemove.back());
+ edgesToRemove.pop_back();
+ }
+ }
+ }
+ }
+
+ void eliminateIndependentEdges() {
+ std::vector<Graph::EdgeItr> edgesToProcess;
+ unsigned numEliminated = 0;
+
+ for (Graph::EdgeItr eItr = g.edgesBegin(), eEnd = g.edgesEnd();
+ eItr != eEnd; ++eItr) {
+ edgesToProcess.push_back(eItr);
+ }
+
+ while (!edgesToProcess.empty()) {
+ if (tryToEliminateEdge(edgesToProcess.back()))
+ ++numEliminated;
+ edgesToProcess.pop_back();
+ }
+ }
+
+ bool tryToEliminateEdge(Graph::EdgeItr eItr) {
+ if (tryNormaliseEdgeMatrix(eItr)) {
+ g.removeEdge(eItr);
+ return true;
+ }
+ return false;
+ }
+
+ bool tryNormaliseEdgeMatrix(Graph::EdgeItr &eItr) {
+
+ Matrix &edgeCosts = g.getEdgeCosts(eItr);
+ Vector &uCosts = g.getNodeCosts(g.getEdgeNode1(eItr)),
+ &vCosts = g.getNodeCosts(g.getEdgeNode2(eItr));
+
+ for (unsigned r = 0; r < edgeCosts.getRows(); ++r) {
+ PBQPNum rowMin = edgeCosts.getRowMin(r);
+ uCosts[r] += rowMin;
+ if (rowMin != std::numeric_limits<PBQPNum>::infinity()) {
+ edgeCosts.subFromRow(r, rowMin);
+ }
+ else {
+ edgeCosts.setRow(r, 0);
+ }
+ }
+
+ for (unsigned c = 0; c < edgeCosts.getCols(); ++c) {
+ PBQPNum colMin = edgeCosts.getColMin(c);
+ vCosts[c] += colMin;
+ if (colMin != std::numeric_limits<PBQPNum>::infinity()) {
+ edgeCosts.subFromCol(c, colMin);
+ }
+ else {
+ edgeCosts.setCol(c, 0);
+ }
+ }
+
+ return edgeCosts.isZero();
+ }
+
+ void backpropagate() {
+ while (!stack.empty()) {
+ computeSolution(stack.back());
+ stack.pop_back();
+ }
+ }
+
+ void computeSolution(Graph::NodeItr nItr) {
+
+ NodeData &nodeData = getSolverNodeData(nItr);
+
+ Vector v(g.getNodeCosts(nItr));
+
+ // Solve based on existing solved edges.
+ for (SolverEdgeItr solvedEdgeItr = nodeData.solverEdgesBegin(),
+ solvedEdgeEnd = nodeData.solverEdgesEnd();
+ solvedEdgeItr != solvedEdgeEnd; ++solvedEdgeItr) {
+
+ Graph::EdgeItr eItr(*solvedEdgeItr);
+ Matrix &edgeCosts = g.getEdgeCosts(eItr);
+
+ if (nItr == g.getEdgeNode1(eItr)) {
+ Graph::NodeItr adjNode(g.getEdgeNode2(eItr));
+ unsigned adjSolution = s.getSelection(adjNode);
+ v += edgeCosts.getColAsVector(adjSolution);
+ }
+ else {
+ Graph::NodeItr adjNode(g.getEdgeNode1(eItr));
+ unsigned adjSolution = s.getSelection(adjNode);
+ v += edgeCosts.getRowAsVector(adjSolution);
+ }
+
+ }
+
+ setSolution(nItr, v.minIndex());
+ }
+
+ void cleanup() {
+ h.cleanup();
+ nodeDataList.clear();
+ edgeDataList.clear();
+ }
+ };
+
+ /// \brief PBQP heuristic solver class.
+ ///
+ /// Given a PBQP Graph g representing a PBQP problem, you can find a solution
+ /// by calling
+ /// <tt>Solution s = HeuristicSolver<H>::solve(g);</tt>
+ ///
+ /// The choice of heuristic for the H parameter will affect both the solver
+ /// speed and solution quality. The heuristic should be chosen based on the
+ /// nature of the problem being solved.
+ /// Currently the only solver included with LLVM is the Briggs heuristic for
+ /// register allocation.
+ template <typename HImpl>
+ class HeuristicSolver {
+ public:
+ static Solution solve(Graph &g) {
+ HeuristicSolverImpl<HImpl> hs(g);
+ return hs.computeSolution();
+ }
+ };
+
+}
+
+#endif // LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H
diff --git a/lib/CodeGen/PBQP/Heuristics/Briggs.h b/lib/CodeGen/PBQP/Heuristics/Briggs.h
new file mode 100644
index 0000000..c09ad74
--- /dev/null
+++ b/lib/CodeGen/PBQP/Heuristics/Briggs.h
@@ -0,0 +1,472 @@
+//===-- Briggs.h --- Briggs Heuristic for PBQP ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements the Briggs test for "allocability" of nodes in a
+// PBQP graph representing a register allocation problem. Nodes which can be
+// proven allocable (by a safe and relatively accurate test) are removed from
+// the PBQP graph first. If no provably allocable node is present in the graph
+// then the node with the minimal spill-cost to degree ratio is removed.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
+#define LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
+
+#include "llvm/Support/Compiler.h"
+#include "../HeuristicSolver.h"
+#include "../HeuristicBase.h"
+
+#include <set>
+#include <limits>
+
+namespace PBQP {
+ namespace Heuristics {
+
+ /// \brief PBQP Heuristic which applies an allocability test based on
+ /// Briggs.
+ ///
+ /// This heuristic assumes that the elements of cost vectors in the PBQP
+ /// problem represent storage options, with the first being the spill
+ /// option and subsequent elements representing legal registers for the
+ /// corresponding node. Edge cost matrices are likewise assumed to represent
+ /// register constraints.
+ /// If one or more nodes can be proven allocable by this heuristic (by
+ /// inspection of their constraint matrices) then the allocable node of
+ /// highest degree is selected for the next reduction and pushed to the
+ /// solver stack. If no nodes can be proven allocable then the node with
+ /// the lowest estimated spill cost is selected and push to the solver stack
+ /// instead.
+ ///
+ /// This implementation is built on top of HeuristicBase.
+ class Briggs : public HeuristicBase<Briggs> {
+ private:
+
+ class LinkDegreeComparator {
+ public:
+ LinkDegreeComparator(HeuristicSolverImpl<Briggs> &s) : s(&s) {}
+ bool operator()(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr) const {
+ if (s->getSolverDegree(n1Itr) > s->getSolverDegree(n2Itr))
+ return true;
+ if (s->getSolverDegree(n1Itr) < s->getSolverDegree(n2Itr))
+ return false;
+ return (&*n1Itr < &*n2Itr);
+ }
+ private:
+ HeuristicSolverImpl<Briggs> *s;
+ };
+
+ class SpillCostComparator {
+ public:
+ SpillCostComparator(HeuristicSolverImpl<Briggs> &s)
+ : s(&s), g(&s.getGraph()) {}
+ bool operator()(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr) const {
+ PBQPNum cost1 = g->getNodeCosts(n1Itr)[0] / s->getSolverDegree(n1Itr),
+ cost2 = g->getNodeCosts(n2Itr)[0] / s->getSolverDegree(n2Itr);
+ if (cost1 < cost2)
+ return true;
+ if (cost1 > cost2)
+ return false;
+ return (&*n1Itr < &*n2Itr);
+ }
+
+ private:
+ HeuristicSolverImpl<Briggs> *s;
+ Graph *g;
+ };
+
+ typedef std::list<Graph::NodeItr> RNAllocableList;
+ typedef RNAllocableList::iterator RNAllocableListItr;
+
+ typedef std::list<Graph::NodeItr> RNUnallocableList;
+ typedef RNUnallocableList::iterator RNUnallocableListItr;
+
+ public:
+
+ struct NodeData {
+ typedef std::vector<unsigned> UnsafeDegreesArray;
+ bool isHeuristic, isAllocable, isInitialized;
+ unsigned numDenied, numSafe;
+ UnsafeDegreesArray unsafeDegrees;
+ RNAllocableListItr rnaItr;
+ RNUnallocableListItr rnuItr;
+
+ NodeData()
+ : isHeuristic(false), isAllocable(false), isInitialized(false),
+ numDenied(0), numSafe(0) { }
+ };
+
+ struct EdgeData {
+ typedef std::vector<unsigned> UnsafeArray;
+ unsigned worst, reverseWorst;
+ UnsafeArray unsafe, reverseUnsafe;
+ bool isUpToDate;
+
+ EdgeData() : worst(0), reverseWorst(0), isUpToDate(false) {}
+ };
+
+ /// \brief Construct an instance of the Briggs heuristic.
+ /// @param solver A reference to the solver which is using this heuristic.
+ Briggs(HeuristicSolverImpl<Briggs> &solver) :
+ HeuristicBase<Briggs>(solver) {}
+
+ /// \brief Determine whether a node should be reduced using optimal
+ /// reduction.
+ /// @param nItr Node iterator to be considered.
+ /// @return True if the given node should be optimally reduced, false
+ /// otherwise.
+ ///
+ /// Selects nodes of degree 0, 1 or 2 for optimal reduction, with one
+ /// exception. Nodes whose spill cost (element 0 of their cost vector) is
+ /// infinite are checked for allocability first. Allocable nodes may be
+ /// optimally reduced, but nodes whose allocability cannot be proven are
+ /// selected for heuristic reduction instead.
+ bool shouldOptimallyReduce(Graph::NodeItr nItr) {
+ if (getSolver().getSolverDegree(nItr) < 3) {
+ if (getGraph().getNodeCosts(nItr)[0] !=
+ std::numeric_limits<PBQPNum>::infinity()) {
+ return true;
+ }
+ // Otherwise we have an infinite spill cost node.
+ initializeNode(nItr);
+ NodeData &nd = getHeuristicNodeData(nItr);
+ return nd.isAllocable;
+ }
+ // else
+ return false;
+ }
+
+ /// \brief Add a node to the heuristic reduce list.
+ /// @param nItr Node iterator to add to the heuristic reduce list.
+ void addToHeuristicReduceList(Graph::NodeItr nItr) {
+ NodeData &nd = getHeuristicNodeData(nItr);
+ initializeNode(nItr);
+ nd.isHeuristic = true;
+ if (nd.isAllocable) {
+ nd.rnaItr = rnAllocableList.insert(rnAllocableList.end(), nItr);
+ } else {
+ nd.rnuItr = rnUnallocableList.insert(rnUnallocableList.end(), nItr);
+ }
+ }
+
+ /// \brief Heuristically reduce one of the nodes in the heuristic
+ /// reduce list.
+ /// @return True if a reduction takes place, false if the heuristic reduce
+ /// list is empty.
+ ///
+ /// If the list of allocable nodes is non-empty a node is selected
+ /// from it and pushed to the stack. Otherwise if the non-allocable list
+ /// is non-empty a node is selected from it and pushed to the stack.
+ /// If both lists are empty the method simply returns false with no action
+ /// taken.
+ bool heuristicReduce() {
+ if (!rnAllocableList.empty()) {
+ RNAllocableListItr rnaItr =
+ min_element(rnAllocableList.begin(), rnAllocableList.end(),
+ LinkDegreeComparator(getSolver()));
+ Graph::NodeItr nItr = *rnaItr;
+ rnAllocableList.erase(rnaItr);
+ handleRemoveNode(nItr);
+ getSolver().pushToStack(nItr);
+ return true;
+ } else if (!rnUnallocableList.empty()) {
+ RNUnallocableListItr rnuItr =
+ min_element(rnUnallocableList.begin(), rnUnallocableList.end(),
+ SpillCostComparator(getSolver()));
+ Graph::NodeItr nItr = *rnuItr;
+ rnUnallocableList.erase(rnuItr);
+ handleRemoveNode(nItr);
+ getSolver().pushToStack(nItr);
+ return true;
+ }
+ // else
+ return false;
+ }
+
+ /// \brief Prepare a change in the costs on the given edge.
+ /// @param eItr Edge iterator.
+ void preUpdateEdgeCosts(Graph::EdgeItr eItr) {
+ Graph &g = getGraph();
+ Graph::NodeItr n1Itr = g.getEdgeNode1(eItr),
+ n2Itr = g.getEdgeNode2(eItr);
+ NodeData &n1 = getHeuristicNodeData(n1Itr),
+ &n2 = getHeuristicNodeData(n2Itr);
+
+ if (n1.isHeuristic)
+ subtractEdgeContributions(eItr, getGraph().getEdgeNode1(eItr));
+ if (n2.isHeuristic)
+ subtractEdgeContributions(eItr, getGraph().getEdgeNode2(eItr));
+
+ EdgeData &ed = getHeuristicEdgeData(eItr);
+ ed.isUpToDate = false;
+ }
+
+ /// \brief Handle the change in the costs on the given edge.
+ /// @param eItr Edge iterator.
+ void postUpdateEdgeCosts(Graph::EdgeItr eItr) {
+ // This is effectively the same as adding a new edge now, since
+ // we've factored out the costs of the old one.
+ handleAddEdge(eItr);
+ }
+
+ /// \brief Handle the addition of a new edge into the PBQP graph.
+ /// @param eItr Edge iterator for the added edge.
+ ///
+ /// Updates allocability of any nodes connected by this edge which are
+ /// being managed by the heuristic. If allocability changes they are
+ /// moved to the appropriate list.
+ void handleAddEdge(Graph::EdgeItr eItr) {
+ Graph &g = getGraph();
+ Graph::NodeItr n1Itr = g.getEdgeNode1(eItr),
+ n2Itr = g.getEdgeNode2(eItr);
+ NodeData &n1 = getHeuristicNodeData(n1Itr),
+ &n2 = getHeuristicNodeData(n2Itr);
+
+ // If neither node is managed by the heuristic there's nothing to be
+ // done.
+ if (!n1.isHeuristic && !n2.isHeuristic)
+ return;
+
+ // Ok - we need to update at least one node.
+ computeEdgeContributions(eItr);
+
+ // Update node 1 if it's managed by the heuristic.
+ if (n1.isHeuristic) {
+ bool n1WasAllocable = n1.isAllocable;
+ addEdgeContributions(eItr, n1Itr);
+ updateAllocability(n1Itr);
+ if (n1WasAllocable && !n1.isAllocable) {
+ rnAllocableList.erase(n1.rnaItr);
+ n1.rnuItr =
+ rnUnallocableList.insert(rnUnallocableList.end(), n1Itr);
+ }
+ }
+
+ // Likewise for node 2.
+ if (n2.isHeuristic) {
+ bool n2WasAllocable = n2.isAllocable;
+ addEdgeContributions(eItr, n2Itr);
+ updateAllocability(n2Itr);
+ if (n2WasAllocable && !n2.isAllocable) {
+ rnAllocableList.erase(n2.rnaItr);
+ n2.rnuItr =
+ rnUnallocableList.insert(rnUnallocableList.end(), n2Itr);
+ }
+ }
+ }
+
+ /// \brief Handle disconnection of an edge from a node.
+ /// @param eItr Edge iterator for edge being disconnected.
+ /// @param nItr Node iterator for the node being disconnected from.
+ ///
+ /// Updates allocability of the given node and, if appropriate, moves the
+ /// node to a new list.
+ void handleRemoveEdge(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
+ NodeData &nd = getHeuristicNodeData(nItr);
+
+ // If the node is not managed by the heuristic there's nothing to be
+ // done.
+ if (!nd.isHeuristic)
+ return;
+
+ EdgeData &ed ATTRIBUTE_UNUSED = getHeuristicEdgeData(eItr);
+
+ assert(ed.isUpToDate && "Edge data is not up to date.");
+
+ // Update node.
+ bool ndWasAllocable = nd.isAllocable;
+ subtractEdgeContributions(eItr, nItr);
+ updateAllocability(nItr);
+
+ // If the node has gone optimal...
+ if (shouldOptimallyReduce(nItr)) {
+ nd.isHeuristic = false;
+ addToOptimalReduceList(nItr);
+ if (ndWasAllocable) {
+ rnAllocableList.erase(nd.rnaItr);
+ } else {
+ rnUnallocableList.erase(nd.rnuItr);
+ }
+ } else {
+ // Node didn't go optimal, but we might have to move it
+ // from "unallocable" to "allocable".
+ if (!ndWasAllocable && nd.isAllocable) {
+ rnUnallocableList.erase(nd.rnuItr);
+ nd.rnaItr = rnAllocableList.insert(rnAllocableList.end(), nItr);
+ }
+ }
+ }
+
+ private:
+
+ NodeData& getHeuristicNodeData(Graph::NodeItr nItr) {
+ return getSolver().getHeuristicNodeData(nItr);
+ }
+
+ EdgeData& getHeuristicEdgeData(Graph::EdgeItr eItr) {
+ return getSolver().getHeuristicEdgeData(eItr);
+ }
+
+ // Work out what this edge will contribute to the allocability of the
+ // nodes connected to it.
+ void computeEdgeContributions(Graph::EdgeItr eItr) {
+ EdgeData &ed = getHeuristicEdgeData(eItr);
+
+ if (ed.isUpToDate)
+ return; // Edge data is already up to date.
+
+ Matrix &eCosts = getGraph().getEdgeCosts(eItr);
+
+ unsigned numRegs = eCosts.getRows() - 1,
+ numReverseRegs = eCosts.getCols() - 1;
+
+ std::vector<unsigned> rowInfCounts(numRegs, 0),
+ colInfCounts(numReverseRegs, 0);
+
+ ed.worst = 0;
+ ed.reverseWorst = 0;
+ ed.unsafe.clear();
+ ed.unsafe.resize(numRegs, 0);
+ ed.reverseUnsafe.clear();
+ ed.reverseUnsafe.resize(numReverseRegs, 0);
+
+ for (unsigned i = 0; i < numRegs; ++i) {
+ for (unsigned j = 0; j < numReverseRegs; ++j) {
+ if (eCosts[i + 1][j + 1] ==
+ std::numeric_limits<PBQPNum>::infinity()) {
+ ed.unsafe[i] = 1;
+ ed.reverseUnsafe[j] = 1;
+ ++rowInfCounts[i];
+ ++colInfCounts[j];
+
+ if (colInfCounts[j] > ed.worst) {
+ ed.worst = colInfCounts[j];
+ }
+
+ if (rowInfCounts[i] > ed.reverseWorst) {
+ ed.reverseWorst = rowInfCounts[i];
+ }
+ }
+ }
+ }
+
+ ed.isUpToDate = true;
+ }
+
+ // Add the contributions of the given edge to the given node's
+ // numDenied and safe members. No action is taken other than to update
+ // these member values. Once updated these numbers can be used by clients
+ // to update the node's allocability.
+ void addEdgeContributions(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
+ EdgeData &ed = getHeuristicEdgeData(eItr);
+
+ assert(ed.isUpToDate && "Using out-of-date edge numbers.");
+
+ NodeData &nd = getHeuristicNodeData(nItr);
+ unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
+
+ bool nIsNode1 = nItr == getGraph().getEdgeNode1(eItr);
+ EdgeData::UnsafeArray &unsafe =
+ nIsNode1 ? ed.unsafe : ed.reverseUnsafe;
+ nd.numDenied += nIsNode1 ? ed.worst : ed.reverseWorst;
+
+ for (unsigned r = 0; r < numRegs; ++r) {
+ if (unsafe[r]) {
+ if (nd.unsafeDegrees[r]==0) {
+ --nd.numSafe;
+ }
+ ++nd.unsafeDegrees[r];
+ }
+ }
+ }
+
+ // Subtract the contributions of the given edge to the given node's
+ // numDenied and safe members. No action is taken other than to update
+ // these member values. Once updated these numbers can be used by clients
+ // to update the node's allocability.
+ void subtractEdgeContributions(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
+ EdgeData &ed = getHeuristicEdgeData(eItr);
+
+ assert(ed.isUpToDate && "Using out-of-date edge numbers.");
+
+ NodeData &nd = getHeuristicNodeData(nItr);
+ unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
+
+ bool nIsNode1 = nItr == getGraph().getEdgeNode1(eItr);
+ EdgeData::UnsafeArray &unsafe =
+ nIsNode1 ? ed.unsafe : ed.reverseUnsafe;
+ nd.numDenied -= nIsNode1 ? ed.worst : ed.reverseWorst;
+
+ for (unsigned r = 0; r < numRegs; ++r) {
+ if (unsafe[r]) {
+ if (nd.unsafeDegrees[r] == 1) {
+ ++nd.numSafe;
+ }
+ --nd.unsafeDegrees[r];
+ }
+ }
+ }
+
+ void updateAllocability(Graph::NodeItr nItr) {
+ NodeData &nd = getHeuristicNodeData(nItr);
+ unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
+ nd.isAllocable = nd.numDenied < numRegs || nd.numSafe > 0;
+ }
+
+ void initializeNode(Graph::NodeItr nItr) {
+ NodeData &nd = getHeuristicNodeData(nItr);
+
+ if (nd.isInitialized)
+ return; // Node data is already up to date.
+
+ unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
+
+ nd.numDenied = 0;
+ nd.numSafe = numRegs;
+ nd.unsafeDegrees.resize(numRegs, 0);
+
+ typedef HeuristicSolverImpl<Briggs>::SolverEdgeItr SolverEdgeItr;
+
+ for (SolverEdgeItr aeItr = getSolver().solverEdgesBegin(nItr),
+ aeEnd = getSolver().solverEdgesEnd(nItr);
+ aeItr != aeEnd; ++aeItr) {
+
+ Graph::EdgeItr eItr = *aeItr;
+ computeEdgeContributions(eItr);
+ addEdgeContributions(eItr, nItr);
+ }
+
+ updateAllocability(nItr);
+ nd.isInitialized = true;
+ }
+
+ void handleRemoveNode(Graph::NodeItr xnItr) {
+ typedef HeuristicSolverImpl<Briggs>::SolverEdgeItr SolverEdgeItr;
+ std::vector<Graph::EdgeItr> edgesToRemove;
+ for (SolverEdgeItr aeItr = getSolver().solverEdgesBegin(xnItr),
+ aeEnd = getSolver().solverEdgesEnd(xnItr);
+ aeItr != aeEnd; ++aeItr) {
+ Graph::NodeItr ynItr = getGraph().getEdgeOtherNode(*aeItr, xnItr);
+ handleRemoveEdge(*aeItr, ynItr);
+ edgesToRemove.push_back(*aeItr);
+ }
+ while (!edgesToRemove.empty()) {
+ getSolver().removeSolverEdge(edgesToRemove.back());
+ edgesToRemove.pop_back();
+ }
+ }
+
+ RNAllocableList rnAllocableList;
+ RNUnallocableList rnUnallocableList;
+ };
+
+ }
+}
+
+
+#endif // LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
diff --git a/lib/CodeGen/PBQP/Math.h b/lib/CodeGen/PBQP/Math.h
new file mode 100644
index 0000000..e7598bf
--- /dev/null
+++ b/lib/CodeGen/PBQP/Math.h
@@ -0,0 +1,288 @@
+//===------ Math.h - PBQP Vector and Matrix classes -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PBQP_MATH_H
+#define LLVM_CODEGEN_PBQP_MATH_H
+
+#include <cassert>
+#include <algorithm>
+#include <functional>
+
+namespace PBQP {
+
+typedef float PBQPNum;
+
+/// \brief PBQP Vector class.
+class Vector {
+ public:
+
+ /// \brief Construct a PBQP vector of the given size.
+ explicit Vector(unsigned length) :
+ length(length), data(new PBQPNum[length]) {
+ }
+
+ /// \brief Construct a PBQP vector with initializer.
+ Vector(unsigned length, PBQPNum initVal) :
+ length(length), data(new PBQPNum[length]) {
+ std::fill(data, data + length, initVal);
+ }
+
+ /// \brief Copy construct a PBQP vector.
+ Vector(const Vector &v) :
+ length(v.length), data(new PBQPNum[length]) {
+ std::copy(v.data, v.data + length, data);
+ }
+
+ /// \brief Destroy this vector, return its memory.
+ ~Vector() { delete[] data; }
+
+ /// \brief Assignment operator.
+ Vector& operator=(const Vector &v) {
+ delete[] data;
+ length = v.length;
+ data = new PBQPNum[length];
+ std::copy(v.data, v.data + length, data);
+ return *this;
+ }
+
+ /// \brief Return the length of the vector
+ unsigned getLength() const {
+ return length;
+ }
+
+ /// \brief Element access.
+ PBQPNum& operator[](unsigned index) {
+ assert(index < length && "Vector element access out of bounds.");
+ return data[index];
+ }
+
+ /// \brief Const element access.
+ const PBQPNum& operator[](unsigned index) const {
+ assert(index < length && "Vector element access out of bounds.");
+ return data[index];
+ }
+
+ /// \brief Add another vector to this one.
+ Vector& operator+=(const Vector &v) {
+ assert(length == v.length && "Vector length mismatch.");
+ std::transform(data, data + length, v.data, data, std::plus<PBQPNum>());
+ return *this;
+ }
+
+ /// \brief Subtract another vector from this one.
+ Vector& operator-=(const Vector &v) {
+ assert(length == v.length && "Vector length mismatch.");
+ std::transform(data, data + length, v.data, data, std::minus<PBQPNum>());
+ return *this;
+ }
+
+ /// \brief Returns the index of the minimum value in this vector
+ unsigned minIndex() const {
+ return std::min_element(data, data + length) - data;
+ }
+
+ private:
+ unsigned length;
+ PBQPNum *data;
+};
+
+/// \brief Output a textual representation of the given vector on the given
+/// output stream.
+template <typename OStream>
+OStream& operator<<(OStream &os, const Vector &v) {
+ assert((v.getLength() != 0) && "Zero-length vector badness.");
+
+ os << "[ " << v[0];
+ for (unsigned i = 1; i < v.getLength(); ++i) {
+ os << ", " << v[i];
+ }
+ os << " ]";
+
+ return os;
+}
+
+
+/// \brief PBQP Matrix class
+class Matrix {
+ public:
+
+ /// \brief Construct a PBQP Matrix with the given dimensions.
+ Matrix(unsigned rows, unsigned cols) :
+ rows(rows), cols(cols), data(new PBQPNum[rows * cols]) {
+ }
+
+ /// \brief Construct a PBQP Matrix with the given dimensions and initial
+ /// value.
+ Matrix(unsigned rows, unsigned cols, PBQPNum initVal) :
+ rows(rows), cols(cols), data(new PBQPNum[rows * cols]) {
+ std::fill(data, data + (rows * cols), initVal);
+ }
+
+ /// \brief Copy construct a PBQP matrix.
+ Matrix(const Matrix &m) :
+ rows(m.rows), cols(m.cols), data(new PBQPNum[rows * cols]) {
+ std::copy(m.data, m.data + (rows * cols), data);
+ }
+
+ /// \brief Destroy this matrix, return its memory.
+ ~Matrix() { delete[] data; }
+
+ /// \brief Assignment operator.
+ Matrix& operator=(const Matrix &m) {
+ delete[] data;
+ rows = m.rows; cols = m.cols;
+ data = new PBQPNum[rows * cols];
+ std::copy(m.data, m.data + (rows * cols), data);
+ return *this;
+ }
+
+ /// \brief Return the number of rows in this matrix.
+ unsigned getRows() const { return rows; }
+
+ /// \brief Return the number of cols in this matrix.
+ unsigned getCols() const { return cols; }
+
+ /// \brief Matrix element access.
+ PBQPNum* operator[](unsigned r) {
+ assert(r < rows && "Row out of bounds.");
+ return data + (r * cols);
+ }
+
+ /// \brief Matrix element access.
+ const PBQPNum* operator[](unsigned r) const {
+ assert(r < rows && "Row out of bounds.");
+ return data + (r * cols);
+ }
+
+ /// \brief Returns the given row as a vector.
+ Vector getRowAsVector(unsigned r) const {
+ Vector v(cols);
+ for (unsigned c = 0; c < cols; ++c)
+ v[c] = (*this)[r][c];
+ return v;
+ }
+
+ /// \brief Returns the given column as a vector.
+ Vector getColAsVector(unsigned c) const {
+ Vector v(rows);
+ for (unsigned r = 0; r < rows; ++r)
+ v[r] = (*this)[r][c];
+ return v;
+ }
+
+ /// \brief Reset the matrix to the given value.
+ Matrix& reset(PBQPNum val = 0) {
+ std::fill(data, data + (rows * cols), val);
+ return *this;
+ }
+
+ /// \brief Set a single row of this matrix to the given value.
+ Matrix& setRow(unsigned r, PBQPNum val) {
+ assert(r < rows && "Row out of bounds.");
+ std::fill(data + (r * cols), data + ((r + 1) * cols), val);
+ return *this;
+ }
+
+ /// \brief Set a single column of this matrix to the given value.
+ Matrix& setCol(unsigned c, PBQPNum val) {
+ assert(c < cols && "Column out of bounds.");
+ for (unsigned r = 0; r < rows; ++r)
+ (*this)[r][c] = val;
+ return *this;
+ }
+
+ /// \brief Matrix transpose.
+ Matrix transpose() const {
+ Matrix m(cols, rows);
+ for (unsigned r = 0; r < rows; ++r)
+ for (unsigned c = 0; c < cols; ++c)
+ m[c][r] = (*this)[r][c];
+ return m;
+ }
+
+ /// \brief Returns the diagonal of the matrix as a vector.
+ ///
+ /// Matrix must be square.
+ Vector diagonalize() const {
+ assert(rows == cols && "Attempt to diagonalize non-square matrix.");
+
+ Vector v(rows);
+ for (unsigned r = 0; r < rows; ++r)
+ v[r] = (*this)[r][r];
+ return v;
+ }
+
+ /// \brief Add the given matrix to this one.
+ Matrix& operator+=(const Matrix &m) {
+ assert(rows == m.rows && cols == m.cols &&
+ "Matrix dimensions mismatch.");
+ std::transform(data, data + (rows * cols), m.data, data,
+ std::plus<PBQPNum>());
+ return *this;
+ }
+
+ /// \brief Returns the minimum of the given row
+ PBQPNum getRowMin(unsigned r) const {
+ assert(r < rows && "Row out of bounds");
+ return *std::min_element(data + (r * cols), data + ((r + 1) * cols));
+ }
+
+ /// \brief Returns the minimum of the given column
+ PBQPNum getColMin(unsigned c) const {
+ PBQPNum minElem = (*this)[0][c];
+ for (unsigned r = 1; r < rows; ++r)
+ if ((*this)[r][c] < minElem) minElem = (*this)[r][c];
+ return minElem;
+ }
+
+ /// \brief Subtracts the given scalar from the elements of the given row.
+ Matrix& subFromRow(unsigned r, PBQPNum val) {
+ assert(r < rows && "Row out of bounds");
+ std::transform(data + (r * cols), data + ((r + 1) * cols),
+ data + (r * cols),
+ std::bind2nd(std::minus<PBQPNum>(), val));
+ return *this;
+ }
+
+ /// \brief Subtracts the given scalar from the elements of the given column.
+ Matrix& subFromCol(unsigned c, PBQPNum val) {
+ for (unsigned r = 0; r < rows; ++r)
+ (*this)[r][c] -= val;
+ return *this;
+ }
+
+ /// \brief Returns true if this is a zero matrix.
+ bool isZero() const {
+ return find_if(data, data + (rows * cols),
+ std::bind2nd(std::not_equal_to<PBQPNum>(), 0)) ==
+ data + (rows * cols);
+ }
+
+ private:
+ unsigned rows, cols;
+ PBQPNum *data;
+};
+
+/// \brief Output a textual representation of the given matrix on the given
+/// output stream.
+template <typename OStream>
+OStream& operator<<(OStream &os, const Matrix &m) {
+
+ assert((m.getRows() != 0) && "Zero-row matrix badness.");
+
+ for (unsigned i = 0; i < m.getRows(); ++i) {
+ os << m.getRowAsVector(i);
+ }
+
+ return os;
+}
+
+}
+
+#endif // LLVM_CODEGEN_PBQP_MATH_H
diff --git a/lib/CodeGen/PBQP/Solution.h b/lib/CodeGen/PBQP/Solution.h
new file mode 100644
index 0000000..294b537
--- /dev/null
+++ b/lib/CodeGen/PBQP/Solution.h
@@ -0,0 +1,58 @@
+//===-- Solution.h ------- PBQP Solution ------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// PBQP Solution class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PBQP_SOLUTION_H
+#define LLVM_CODEGEN_PBQP_SOLUTION_H
+
+#include "Math.h"
+#include "Graph.h"
+
+#include <map>
+
+namespace PBQP {
+
+ /// \brief Represents a solution to a PBQP problem.
+ ///
+ /// To get the selection for each node in the problem use the getSelection method.
+ class Solution {
+ private:
+ typedef std::map<Graph::NodeItr, unsigned, NodeItrComparator> SelectionsMap;
+ SelectionsMap selections;
+
+ public:
+
+ /// \brief Number of nodes for which selections have been made.
+ /// @return Number of nodes for which selections have been made.
+ unsigned numNodes() const { return selections.size(); }
+
+ /// \brief Set the selection for a given node.
+ /// @param nItr Node iterator.
+ /// @param selection Selection for nItr.
+ void setSelection(Graph::NodeItr nItr, unsigned selection) {
+ selections[nItr] = selection;
+ }
+
+ /// \brief Get a node's selection.
+ /// @param nItr Node iterator.
+ /// @return The selection for nItr;
+ unsigned getSelection(Graph::NodeItr nItr) const {
+ SelectionsMap::const_iterator sItr = selections.find(nItr);
+ assert(sItr != selections.end() && "No selection for node.");
+ return sItr->second;
+ }
+
+ };
+
+}
+
+#endif // LLVM_CODEGEN_PBQP_SOLUTION_H
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
new file mode 100644
index 0000000..b740c68
--- /dev/null
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -0,0 +1,484 @@
+//===-- PhiElimination.cpp - Eliminate PHI nodes by inserting copies ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass eliminates machine instruction PHI nodes by inserting copy
+// instructions. This destroys SSA information, but is the desired input for
+// some register allocators.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "phielim"
+#include "PHIElimination.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include <algorithm>
+#include <map>
+using namespace llvm;
+
+STATISTIC(NumAtomic, "Number of atomic phis lowered");
+STATISTIC(NumSplits, "Number of critical edges split on demand");
+STATISTIC(NumReused, "Number of reused lowered phis");
+
+char PHIElimination::ID = 0;
+static RegisterPass<PHIElimination>
+X("phi-node-elimination", "Eliminate PHI nodes for register allocation");
+
+const PassInfo *const llvm::PHIEliminationID = &X;
+
+void llvm::PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<LiveVariables>();
+ AU.addPreserved<MachineDominatorTree>();
+ // rdar://7401784 This would be nice:
+ // AU.addPreservedID(MachineLoopInfoID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &Fn) {
+ MRI = &Fn.getRegInfo();
+
+ PHIDefs.clear();
+ PHIKills.clear();
+ bool Changed = false;
+
+ // Split critical edges to help the coalescer
+ if (LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>())
+ for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
+ Changed |= SplitPHIEdges(Fn, *I, *LV);
+
+ // Populate VRegPHIUseCount
+ analyzePHINodes(Fn);
+
+ // Eliminate PHI instructions by inserting copies into predecessor blocks.
+ for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
+ Changed |= EliminatePHINodes(Fn, *I);
+
+ // Remove dead IMPLICIT_DEF instructions.
+ for (SmallPtrSet<MachineInstr*, 4>::iterator I = ImpDefs.begin(),
+ E = ImpDefs.end(); I != E; ++I) {
+ MachineInstr *DefMI = *I;
+ unsigned DefReg = DefMI->getOperand(0).getReg();
+ if (MRI->use_empty(DefReg))
+ DefMI->eraseFromParent();
+ }
+
+ // Clean up the lowered PHI instructions.
+ for (LoweredPHIMap::iterator I = LoweredPHIs.begin(), E = LoweredPHIs.end();
+ I != E; ++I)
+ Fn.DeleteMachineInstr(I->first);
+
+ LoweredPHIs.clear();
+ ImpDefs.clear();
+ VRegPHIUseCount.clear();
+ return Changed;
+}
+
+/// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in
+/// predecessor basic blocks.
+///
+bool llvm::PHIElimination::EliminatePHINodes(MachineFunction &MF,
+ MachineBasicBlock &MBB) {
+ if (MBB.empty() || !MBB.front().isPHI())
+ return false; // Quick exit for basic blocks without PHIs.
+
+ // Get an iterator to the first instruction after the last PHI node (this may
+ // also be the end of the basic block).
+ MachineBasicBlock::iterator AfterPHIsIt = SkipPHIsAndLabels(MBB, MBB.begin());
+
+ while (MBB.front().isPHI())
+ LowerAtomicPHINode(MBB, AfterPHIsIt);
+
+ return true;
+}
+
+/// isSourceDefinedByImplicitDef - Return true if all sources of the phi node
+/// are implicit_def's.
+static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi,
+ const MachineRegisterInfo *MRI) {
+ for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) {
+ unsigned SrcReg = MPhi->getOperand(i).getReg();
+ const MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+ if (!DefMI || !DefMI->isImplicitDef())
+ return false;
+ }
+ return true;
+}
+
+// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg
+// when following the CFG edge to SuccMBB. This needs to be after any def of
+// SrcReg, but before any subsequent point where control flow might jump out of
+// the basic block.
+MachineBasicBlock::iterator
+llvm::PHIElimination::FindCopyInsertPoint(MachineBasicBlock &MBB,
+ MachineBasicBlock &SuccMBB,
+ unsigned SrcReg) {
+ // Handle the trivial case trivially.
+ if (MBB.empty())
+ return MBB.begin();
+
+ // Usually, we just want to insert the copy before the first terminator
+ // instruction. However, for the edge going to a landing pad, we must insert
+ // the copy before the call/invoke instruction.
+ if (!SuccMBB.isLandingPad())
+ return MBB.getFirstTerminator();
+
+ // Discover any defs/uses in this basic block.
+ SmallPtrSet<MachineInstr*, 8> DefUsesInMBB;
+ for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
+ RE = MRI->reg_end(); RI != RE; ++RI) {
+ MachineInstr *DefUseMI = &*RI;
+ if (DefUseMI->getParent() == &MBB)
+ DefUsesInMBB.insert(DefUseMI);
+ }
+
+ MachineBasicBlock::iterator InsertPoint;
+ if (DefUsesInMBB.empty()) {
+ // No defs. Insert the copy at the start of the basic block.
+ InsertPoint = MBB.begin();
+ } else if (DefUsesInMBB.size() == 1) {
+ // Insert the copy immediately after the def/use.
+ InsertPoint = *DefUsesInMBB.begin();
+ ++InsertPoint;
+ } else {
+ // Insert the copy immediately after the last def/use.
+ InsertPoint = MBB.end();
+ while (!DefUsesInMBB.count(&*--InsertPoint)) {}
+ ++InsertPoint;
+ }
+
+ // Make sure the copy goes after any phi nodes however.
+ return SkipPHIsAndLabels(MBB, InsertPoint);
+}
+
+/// LowerAtomicPHINode - Lower the PHI node at the top of the specified block,
+/// under the assuption that it needs to be lowered in a way that supports
+/// atomic execution of PHIs. This lowering method is always correct all of the
+/// time.
+///
+void llvm::PHIElimination::LowerAtomicPHINode(
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator AfterPHIsIt) {
+ ++NumAtomic;
+ // Unlink the PHI node from the basic block, but don't delete the PHI yet.
+ MachineInstr *MPhi = MBB.remove(MBB.begin());
+
+ unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2;
+ unsigned DestReg = MPhi->getOperand(0).getReg();
+ bool isDead = MPhi->getOperand(0).isDead();
+
+ // Create a new register for the incoming PHI arguments.
+ MachineFunction &MF = *MBB.getParent();
+ const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);
+ unsigned IncomingReg = 0;
+ bool reusedIncoming = false; // Is IncomingReg reused from an earlier PHI?
+
+ // Insert a register to register copy at the top of the current block (but
+ // after any remaining phi nodes) which copies the new incoming register
+ // into the phi node destination.
+ const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+ if (isSourceDefinedByImplicitDef(MPhi, MRI))
+ // If all sources of a PHI node are implicit_def, just emit an
+ // implicit_def instead of a copy.
+ BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), DestReg);
+ else {
+ // Can we reuse an earlier PHI node? This only happens for critical edges,
+ // typically those created by tail duplication.
+ unsigned &entry = LoweredPHIs[MPhi];
+ if (entry) {
+ // An identical PHI node was already lowered. Reuse the incoming register.
+ IncomingReg = entry;
+ reusedIncoming = true;
+ ++NumReused;
+ DEBUG(dbgs() << "Reusing %reg" << IncomingReg << " for " << *MPhi);
+ } else {
+ entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
+ }
+ TII->copyRegToReg(MBB, AfterPHIsIt, DestReg, IncomingReg, RC, RC);
+ }
+
+ // Record PHI def.
+ assert(!hasPHIDef(DestReg) && "Vreg has multiple phi-defs?");
+ PHIDefs[DestReg] = &MBB;
+
+ // Update live variable information if there is any.
+ LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>();
+ if (LV) {
+ MachineInstr *PHICopy = prior(AfterPHIsIt);
+
+ if (IncomingReg) {
+ LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg);
+
+ // Increment use count of the newly created virtual register.
+ VI.NumUses++;
+
+ // When we are reusing the incoming register, it may already have been
+ // killed in this block. The old kill will also have been inserted at
+ // AfterPHIsIt, so it appears before the current PHICopy.
+ if (reusedIncoming)
+ if (MachineInstr *OldKill = VI.findKill(&MBB)) {
+ DEBUG(dbgs() << "Remove old kill from " << *OldKill);
+ LV->removeVirtualRegisterKilled(IncomingReg, OldKill);
+ DEBUG(MBB.dump());
+ }
+
+ // Add information to LiveVariables to know that the incoming value is
+ // killed. Note that because the value is defined in several places (once
+ // each for each incoming block), the "def" block and instruction fields
+ // for the VarInfo is not filled in.
+ LV->addVirtualRegisterKilled(IncomingReg, PHICopy);
+ }
+
+ // Since we are going to be deleting the PHI node, if it is the last use of
+ // any registers, or if the value itself is dead, we need to move this
+ // information over to the new copy we just inserted.
+ LV->removeVirtualRegistersKilled(MPhi);
+
+ // If the result is dead, update LV.
+ if (isDead) {
+ LV->addVirtualRegisterDead(DestReg, PHICopy);
+ LV->removeVirtualRegisterDead(DestReg, MPhi);
+ }
+ }
+
+ // Adjust the VRegPHIUseCount map to account for the removal of this PHI node.
+ for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2)
+ --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i+1).getMBB()->getNumber(),
+ MPhi->getOperand(i).getReg())];
+
+ // Now loop over all of the incoming arguments, changing them to copy into the
+ // IncomingReg register in the corresponding predecessor basic block.
+ SmallPtrSet<MachineBasicBlock*, 8> MBBsInsertedInto;
+ for (int i = NumSrcs - 1; i >= 0; --i) {
+ unsigned SrcReg = MPhi->getOperand(i*2+1).getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+ "Machine PHI Operands must all be virtual registers!");
+
+ // Get the MachineBasicBlock equivalent of the BasicBlock that is the source
+ // path the PHI.
+ MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB();
+
+ // Record the kill.
+ PHIKills[SrcReg].insert(&opBlock);
+
+ // If source is defined by an implicit def, there is no need to insert a
+ // copy.
+ MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+ if (DefMI->isImplicitDef()) {
+ ImpDefs.insert(DefMI);
+ continue;
+ }
+
+ // Check to make sure we haven't already emitted the copy for this block.
+ // This can happen because PHI nodes may have multiple entries for the same
+ // basic block.
+ if (!MBBsInsertedInto.insert(&opBlock))
+ continue; // If the copy has already been emitted, we're done.
+
+ // Find a safe location to insert the copy, this may be the first terminator
+ // in the block (or end()).
+ MachineBasicBlock::iterator InsertPos =
+ FindCopyInsertPoint(opBlock, MBB, SrcReg);
+
+ // Insert the copy.
+ if (!reusedIncoming && IncomingReg)
+ TII->copyRegToReg(opBlock, InsertPos, IncomingReg, SrcReg, RC, RC);
+
+ // Now update live variable information if we have it. Otherwise we're done
+ if (!LV) continue;
+
+ // We want to be able to insert a kill of the register if this PHI (aka, the
+ // copy we just inserted) is the last use of the source value. Live
+ // variable analysis conservatively handles this by saying that the value is
+ // live until the end of the block the PHI entry lives in. If the value
+ // really is dead at the PHI copy, there will be no successor blocks which
+ // have the value live-in.
+
+ // Also check to see if this register is in use by another PHI node which
+ // has not yet been eliminated. If so, it will be killed at an appropriate
+ // point later.
+
+ // Is it used by any PHI instructions in this block?
+ bool ValueIsUsed = VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)];
+
+ // Okay, if we now know that the value is not live out of the block, we can
+ // add a kill marker in this block saying that it kills the incoming value!
+ if (!ValueIsUsed && !LV->isLiveOut(SrcReg, opBlock)) {
+ // In our final twist, we have to decide which instruction kills the
+ // register. In most cases this is the copy, however, the first
+ // terminator instruction at the end of the block may also use the value.
+ // In this case, we should mark *it* as being the killing block, not the
+ // copy.
+ MachineBasicBlock::iterator KillInst;
+ MachineBasicBlock::iterator Term = opBlock.getFirstTerminator();
+ if (Term != opBlock.end() && Term->readsRegister(SrcReg)) {
+ KillInst = Term;
+
+ // Check that no other terminators use values.
+#ifndef NDEBUG
+ for (MachineBasicBlock::iterator TI = llvm::next(Term);
+ TI != opBlock.end(); ++TI) {
+ assert(!TI->readsRegister(SrcReg) &&
+ "Terminator instructions cannot use virtual registers unless"
+ "they are the first terminator in a block!");
+ }
+#endif
+ } else if (reusedIncoming || !IncomingReg) {
+ // We may have to rewind a bit if we didn't insert a copy this time.
+ KillInst = Term;
+ while (KillInst != opBlock.begin())
+ if ((--KillInst)->readsRegister(SrcReg))
+ break;
+ } else {
+ // We just inserted this copy.
+ KillInst = prior(InsertPos);
+ }
+ assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction");
+
+ // Finally, mark it killed.
+ LV->addVirtualRegisterKilled(SrcReg, KillInst);
+
+ // This vreg no longer lives all of the way through opBlock.
+ unsigned opBlockNum = opBlock.getNumber();
+ LV->getVarInfo(SrcReg).AliveBlocks.reset(opBlockNum);
+ }
+ }
+
+ // Really delete the PHI instruction now, if it is not in the LoweredPHIs map.
+ if (reusedIncoming || !IncomingReg)
+ MF.DeleteMachineInstr(MPhi);
+}
+
+/// analyzePHINodes - Gather information about the PHI nodes in here. In
+/// particular, we want to map the number of uses of a virtual register which is
+/// used in a PHI node. We map that to the BB the vreg is coming from. This is
+/// used later to determine when the vreg is killed in the BB.
+///
+void llvm::PHIElimination::analyzePHINodes(const MachineFunction& Fn) {
+ for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end();
+ I != E; ++I)
+ for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
+ BBI != BBE && BBI->isPHI(); ++BBI)
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
+ ++VRegPHIUseCount[BBVRegPair(BBI->getOperand(i+1).getMBB()->getNumber(),
+ BBI->getOperand(i).getReg())];
+}
+
+bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ LiveVariables &LV) {
+ if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad())
+ return false; // Quick exit for basic blocks without PHIs.
+
+ for (MachineBasicBlock::const_iterator BBI = MBB.begin(), BBE = MBB.end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
+ unsigned Reg = BBI->getOperand(i).getReg();
+ MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB();
+ // We break edges when registers are live out from the predecessor block
+ // (not considering PHI nodes). If the register is live in to this block
+ // anyway, we would gain nothing from splitting.
+ if (!LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB))
+ SplitCriticalEdge(PreMBB, &MBB);
+ }
+ }
+ return true;
+}
+
+MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A,
+ MachineBasicBlock *B) {
+ assert(A && B && "Missing MBB end point");
+
+ MachineFunction *MF = A->getParent();
+
+ // We may need to update A's terminator, but we can't do that if AnalyzeBranch
+ // fails. If A uses a jump table, we won't touch it.
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (TII->AnalyzeBranch(*A, TBB, FBB, Cond))
+ return NULL;
+
+ ++NumSplits;
+
+ MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
+ MF->insert(llvm::next(MachineFunction::iterator(A)), NMBB);
+ DEBUG(dbgs() << "PHIElimination splitting critical edge:"
+ " BB#" << A->getNumber()
+ << " -- BB#" << NMBB->getNumber()
+ << " -- BB#" << B->getNumber() << '\n');
+
+ A->ReplaceUsesOfBlockWith(B, NMBB);
+ A->updateTerminator();
+
+ // Insert unconditional "jump B" instruction in NMBB if necessary.
+ NMBB->addSuccessor(B);
+ if (!NMBB->isLayoutSuccessor(B)) {
+ Cond.clear();
+ MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, B, NULL, Cond);
+ }
+
+ // Fix PHI nodes in B so they refer to NMBB instead of A
+ for (MachineBasicBlock::iterator i = B->begin(), e = B->end();
+ i != e && i->isPHI(); ++i)
+ for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2)
+ if (i->getOperand(ni+1).getMBB() == A)
+ i->getOperand(ni+1).setMBB(NMBB);
+
+ if (LiveVariables *LV=getAnalysisIfAvailable<LiveVariables>())
+ LV->addNewBlock(NMBB, A, B);
+
+ if (MachineDominatorTree *MDT=getAnalysisIfAvailable<MachineDominatorTree>())
+ MDT->addNewBlock(NMBB, A);
+
+ return NMBB;
+}
+
+unsigned
+PHIElimination::PHINodeTraits::getHashValue(const MachineInstr *MI) {
+ if (!MI || MI==getEmptyKey() || MI==getTombstoneKey())
+ return DenseMapInfo<MachineInstr*>::getHashValue(MI);
+ unsigned hash = 0;
+ for (unsigned ni = 1, ne = MI->getNumOperands(); ni != ne; ni += 2)
+ hash = hash*37 + DenseMapInfo<BBVRegPair>::
+ getHashValue(BBVRegPair(MI->getOperand(ni+1).getMBB()->getNumber(),
+ MI->getOperand(ni).getReg()));
+ return hash;
+}
+
+bool PHIElimination::PHINodeTraits::isEqual(const MachineInstr *LHS,
+ const MachineInstr *RHS) {
+ const MachineInstr *EmptyKey = getEmptyKey();
+ const MachineInstr *TombstoneKey = getTombstoneKey();
+ if (!LHS || !RHS || LHS==EmptyKey || RHS==EmptyKey ||
+ LHS==TombstoneKey || RHS==TombstoneKey)
+ return LHS==RHS;
+
+ unsigned ne = LHS->getNumOperands();
+ if (ne != RHS->getNumOperands())
+ return false;
+ // Ignore operand 0, the defined register.
+ for (unsigned ni = 1; ni != ne; ni += 2)
+ if (LHS->getOperand(ni).getReg() != RHS->getOperand(ni).getReg() ||
+ LHS->getOperand(ni+1).getMBB() != RHS->getOperand(ni+1).getMBB())
+ return false;
+ return true;
+}
diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h
new file mode 100644
index 0000000..895aaa4
--- /dev/null
+++ b/lib/CodeGen/PHIElimination.h
@@ -0,0 +1,145 @@
+//===-- lib/CodeGen/PHIElimination.h ----------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PHIELIMINATION_HPP
+#define LLVM_CODEGEN_PHIELIMINATION_HPP
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+namespace llvm {
+ class LiveVariables;
+
+ /// Lower PHI instructions to copies.
+ class PHIElimination : public MachineFunctionPass {
+ MachineRegisterInfo *MRI; // Machine register information
+ private:
+
+ typedef SmallSet<MachineBasicBlock*, 4> PHIKillList;
+ typedef DenseMap<unsigned, PHIKillList> PHIKillMap;
+ typedef DenseMap<unsigned, MachineBasicBlock*> PHIDefMap;
+
+ public:
+
+ typedef PHIKillList::iterator phi_kill_iterator;
+ typedef PHIKillList::const_iterator const_phi_kill_iterator;
+
+ static char ID; // Pass identification, replacement for typeid
+ PHIElimination() : MachineFunctionPass(&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ /// Return true if the given vreg was defined by a PHI intsr prior to
+ /// lowering.
+ bool hasPHIDef(unsigned vreg) const {
+ return PHIDefs.count(vreg);
+ }
+
+ /// Returns the block in which the PHI instruction which defined the
+ /// given vreg used to reside.
+ MachineBasicBlock* getPHIDefBlock(unsigned vreg) {
+ PHIDefMap::iterator phiDefItr = PHIDefs.find(vreg);
+ assert(phiDefItr != PHIDefs.end() && "vreg has no phi-def.");
+ return phiDefItr->second;
+ }
+
+ /// Returns true if the given vreg was killed by a PHI instr.
+ bool hasPHIKills(unsigned vreg) const {
+ return PHIKills.count(vreg);
+ }
+
+ /// Returns an iterator over the BasicBlocks which contained PHI
+ /// kills of this register prior to lowering.
+ phi_kill_iterator phiKillsBegin(unsigned vreg) {
+ PHIKillMap::iterator phiKillItr = PHIKills.find(vreg);
+ assert(phiKillItr != PHIKills.end() && "vreg has no phi-kills.");
+ return phiKillItr->second.begin();
+ }
+ phi_kill_iterator phiKillsEnd(unsigned vreg) {
+ PHIKillMap::iterator phiKillItr = PHIKills.find(vreg);
+ assert(phiKillItr != PHIKills.end() && "vreg has no phi-kills.");
+ return phiKillItr->second.end();
+ }
+
+ private:
+ /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
+ /// in predecessor basic blocks.
+ ///
+ bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB);
+ void LowerAtomicPHINode(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator AfterPHIsIt);
+
+ /// analyzePHINodes - Gather information about the PHI nodes in
+ /// here. In particular, we want to map the number of uses of a virtual
+ /// register which is used in a PHI node. We map that to the BB the
+ /// vreg is coming from. This is used later to determine when the vreg
+ /// is killed in the BB.
+ ///
+ void analyzePHINodes(const MachineFunction& Fn);
+
+ /// Split critical edges where necessary for good coalescer performance.
+ bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB,
+ LiveVariables &LV);
+
+ /// SplitCriticalEdge - Split a critical edge from A to B by
+ /// inserting a new MBB. Update branches in A and PHI instructions
+ /// in B. Return the new block.
+ MachineBasicBlock *SplitCriticalEdge(MachineBasicBlock *A,
+ MachineBasicBlock *B);
+
+ /// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from
+ /// SrcReg when following the CFG edge to SuccMBB. This needs to be after
+ /// any def of SrcReg, but before any subsequent point where control flow
+ /// might jump out of the basic block.
+ MachineBasicBlock::iterator FindCopyInsertPoint(MachineBasicBlock &MBB,
+ MachineBasicBlock &SuccMBB,
+ unsigned SrcReg);
+
+ // SkipPHIsAndLabels - Copies need to be inserted after phi nodes and
+ // also after any exception handling labels: in landing pads execution
+ // starts at the label, so any copies placed before it won't be executed!
+ MachineBasicBlock::iterator SkipPHIsAndLabels(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) {
+ // Rather than assuming that EH labels come before other kinds of labels,
+ // just skip all labels.
+ while (I != MBB.end() && (I->isPHI() || I->isLabel()))
+ ++I;
+ return I;
+ }
+
+ typedef std::pair<unsigned, unsigned> BBVRegPair;
+ typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse;
+
+ VRegPHIUse VRegPHIUseCount;
+ PHIDefMap PHIDefs;
+ PHIKillMap PHIKills;
+
+ // Defs of PHI sources which are implicit_def.
+ SmallPtrSet<MachineInstr*, 4> ImpDefs;
+
+ // Lowered PHI nodes may be reused. We provide special DenseMap traits to
+ // match PHI nodes with identical arguments.
+ struct PHINodeTraits : public DenseMapInfo<MachineInstr*> {
+ static unsigned getHashValue(const MachineInstr *PtrVal);
+ static bool isEqual(const MachineInstr *LHS, const MachineInstr *RHS);
+ };
+
+ // Map reusable lowered PHI node -> incoming join register.
+ typedef DenseMap<MachineInstr*, unsigned, PHINodeTraits> LoweredPHIMap;
+ LoweredPHIMap LoweredPHIs;
+ };
+
+}
+
+#endif /* LLVM_CODEGEN_PHIELIMINATION_HPP */
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
new file mode 100644
index 0000000..f67eb79
--- /dev/null
+++ b/lib/CodeGen/Passes.cpp
@@ -0,0 +1,54 @@
+//===-- Passes.cpp - Target independent code generation passes ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines interfaces to access the target independent code
+// generation passes provided by the LLVM backend.
+//
+//===---------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/Passes.h"
+
+using namespace llvm;
+
+//===---------------------------------------------------------------------===//
+///
+/// RegisterRegAlloc class - Track the registration of register allocators.
+///
+//===---------------------------------------------------------------------===//
+MachinePassRegistry RegisterRegAlloc::Registry;
+
+
+//===---------------------------------------------------------------------===//
+///
+/// RegAlloc command line options.
+///
+//===---------------------------------------------------------------------===//
+static cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
+ RegisterPassParser<RegisterRegAlloc> >
+RegAlloc("regalloc",
+ cl::init(&createLinearScanRegisterAllocator),
+ cl::desc("Register allocator to use: (default = linearscan)"));
+
+
+//===---------------------------------------------------------------------===//
+///
+/// createRegisterAllocator - choose the appropriate register allocator.
+///
+//===---------------------------------------------------------------------===//
+FunctionPass *llvm::createRegisterAllocator() {
+ RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault();
+
+ if (!Ctor) {
+ Ctor = RegAlloc;
+ RegisterRegAlloc::setDefault(RegAlloc);
+ }
+
+ return Ctor();
+}
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
new file mode 100644
index 0000000..f43395f
--- /dev/null
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -0,0 +1,726 @@
+//===----- SchedulePostRAList.cpp - list scheduler ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a top-down list scheduler, using standard algorithms.
+// The basic approach uses a priority queue of available nodes to schedule.
+// One at a time, nodes are taken from the priority queue (thus in priority
+// order), checked for legality to schedule, and emitted if legal.
+//
+// Nodes may not be legal to schedule either due to structural hazards (e.g.
+// pipeline or resource constraints) or because an input to the instruction has
+// not completed execution.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "post-RA-sched"
+#include "AntiDepBreaker.h"
+#include "AggressiveAntiDepBreaker.h"
+#include "CriticalAntiDepBreaker.h"
+#include "ExactHazardRecognizer.h"
+#include "SimpleHazardRecognizer.h"
+#include "ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/Statistic.h"
+#include <map>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumNoops, "Number of noops inserted");
+STATISTIC(NumStalls, "Number of pipeline stalls");
+STATISTIC(NumFixedAnti, "Number of fixed anti-dependencies");
+
+// Post-RA scheduling is enabled with
+// TargetSubtarget.enablePostRAScheduler(). This flag can be used to
+// override the target.
+static cl::opt<bool>
+EnablePostRAScheduler("post-RA-scheduler",
+ cl::desc("Enable scheduling after register allocation"),
+ cl::init(false), cl::Hidden);
+static cl::opt<std::string>
+EnableAntiDepBreaking("break-anti-dependencies",
+ cl::desc("Break post-RA scheduling anti-dependencies: "
+ "\"critical\", \"all\", or \"none\""),
+ cl::init("none"), cl::Hidden);
+static cl::opt<bool>
+EnablePostRAHazardAvoidance("avoid-hazards",
+ cl::desc("Enable exact hazard avoidance"),
+ cl::init(true), cl::Hidden);
+
+// If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
+static cl::opt<int>
+DebugDiv("postra-sched-debugdiv",
+ cl::desc("Debug control MBBs that are scheduled"),
+ cl::init(0), cl::Hidden);
+static cl::opt<int>
+DebugMod("postra-sched-debugmod",
+ cl::desc("Debug control MBBs that are scheduled"),
+ cl::init(0), cl::Hidden);
+
+AntiDepBreaker::~AntiDepBreaker() { }
+
+namespace {
+ class PostRAScheduler : public MachineFunctionPass {
+ AliasAnalysis *AA;
+ CodeGenOpt::Level OptLevel;
+
+ public:
+ static char ID;
+ PostRAScheduler(CodeGenOpt::Level ol) :
+ MachineFunctionPass(&ID), OptLevel(ol) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ const char *getPassName() const {
+ return "Post RA top-down list latency scheduler";
+ }
+
+ bool runOnMachineFunction(MachineFunction &Fn);
+ };
+ char PostRAScheduler::ID = 0;
+
+ class SchedulePostRATDList : public ScheduleDAGInstrs {
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ ///
+ LatencyPriorityQueue AvailableQueue;
+
+ /// PendingQueue - This contains all of the instructions whose operands have
+ /// been issued, but their results are not ready yet (due to the latency of
+ /// the operation). Once the operands becomes available, the instruction is
+ /// added to the AvailableQueue.
+ std::vector<SUnit*> PendingQueue;
+
+ /// Topo - A topological ordering for SUnits.
+ ScheduleDAGTopologicalSort Topo;
+
+ /// HazardRec - The hazard recognizer to use.
+ ScheduleHazardRecognizer *HazardRec;
+
+ /// AntiDepBreak - Anti-dependence breaking object, or NULL if none
+ AntiDepBreaker *AntiDepBreak;
+
+ /// AA - AliasAnalysis for making memory reference queries.
+ AliasAnalysis *AA;
+
+ /// KillIndices - The index of the most recent kill (proceding bottom-up),
+ /// or ~0u if the register is not live.
+ unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister];
+
+ public:
+ SchedulePostRATDList(MachineFunction &MF,
+ const MachineLoopInfo &MLI,
+ const MachineDominatorTree &MDT,
+ ScheduleHazardRecognizer *HR,
+ AntiDepBreaker *ADB,
+ AliasAnalysis *aa)
+ : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits),
+ HazardRec(HR), AntiDepBreak(ADB), AA(aa) {}
+
+ ~SchedulePostRATDList() {
+ }
+
+ /// StartBlock - Initialize register live-range state for scheduling in
+ /// this block.
+ ///
+ void StartBlock(MachineBasicBlock *BB);
+
+ /// Schedule - Schedule the instruction range using list scheduling.
+ ///
+ void Schedule();
+
+ /// Observe - Update liveness information to account for the current
+ /// instruction, which will not be scheduled.
+ ///
+ void Observe(MachineInstr *MI, unsigned Count);
+
+ /// FinishBlock - Clean up register live-range state.
+ ///
+ void FinishBlock();
+
+ /// FixupKills - Fix register kill flags that have been made
+ /// invalid due to scheduling
+ ///
+ void FixupKills(MachineBasicBlock *MBB);
+
+ private:
+ void ReleaseSucc(SUnit *SU, SDep *SuccEdge);
+ void ReleaseSuccessors(SUnit *SU);
+ void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+ void ListScheduleTopDown();
+ void StartBlockForKills(MachineBasicBlock *BB);
+
+ // ToggleKillFlag - Toggle a register operand kill flag. Other
+ // adjustments may be made to the instruction if necessary. Return
+ // true if the operand has been deleted, false if not.
+ bool ToggleKillFlag(MachineInstr *MI, MachineOperand &MO);
+ };
+}
+
+/// isSchedulingBoundary - Test if the given instruction should be
+/// considered a scheduling boundary. This primarily includes labels
+/// and terminators.
+///
+static bool isSchedulingBoundary(const MachineInstr *MI,
+ const MachineFunction &MF) {
+ // Terminators and labels can't be scheduled around.
+ if (MI->getDesc().isTerminator() || MI->isLabel())
+ return true;
+
+ // Don't attempt to schedule around any instruction that modifies
+ // a stack-oriented pointer, as it's unlikely to be profitable. This
+ // saves compile time, because it doesn't require every single
+ // stack slot reference to depend on the instruction that does the
+ // modification.
+ const TargetLowering &TLI = *MF.getTarget().getTargetLowering();
+ if (MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore()))
+ return true;
+
+ return false;
+}
+
+bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
+ AA = &getAnalysis<AliasAnalysis>();
+
+ // Check for explicit enable/disable of post-ra scheduling.
+ TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE;
+ SmallVector<TargetRegisterClass*, 4> CriticalPathRCs;
+ if (EnablePostRAScheduler.getPosition() > 0) {
+ if (!EnablePostRAScheduler)
+ return false;
+ } else {
+ // Check that post-RA scheduling is enabled for this target.
+ const TargetSubtarget &ST = Fn.getTarget().getSubtarget<TargetSubtarget>();
+ if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode, CriticalPathRCs))
+ return false;
+ }
+
+ // Check for antidep breaking override...
+ if (EnableAntiDepBreaking.getPosition() > 0) {
+ AntiDepMode = (EnableAntiDepBreaking == "all") ? TargetSubtarget::ANTIDEP_ALL :
+ (EnableAntiDepBreaking == "critical") ? TargetSubtarget::ANTIDEP_CRITICAL :
+ TargetSubtarget::ANTIDEP_NONE;
+ }
+
+ DEBUG(dbgs() << "PostRAScheduler\n");
+
+ const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+ const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+ const InstrItineraryData &InstrItins = Fn.getTarget().getInstrItineraryData();
+ ScheduleHazardRecognizer *HR = EnablePostRAHazardAvoidance ?
+ (ScheduleHazardRecognizer *)new ExactHazardRecognizer(InstrItins) :
+ (ScheduleHazardRecognizer *)new SimpleHazardRecognizer();
+ AntiDepBreaker *ADB =
+ ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ?
+ (AntiDepBreaker *)new AggressiveAntiDepBreaker(Fn, CriticalPathRCs) :
+ ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ?
+ (AntiDepBreaker *)new CriticalAntiDepBreaker(Fn) : NULL));
+
+ SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR, ADB, AA);
+
+ // Loop over all of the basic blocks
+ for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+ MBB != MBBe; ++MBB) {
+#ifndef NDEBUG
+ // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
+ if (DebugDiv > 0) {
+ static int bbcnt = 0;
+ if (bbcnt++ % DebugDiv != DebugMod)
+ continue;
+ dbgs() << "*** DEBUG scheduling " << Fn.getFunction()->getNameStr() <<
+ ":BB#" << MBB->getNumber() << " ***\n";
+ }
+#endif
+
+ // Initialize register live-range state for scheduling in this block.
+ Scheduler.StartBlock(MBB);
+
+ // Schedule each sequence of instructions not interrupted by a label
+ // or anything else that effectively needs to shut down scheduling.
+ MachineBasicBlock::iterator Current = MBB->end();
+ unsigned Count = MBB->size(), CurrentCount = Count;
+ for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) {
+ MachineInstr *MI = prior(I);
+ if (isSchedulingBoundary(MI, Fn)) {
+ Scheduler.Run(MBB, I, Current, CurrentCount);
+ Scheduler.EmitSchedule(0);
+ Current = MI;
+ CurrentCount = Count - 1;
+ Scheduler.Observe(MI, CurrentCount);
+ }
+ I = MI;
+ --Count;
+ }
+ assert(Count == 0 && "Instruction count mismatch!");
+ assert((MBB->begin() == Current || CurrentCount != 0) &&
+ "Instruction count mismatch!");
+ Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount);
+ Scheduler.EmitSchedule(0);
+
+ // Clean up register live-range state.
+ Scheduler.FinishBlock();
+
+ // Update register kills
+ Scheduler.FixupKills(MBB);
+ }
+
+ delete HR;
+ delete ADB;
+
+ return true;
+}
+
+/// StartBlock - Initialize register live-range state for scheduling in
+/// this block.
+///
+void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
+ // Call the superclass.
+ ScheduleDAGInstrs::StartBlock(BB);
+
+ // Reset the hazard recognizer and anti-dep breaker.
+ HazardRec->Reset();
+ if (AntiDepBreak != NULL)
+ AntiDepBreak->StartBlock(BB);
+}
+
+/// Schedule - Schedule the instruction range using list scheduling.
+///
+void SchedulePostRATDList::Schedule() {
+ // Build the scheduling graph.
+ BuildSchedGraph(AA);
+
+ if (AntiDepBreak != NULL) {
+ unsigned Broken =
+ AntiDepBreak->BreakAntiDependencies(SUnits, Begin, InsertPos,
+ InsertPosIndex);
+
+ if (Broken != 0) {
+ // We made changes. Update the dependency graph.
+ // Theoretically we could update the graph in place:
+ // When a live range is changed to use a different register, remove
+ // the def's anti-dependence *and* output-dependence edges due to
+ // that register, and add new anti-dependence and output-dependence
+ // edges based on the next live range of the register.
+ SUnits.clear();
+ Sequence.clear();
+ EntrySU = SUnit();
+ ExitSU = SUnit();
+ BuildSchedGraph(AA);
+
+ NumFixedAnti += Broken;
+ }
+ }
+
+ DEBUG(dbgs() << "********** List Scheduling **********\n");
+ DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ SUnits[su].dumpAll(this));
+
+ AvailableQueue.initNodes(SUnits);
+ ListScheduleTopDown();
+ AvailableQueue.releaseState();
+}
+
+/// Observe - Update liveness information to account for the current
+/// instruction, which will not be scheduled.
+///
+void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) {
+ if (AntiDepBreak != NULL)
+ AntiDepBreak->Observe(MI, Count, InsertPosIndex);
+}
+
+/// FinishBlock - Clean up register live-range state.
+///
+void SchedulePostRATDList::FinishBlock() {
+ if (AntiDepBreak != NULL)
+ AntiDepBreak->FinishBlock();
+
+ // Call the superclass.
+ ScheduleDAGInstrs::FinishBlock();
+}
+
+/// StartBlockForKills - Initialize register live-range state for updating kills
+///
+void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) {
+ // Initialize the indices to indicate that no registers are live.
+ for (unsigned i = 0; i < TRI->getNumRegs(); ++i)
+ KillIndices[i] = ~0u;
+
+ // Determine the live-out physregs for this block.
+ if (!BB->empty() && BB->back().getDesc().isReturn()) {
+ // In a return block, examine the function live-out regs.
+ for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
+ E = MRI.liveout_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ KillIndices[Reg] = BB->size();
+ // Repeat, for all subregs.
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg) {
+ KillIndices[*Subreg] = BB->size();
+ }
+ }
+ }
+ else {
+ // In a non-return block, examine the live-in regs of all successors.
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI) {
+ for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+ E = (*SI)->livein_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ KillIndices[Reg] = BB->size();
+ // Repeat, for all subregs.
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg) {
+ KillIndices[*Subreg] = BB->size();
+ }
+ }
+ }
+ }
+}
+
+bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
+ MachineOperand &MO) {
+ // Setting kill flag...
+ if (!MO.isKill()) {
+ MO.setIsKill(true);
+ return false;
+ }
+
+ // If MO itself is live, clear the kill flag...
+ if (KillIndices[MO.getReg()] != ~0u) {
+ MO.setIsKill(false);
+ return false;
+ }
+
+ // If any subreg of MO is live, then create an imp-def for that
+ // subreg and keep MO marked as killed.
+ MO.setIsKill(false);
+ bool AllDead = true;
+ const unsigned SuperReg = MO.getReg();
+ for (const unsigned *Subreg = TRI->getSubRegisters(SuperReg);
+ *Subreg; ++Subreg) {
+ if (KillIndices[*Subreg] != ~0u) {
+ MI->addOperand(MachineOperand::CreateReg(*Subreg,
+ true /*IsDef*/,
+ true /*IsImp*/,
+ false /*IsKill*/,
+ false /*IsDead*/));
+ AllDead = false;
+ }
+ }
+
+ if(AllDead)
+ MO.setIsKill(true);
+ return false;
+}
+
+/// FixupKills - Fix the register kill flags, they may have been made
+/// incorrect by instruction reordering.
+///
+void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n');
+
+ std::set<unsigned> killedRegs;
+ BitVector ReservedRegs = TRI->getReservedRegs(MF);
+
+ StartBlockForKills(MBB);
+
+ // Examine block from end to start...
+ unsigned Count = MBB->size();
+ for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin();
+ I != E; --Count) {
+ MachineInstr *MI = --I;
+
+ // Update liveness. Registers that are defed but not used in this
+ // instruction are now dead. Mark register and all subregs as they
+ // are completely defined.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ if (!MO.isDef()) continue;
+ // Ignore two-addr defs.
+ if (MI->isRegTiedToUseOperand(i)) continue;
+
+ KillIndices[Reg] = ~0u;
+
+ // Repeat for all subregs.
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg) {
+ KillIndices[*Subreg] = ~0u;
+ }
+ }
+
+ // Examine all used registers and set/clear kill flag. When a
+ // register is used multiple times we only set the kill flag on
+ // the first use.
+ killedRegs.clear();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse()) continue;
+ unsigned Reg = MO.getReg();
+ if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
+
+ bool kill = false;
+ if (killedRegs.find(Reg) == killedRegs.end()) {
+ kill = true;
+ // A register is not killed if any subregs are live...
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg) {
+ if (KillIndices[*Subreg] != ~0u) {
+ kill = false;
+ break;
+ }
+ }
+
+ // If subreg is not live, then register is killed if it became
+ // live in this instruction
+ if (kill)
+ kill = (KillIndices[Reg] == ~0u);
+ }
+
+ if (MO.isKill() != kill) {
+ DEBUG(dbgs() << "Fixing " << MO << " in ");
+ // Warning: ToggleKillFlag may invalidate MO.
+ ToggleKillFlag(MI, MO);
+ DEBUG(MI->dump());
+ }
+
+ killedRegs.insert(Reg);
+ }
+
+ // Mark any used register (that is not using undef) and subregs as
+ // now live...
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
+ unsigned Reg = MO.getReg();
+ if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
+
+ KillIndices[Reg] = Count;
+
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg) {
+ KillIndices[*Subreg] = Count;
+ }
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the PendingQueue if the count reaches zero. Also update its cycle bound.
+void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
+ SUnit *SuccSU = SuccEdge->getSUnit();
+
+#ifndef NDEBUG
+ if (SuccSU->NumPredsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ SuccSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(0);
+ }
+#endif
+ --SuccSU->NumPredsLeft;
+
+ // Compute how many cycles it will be before this actually becomes
+ // available. This is the max of the start time of all predecessors plus
+ // their latencies.
+ SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency());
+
+ // If all the node's predecessors are scheduled, this node is ready
+ // to be scheduled. Ignore the special ExitSU node.
+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
+ PendingQueue.push_back(SuccSU);
+}
+
+/// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors.
+void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) {
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ ReleaseSucc(SU, &*I);
+ }
+}
+
+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+ DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+ DEBUG(SU->dump(this));
+
+ Sequence.push_back(SU);
+ assert(CurCycle >= SU->getDepth() &&
+ "Node scheduled above its depth!");
+ SU->setDepthToAtLeast(CurCycle);
+
+ ReleaseSuccessors(SU);
+ SU->isScheduled = true;
+ AvailableQueue.ScheduledNode(SU);
+}
+
+/// ListScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void SchedulePostRATDList::ListScheduleTopDown() {
+ unsigned CurCycle = 0;
+
+ // We're scheduling top-down but we're visiting the regions in
+ // bottom-up order, so we don't know the hazards at the start of a
+ // region. So assume no hazards (this should usually be ok as most
+ // blocks are a single region).
+ HazardRec->Reset();
+
+ // Release any successors of the special Entry node.
+ ReleaseSuccessors(&EntrySU);
+
+ // Add all leaves to Available queue.
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ // It is available if it has no predecessors.
+ bool available = SUnits[i].Preds.empty();
+ if (available) {
+ AvailableQueue.push(&SUnits[i]);
+ SUnits[i].isAvailable = true;
+ }
+ }
+
+ // In any cycle where we can't schedule any instructions, we must
+ // stall or emit a noop, depending on the target.
+ bool CycleHasInsts = false;
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ std::vector<SUnit*> NotReady;
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue.empty() || !PendingQueue.empty()) {
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ unsigned MinDepth = ~0u;
+ for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+ if (PendingQueue[i]->getDepth() <= CurCycle) {
+ AvailableQueue.push(PendingQueue[i]);
+ PendingQueue[i]->isAvailable = true;
+ PendingQueue[i] = PendingQueue.back();
+ PendingQueue.pop_back();
+ --i; --e;
+ } else if (PendingQueue[i]->getDepth() < MinDepth)
+ MinDepth = PendingQueue[i]->getDepth();
+ }
+
+ DEBUG(dbgs() << "\n*** Examining Available\n";
+ LatencyPriorityQueue q = AvailableQueue;
+ while (!q.empty()) {
+ SUnit *su = q.pop();
+ dbgs() << "Height " << su->getHeight() << ": ";
+ su->dump(this);
+ });
+
+ SUnit *FoundSUnit = 0;
+ bool HasNoopHazards = false;
+ while (!AvailableQueue.empty()) {
+ SUnit *CurSUnit = AvailableQueue.pop();
+
+ ScheduleHazardRecognizer::HazardType HT =
+ HazardRec->getHazardType(CurSUnit);
+ if (HT == ScheduleHazardRecognizer::NoHazard) {
+ FoundSUnit = CurSUnit;
+ break;
+ }
+
+ // Remember if this is a noop hazard.
+ HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
+
+ NotReady.push_back(CurSUnit);
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ if (!NotReady.empty()) {
+ AvailableQueue.push_all(NotReady);
+ NotReady.clear();
+ }
+
+ // If we found a node to schedule...
+ if (FoundSUnit) {
+ // ... schedule the node...
+ ScheduleNodeTopDown(FoundSUnit, CurCycle);
+ HazardRec->EmitInstruction(FoundSUnit);
+ CycleHasInsts = true;
+
+ // If we are using the target-specific hazards, then don't
+ // advance the cycle time just because we schedule a node. If
+ // the target allows it we can schedule multiple nodes in the
+ // same cycle.
+ if (!EnablePostRAHazardAvoidance) {
+ if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops!
+ ++CurCycle;
+ }
+ } else {
+ if (CycleHasInsts) {
+ DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n');
+ HazardRec->AdvanceCycle();
+ } else if (!HasNoopHazards) {
+ // Otherwise, we have a pipeline stall, but no other problem,
+ // just advance the current cycle and try again.
+ DEBUG(dbgs() << "*** Stall in cycle " << CurCycle << '\n');
+ HazardRec->AdvanceCycle();
+ ++NumStalls;
+ } else {
+ // Otherwise, we have no instructions to issue and we have instructions
+ // that will fault if we don't do this right. This is the case for
+ // processors without pipeline interlocks and other cases.
+ DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n');
+ HazardRec->EmitNoop();
+ Sequence.push_back(0); // NULL here means noop
+ ++NumNoops;
+ }
+
+ ++CurCycle;
+ CycleHasInsts = false;
+ }
+ }
+
+#ifndef NDEBUG
+ VerifySchedule(/*isBottomUp=*/false);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createPostRAScheduler(CodeGenOpt::Level OptLevel) {
+ return new PostRAScheduler(OptLevel);
+}
diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp
new file mode 100644
index 0000000..70e91aa
--- /dev/null
+++ b/lib/CodeGen/PreAllocSplitting.cpp
@@ -0,0 +1,1465 @@
+//===-- PreAllocSplitting.cpp - Pre-allocation Interval Spltting Pass. ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the machine instruction level pre-register allocation
+// live interval splitting pass. It finds live interval barriers, i.e.
+// instructions which will kill all physical registers in certain register
+// classes, and split all live intervals which cross the barrier.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-alloc-split"
+#include "VirtRegMap.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+static cl::opt<int> PreSplitLimit("pre-split-limit", cl::init(-1), cl::Hidden);
+static cl::opt<int> DeadSplitLimit("dead-split-limit", cl::init(-1),
+ cl::Hidden);
+static cl::opt<int> RestoreFoldLimit("restore-fold-limit", cl::init(-1),
+ cl::Hidden);
+
+STATISTIC(NumSplits, "Number of intervals split");
+STATISTIC(NumRemats, "Number of intervals split by rematerialization");
+STATISTIC(NumFolds, "Number of intervals split with spill folding");
+STATISTIC(NumRestoreFolds, "Number of intervals split with restore folding");
+STATISTIC(NumRenumbers, "Number of intervals renumbered into new registers");
+STATISTIC(NumDeadSpills, "Number of dead spills removed");
+
+namespace {
+ class PreAllocSplitting : public MachineFunctionPass {
+ MachineFunction *CurrMF;
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo* TRI;
+ MachineFrameInfo *MFI;
+ MachineRegisterInfo *MRI;
+ SlotIndexes *SIs;
+ LiveIntervals *LIs;
+ LiveStacks *LSs;
+ VirtRegMap *VRM;
+
+ // Barrier - Current barrier being processed.
+ MachineInstr *Barrier;
+
+ // BarrierMBB - Basic block where the barrier resides in.
+ MachineBasicBlock *BarrierMBB;
+
+ // Barrier - Current barrier index.
+ SlotIndex BarrierIdx;
+
+ // CurrLI - Current live interval being split.
+ LiveInterval *CurrLI;
+
+ // CurrSLI - Current stack slot live interval.
+ LiveInterval *CurrSLI;
+
+ // CurrSValNo - Current val# for the stack slot live interval.
+ VNInfo *CurrSValNo;
+
+ // IntervalSSMap - A map from live interval to spill slots.
+ DenseMap<unsigned, int> IntervalSSMap;
+
+ // Def2SpillMap - A map from a def instruction index to spill index.
+ DenseMap<SlotIndex, SlotIndex> Def2SpillMap;
+
+ public:
+ static char ID;
+ PreAllocSplitting()
+ : MachineFunctionPass(&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addRequired<LiveStacks>();
+ AU.addPreserved<LiveStacks>();
+ AU.addPreserved<RegisterCoalescer>();
+ AU.addPreserved<CalculateSpillWeights>();
+ if (StrongPHIElim)
+ AU.addPreservedID(StrongPHIEliminationID);
+ else
+ AU.addPreservedID(PHIEliminationID);
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<VirtRegMap>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<VirtRegMap>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual void releaseMemory() {
+ IntervalSSMap.clear();
+ Def2SpillMap.clear();
+ }
+
+ virtual const char *getPassName() const {
+ return "Pre-Register Allocaton Live Interval Splitting";
+ }
+
+ /// print - Implement the dump method.
+ virtual void print(raw_ostream &O, const Module* M = 0) const {
+ LIs->print(O, M);
+ }
+
+
+ private:
+
+ MachineBasicBlock::iterator
+ findSpillPoint(MachineBasicBlock*, MachineInstr*, MachineInstr*,
+ SmallPtrSet<MachineInstr*, 4>&);
+
+ MachineBasicBlock::iterator
+ findRestorePoint(MachineBasicBlock*, MachineInstr*, SlotIndex,
+ SmallPtrSet<MachineInstr*, 4>&);
+
+ int CreateSpillStackSlot(unsigned, const TargetRegisterClass *);
+
+ bool IsAvailableInStack(MachineBasicBlock*, unsigned,
+ SlotIndex, SlotIndex,
+ SlotIndex&, int&) const;
+
+ void UpdateSpillSlotInterval(VNInfo*, SlotIndex, SlotIndex);
+
+ bool SplitRegLiveInterval(LiveInterval*);
+
+ bool SplitRegLiveIntervals(const TargetRegisterClass **,
+ SmallPtrSet<LiveInterval*, 8>&);
+
+ bool createsNewJoin(LiveRange* LR, MachineBasicBlock* DefMBB,
+ MachineBasicBlock* BarrierMBB);
+ bool Rematerialize(unsigned vreg, VNInfo* ValNo,
+ MachineInstr* DefMI,
+ MachineBasicBlock::iterator RestorePt,
+ SmallPtrSet<MachineInstr*, 4>& RefsInMBB);
+ MachineInstr* FoldSpill(unsigned vreg, const TargetRegisterClass* RC,
+ MachineInstr* DefMI,
+ MachineInstr* Barrier,
+ MachineBasicBlock* MBB,
+ int& SS,
+ SmallPtrSet<MachineInstr*, 4>& RefsInMBB);
+ MachineInstr* FoldRestore(unsigned vreg,
+ const TargetRegisterClass* RC,
+ MachineInstr* Barrier,
+ MachineBasicBlock* MBB,
+ int SS,
+ SmallPtrSet<MachineInstr*, 4>& RefsInMBB);
+ void RenumberValno(VNInfo* VN);
+ void ReconstructLiveInterval(LiveInterval* LI);
+ bool removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split);
+ unsigned getNumberOfNonSpills(SmallPtrSet<MachineInstr*, 4>& MIs,
+ unsigned Reg, int FrameIndex, bool& TwoAddr);
+ VNInfo* PerformPHIConstruction(MachineBasicBlock::iterator Use,
+ MachineBasicBlock* MBB, LiveInterval* LI,
+ SmallPtrSet<MachineInstr*, 4>& Visited,
+ DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs,
+ DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses,
+ DenseMap<MachineInstr*, VNInfo*>& NewVNs,
+ DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut,
+ DenseMap<MachineBasicBlock*, VNInfo*>& Phis,
+ bool IsTopLevel, bool IsIntraBlock);
+ VNInfo* PerformPHIConstructionFallBack(MachineBasicBlock::iterator Use,
+ MachineBasicBlock* MBB, LiveInterval* LI,
+ SmallPtrSet<MachineInstr*, 4>& Visited,
+ DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs,
+ DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses,
+ DenseMap<MachineInstr*, VNInfo*>& NewVNs,
+ DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut,
+ DenseMap<MachineBasicBlock*, VNInfo*>& Phis,
+ bool IsTopLevel, bool IsIntraBlock);
+};
+} // end anonymous namespace
+
+char PreAllocSplitting::ID = 0;
+
+static RegisterPass<PreAllocSplitting>
+X("pre-alloc-splitting", "Pre-Register Allocation Live Interval Splitting");
+
+const PassInfo *const llvm::PreAllocSplittingID = &X;
+
+/// findSpillPoint - Find a gap as far away from the given MI that's suitable
+/// for spilling the current live interval. The index must be before any
+/// defs and uses of the live interval register in the mbb. Return begin() if
+/// none is found.
+MachineBasicBlock::iterator
+PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI,
+ MachineInstr *DefMI,
+ SmallPtrSet<MachineInstr*, 4> &RefsInMBB) {
+ MachineBasicBlock::iterator Pt = MBB->begin();
+
+ MachineBasicBlock::iterator MII = MI;
+ MachineBasicBlock::iterator EndPt = DefMI
+ ? MachineBasicBlock::iterator(DefMI) : MBB->begin();
+
+ while (MII != EndPt && !RefsInMBB.count(MII) &&
+ MII->getOpcode() != TRI->getCallFrameSetupOpcode())
+ --MII;
+ if (MII == EndPt || RefsInMBB.count(MII)) return Pt;
+
+ while (MII != EndPt && !RefsInMBB.count(MII)) {
+ // We can't insert the spill between the barrier (a call), and its
+ // corresponding call frame setup.
+ if (MII->getOpcode() == TRI->getCallFrameDestroyOpcode()) {
+ while (MII->getOpcode() != TRI->getCallFrameSetupOpcode()) {
+ --MII;
+ if (MII == EndPt) {
+ return Pt;
+ }
+ }
+ continue;
+ } else {
+ Pt = MII;
+ }
+
+ if (RefsInMBB.count(MII))
+ return Pt;
+
+
+ --MII;
+ }
+
+ return Pt;
+}
+
+/// findRestorePoint - Find a gap in the instruction index map that's suitable
+/// for restoring the current live interval value. The index must be before any
+/// uses of the live interval register in the mbb. Return end() if none is
+/// found.
+MachineBasicBlock::iterator
+PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI,
+ SlotIndex LastIdx,
+ SmallPtrSet<MachineInstr*, 4> &RefsInMBB) {
+ // FIXME: Allow spill to be inserted to the beginning of the mbb. Update mbb
+ // begin index accordingly.
+ MachineBasicBlock::iterator Pt = MBB->end();
+ MachineBasicBlock::iterator EndPt = MBB->getFirstTerminator();
+
+ // We start at the call, so walk forward until we find the call frame teardown
+ // since we can't insert restores before that. Bail if we encounter a use
+ // during this time.
+ MachineBasicBlock::iterator MII = MI;
+ if (MII == EndPt) return Pt;
+
+ while (MII != EndPt && !RefsInMBB.count(MII) &&
+ MII->getOpcode() != TRI->getCallFrameDestroyOpcode())
+ ++MII;
+ if (MII == EndPt || RefsInMBB.count(MII)) return Pt;
+ ++MII;
+
+ // FIXME: Limit the number of instructions to examine to reduce
+ // compile time?
+ while (MII != EndPt) {
+ SlotIndex Index = LIs->getInstructionIndex(MII);
+ if (Index > LastIdx)
+ break;
+
+ // We can't insert a restore between the barrier (a call) and its
+ // corresponding call frame teardown.
+ if (MII->getOpcode() == TRI->getCallFrameSetupOpcode()) {
+ do {
+ if (MII == EndPt || RefsInMBB.count(MII)) return Pt;
+ ++MII;
+ } while (MII->getOpcode() != TRI->getCallFrameDestroyOpcode());
+ } else {
+ Pt = MII;
+ }
+
+ if (RefsInMBB.count(MII))
+ return Pt;
+
+ ++MII;
+ }
+
+ return Pt;
+}
+
+/// CreateSpillStackSlot - Create a stack slot for the live interval being
+/// split. If the live interval was previously split, just reuse the same
+/// slot.
+int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg,
+ const TargetRegisterClass *RC) {
+ int SS;
+ DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(Reg);
+ if (I != IntervalSSMap.end()) {
+ SS = I->second;
+ } else {
+ SS = MFI->CreateSpillStackObject(RC->getSize(), RC->getAlignment());
+ IntervalSSMap[Reg] = SS;
+ }
+
+ // Create live interval for stack slot.
+ CurrSLI = &LSs->getOrCreateInterval(SS, RC);
+ if (CurrSLI->hasAtLeastOneValue())
+ CurrSValNo = CurrSLI->getValNumInfo(0);
+ else
+ CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0, false,
+ LSs->getVNInfoAllocator());
+ return SS;
+}
+
+/// IsAvailableInStack - Return true if register is available in a split stack
+/// slot at the specified index.
+bool
+PreAllocSplitting::IsAvailableInStack(MachineBasicBlock *DefMBB,
+ unsigned Reg, SlotIndex DefIndex,
+ SlotIndex RestoreIndex,
+ SlotIndex &SpillIndex,
+ int& SS) const {
+ if (!DefMBB)
+ return false;
+
+ DenseMap<unsigned, int>::const_iterator I = IntervalSSMap.find(Reg);
+ if (I == IntervalSSMap.end())
+ return false;
+ DenseMap<SlotIndex, SlotIndex>::const_iterator
+ II = Def2SpillMap.find(DefIndex);
+ if (II == Def2SpillMap.end())
+ return false;
+
+ // If last spill of def is in the same mbb as barrier mbb (where restore will
+ // be), make sure it's not below the intended restore index.
+ // FIXME: Undo the previous spill?
+ assert(LIs->getMBBFromIndex(II->second) == DefMBB);
+ if (DefMBB == BarrierMBB && II->second >= RestoreIndex)
+ return false;
+
+ SS = I->second;
+ SpillIndex = II->second;
+ return true;
+}
+
+/// UpdateSpillSlotInterval - Given the specified val# of the register live
+/// interval being split, and the spill and restore indicies, update the live
+/// interval of the spill stack slot.
+void
+PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, SlotIndex SpillIndex,
+ SlotIndex RestoreIndex) {
+ assert(LIs->getMBBFromIndex(RestoreIndex) == BarrierMBB &&
+ "Expect restore in the barrier mbb");
+
+ MachineBasicBlock *MBB = LIs->getMBBFromIndex(SpillIndex);
+ if (MBB == BarrierMBB) {
+ // Intra-block spill + restore. We are done.
+ LiveRange SLR(SpillIndex, RestoreIndex, CurrSValNo);
+ CurrSLI->addRange(SLR);
+ return;
+ }
+
+ SmallPtrSet<MachineBasicBlock*, 4> Processed;
+ SlotIndex EndIdx = LIs->getMBBEndIdx(MBB);
+ LiveRange SLR(SpillIndex, EndIdx, CurrSValNo);
+ CurrSLI->addRange(SLR);
+ Processed.insert(MBB);
+
+ // Start from the spill mbb, figure out the extend of the spill slot's
+ // live interval.
+ SmallVector<MachineBasicBlock*, 4> WorkList;
+ const LiveRange *LR = CurrLI->getLiveRangeContaining(SpillIndex);
+ if (LR->end > EndIdx)
+ // If live range extend beyond end of mbb, add successors to work list.
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI)
+ WorkList.push_back(*SI);
+
+ while (!WorkList.empty()) {
+ MachineBasicBlock *MBB = WorkList.back();
+ WorkList.pop_back();
+ if (Processed.count(MBB))
+ continue;
+ SlotIndex Idx = LIs->getMBBStartIdx(MBB);
+ LR = CurrLI->getLiveRangeContaining(Idx);
+ if (LR && LR->valno == ValNo) {
+ EndIdx = LIs->getMBBEndIdx(MBB);
+ if (Idx <= RestoreIndex && RestoreIndex < EndIdx) {
+ // Spill slot live interval stops at the restore.
+ LiveRange SLR(Idx, RestoreIndex, CurrSValNo);
+ CurrSLI->addRange(SLR);
+ } else if (LR->end > EndIdx) {
+ // Live range extends beyond end of mbb, process successors.
+ LiveRange SLR(Idx, EndIdx.getNextIndex(), CurrSValNo);
+ CurrSLI->addRange(SLR);
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI)
+ WorkList.push_back(*SI);
+ } else {
+ LiveRange SLR(Idx, LR->end, CurrSValNo);
+ CurrSLI->addRange(SLR);
+ }
+ Processed.insert(MBB);
+ }
+ }
+}
+
+/// PerformPHIConstruction - From properly set up use and def lists, use a PHI
+/// construction algorithm to compute the ranges and valnos for an interval.
+VNInfo*
+PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI,
+ MachineBasicBlock* MBB, LiveInterval* LI,
+ SmallPtrSet<MachineInstr*, 4>& Visited,
+ DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs,
+ DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses,
+ DenseMap<MachineInstr*, VNInfo*>& NewVNs,
+ DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut,
+ DenseMap<MachineBasicBlock*, VNInfo*>& Phis,
+ bool IsTopLevel, bool IsIntraBlock) {
+ // Return memoized result if it's available.
+ if (IsTopLevel && Visited.count(UseI) && NewVNs.count(UseI))
+ return NewVNs[UseI];
+ else if (!IsTopLevel && IsIntraBlock && NewVNs.count(UseI))
+ return NewVNs[UseI];
+ else if (!IsIntraBlock && LiveOut.count(MBB))
+ return LiveOut[MBB];
+
+ // Check if our block contains any uses or defs.
+ bool ContainsDefs = Defs.count(MBB);
+ bool ContainsUses = Uses.count(MBB);
+
+ VNInfo* RetVNI = 0;
+
+ // Enumerate the cases of use/def contaning blocks.
+ if (!ContainsDefs && !ContainsUses) {
+ return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, Uses,
+ NewVNs, LiveOut, Phis,
+ IsTopLevel, IsIntraBlock);
+ } else if (ContainsDefs && !ContainsUses) {
+ SmallPtrSet<MachineInstr*, 2>& BlockDefs = Defs[MBB];
+
+ // Search for the def in this block. If we don't find it before the
+ // instruction we care about, go to the fallback case. Note that that
+ // should never happen: this cannot be intrablock, so use should
+ // always be an end() iterator.
+ assert(UseI == MBB->end() && "No use marked in intrablock");
+
+ MachineBasicBlock::iterator Walker = UseI;
+ --Walker;
+ while (Walker != MBB->begin()) {
+ if (BlockDefs.count(Walker))
+ break;
+ --Walker;
+ }
+
+ // Once we've found it, extend its VNInfo to our instruction.
+ SlotIndex DefIndex = LIs->getInstructionIndex(Walker);
+ DefIndex = DefIndex.getDefIndex();
+ SlotIndex EndIndex = LIs->getMBBEndIdx(MBB);
+
+ RetVNI = NewVNs[Walker];
+ LI->addRange(LiveRange(DefIndex, EndIndex, RetVNI));
+ } else if (!ContainsDefs && ContainsUses) {
+ SmallPtrSet<MachineInstr*, 2>& BlockUses = Uses[MBB];
+
+ // Search for the use in this block that precedes the instruction we care
+ // about, going to the fallback case if we don't find it.
+ MachineBasicBlock::iterator Walker = UseI;
+ bool found = false;
+ while (Walker != MBB->begin()) {
+ --Walker;
+ if (BlockUses.count(Walker)) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs,
+ Uses, NewVNs, LiveOut, Phis,
+ IsTopLevel, IsIntraBlock);
+
+ SlotIndex UseIndex = LIs->getInstructionIndex(Walker);
+ UseIndex = UseIndex.getUseIndex();
+ SlotIndex EndIndex;
+ if (IsIntraBlock) {
+ EndIndex = LIs->getInstructionIndex(UseI).getDefIndex();
+ } else
+ EndIndex = LIs->getMBBEndIdx(MBB);
+
+ // Now, recursively phi construct the VNInfo for the use we found,
+ // and then extend it to include the instruction we care about
+ RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses,
+ NewVNs, LiveOut, Phis, false, true);
+
+ LI->addRange(LiveRange(UseIndex, EndIndex, RetVNI));
+
+ // FIXME: Need to set kills properly for inter-block stuff.
+ if (RetVNI->isKill(UseIndex)) RetVNI->removeKill(UseIndex);
+ if (IsIntraBlock)
+ RetVNI->addKill(EndIndex);
+ } else if (ContainsDefs && ContainsUses) {
+ SmallPtrSet<MachineInstr*, 2>& BlockDefs = Defs[MBB];
+ SmallPtrSet<MachineInstr*, 2>& BlockUses = Uses[MBB];
+
+ // This case is basically a merging of the two preceding case, with the
+ // special note that checking for defs must take precedence over checking
+ // for uses, because of two-address instructions.
+ MachineBasicBlock::iterator Walker = UseI;
+ bool foundDef = false;
+ bool foundUse = false;
+ while (Walker != MBB->begin()) {
+ --Walker;
+ if (BlockDefs.count(Walker)) {
+ foundDef = true;
+ break;
+ } else if (BlockUses.count(Walker)) {
+ foundUse = true;
+ break;
+ }
+ }
+
+ if (!foundDef && !foundUse)
+ return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs,
+ Uses, NewVNs, LiveOut, Phis,
+ IsTopLevel, IsIntraBlock);
+
+ SlotIndex StartIndex = LIs->getInstructionIndex(Walker);
+ StartIndex = foundDef ? StartIndex.getDefIndex() : StartIndex.getUseIndex();
+ SlotIndex EndIndex;
+ if (IsIntraBlock) {
+ EndIndex = LIs->getInstructionIndex(UseI).getDefIndex();
+ } else
+ EndIndex = LIs->getMBBEndIdx(MBB);
+
+ if (foundDef)
+ RetVNI = NewVNs[Walker];
+ else
+ RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses,
+ NewVNs, LiveOut, Phis, false, true);
+
+ LI->addRange(LiveRange(StartIndex, EndIndex, RetVNI));
+
+ if (foundUse && RetVNI->isKill(StartIndex))
+ RetVNI->removeKill(StartIndex);
+ if (IsIntraBlock) {
+ RetVNI->addKill(EndIndex);
+ }
+ }
+
+ // Memoize results so we don't have to recompute them.
+ if (!IsIntraBlock) LiveOut[MBB] = RetVNI;
+ else {
+ if (!NewVNs.count(UseI))
+ NewVNs[UseI] = RetVNI;
+ Visited.insert(UseI);
+ }
+
+ return RetVNI;
+}
+
+/// PerformPHIConstructionFallBack - PerformPHIConstruction fall back path.
+///
+VNInfo*
+PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator UseI,
+ MachineBasicBlock* MBB, LiveInterval* LI,
+ SmallPtrSet<MachineInstr*, 4>& Visited,
+ DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs,
+ DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses,
+ DenseMap<MachineInstr*, VNInfo*>& NewVNs,
+ DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut,
+ DenseMap<MachineBasicBlock*, VNInfo*>& Phis,
+ bool IsTopLevel, bool IsIntraBlock) {
+ // NOTE: Because this is the fallback case from other cases, we do NOT
+ // assume that we are not intrablock here.
+ if (Phis.count(MBB)) return Phis[MBB];
+
+ SlotIndex StartIndex = LIs->getMBBStartIdx(MBB);
+ VNInfo *RetVNI = Phis[MBB] =
+ LI->getNextValue(SlotIndex(), /*FIXME*/ 0, false,
+ LIs->getVNInfoAllocator());
+
+ if (!IsIntraBlock) LiveOut[MBB] = RetVNI;
+
+ // If there are no uses or defs between our starting point and the
+ // beginning of the block, then recursive perform phi construction
+ // on our predecessors.
+ DenseMap<MachineBasicBlock*, VNInfo*> IncomingVNs;
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ VNInfo* Incoming = PerformPHIConstruction((*PI)->end(), *PI, LI,
+ Visited, Defs, Uses, NewVNs,
+ LiveOut, Phis, false, false);
+ if (Incoming != 0)
+ IncomingVNs[*PI] = Incoming;
+ }
+
+ if (MBB->pred_size() == 1 && !RetVNI->hasPHIKill()) {
+ VNInfo* OldVN = RetVNI;
+ VNInfo* NewVN = IncomingVNs.begin()->second;
+ VNInfo* MergedVN = LI->MergeValueNumberInto(OldVN, NewVN);
+ if (MergedVN == OldVN) std::swap(OldVN, NewVN);
+
+ for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator LOI = LiveOut.begin(),
+ LOE = LiveOut.end(); LOI != LOE; ++LOI)
+ if (LOI->second == OldVN)
+ LOI->second = MergedVN;
+ for (DenseMap<MachineInstr*, VNInfo*>::iterator NVI = NewVNs.begin(),
+ NVE = NewVNs.end(); NVI != NVE; ++NVI)
+ if (NVI->second == OldVN)
+ NVI->second = MergedVN;
+ for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator PI = Phis.begin(),
+ PE = Phis.end(); PI != PE; ++PI)
+ if (PI->second == OldVN)
+ PI->second = MergedVN;
+ RetVNI = MergedVN;
+ } else {
+ // Otherwise, merge the incoming VNInfos with a phi join. Create a new
+ // VNInfo to represent the joined value.
+ for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator I =
+ IncomingVNs.begin(), E = IncomingVNs.end(); I != E; ++I) {
+ I->second->setHasPHIKill(true);
+ SlotIndex KillIndex(LIs->getMBBEndIdx(I->first), true);
+ if (!I->second->isKill(KillIndex))
+ I->second->addKill(KillIndex);
+ }
+ }
+
+ SlotIndex EndIndex;
+ if (IsIntraBlock) {
+ EndIndex = LIs->getInstructionIndex(UseI).getDefIndex();
+ } else
+ EndIndex = LIs->getMBBEndIdx(MBB);
+ LI->addRange(LiveRange(StartIndex, EndIndex, RetVNI));
+ if (IsIntraBlock)
+ RetVNI->addKill(EndIndex);
+
+ // Memoize results so we don't have to recompute them.
+ if (!IsIntraBlock)
+ LiveOut[MBB] = RetVNI;
+ else {
+ if (!NewVNs.count(UseI))
+ NewVNs[UseI] = RetVNI;
+ Visited.insert(UseI);
+ }
+
+ return RetVNI;
+}
+
+/// ReconstructLiveInterval - Recompute a live interval from scratch.
+void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
+ BumpPtrAllocator& Alloc = LIs->getVNInfoAllocator();
+
+ // Clear the old ranges and valnos;
+ LI->clear();
+
+ // Cache the uses and defs of the register
+ typedef DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> > RegMap;
+ RegMap Defs, Uses;
+
+ // Keep track of the new VNs we're creating.
+ DenseMap<MachineInstr*, VNInfo*> NewVNs;
+ SmallPtrSet<VNInfo*, 2> PhiVNs;
+
+ // Cache defs, and create a new VNInfo for each def.
+ for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(LI->reg),
+ DE = MRI->def_end(); DI != DE; ++DI) {
+ Defs[(*DI).getParent()].insert(&*DI);
+
+ SlotIndex DefIdx = LIs->getInstructionIndex(&*DI);
+ DefIdx = DefIdx.getDefIndex();
+
+ assert(!DI->isPHI() && "PHI instr in code during pre-alloc splitting.");
+ VNInfo* NewVN = LI->getNextValue(DefIdx, 0, true, Alloc);
+
+ // If the def is a move, set the copy field.
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (TII->isMoveInstr(*DI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
+ if (DstReg == LI->reg)
+ NewVN->setCopy(&*DI);
+
+ NewVNs[&*DI] = NewVN;
+ }
+
+ // Cache uses as a separate pass from actually processing them.
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(LI->reg),
+ UE = MRI->use_end(); UI != UE; ++UI)
+ Uses[(*UI).getParent()].insert(&*UI);
+
+ // Now, actually process every use and use a phi construction algorithm
+ // to walk from it to its reaching definitions, building VNInfos along
+ // the way.
+ DenseMap<MachineBasicBlock*, VNInfo*> LiveOut;
+ DenseMap<MachineBasicBlock*, VNInfo*> Phis;
+ SmallPtrSet<MachineInstr*, 4> Visited;
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(LI->reg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ PerformPHIConstruction(&*UI, UI->getParent(), LI, Visited, Defs,
+ Uses, NewVNs, LiveOut, Phis, true, true);
+ }
+
+ // Add ranges for dead defs
+ for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(LI->reg),
+ DE = MRI->def_end(); DI != DE; ++DI) {
+ SlotIndex DefIdx = LIs->getInstructionIndex(&*DI);
+ DefIdx = DefIdx.getDefIndex();
+
+ if (LI->liveAt(DefIdx)) continue;
+
+ VNInfo* DeadVN = NewVNs[&*DI];
+ LI->addRange(LiveRange(DefIdx, DefIdx.getNextSlot(), DeadVN));
+ DeadVN->addKill(DefIdx);
+ }
+
+ // Update kill markers.
+ for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end();
+ VI != VE; ++VI) {
+ VNInfo* VNI = *VI;
+ for (unsigned i = 0, e = VNI->kills.size(); i != e; ++i) {
+ SlotIndex KillIdx = VNI->kills[i];
+ if (KillIdx.isPHI())
+ continue;
+ MachineInstr *KillMI = LIs->getInstructionFromIndex(KillIdx);
+ if (KillMI) {
+ MachineOperand *KillMO = KillMI->findRegisterUseOperand(CurrLI->reg);
+ if (KillMO)
+ // It could be a dead def.
+ KillMO->setIsKill();
+ }
+ }
+ }
+}
+
+/// RenumberValno - Split the given valno out into a new vreg, allowing it to
+/// be allocated to a different register. This function creates a new vreg,
+/// copies the valno and its live ranges over to the new vreg's interval,
+/// removes them from the old interval, and rewrites all uses and defs of
+/// the original reg to the new vreg within those ranges.
+void PreAllocSplitting::RenumberValno(VNInfo* VN) {
+ SmallVector<VNInfo*, 4> Stack;
+ SmallVector<VNInfo*, 4> VNsToCopy;
+ Stack.push_back(VN);
+
+ // Walk through and copy the valno we care about, and any other valnos
+ // that are two-address redefinitions of the one we care about. These
+ // will need to be rewritten as well. We also check for safety of the
+ // renumbering here, by making sure that none of the valno involved has
+ // phi kills.
+ while (!Stack.empty()) {
+ VNInfo* OldVN = Stack.back();
+ Stack.pop_back();
+
+ // Bail out if we ever encounter a valno that has a PHI kill. We can't
+ // renumber these.
+ if (OldVN->hasPHIKill()) return;
+
+ VNsToCopy.push_back(OldVN);
+
+ // Locate two-address redefinitions
+ for (VNInfo::KillSet::iterator KI = OldVN->kills.begin(),
+ KE = OldVN->kills.end(); KI != KE; ++KI) {
+ assert(!KI->isPHI() &&
+ "VN previously reported having no PHI kills.");
+ MachineInstr* MI = LIs->getInstructionFromIndex(*KI);
+ unsigned DefIdx = MI->findRegisterDefOperandIdx(CurrLI->reg);
+ if (DefIdx == ~0U) continue;
+ if (MI->isRegTiedToUseOperand(DefIdx)) {
+ VNInfo* NextVN =
+ CurrLI->findDefinedVNInfoForRegInt(KI->getDefIndex());
+ if (NextVN == OldVN) continue;
+ Stack.push_back(NextVN);
+ }
+ }
+ }
+
+ // Create the new vreg
+ unsigned NewVReg = MRI->createVirtualRegister(MRI->getRegClass(CurrLI->reg));
+
+ // Create the new live interval
+ LiveInterval& NewLI = LIs->getOrCreateInterval(NewVReg);
+
+ for (SmallVector<VNInfo*, 4>::iterator OI = VNsToCopy.begin(), OE =
+ VNsToCopy.end(); OI != OE; ++OI) {
+ VNInfo* OldVN = *OI;
+
+ // Copy the valno over
+ VNInfo* NewVN = NewLI.createValueCopy(OldVN, LIs->getVNInfoAllocator());
+ NewLI.MergeValueInAsValue(*CurrLI, OldVN, NewVN);
+
+ // Remove the valno from the old interval
+ CurrLI->removeValNo(OldVN);
+ }
+
+ // Rewrite defs and uses. This is done in two stages to avoid invalidating
+ // the reg_iterator.
+ SmallVector<std::pair<MachineInstr*, unsigned>, 8> OpsToChange;
+
+ for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(CurrLI->reg),
+ E = MRI->reg_end(); I != E; ++I) {
+ MachineOperand& MO = I.getOperand();
+ SlotIndex InstrIdx = LIs->getInstructionIndex(&*I);
+
+ if ((MO.isUse() && NewLI.liveAt(InstrIdx.getUseIndex())) ||
+ (MO.isDef() && NewLI.liveAt(InstrIdx.getDefIndex())))
+ OpsToChange.push_back(std::make_pair(&*I, I.getOperandNo()));
+ }
+
+ for (SmallVector<std::pair<MachineInstr*, unsigned>, 8>::iterator I =
+ OpsToChange.begin(), E = OpsToChange.end(); I != E; ++I) {
+ MachineInstr* Inst = I->first;
+ unsigned OpIdx = I->second;
+ MachineOperand& MO = Inst->getOperand(OpIdx);
+ MO.setReg(NewVReg);
+ }
+
+ // Grow the VirtRegMap, since we've created a new vreg.
+ VRM->grow();
+
+ // The renumbered vreg shares a stack slot with the old register.
+ if (IntervalSSMap.count(CurrLI->reg))
+ IntervalSSMap[NewVReg] = IntervalSSMap[CurrLI->reg];
+
+ NumRenumbers++;
+}
+
+bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo,
+ MachineInstr* DefMI,
+ MachineBasicBlock::iterator RestorePt,
+ SmallPtrSet<MachineInstr*, 4>& RefsInMBB) {
+ MachineBasicBlock& MBB = *RestorePt->getParent();
+
+ MachineBasicBlock::iterator KillPt = BarrierMBB->end();
+ if (!ValNo->isDefAccurate() || DefMI->getParent() == BarrierMBB)
+ KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB);
+ else
+ KillPt = llvm::next(MachineBasicBlock::iterator(DefMI));
+
+ if (KillPt == DefMI->getParent()->end())
+ return false;
+
+ TII->reMaterialize(MBB, RestorePt, VReg, 0, DefMI, TRI);
+ SlotIndex RematIdx = LIs->InsertMachineInstrInMaps(prior(RestorePt));
+
+ ReconstructLiveInterval(CurrLI);
+ RematIdx = RematIdx.getDefIndex();
+ RenumberValno(CurrLI->findDefinedVNInfoForRegInt(RematIdx));
+
+ ++NumSplits;
+ ++NumRemats;
+ return true;
+}
+
+MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg,
+ const TargetRegisterClass* RC,
+ MachineInstr* DefMI,
+ MachineInstr* Barrier,
+ MachineBasicBlock* MBB,
+ int& SS,
+ SmallPtrSet<MachineInstr*, 4>& RefsInMBB) {
+ // Go top down if RefsInMBB is empty.
+ if (RefsInMBB.empty())
+ return 0;
+
+ MachineBasicBlock::iterator FoldPt = Barrier;
+ while (&*FoldPt != DefMI && FoldPt != MBB->begin() &&
+ !RefsInMBB.count(FoldPt))
+ --FoldPt;
+
+ int OpIdx = FoldPt->findRegisterDefOperandIdx(vreg, false);
+ if (OpIdx == -1)
+ return 0;
+
+ SmallVector<unsigned, 1> Ops;
+ Ops.push_back(OpIdx);
+
+ if (!TII->canFoldMemoryOperand(FoldPt, Ops))
+ return 0;
+
+ DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(vreg);
+ if (I != IntervalSSMap.end()) {
+ SS = I->second;
+ } else {
+ SS = MFI->CreateSpillStackObject(RC->getSize(), RC->getAlignment());
+ }
+
+ MachineInstr* FMI = TII->foldMemoryOperand(*MBB->getParent(),
+ FoldPt, Ops, SS);
+
+ if (FMI) {
+ LIs->ReplaceMachineInstrInMaps(FoldPt, FMI);
+ FMI = MBB->insert(MBB->erase(FoldPt), FMI);
+ ++NumFolds;
+
+ IntervalSSMap[vreg] = SS;
+ CurrSLI = &LSs->getOrCreateInterval(SS, RC);
+ if (CurrSLI->hasAtLeastOneValue())
+ CurrSValNo = CurrSLI->getValNumInfo(0);
+ else
+ CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0, false,
+ LSs->getVNInfoAllocator());
+ }
+
+ return FMI;
+}
+
+MachineInstr* PreAllocSplitting::FoldRestore(unsigned vreg,
+ const TargetRegisterClass* RC,
+ MachineInstr* Barrier,
+ MachineBasicBlock* MBB,
+ int SS,
+ SmallPtrSet<MachineInstr*, 4>& RefsInMBB) {
+ if ((int)RestoreFoldLimit != -1 && RestoreFoldLimit == (int)NumRestoreFolds)
+ return 0;
+
+ // Go top down if RefsInMBB is empty.
+ if (RefsInMBB.empty())
+ return 0;
+
+ // Can't fold a restore between a call stack setup and teardown.
+ MachineBasicBlock::iterator FoldPt = Barrier;
+
+ // Advance from barrier to call frame teardown.
+ while (FoldPt != MBB->getFirstTerminator() &&
+ FoldPt->getOpcode() != TRI->getCallFrameDestroyOpcode()) {
+ if (RefsInMBB.count(FoldPt))
+ return 0;
+
+ ++FoldPt;
+ }
+
+ if (FoldPt == MBB->getFirstTerminator())
+ return 0;
+ else
+ ++FoldPt;
+
+ // Now find the restore point.
+ while (FoldPt != MBB->getFirstTerminator() && !RefsInMBB.count(FoldPt)) {
+ if (FoldPt->getOpcode() == TRI->getCallFrameSetupOpcode()) {
+ while (FoldPt != MBB->getFirstTerminator() &&
+ FoldPt->getOpcode() != TRI->getCallFrameDestroyOpcode()) {
+ if (RefsInMBB.count(FoldPt))
+ return 0;
+
+ ++FoldPt;
+ }
+
+ if (FoldPt == MBB->getFirstTerminator())
+ return 0;
+ }
+
+ ++FoldPt;
+ }
+
+ if (FoldPt == MBB->getFirstTerminator())
+ return 0;
+
+ int OpIdx = FoldPt->findRegisterUseOperandIdx(vreg, true);
+ if (OpIdx == -1)
+ return 0;
+
+ SmallVector<unsigned, 1> Ops;
+ Ops.push_back(OpIdx);
+
+ if (!TII->canFoldMemoryOperand(FoldPt, Ops))
+ return 0;
+
+ MachineInstr* FMI = TII->foldMemoryOperand(*MBB->getParent(),
+ FoldPt, Ops, SS);
+
+ if (FMI) {
+ LIs->ReplaceMachineInstrInMaps(FoldPt, FMI);
+ FMI = MBB->insert(MBB->erase(FoldPt), FMI);
+ ++NumRestoreFolds;
+ }
+
+ return FMI;
+}
+
+/// SplitRegLiveInterval - Split (spill and restore) the given live interval
+/// so it would not cross the barrier that's being processed. Shrink wrap
+/// (minimize) the live interval to the last uses.
+bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
+ DEBUG(dbgs() << "Pre-alloc splitting " << LI->reg << " for " << *Barrier
+ << " result: ");
+
+ CurrLI = LI;
+
+ // Find live range where current interval cross the barrier.
+ LiveInterval::iterator LR =
+ CurrLI->FindLiveRangeContaining(BarrierIdx.getUseIndex());
+ VNInfo *ValNo = LR->valno;
+
+ assert(!ValNo->isUnused() && "Val# is defined by a dead def?");
+
+ MachineInstr *DefMI = ValNo->isDefAccurate()
+ ? LIs->getInstructionFromIndex(ValNo->def) : NULL;
+
+ // If this would create a new join point, do not split.
+ if (DefMI && createsNewJoin(LR, DefMI->getParent(), Barrier->getParent())) {
+ DEBUG(dbgs() << "FAILED (would create a new join point).\n");
+ return false;
+ }
+
+ // Find all references in the barrier mbb.
+ SmallPtrSet<MachineInstr*, 4> RefsInMBB;
+ for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(CurrLI->reg),
+ E = MRI->reg_end(); I != E; ++I) {
+ MachineInstr *RefMI = &*I;
+ if (RefMI->getParent() == BarrierMBB)
+ RefsInMBB.insert(RefMI);
+ }
+
+ // Find a point to restore the value after the barrier.
+ MachineBasicBlock::iterator RestorePt =
+ findRestorePoint(BarrierMBB, Barrier, LR->end, RefsInMBB);
+ if (RestorePt == BarrierMBB->end()) {
+ DEBUG(dbgs() << "FAILED (could not find a suitable restore point).\n");
+ return false;
+ }
+
+ if (DefMI && LIs->isReMaterializable(*LI, ValNo, DefMI))
+ if (Rematerialize(LI->reg, ValNo, DefMI, RestorePt, RefsInMBB)) {
+ DEBUG(dbgs() << "success (remat).\n");
+ return true;
+ }
+
+ // Add a spill either before the barrier or after the definition.
+ MachineBasicBlock *DefMBB = DefMI ? DefMI->getParent() : NULL;
+ const TargetRegisterClass *RC = MRI->getRegClass(CurrLI->reg);
+ SlotIndex SpillIndex;
+ MachineInstr *SpillMI = NULL;
+ int SS = -1;
+ if (!ValNo->isDefAccurate()) {
+ // If we don't know where the def is we must split just before the barrier.
+ if ((SpillMI = FoldSpill(LI->reg, RC, 0, Barrier,
+ BarrierMBB, SS, RefsInMBB))) {
+ SpillIndex = LIs->getInstructionIndex(SpillMI);
+ } else {
+ MachineBasicBlock::iterator SpillPt =
+ findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB);
+ if (SpillPt == BarrierMBB->begin()) {
+ DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n");
+ return false; // No gap to insert spill.
+ }
+ // Add spill.
+
+ SS = CreateSpillStackSlot(CurrLI->reg, RC);
+ TII->storeRegToStackSlot(*BarrierMBB, SpillPt, CurrLI->reg, true, SS, RC);
+ SpillMI = prior(SpillPt);
+ SpillIndex = LIs->InsertMachineInstrInMaps(SpillMI);
+ }
+ } else if (!IsAvailableInStack(DefMBB, CurrLI->reg, ValNo->def,
+ LIs->getZeroIndex(), SpillIndex, SS)) {
+ // If it's already split, just restore the value. There is no need to spill
+ // the def again.
+ if (!DefMI) {
+ DEBUG(dbgs() << "FAILED (def is dead).\n");
+ return false; // Def is dead. Do nothing.
+ }
+
+ if ((SpillMI = FoldSpill(LI->reg, RC, DefMI, Barrier,
+ BarrierMBB, SS, RefsInMBB))) {
+ SpillIndex = LIs->getInstructionIndex(SpillMI);
+ } else {
+ // Check if it's possible to insert a spill after the def MI.
+ MachineBasicBlock::iterator SpillPt;
+ if (DefMBB == BarrierMBB) {
+ // Add spill after the def and the last use before the barrier.
+ SpillPt = findSpillPoint(BarrierMBB, Barrier, DefMI,
+ RefsInMBB);
+ if (SpillPt == DefMBB->begin()) {
+ DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n");
+ return false; // No gap to insert spill.
+ }
+ } else {
+ SpillPt = llvm::next(MachineBasicBlock::iterator(DefMI));
+ if (SpillPt == DefMBB->end()) {
+ DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n");
+ return false; // No gap to insert spill.
+ }
+ }
+ // Add spill.
+ SS = CreateSpillStackSlot(CurrLI->reg, RC);
+ TII->storeRegToStackSlot(*DefMBB, SpillPt, CurrLI->reg, false, SS, RC);
+ SpillMI = prior(SpillPt);
+ SpillIndex = LIs->InsertMachineInstrInMaps(SpillMI);
+ }
+ }
+
+ // Remember def instruction index to spill index mapping.
+ if (DefMI && SpillMI)
+ Def2SpillMap[ValNo->def] = SpillIndex;
+
+ // Add restore.
+ bool FoldedRestore = false;
+ SlotIndex RestoreIndex;
+ if (MachineInstr* LMI = FoldRestore(CurrLI->reg, RC, Barrier,
+ BarrierMBB, SS, RefsInMBB)) {
+ RestorePt = LMI;
+ RestoreIndex = LIs->getInstructionIndex(RestorePt);
+ FoldedRestore = true;
+ } else {
+ TII->loadRegFromStackSlot(*BarrierMBB, RestorePt, CurrLI->reg, SS, RC);
+ MachineInstr *LoadMI = prior(RestorePt);
+ RestoreIndex = LIs->InsertMachineInstrInMaps(LoadMI);
+ }
+
+ // Update spill stack slot live interval.
+ UpdateSpillSlotInterval(ValNo, SpillIndex.getUseIndex().getNextSlot(),
+ RestoreIndex.getDefIndex());
+
+ ReconstructLiveInterval(CurrLI);
+
+ if (!FoldedRestore) {
+ SlotIndex RestoreIdx = LIs->getInstructionIndex(prior(RestorePt));
+ RestoreIdx = RestoreIdx.getDefIndex();
+ RenumberValno(CurrLI->findDefinedVNInfoForRegInt(RestoreIdx));
+ }
+
+ ++NumSplits;
+ DEBUG(dbgs() << "success.\n");
+ return true;
+}
+
+/// SplitRegLiveIntervals - Split all register live intervals that cross the
+/// barrier that's being processed.
+bool
+PreAllocSplitting::SplitRegLiveIntervals(const TargetRegisterClass **RCs,
+ SmallPtrSet<LiveInterval*, 8>& Split) {
+ // First find all the virtual registers whose live intervals are intercepted
+ // by the current barrier.
+ SmallVector<LiveInterval*, 8> Intervals;
+ for (const TargetRegisterClass **RC = RCs; *RC; ++RC) {
+ // FIXME: If it's not safe to move any instruction that defines the barrier
+ // register class, then it means there are some special dependencies which
+ // codegen is not modelling. Ignore these barriers for now.
+ if (!TII->isSafeToMoveRegClassDefs(*RC))
+ continue;
+ std::vector<unsigned> &VRs = MRI->getRegClassVirtRegs(*RC);
+ for (unsigned i = 0, e = VRs.size(); i != e; ++i) {
+ unsigned Reg = VRs[i];
+ if (!LIs->hasInterval(Reg))
+ continue;
+ LiveInterval *LI = &LIs->getInterval(Reg);
+ if (LI->liveAt(BarrierIdx) && !Barrier->readsRegister(Reg))
+ // Virtual register live interval is intercepted by the barrier. We
+ // should split and shrink wrap its interval if possible.
+ Intervals.push_back(LI);
+ }
+ }
+
+ // Process the affected live intervals.
+ bool Change = false;
+ while (!Intervals.empty()) {
+ if (PreSplitLimit != -1 && (int)NumSplits == PreSplitLimit)
+ break;
+ LiveInterval *LI = Intervals.back();
+ Intervals.pop_back();
+ bool result = SplitRegLiveInterval(LI);
+ if (result) Split.insert(LI);
+ Change |= result;
+ }
+
+ return Change;
+}
+
+unsigned PreAllocSplitting::getNumberOfNonSpills(
+ SmallPtrSet<MachineInstr*, 4>& MIs,
+ unsigned Reg, int FrameIndex,
+ bool& FeedsTwoAddr) {
+ unsigned NonSpills = 0;
+ for (SmallPtrSet<MachineInstr*, 4>::iterator UI = MIs.begin(), UE = MIs.end();
+ UI != UE; ++UI) {
+ int StoreFrameIndex;
+ unsigned StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex);
+ if (StoreVReg != Reg || StoreFrameIndex != FrameIndex)
+ NonSpills++;
+
+ int DefIdx = (*UI)->findRegisterDefOperandIdx(Reg);
+ if (DefIdx != -1 && (*UI)->isRegTiedToUseOperand(DefIdx))
+ FeedsTwoAddr = true;
+ }
+
+ return NonSpills;
+}
+
+/// removeDeadSpills - After doing splitting, filter through all intervals we've
+/// split, and see if any of the spills are unnecessary. If so, remove them.
+bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) {
+ bool changed = false;
+
+ // Walk over all of the live intervals that were touched by the splitter,
+ // and see if we can do any DCE and/or folding.
+ for (SmallPtrSet<LiveInterval*, 8>::iterator LI = split.begin(),
+ LE = split.end(); LI != LE; ++LI) {
+ DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> > VNUseCount;
+
+ // First, collect all the uses of the vreg, and sort them by their
+ // reaching definition (VNInfo).
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin((*LI)->reg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ SlotIndex index = LIs->getInstructionIndex(&*UI);
+ index = index.getUseIndex();
+
+ const LiveRange* LR = (*LI)->getLiveRangeContaining(index);
+ VNUseCount[LR->valno].insert(&*UI);
+ }
+
+ // Now, take the definitions (VNInfo's) one at a time and try to DCE
+ // and/or fold them away.
+ for (LiveInterval::vni_iterator VI = (*LI)->vni_begin(),
+ VE = (*LI)->vni_end(); VI != VE; ++VI) {
+
+ if (DeadSplitLimit != -1 && (int)NumDeadSpills == DeadSplitLimit)
+ return changed;
+
+ VNInfo* CurrVN = *VI;
+
+ // We don't currently try to handle definitions with PHI kills, because
+ // it would involve processing more than one VNInfo at once.
+ if (CurrVN->hasPHIKill()) continue;
+
+ // We also don't try to handle the results of PHI joins, since there's
+ // no defining instruction to analyze.
+ if (!CurrVN->isDefAccurate() || CurrVN->isUnused()) continue;
+
+ // We're only interested in eliminating cruft introduced by the splitter,
+ // is of the form load-use or load-use-store. First, check that the
+ // definition is a load, and remember what stack slot we loaded it from.
+ MachineInstr* DefMI = LIs->getInstructionFromIndex(CurrVN->def);
+ int FrameIndex;
+ if (!TII->isLoadFromStackSlot(DefMI, FrameIndex)) continue;
+
+ // If the definition has no uses at all, just DCE it.
+ if (VNUseCount[CurrVN].size() == 0) {
+ LIs->RemoveMachineInstrFromMaps(DefMI);
+ (*LI)->removeValNo(CurrVN);
+ DefMI->eraseFromParent();
+ VNUseCount.erase(CurrVN);
+ NumDeadSpills++;
+ changed = true;
+ continue;
+ }
+
+ // Second, get the number of non-store uses of the definition, as well as
+ // a flag indicating whether it feeds into a later two-address definition.
+ bool FeedsTwoAddr = false;
+ unsigned NonSpillCount = getNumberOfNonSpills(VNUseCount[CurrVN],
+ (*LI)->reg, FrameIndex,
+ FeedsTwoAddr);
+
+ // If there's one non-store use and it doesn't feed a two-addr, then
+ // this is a load-use-store case that we can try to fold.
+ if (NonSpillCount == 1 && !FeedsTwoAddr) {
+ // Start by finding the non-store use MachineInstr.
+ SmallPtrSet<MachineInstr*, 4>::iterator UI = VNUseCount[CurrVN].begin();
+ int StoreFrameIndex;
+ unsigned StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex);
+ while (UI != VNUseCount[CurrVN].end() &&
+ (StoreVReg == (*LI)->reg && StoreFrameIndex == FrameIndex)) {
+ ++UI;
+ if (UI != VNUseCount[CurrVN].end())
+ StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex);
+ }
+ if (UI == VNUseCount[CurrVN].end()) continue;
+
+ MachineInstr* use = *UI;
+
+ // Attempt to fold it away!
+ int OpIdx = use->findRegisterUseOperandIdx((*LI)->reg, false);
+ if (OpIdx == -1) continue;
+ SmallVector<unsigned, 1> Ops;
+ Ops.push_back(OpIdx);
+ if (!TII->canFoldMemoryOperand(use, Ops)) continue;
+
+ MachineInstr* NewMI =
+ TII->foldMemoryOperand(*use->getParent()->getParent(),
+ use, Ops, FrameIndex);
+
+ if (!NewMI) continue;
+
+ // Update relevant analyses.
+ LIs->RemoveMachineInstrFromMaps(DefMI);
+ LIs->ReplaceMachineInstrInMaps(use, NewMI);
+ (*LI)->removeValNo(CurrVN);
+
+ DefMI->eraseFromParent();
+ MachineBasicBlock* MBB = use->getParent();
+ NewMI = MBB->insert(MBB->erase(use), NewMI);
+ VNUseCount[CurrVN].erase(use);
+
+ // Remove deleted instructions. Note that we need to remove them from
+ // the VNInfo->use map as well, just to be safe.
+ for (SmallPtrSet<MachineInstr*, 4>::iterator II =
+ VNUseCount[CurrVN].begin(), IE = VNUseCount[CurrVN].end();
+ II != IE; ++II) {
+ for (DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> >::iterator
+ VNI = VNUseCount.begin(), VNE = VNUseCount.end(); VNI != VNE;
+ ++VNI)
+ if (VNI->first != CurrVN)
+ VNI->second.erase(*II);
+ LIs->RemoveMachineInstrFromMaps(*II);
+ (*II)->eraseFromParent();
+ }
+
+ VNUseCount.erase(CurrVN);
+
+ for (DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> >::iterator
+ VI = VNUseCount.begin(), VE = VNUseCount.end(); VI != VE; ++VI)
+ if (VI->second.erase(use))
+ VI->second.insert(NewMI);
+
+ NumDeadSpills++;
+ changed = true;
+ continue;
+ }
+
+ // If there's more than one non-store instruction, we can't profitably
+ // fold it, so bail.
+ if (NonSpillCount) continue;
+
+ // Otherwise, this is a load-store case, so DCE them.
+ for (SmallPtrSet<MachineInstr*, 4>::iterator UI =
+ VNUseCount[CurrVN].begin(), UE = VNUseCount[CurrVN].end();
+ UI != UE; ++UI) {
+ LIs->RemoveMachineInstrFromMaps(*UI);
+ (*UI)->eraseFromParent();
+ }
+
+ VNUseCount.erase(CurrVN);
+
+ LIs->RemoveMachineInstrFromMaps(DefMI);
+ (*LI)->removeValNo(CurrVN);
+ DefMI->eraseFromParent();
+ NumDeadSpills++;
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+bool PreAllocSplitting::createsNewJoin(LiveRange* LR,
+ MachineBasicBlock* DefMBB,
+ MachineBasicBlock* BarrierMBB) {
+ if (DefMBB == BarrierMBB)
+ return false;
+
+ if (LR->valno->hasPHIKill())
+ return false;
+
+ SlotIndex MBBEnd = LIs->getMBBEndIdx(BarrierMBB);
+ if (LR->end < MBBEnd)
+ return false;
+
+ MachineLoopInfo& MLI = getAnalysis<MachineLoopInfo>();
+ if (MLI.getLoopFor(DefMBB) != MLI.getLoopFor(BarrierMBB))
+ return true;
+
+ MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>();
+ SmallPtrSet<MachineBasicBlock*, 4> Visited;
+ typedef std::pair<MachineBasicBlock*,
+ MachineBasicBlock::succ_iterator> ItPair;
+ SmallVector<ItPair, 4> Stack;
+ Stack.push_back(std::make_pair(BarrierMBB, BarrierMBB->succ_begin()));
+
+ while (!Stack.empty()) {
+ ItPair P = Stack.back();
+ Stack.pop_back();
+
+ MachineBasicBlock* PredMBB = P.first;
+ MachineBasicBlock::succ_iterator S = P.second;
+
+ if (S == PredMBB->succ_end())
+ continue;
+ else if (Visited.count(*S)) {
+ Stack.push_back(std::make_pair(PredMBB, ++S));
+ continue;
+ } else
+ Stack.push_back(std::make_pair(PredMBB, S+1));
+
+ MachineBasicBlock* MBB = *S;
+ Visited.insert(MBB);
+
+ if (MBB == BarrierMBB)
+ return true;
+
+ MachineDomTreeNode* DefMDTN = MDT.getNode(DefMBB);
+ MachineDomTreeNode* BarrierMDTN = MDT.getNode(BarrierMBB);
+ MachineDomTreeNode* MDTN = MDT.getNode(MBB)->getIDom();
+ while (MDTN) {
+ if (MDTN == DefMDTN)
+ return true;
+ else if (MDTN == BarrierMDTN)
+ break;
+ MDTN = MDTN->getIDom();
+ }
+
+ MBBEnd = LIs->getMBBEndIdx(MBB);
+ if (LR->end > MBBEnd)
+ Stack.push_back(std::make_pair(MBB, MBB->succ_begin()));
+ }
+
+ return false;
+}
+
+
+bool PreAllocSplitting::runOnMachineFunction(MachineFunction &MF) {
+ CurrMF = &MF;
+ TM = &MF.getTarget();
+ TRI = TM->getRegisterInfo();
+ TII = TM->getInstrInfo();
+ MFI = MF.getFrameInfo();
+ MRI = &MF.getRegInfo();
+ SIs = &getAnalysis<SlotIndexes>();
+ LIs = &getAnalysis<LiveIntervals>();
+ LSs = &getAnalysis<LiveStacks>();
+ VRM = &getAnalysis<VirtRegMap>();
+
+ bool MadeChange = false;
+
+ // Make sure blocks are numbered in order.
+ MF.RenumberBlocks();
+
+ MachineBasicBlock *Entry = MF.begin();
+ SmallPtrSet<MachineBasicBlock*,16> Visited;
+
+ SmallPtrSet<LiveInterval*, 8> Split;
+
+ for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
+ DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
+ DFI != E; ++DFI) {
+ BarrierMBB = *DFI;
+ for (MachineBasicBlock::iterator I = BarrierMBB->begin(),
+ E = BarrierMBB->end(); I != E; ++I) {
+ Barrier = &*I;
+ const TargetRegisterClass **BarrierRCs =
+ Barrier->getDesc().getRegClassBarriers();
+ if (!BarrierRCs)
+ continue;
+ BarrierIdx = LIs->getInstructionIndex(Barrier);
+ MadeChange |= SplitRegLiveIntervals(BarrierRCs, Split);
+ }
+ }
+
+ MadeChange |= removeDeadSpills(Split);
+
+ return MadeChange;
+}
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
new file mode 100644
index 0000000..e3df2e4
--- /dev/null
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -0,0 +1,275 @@
+//===---------------------- ProcessImplicitDefs.cpp -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "processimplicitdefs"
+
+#include "llvm/CodeGen/ProcessImplicitDefs.h"
+
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+
+using namespace llvm;
+
+char ProcessImplicitDefs::ID = 0;
+static RegisterPass<ProcessImplicitDefs> X("processimpdefs",
+ "Process Implicit Definitions.");
+
+void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<LiveVariables>();
+ AU.addRequired<LiveVariables>();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ AU.addPreservedID(TwoAddressInstructionPassID);
+ AU.addPreservedID(PHIEliminationID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI,
+ unsigned Reg, unsigned OpIdx,
+ const TargetInstrInfo *tii_) {
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
+ Reg == SrcReg)
+ return true;
+
+ if (OpIdx == 2 && MI->isSubregToReg())
+ return true;
+ if (OpIdx == 1 && MI->isExtractSubreg())
+ return true;
+ return false;
+}
+
+/// processImplicitDefs - Process IMPLICIT_DEF instructions and make sure
+/// there is one implicit_def for each use. Add isUndef marker to
+/// implicit_def defs and their uses.
+bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
+
+ DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n"
+ << "********** Function: "
+ << ((Value*)fn.getFunction())->getName() << '\n');
+
+ bool Changed = false;
+
+ const TargetInstrInfo *tii_ = fn.getTarget().getInstrInfo();
+ const TargetRegisterInfo *tri_ = fn.getTarget().getRegisterInfo();
+ MachineRegisterInfo *mri_ = &fn.getRegInfo();
+
+ LiveVariables *lv_ = &getAnalysis<LiveVariables>();
+
+ SmallSet<unsigned, 8> ImpDefRegs;
+ SmallVector<MachineInstr*, 8> ImpDefMIs;
+ SmallVector<MachineInstr*, 4> RUses;
+ SmallPtrSet<MachineBasicBlock*,16> Visited;
+ SmallPtrSet<MachineInstr*, 8> ModInsts;
+
+ MachineBasicBlock *Entry = fn.begin();
+ for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
+ DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
+ DFI != E; ++DFI) {
+ MachineBasicBlock *MBB = *DFI;
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ) {
+ MachineInstr *MI = &*I;
+ ++I;
+ if (MI->isImplicitDef()) {
+ unsigned Reg = MI->getOperand(0).getReg();
+ ImpDefRegs.insert(Reg);
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ for (const unsigned *SS = tri_->getSubRegisters(Reg); *SS; ++SS)
+ ImpDefRegs.insert(*SS);
+ }
+ ImpDefMIs.push_back(MI);
+ continue;
+ }
+
+ if (MI->isInsertSubreg()) {
+ MachineOperand &MO = MI->getOperand(2);
+ if (ImpDefRegs.count(MO.getReg())) {
+ // %reg1032<def> = INSERT_SUBREG %reg1032, undef, 2
+ // This is an identity copy, eliminate it now.
+ if (MO.isKill()) {
+ LiveVariables::VarInfo& vi = lv_->getVarInfo(MO.getReg());
+ vi.removeKill(MI);
+ }
+ MI->eraseFromParent();
+ Changed = true;
+ continue;
+ }
+ }
+
+ bool ChangedToImpDef = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || MO.isUndef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (!ImpDefRegs.count(Reg))
+ continue;
+ // Use is a copy, just turn it into an implicit_def.
+ if (CanTurnIntoImplicitDef(MI, Reg, i, tii_)) {
+ bool isKill = MO.isKill();
+ MI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF));
+ for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
+ MI->RemoveOperand(j);
+ if (isKill) {
+ ImpDefRegs.erase(Reg);
+ LiveVariables::VarInfo& vi = lv_->getVarInfo(Reg);
+ vi.removeKill(MI);
+ }
+ ChangedToImpDef = true;
+ Changed = true;
+ break;
+ }
+
+ Changed = true;
+ MO.setIsUndef();
+ if (MO.isKill() || MI->isRegTiedToDefOperand(i)) {
+ // Make sure other uses of
+ for (unsigned j = i+1; j != e; ++j) {
+ MachineOperand &MOJ = MI->getOperand(j);
+ if (MOJ.isReg() && MOJ.isUse() && MOJ.getReg() == Reg)
+ MOJ.setIsUndef();
+ }
+ ImpDefRegs.erase(Reg);
+ }
+ }
+
+ if (ChangedToImpDef) {
+ // Backtrack to process this new implicit_def.
+ --I;
+ } else {
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ ImpDefRegs.erase(MO.getReg());
+ }
+ }
+ }
+
+ // Any outstanding liveout implicit_def's?
+ for (unsigned i = 0, e = ImpDefMIs.size(); i != e; ++i) {
+ MachineInstr *MI = ImpDefMIs[i];
+ unsigned Reg = MI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+ !ImpDefRegs.count(Reg)) {
+ // Delete all "local" implicit_def's. That include those which define
+ // physical registers since they cannot be liveout.
+ MI->eraseFromParent();
+ Changed = true;
+ continue;
+ }
+
+ // If there are multiple defs of the same register and at least one
+ // is not an implicit_def, do not insert implicit_def's before the
+ // uses.
+ bool Skip = false;
+ SmallVector<MachineInstr*, 4> DeadImpDefs;
+ for (MachineRegisterInfo::def_iterator DI = mri_->def_begin(Reg),
+ DE = mri_->def_end(); DI != DE; ++DI) {
+ MachineInstr *DeadImpDef = &*DI;
+ if (!DeadImpDef->isImplicitDef()) {
+ Skip = true;
+ break;
+ }
+ DeadImpDefs.push_back(DeadImpDef);
+ }
+ if (Skip)
+ continue;
+
+ // The only implicit_def which we want to keep are those that are live
+ // out of its block.
+ for (unsigned j = 0, ee = DeadImpDefs.size(); j != ee; ++j)
+ DeadImpDefs[j]->eraseFromParent();
+ Changed = true;
+
+ // Process each use instruction once.
+ for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg),
+ UE = mri_->use_end(); UI != UE; ++UI) {
+ MachineInstr *RMI = &*UI;
+ MachineBasicBlock *RMBB = RMI->getParent();
+ if (RMBB == MBB)
+ continue;
+ if (ModInsts.insert(RMI))
+ RUses.push_back(RMI);
+ }
+
+ for (unsigned i = 0, e = RUses.size(); i != e; ++i) {
+ MachineInstr *RMI = RUses[i];
+
+ // Turn a copy use into an implicit_def.
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ if (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
+ Reg == SrcReg) {
+ RMI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF));
+
+ bool isKill = false;
+ SmallVector<unsigned, 4> Ops;
+ for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) {
+ MachineOperand &RRMO = RMI->getOperand(j);
+ if (RRMO.isReg() && RRMO.getReg() == Reg) {
+ Ops.push_back(j);
+ if (RRMO.isKill())
+ isKill = true;
+ }
+ }
+ // Leave the other operands along.
+ for (unsigned j = 0, ee = Ops.size(); j != ee; ++j) {
+ unsigned OpIdx = Ops[j];
+ RMI->RemoveOperand(OpIdx-j);
+ }
+
+ // Update LiveVariables varinfo if the instruction is a kill.
+ if (isKill) {
+ LiveVariables::VarInfo& vi = lv_->getVarInfo(Reg);
+ vi.removeKill(RMI);
+ }
+ continue;
+ }
+
+ // Replace Reg with a new vreg that's marked implicit.
+ const TargetRegisterClass* RC = mri_->getRegClass(Reg);
+ unsigned NewVReg = mri_->createVirtualRegister(RC);
+ bool isKill = true;
+ for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) {
+ MachineOperand &RRMO = RMI->getOperand(j);
+ if (RRMO.isReg() && RRMO.getReg() == Reg) {
+ RRMO.setReg(NewVReg);
+ RRMO.setIsUndef();
+ if (isKill) {
+ // Only the first operand of NewVReg is marked kill.
+ RRMO.setIsKill();
+ isKill = false;
+ }
+ }
+ }
+ }
+ RUses.clear();
+ ModInsts.clear();
+ }
+ ImpDefRegs.clear();
+ ImpDefMIs.clear();
+ }
+
+ return Changed;
+}
+
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
new file mode 100644
index 0000000..036f59a
--- /dev/null
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -0,0 +1,920 @@
+//===-- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is responsible for finalizing the functions frame layout, saving
+// callee saved registers, and for emitting prolog & epilog code for the
+// function.
+//
+// This pass must be run after register allocation. After this pass is
+// executed, it is illegal to construct MO_FrameIndex operands.
+//
+// This pass provides an optional shrink wrapping variant of prolog/epilog
+// insertion, enabled via --shrink-wrap. See ShrinkWrapping.cpp.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PrologEpilogInserter.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include <climits>
+
+using namespace llvm;
+
+char PEI::ID = 0;
+
+static RegisterPass<PEI>
+X("prologepilog", "Prologue/Epilogue Insertion");
+
+/// createPrologEpilogCodeInserter - This function returns a pass that inserts
+/// prolog and epilog code, and eliminates abstract frame references.
+///
+FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); }
+
+/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+/// frame indexes with appropriate references.
+///
+bool PEI::runOnMachineFunction(MachineFunction &Fn) {
+ const Function* F = Fn.getFunction();
+ const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+ RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL;
+ FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
+
+ // Get MachineModuleInfo so that we can track the construction of the
+ // frame.
+ if (MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>())
+ Fn.getFrameInfo()->setMachineModuleInfo(MMI);
+
+ // Calculate the MaxCallFrameSize and HasCalls variables for the function's
+ // frame information. Also eliminates call frame pseudo instructions.
+ calculateCallsInformation(Fn);
+
+ // Allow the target machine to make some adjustments to the function
+ // e.g. UsedPhysRegs before calculateCalleeSavedRegisters.
+ TRI->processFunctionBeforeCalleeSavedScan(Fn, RS);
+
+ // Scan the function for modified callee saved registers and insert spill code
+ // for any callee saved registers that are modified.
+ calculateCalleeSavedRegisters(Fn);
+
+ // Determine placement of CSR spill/restore code:
+ // - with shrink wrapping, place spills and restores to tightly
+ // enclose regions in the Machine CFG of the function where
+ // they are used. Without shrink wrapping
+ // - default (no shrink wrapping), place all spills in the
+ // entry block, all restores in return blocks.
+ placeCSRSpillsAndRestores(Fn);
+
+ // Add the code to save and restore the callee saved registers
+ if (!F->hasFnAttr(Attribute::Naked))
+ insertCSRSpillsAndRestores(Fn);
+
+ // Allow the target machine to make final modifications to the function
+ // before the frame layout is finalized.
+ TRI->processFunctionBeforeFrameFinalized(Fn);
+
+ // Calculate actual frame offsets for all abstract stack objects...
+ calculateFrameObjectOffsets(Fn);
+
+ // Add prolog and epilog code to the function. This function is required
+ // to align the stack frame as necessary for any stack variables or
+ // called functions. Because of this, calculateCalleeSavedRegisters
+ // must be called before this function in order to set the HasCalls
+ // and MaxCallFrameSize variables.
+ if (!F->hasFnAttr(Attribute::Naked))
+ insertPrologEpilogCode(Fn);
+
+ // Replace all MO_FrameIndex operands with physical register references
+ // and actual offsets.
+ //
+ replaceFrameIndices(Fn);
+
+ // If register scavenging is needed, as we've enabled doing it as a
+ // post-pass, scavenge the virtual registers that frame index elimiation
+ // inserted.
+ if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging)
+ scavengeFrameVirtualRegs(Fn);
+
+ delete RS;
+ clearAllSets();
+ return true;
+}
+
+#if 0
+void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ if (ShrinkWrapping || ShrinkWrapFunc != "") {
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ }
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+#endif
+
+/// calculateCallsInformation - Calculate the MaxCallFrameSize and HasCalls
+/// variables for the function's frame information and eliminate call frame
+/// pseudo instructions.
+void PEI::calculateCallsInformation(MachineFunction &Fn) {
+ const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+ MachineFrameInfo *FFI = Fn.getFrameInfo();
+
+ unsigned MaxCallFrameSize = 0;
+ bool HasCalls = FFI->hasCalls();
+
+ // Get the function call frame set-up and tear-down instruction opcode
+ int FrameSetupOpcode = RegInfo->getCallFrameSetupOpcode();
+ int FrameDestroyOpcode = RegInfo->getCallFrameDestroyOpcode();
+
+ // Early exit for targets which have no call frame setup/destroy pseudo
+ // instructions.
+ if (FrameSetupOpcode == -1 && FrameDestroyOpcode == -1)
+ return;
+
+ std::vector<MachineBasicBlock::iterator> FrameSDOps;
+ for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB)
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
+ if (I->getOpcode() == FrameSetupOpcode ||
+ I->getOpcode() == FrameDestroyOpcode) {
+ assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo"
+ " instructions should have a single immediate argument!");
+ unsigned Size = I->getOperand(0).getImm();
+ if (Size > MaxCallFrameSize) MaxCallFrameSize = Size;
+ HasCalls = true;
+ FrameSDOps.push_back(I);
+ } else if (I->isInlineAsm()) {
+ // An InlineAsm might be a call; assume it is to get the stack frame
+ // aligned correctly for calls.
+ HasCalls = true;
+ }
+
+ FFI->setHasCalls(HasCalls);
+ FFI->setMaxCallFrameSize(MaxCallFrameSize);
+
+ for (std::vector<MachineBasicBlock::iterator>::iterator
+ i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) {
+ MachineBasicBlock::iterator I = *i;
+
+ // If call frames are not being included as part of the stack frame, and
+ // there is no dynamic allocation (therefore referencing frame slots off
+ // sp), leave the pseudo ops alone. We'll eliminate them later.
+ if (RegInfo->hasReservedCallFrame(Fn) || RegInfo->hasFP(Fn))
+ RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
+ }
+}
+
+
+/// calculateCalleeSavedRegisters - Scan the function for modified callee saved
+/// registers.
+void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
+ const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+ const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo();
+ MachineFrameInfo *FFI = Fn.getFrameInfo();
+
+ // Get the callee saved register list...
+ const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn);
+
+ // These are used to keep track the callee-save area. Initialize them.
+ MinCSFrameIndex = INT_MAX;
+ MaxCSFrameIndex = 0;
+
+ // Early exit for targets which have no callee saved registers.
+ if (CSRegs == 0 || CSRegs[0] == 0)
+ return;
+
+ // Figure out which *callee saved* registers are modified by the current
+ // function, thus needing to be saved and restored in the prolog/epilog.
+ const TargetRegisterClass * const *CSRegClasses =
+ RegInfo->getCalleeSavedRegClasses(&Fn);
+
+ std::vector<CalleeSavedInfo> CSI;
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ unsigned Reg = CSRegs[i];
+ if (Fn.getRegInfo().isPhysRegUsed(Reg)) {
+ // If the reg is modified, save it!
+ CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i]));
+ } else {
+ for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg);
+ *AliasSet; ++AliasSet) { // Check alias registers too.
+ if (Fn.getRegInfo().isPhysRegUsed(*AliasSet)) {
+ CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i]));
+ break;
+ }
+ }
+ }
+ }
+
+ if (CSI.empty())
+ return; // Early exit if no callee saved registers are modified!
+
+ unsigned NumFixedSpillSlots;
+ const TargetFrameInfo::SpillSlot *FixedSpillSlots =
+ TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
+
+ // Now that we know which registers need to be saved and restored, allocate
+ // stack slots for them.
+ for (std::vector<CalleeSavedInfo>::iterator
+ I = CSI.begin(), E = CSI.end(); I != E; ++I) {
+ unsigned Reg = I->getReg();
+ const TargetRegisterClass *RC = I->getRegClass();
+
+ int FrameIdx;
+ if (RegInfo->hasReservedSpillSlot(Fn, Reg, FrameIdx)) {
+ I->setFrameIdx(FrameIdx);
+ continue;
+ }
+
+ // Check to see if this physreg must be spilled to a particular stack slot
+ // on this target.
+ const TargetFrameInfo::SpillSlot *FixedSlot = FixedSpillSlots;
+ while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots &&
+ FixedSlot->Reg != Reg)
+ ++FixedSlot;
+
+ if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) {
+ // Nope, just spill it anywhere convenient.
+ unsigned Align = RC->getAlignment();
+ unsigned StackAlign = TFI->getStackAlignment();
+
+ // We may not be able to satisfy the desired alignment specification of
+ // the TargetRegisterClass if the stack alignment is smaller. Use the
+ // min.
+ Align = std::min(Align, StackAlign);
+ FrameIdx = FFI->CreateStackObject(RC->getSize(), Align, true);
+ if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
+ if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
+ } else {
+ // Spill it to the stack where we must.
+ FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset,
+ true, false);
+ }
+
+ I->setFrameIdx(FrameIdx);
+ }
+
+ FFI->setCalleeSavedInfo(CSI);
+}
+
+/// insertCSRSpillsAndRestores - Insert spill and restore code for
+/// callee saved registers used in the function, handling shrink wrapping.
+///
+void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
+ // Get callee saved register information.
+ MachineFrameInfo *FFI = Fn.getFrameInfo();
+ const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
+
+ FFI->setCalleeSavedInfoValid(true);
+
+ // Early exit if no callee saved registers are modified!
+ if (CSI.empty())
+ return;
+
+ const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
+ MachineBasicBlock::iterator I;
+
+ if (! ShrinkWrapThisFunction) {
+ // Spill using target interface.
+ I = EntryBlock->begin();
+ if (!TII.spillCalleeSavedRegisters(*EntryBlock, I, CSI)) {
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ // Add the callee-saved register as live-in.
+ // It's killed at the spill.
+ EntryBlock->addLiveIn(CSI[i].getReg());
+
+ // Insert the spill to the stack frame.
+ TII.storeRegToStackSlot(*EntryBlock, I, CSI[i].getReg(), true,
+ CSI[i].getFrameIdx(), CSI[i].getRegClass());
+ }
+ }
+
+ // Restore using target interface.
+ for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) {
+ MachineBasicBlock* MBB = ReturnBlocks[ri];
+ I = MBB->end(); --I;
+
+ // Skip over all terminator instructions, which are part of the return
+ // sequence.
+ MachineBasicBlock::iterator I2 = I;
+ while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
+ I = I2;
+
+ bool AtStart = I == MBB->begin();
+ MachineBasicBlock::iterator BeforeI = I;
+ if (!AtStart)
+ --BeforeI;
+
+ // Restore all registers immediately before the return and any
+ // terminators that preceed it.
+ if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI)) {
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ TII.loadRegFromStackSlot(*MBB, I, CSI[i].getReg(),
+ CSI[i].getFrameIdx(),
+ CSI[i].getRegClass());
+ assert(I != MBB->begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ // Insert in reverse order. loadRegFromStackSlot can insert
+ // multiple instructions.
+ if (AtStart)
+ I = MBB->begin();
+ else {
+ I = BeforeI;
+ ++I;
+ }
+ }
+ }
+ }
+ return;
+ }
+
+ // Insert spills.
+ std::vector<CalleeSavedInfo> blockCSI;
+ for (CSRegBlockMap::iterator BI = CSRSave.begin(),
+ BE = CSRSave.end(); BI != BE; ++BI) {
+ MachineBasicBlock* MBB = BI->first;
+ CSRegSet save = BI->second;
+
+ if (save.empty())
+ continue;
+
+ blockCSI.clear();
+ for (CSRegSet::iterator RI = save.begin(),
+ RE = save.end(); RI != RE; ++RI) {
+ blockCSI.push_back(CSI[*RI]);
+ }
+ assert(blockCSI.size() > 0 &&
+ "Could not collect callee saved register info");
+
+ I = MBB->begin();
+
+ // When shrink wrapping, use stack slot stores/loads.
+ for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
+ // Add the callee-saved register as live-in.
+ // It's killed at the spill.
+ MBB->addLiveIn(blockCSI[i].getReg());
+
+ // Insert the spill to the stack frame.
+ TII.storeRegToStackSlot(*MBB, I, blockCSI[i].getReg(),
+ true,
+ blockCSI[i].getFrameIdx(),
+ blockCSI[i].getRegClass());
+ }
+ }
+
+ for (CSRegBlockMap::iterator BI = CSRRestore.begin(),
+ BE = CSRRestore.end(); BI != BE; ++BI) {
+ MachineBasicBlock* MBB = BI->first;
+ CSRegSet restore = BI->second;
+
+ if (restore.empty())
+ continue;
+
+ blockCSI.clear();
+ for (CSRegSet::iterator RI = restore.begin(),
+ RE = restore.end(); RI != RE; ++RI) {
+ blockCSI.push_back(CSI[*RI]);
+ }
+ assert(blockCSI.size() > 0 &&
+ "Could not find callee saved register info");
+
+ // If MBB is empty and needs restores, insert at the _beginning_.
+ if (MBB->empty()) {
+ I = MBB->begin();
+ } else {
+ I = MBB->end();
+ --I;
+
+ // Skip over all terminator instructions, which are part of the
+ // return sequence.
+ if (! I->getDesc().isTerminator()) {
+ ++I;
+ } else {
+ MachineBasicBlock::iterator I2 = I;
+ while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
+ I = I2;
+ }
+ }
+
+ bool AtStart = I == MBB->begin();
+ MachineBasicBlock::iterator BeforeI = I;
+ if (!AtStart)
+ --BeforeI;
+
+ // Restore all registers immediately before the return and any
+ // terminators that preceed it.
+ for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
+ TII.loadRegFromStackSlot(*MBB, I, blockCSI[i].getReg(),
+ blockCSI[i].getFrameIdx(),
+ blockCSI[i].getRegClass());
+ assert(I != MBB->begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ // Insert in reverse order. loadRegFromStackSlot can insert
+ // multiple instructions.
+ if (AtStart)
+ I = MBB->begin();
+ else {
+ I = BeforeI;
+ ++I;
+ }
+ }
+ }
+}
+
+/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
+static inline void
+AdjustStackOffset(MachineFrameInfo *FFI, int FrameIdx,
+ bool StackGrowsDown, int64_t &Offset,
+ unsigned &MaxAlign) {
+ // If the stack grows down, add the object size to find the lowest address.
+ if (StackGrowsDown)
+ Offset += FFI->getObjectSize(FrameIdx);
+
+ unsigned Align = FFI->getObjectAlignment(FrameIdx);
+
+ // If the alignment of this object is greater than that of the stack, then
+ // increase the stack alignment to match.
+ MaxAlign = std::max(MaxAlign, Align);
+
+ // Adjust to alignment boundary.
+ Offset = (Offset + Align - 1) / Align * Align;
+
+ if (StackGrowsDown) {
+ FFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset
+ } else {
+ FFI->setObjectOffset(FrameIdx, Offset);
+ Offset += FFI->getObjectSize(FrameIdx);
+ }
+}
+
+/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
+/// abstract stack objects.
+///
+void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
+ const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo();
+
+ bool StackGrowsDown =
+ TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
+
+ // Loop over all of the stack objects, assigning sequential addresses...
+ MachineFrameInfo *FFI = Fn.getFrameInfo();
+
+ unsigned MaxAlign = 1;
+
+ // Start at the beginning of the local area.
+ // The Offset is the distance from the stack top in the direction
+ // of stack growth -- so it's always nonnegative.
+ int LocalAreaOffset = TFI.getOffsetOfLocalArea();
+ if (StackGrowsDown)
+ LocalAreaOffset = -LocalAreaOffset;
+ assert(LocalAreaOffset >= 0
+ && "Local area offset should be in direction of stack growth");
+ int64_t Offset = LocalAreaOffset;
+
+ // If there are fixed sized objects that are preallocated in the local area,
+ // non-fixed objects can't be allocated right at the start of local area.
+ // We currently don't support filling in holes in between fixed sized
+ // objects, so we adjust 'Offset' to point to the end of last fixed sized
+ // preallocated object.
+ for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
+ int64_t FixedOff;
+ if (StackGrowsDown) {
+ // The maximum distance from the stack pointer is at lower address of
+ // the object -- which is given by offset. For down growing stack
+ // the offset is negative, so we negate the offset to get the distance.
+ FixedOff = -FFI->getObjectOffset(i);
+ } else {
+ // The maximum distance from the start pointer is at the upper
+ // address of the object.
+ FixedOff = FFI->getObjectOffset(i) + FFI->getObjectSize(i);
+ }
+ if (FixedOff > Offset) Offset = FixedOff;
+ }
+
+ // First assign frame offsets to stack objects that are used to spill
+ // callee saved registers.
+ if (StackGrowsDown) {
+ for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
+ // If stack grows down, we need to add size of find the lowest
+ // address of the object.
+ Offset += FFI->getObjectSize(i);
+
+ unsigned Align = FFI->getObjectAlignment(i);
+ // If the alignment of this object is greater than that of the stack,
+ // then increase the stack alignment to match.
+ MaxAlign = std::max(MaxAlign, Align);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+
+ FFI->setObjectOffset(i, -Offset); // Set the computed offset
+ }
+ } else {
+ int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex;
+ for (int i = MaxCSFI; i >= MinCSFI ; --i) {
+ unsigned Align = FFI->getObjectAlignment(i);
+ // If the alignment of this object is greater than that of the stack,
+ // then increase the stack alignment to match.
+ MaxAlign = std::max(MaxAlign, Align);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+
+ FFI->setObjectOffset(i, Offset);
+ Offset += FFI->getObjectSize(i);
+ }
+ }
+
+ // Make sure the special register scavenging spill slot is closest to the
+ // frame pointer if a frame pointer is required.
+ const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+ if (RS && RegInfo->hasFP(Fn) && !RegInfo->needsStackRealignment(Fn)) {
+ int SFI = RS->getScavengingFrameIndex();
+ if (SFI >= 0)
+ AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign);
+ }
+
+ // Make sure that the stack protector comes before the local variables on the
+ // stack.
+ if (FFI->getStackProtectorIndex() >= 0)
+ AdjustStackOffset(FFI, FFI->getStackProtectorIndex(), StackGrowsDown,
+ Offset, MaxAlign);
+
+ // Then assign frame offsets to stack objects that are not used to spill
+ // callee saved registers.
+ for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
+ if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
+ continue;
+ if (RS && (int)i == RS->getScavengingFrameIndex())
+ continue;
+ if (FFI->isDeadObjectIndex(i))
+ continue;
+ if (FFI->getStackProtectorIndex() == (int)i)
+ continue;
+
+ AdjustStackOffset(FFI, i, StackGrowsDown, Offset, MaxAlign);
+ }
+
+ // Make sure the special register scavenging spill slot is closest to the
+ // stack pointer.
+ if (RS && (!RegInfo->hasFP(Fn) || RegInfo->needsStackRealignment(Fn))) {
+ int SFI = RS->getScavengingFrameIndex();
+ if (SFI >= 0)
+ AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign);
+ }
+
+ if (!RegInfo->targetHandlesStackFrameRounding()) {
+ // If we have reserved argument space for call sites in the function
+ // immediately on entry to the current function, count it as part of the
+ // overall stack size.
+ if (FFI->hasCalls() && RegInfo->hasReservedCallFrame(Fn))
+ Offset += FFI->getMaxCallFrameSize();
+
+ // Round up the size to a multiple of the alignment. If the function has
+ // any calls or alloca's, align to the target's StackAlignment value to
+ // ensure that the callee's frame or the alloca data is suitably aligned;
+ // otherwise, for leaf functions, align to the TransientStackAlignment
+ // value.
+ unsigned StackAlign;
+ if (FFI->hasCalls() || FFI->hasVarSizedObjects() ||
+ (RegInfo->needsStackRealignment(Fn) && FFI->getObjectIndexEnd() != 0))
+ StackAlign = TFI.getStackAlignment();
+ else
+ StackAlign = TFI.getTransientStackAlignment();
+ // If the frame pointer is eliminated, all frame offsets will be relative
+ // to SP not FP; align to MaxAlign so this works.
+ StackAlign = std::max(StackAlign, MaxAlign);
+ unsigned AlignMask = StackAlign - 1;
+ Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+ }
+
+ // Update frame info to pretend that this is part of the stack...
+ FFI->setStackSize(Offset - LocalAreaOffset);
+
+ // Remember the required stack alignment in case targets need it to perform
+ // dynamic stack alignment.
+ if (MaxAlign > FFI->getMaxAlignment())
+ FFI->setMaxAlignment(MaxAlign);
+}
+
+
+/// insertPrologEpilogCode - Scan the function for modified callee saved
+/// registers, insert spill code for these callee saved registers, then add
+/// prolog and epilog code to the function.
+///
+void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
+ const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+
+ // Add prologue to the function...
+ TRI->emitPrologue(Fn);
+
+ // Add epilogue to restore the callee-save registers in each exiting block
+ for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
+ // If last instruction is a return instruction, add an epilogue
+ if (!I->empty() && I->back().getDesc().isReturn())
+ TRI->emitEpilogue(Fn, *I);
+ }
+}
+
+
+/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
+/// register references and actual offsets.
+///
+void PEI::replaceFrameIndices(MachineFunction &Fn) {
+ if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do?
+
+ const TargetMachine &TM = Fn.getTarget();
+ assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!");
+ const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+ const TargetFrameInfo *TFI = TM.getFrameInfo();
+ bool StackGrowsDown =
+ TFI->getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
+ int FrameSetupOpcode = TRI.getCallFrameSetupOpcode();
+ int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode();
+
+ for (MachineFunction::iterator BB = Fn.begin(),
+ E = Fn.end(); BB != E; ++BB) {
+ int SPAdj = 0; // SP offset due to call frame setup / destroy.
+ if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB);
+
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
+
+ if (I->getOpcode() == FrameSetupOpcode ||
+ I->getOpcode() == FrameDestroyOpcode) {
+ // Remember how much SP has been adjusted to create the call
+ // frame.
+ int Size = I->getOperand(0).getImm();
+
+ if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) ||
+ (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode))
+ Size = -Size;
+
+ SPAdj += Size;
+
+ MachineBasicBlock::iterator PrevI = BB->end();
+ if (I != BB->begin()) PrevI = prior(I);
+ TRI.eliminateCallFramePseudoInstr(Fn, *BB, I);
+
+ // Visit the instructions created by eliminateCallFramePseudoInstr().
+ if (PrevI == BB->end())
+ I = BB->begin(); // The replaced instr was the first in the block.
+ else
+ I = llvm::next(PrevI);
+ continue;
+ }
+
+ MachineInstr *MI = I;
+ bool DoIncr = true;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
+ if (MI->getOperand(i).isFI()) {
+ // Some instructions (e.g. inline asm instructions) can have
+ // multiple frame indices and/or cause eliminateFrameIndex
+ // to insert more than one instruction. We need the register
+ // scavenger to go through all of these instructions so that
+ // it can update its register information. We keep the
+ // iterator at the point before insertion so that we can
+ // revisit them in full.
+ bool AtBeginning = (I == BB->begin());
+ if (!AtBeginning) --I;
+
+ // If this instruction has a FrameIndex operand, we need to
+ // use that target machine register info object to eliminate
+ // it.
+ int Value;
+ unsigned VReg =
+ TRI.eliminateFrameIndex(MI, SPAdj, &Value,
+ FrameIndexVirtualScavenging ? NULL : RS);
+ if (VReg) {
+ assert (FrameIndexVirtualScavenging &&
+ "Not scavenging, but virtual returned from "
+ "eliminateFrameIndex()!");
+ FrameConstantRegMap[VReg] = FrameConstantEntry(Value, SPAdj);
+ }
+
+ // Reset the iterator if we were at the beginning of the BB.
+ if (AtBeginning) {
+ I = BB->begin();
+ DoIncr = false;
+ }
+
+ MI = 0;
+ break;
+ }
+
+ if (DoIncr && I != BB->end()) ++I;
+
+ // Update register states.
+ if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI);
+ }
+
+ assert(SPAdj == 0 && "Unbalanced call frame setup / destroy pairs?");
+ }
+}
+
+/// findLastUseReg - find the killing use of the specified register within
+/// the instruciton range. Return the operand number of the kill in Operand.
+static MachineBasicBlock::iterator
+findLastUseReg(MachineBasicBlock::iterator I, MachineBasicBlock::iterator ME,
+ unsigned Reg) {
+ // Scan forward to find the last use of this virtual register
+ for (++I; I != ME; ++I) {
+ MachineInstr *MI = I;
+ bool isDefInsn = false;
+ bool isKillInsn = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
+ if (MI->getOperand(i).isReg()) {
+ unsigned OpReg = MI->getOperand(i).getReg();
+ if (OpReg == 0 || !TargetRegisterInfo::isVirtualRegister(OpReg))
+ continue;
+ assert (OpReg == Reg
+ && "overlapping use of scavenged index register!");
+ // If this is the killing use, we have a candidate.
+ if (MI->getOperand(i).isKill())
+ isKillInsn = true;
+ else if (MI->getOperand(i).isDef())
+ isDefInsn = true;
+ }
+ if (isKillInsn && !isDefInsn)
+ return I;
+ }
+ // If we hit the end of the basic block, there was no kill of
+ // the virtual register, which is wrong.
+ assert (0 && "scavenged index register never killed!");
+ return ME;
+}
+
+/// scavengeFrameVirtualRegs - Replace all frame index virtual registers
+/// with physical registers. Use the register scavenger to find an
+/// appropriate register to use.
+void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
+ // Run through the instructions and find any virtual registers.
+ for (MachineFunction::iterator BB = Fn.begin(),
+ E = Fn.end(); BB != E; ++BB) {
+ RS->enterBasicBlock(BB);
+
+ // FIXME: The logic flow in this function is still too convoluted.
+ // It needs a cleanup refactoring. Do that in preparation for tracking
+ // more than one scratch register value and using ranges to find
+ // available scratch registers.
+ unsigned CurrentVirtReg = 0;
+ unsigned CurrentScratchReg = 0;
+ bool havePrevValue = false;
+ int PrevValue = 0;
+ MachineInstr *PrevLastUseMI = NULL;
+ unsigned PrevLastUseOp = 0;
+ bool trackingCurrentValue = false;
+ int SPAdj = 0;
+ int Value = 0;
+
+ // The instruction stream may change in the loop, so check BB->end()
+ // directly.
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
+ MachineInstr *MI = I;
+ bool isDefInsn = false;
+ bool isKillInsn = false;
+ bool clobbersScratchReg = false;
+ bool DoIncr = true;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ if (MI->getOperand(i).isReg()) {
+ MachineOperand &MO = MI->getOperand(i);
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+ // If we have a previous scratch reg, check and see if anything
+ // here kills whatever value is in there.
+ if (Reg == CurrentScratchReg) {
+ if (MO.isUse()) {
+ // Two-address operands implicitly kill
+ if (MO.isKill() || MI->isRegTiedToDefOperand(i))
+ clobbersScratchReg = true;
+ } else {
+ assert (MO.isDef());
+ clobbersScratchReg = true;
+ }
+ }
+ continue;
+ }
+ // If this is a def, remember that this insn defines the value.
+ // This lets us properly consider insns which re-use the scratch
+ // register, such as r2 = sub r2, #imm, in the middle of the
+ // scratch range.
+ if (MO.isDef())
+ isDefInsn = true;
+
+ // Have we already allocated a scratch register for this virtual?
+ if (Reg != CurrentVirtReg) {
+ // When we first encounter a new virtual register, it
+ // must be a definition.
+ assert(MI->getOperand(i).isDef() &&
+ "frame index virtual missing def!");
+ // We can't have nested virtual register live ranges because
+ // there's only a guarantee of one scavenged register at a time.
+ assert (CurrentVirtReg == 0 &&
+ "overlapping frame index virtual registers!");
+
+ // If the target gave us information about what's in the register,
+ // we can use that to re-use scratch regs.
+ DenseMap<unsigned, FrameConstantEntry>::iterator Entry =
+ FrameConstantRegMap.find(Reg);
+ trackingCurrentValue = Entry != FrameConstantRegMap.end();
+ if (trackingCurrentValue) {
+ SPAdj = (*Entry).second.second;
+ Value = (*Entry).second.first;
+ } else
+ SPAdj = Value = 0;
+
+ // If the scratch register from the last allocation is still
+ // available, see if the value matches. If it does, just re-use it.
+ if (trackingCurrentValue && havePrevValue && PrevValue == Value) {
+ // FIXME: This assumes that the instructions in the live range
+ // for the virtual register are exclusively for the purpose
+ // of populating the value in the register. That's reasonable
+ // for these frame index registers, but it's still a very, very
+ // strong assumption. rdar://7322732. Better would be to
+ // explicitly check each instruction in the range for references
+ // to the virtual register. Only delete those insns that
+ // touch the virtual register.
+
+ // Find the last use of the new virtual register. Remove all
+ // instruction between here and there, and update the current
+ // instruction to reference the last use insn instead.
+ MachineBasicBlock::iterator LastUseMI =
+ findLastUseReg(I, BB->end(), Reg);
+
+ // Remove all instructions up 'til the last use, since they're
+ // just calculating the value we already have.
+ BB->erase(I, LastUseMI);
+ I = LastUseMI;
+
+ // Extend the live range of the scratch register
+ PrevLastUseMI->getOperand(PrevLastUseOp).setIsKill(false);
+ RS->setUsed(CurrentScratchReg);
+ CurrentVirtReg = Reg;
+
+ // We deleted the instruction we were scanning the operands of.
+ // Jump back to the instruction iterator loop. Don't increment
+ // past this instruction since we updated the iterator already.
+ DoIncr = false;
+ break;
+ }
+
+ // Scavenge a new scratch register
+ CurrentVirtReg = Reg;
+ const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg);
+ CurrentScratchReg = RS->FindUnusedReg(RC);
+ if (CurrentScratchReg == 0)
+ // No register is "free". Scavenge a register.
+ CurrentScratchReg = RS->scavengeRegister(RC, I, SPAdj);
+
+ PrevValue = Value;
+ }
+ // replace this reference to the virtual register with the
+ // scratch register.
+ assert (CurrentScratchReg && "Missing scratch register!");
+ MI->getOperand(i).setReg(CurrentScratchReg);
+
+ if (MI->getOperand(i).isKill()) {
+ isKillInsn = true;
+ PrevLastUseOp = i;
+ PrevLastUseMI = MI;
+ }
+ }
+ }
+ // If this is the last use of the scratch, stop tracking it. The
+ // last use will be a kill operand in an instruction that does
+ // not also define the scratch register.
+ if (isKillInsn && !isDefInsn) {
+ CurrentVirtReg = 0;
+ havePrevValue = trackingCurrentValue;
+ }
+ // Similarly, notice if instruction clobbered the value in the
+ // register we're tracking for possible later reuse. This is noted
+ // above, but enforced here since the value is still live while we
+ // process the rest of the operands of the instruction.
+ if (clobbersScratchReg) {
+ havePrevValue = false;
+ CurrentScratchReg = 0;
+ }
+ if (DoIncr) {
+ RS->forward(I);
+ ++I;
+ }
+ }
+ }
+}
diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h
new file mode 100644
index 0000000..931f1eb
--- /dev/null
+++ b/lib/CodeGen/PrologEpilogInserter.h
@@ -0,0 +1,181 @@
+//===-- PrologEpilogInserter.h - Prolog/Epilog code insertion -*- C++ -* --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is responsible for finalizing the functions frame layout, saving
+// callee saved registers, and for emitting prolog & epilog code for the
+// function.
+//
+// This pass must be run after register allocation. After this pass is
+// executed, it is illegal to construct MO_FrameIndex operands.
+//
+// This pass also implements a shrink wrapping variant of prolog/epilog
+// insertion.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PEI_H
+#define LLVM_CODEGEN_PEI_H
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+ class RegScavenger;
+ class MachineBasicBlock;
+
+ class PEI : public MachineFunctionPass {
+ public:
+ static char ID;
+ PEI() : MachineFunctionPass(&ID) {}
+
+ const char *getPassName() const {
+ return "Prolog/Epilog Insertion & Frame Finalization";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+ /// frame indexes with appropriate references.
+ ///
+ bool runOnMachineFunction(MachineFunction &Fn);
+
+ private:
+ RegScavenger *RS;
+
+ // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
+ // stack frame indexes.
+ unsigned MinCSFrameIndex, MaxCSFrameIndex;
+
+ // Analysis info for spill/restore placement.
+ // "CSR": "callee saved register".
+
+ // CSRegSet contains indices into the Callee Saved Register Info
+ // vector built by calculateCalleeSavedRegisters() and accessed
+ // via MF.getFrameInfo()->getCalleeSavedInfo().
+ typedef SparseBitVector<> CSRegSet;
+
+ // CSRegBlockMap maps MachineBasicBlocks to sets of callee
+ // saved register indices.
+ typedef DenseMap<MachineBasicBlock*, CSRegSet> CSRegBlockMap;
+
+ // Set and maps for computing CSR spill/restore placement:
+ // used in function (UsedCSRegs)
+ // used in a basic block (CSRUsed)
+ // anticipatable in a basic block (Antic{In,Out})
+ // available in a basic block (Avail{In,Out})
+ // to be spilled at the entry to a basic block (CSRSave)
+ // to be restored at the end of a basic block (CSRRestore)
+ CSRegSet UsedCSRegs;
+ CSRegBlockMap CSRUsed;
+ CSRegBlockMap AnticIn, AnticOut;
+ CSRegBlockMap AvailIn, AvailOut;
+ CSRegBlockMap CSRSave;
+ CSRegBlockMap CSRRestore;
+
+ // Entry and return blocks of the current function.
+ MachineBasicBlock* EntryBlock;
+ SmallVector<MachineBasicBlock*, 4> ReturnBlocks;
+
+ // Map of MBBs to top level MachineLoops.
+ DenseMap<MachineBasicBlock*, MachineLoop*> TLLoops;
+
+ // Flag to control shrink wrapping per-function:
+ // may choose to skip shrink wrapping for certain
+ // functions.
+ bool ShrinkWrapThisFunction;
+
+ // Flag to control whether to use the register scavenger to resolve
+ // frame index materialization registers. Set according to
+ // TRI->requiresFrameIndexScavenging() for the curren function.
+ bool FrameIndexVirtualScavenging;
+
+ // When using the scavenger post-pass to resolve frame reference
+ // materialization registers, maintain a map of the registers to
+ // the constant value and SP adjustment associated with it.
+ typedef std::pair<int, int> FrameConstantEntry;
+ DenseMap<unsigned, FrameConstantEntry> FrameConstantRegMap;
+
+#ifndef NDEBUG
+ // Machine function handle.
+ MachineFunction* MF;
+
+ // Flag indicating that the current function
+ // has at least one "short" path in the machine
+ // CFG from the entry block to an exit block.
+ bool HasFastExitPath;
+#endif
+
+ bool calculateSets(MachineFunction &Fn);
+ bool calcAnticInOut(MachineBasicBlock* MBB);
+ bool calcAvailInOut(MachineBasicBlock* MBB);
+ void calculateAnticAvail(MachineFunction &Fn);
+ bool addUsesForMEMERegion(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4>& blks);
+ bool addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks);
+ bool calcSpillPlacements(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4> &blks,
+ CSRegBlockMap &prevSpills);
+ bool calcRestorePlacements(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4> &blks,
+ CSRegBlockMap &prevRestores);
+ void placeSpillsAndRestores(MachineFunction &Fn);
+ void placeCSRSpillsAndRestores(MachineFunction &Fn);
+ void calculateCallsInformation(MachineFunction &Fn);
+ void calculateCalleeSavedRegisters(MachineFunction &Fn);
+ void insertCSRSpillsAndRestores(MachineFunction &Fn);
+ void calculateFrameObjectOffsets(MachineFunction &Fn);
+ void replaceFrameIndices(MachineFunction &Fn);
+ void scavengeFrameVirtualRegs(MachineFunction &Fn);
+ void insertPrologEpilogCode(MachineFunction &Fn);
+
+ // Initialize DFA sets, called before iterations.
+ void clearAnticAvailSets();
+ // Clear all sets constructed by shrink wrapping.
+ void clearAllSets();
+
+ // Initialize all shrink wrapping data.
+ void initShrinkWrappingInfo();
+
+ // Convienences for dealing with machine loops.
+ MachineBasicBlock* getTopLevelLoopPreheader(MachineLoop* LP);
+ MachineLoop* getTopLevelLoopParent(MachineLoop *LP);
+
+ // Propgate CSRs used in MBB to all MBBs of loop LP.
+ void propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP);
+
+ // Convenience for recognizing return blocks.
+ bool isReturnBlock(MachineBasicBlock* MBB);
+
+#ifndef NDEBUG
+ // Debugging methods.
+
+ // Mark this function as having fast exit paths.
+ void findFastExitPath();
+
+ // Verify placement of spills/restores.
+ void verifySpillRestorePlacement();
+
+ std::string getBasicBlockName(const MachineBasicBlock* MBB);
+ std::string stringifyCSRegSet(const CSRegSet& s);
+ void dumpSet(const CSRegSet& s);
+ void dumpUsed(MachineBasicBlock* MBB);
+ void dumpAllUsed();
+ void dumpSets(MachineBasicBlock* MBB);
+ void dumpSets1(MachineBasicBlock* MBB);
+ void dumpAllSets();
+ void dumpSRSets();
+#endif
+
+ };
+} // End llvm namespace
+#endif
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
new file mode 100644
index 0000000..7fb3e6e
--- /dev/null
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -0,0 +1,115 @@
+//===-- llvm/CodeGen/PseudoSourceValue.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PseudoSourceValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+using namespace llvm;
+
+static ManagedStatic<PseudoSourceValue[4]> PSVs;
+
+const PseudoSourceValue *PseudoSourceValue::getStack()
+{ return &(*PSVs)[0]; }
+const PseudoSourceValue *PseudoSourceValue::getGOT()
+{ return &(*PSVs)[1]; }
+const PseudoSourceValue *PseudoSourceValue::getJumpTable()
+{ return &(*PSVs)[2]; }
+const PseudoSourceValue *PseudoSourceValue::getConstantPool()
+{ return &(*PSVs)[3]; }
+
+static const char *const PSVNames[] = {
+ "Stack",
+ "GOT",
+ "JumpTable",
+ "ConstantPool"
+};
+
+// FIXME: THIS IS A HACK!!!!
+// Eventually these should be uniqued on LLVMContext rather than in a managed
+// static. For now, we can safely use the global context for the time being to
+// squeak by.
+PseudoSourceValue::PseudoSourceValue(enum ValueTy Subclass) :
+ Value(Type::getInt8PtrTy(getGlobalContext()),
+ Subclass) {}
+
+void PseudoSourceValue::printCustom(raw_ostream &O) const {
+ O << PSVNames[this - *PSVs];
+}
+
+static ManagedStatic<std::map<int, const PseudoSourceValue *> > FSValues;
+
+const PseudoSourceValue *PseudoSourceValue::getFixedStack(int FI) {
+ const PseudoSourceValue *&V = (*FSValues)[FI];
+ if (!V)
+ V = new FixedStackPseudoSourceValue(FI);
+ return V;
+}
+
+bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const {
+ if (this == getStack())
+ return false;
+ if (this == getGOT() ||
+ this == getConstantPool() ||
+ this == getJumpTable())
+ return true;
+ llvm_unreachable("Unknown PseudoSourceValue!");
+ return false;
+}
+
+bool PseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const {
+ if (this == getStack() ||
+ this == getGOT() ||
+ this == getConstantPool() ||
+ this == getJumpTable())
+ return false;
+ llvm_unreachable("Unknown PseudoSourceValue!");
+ return true;
+}
+
+bool PseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const {
+ if (this == getGOT() ||
+ this == getConstantPool() ||
+ this == getJumpTable())
+ return false;
+ return true;
+}
+
+bool FixedStackPseudoSourceValue::isConstant(const MachineFrameInfo *MFI) const{
+ return MFI && MFI->isImmutableObjectIndex(FI);
+}
+
+bool FixedStackPseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const {
+ // Negative frame indices are used for special things that don't
+ // appear in LLVM IR. Non-negative indices may be used for things
+ // like static allocas.
+ if (!MFI)
+ return FI >= 0;
+ // Spill slots should not alias others.
+ return !MFI->isFixedObjectIndex(FI) && !MFI->isSpillSlotObjectIndex(FI);
+}
+
+bool FixedStackPseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const {
+ if (!MFI)
+ return true;
+ // Spill slots will not alias any LLVM IR value.
+ return !MFI->isSpillSlotObjectIndex(FI);
+}
+
+void FixedStackPseudoSourceValue::printCustom(raw_ostream &OS) const {
+ OS << "FixedStack" << FI;
+}
diff --git a/lib/CodeGen/README.txt b/lib/CodeGen/README.txt
new file mode 100644
index 0000000..b655dda
--- /dev/null
+++ b/lib/CodeGen/README.txt
@@ -0,0 +1,199 @@
+//===---------------------------------------------------------------------===//
+
+Common register allocation / spilling problem:
+
+ mul lr, r4, lr
+ str lr, [sp, #+52]
+ ldr lr, [r1, #+32]
+ sxth r3, r3
+ ldr r4, [sp, #+52]
+ mla r4, r3, lr, r4
+
+can be:
+
+ mul lr, r4, lr
+ mov r4, lr
+ str lr, [sp, #+52]
+ ldr lr, [r1, #+32]
+ sxth r3, r3
+ mla r4, r3, lr, r4
+
+and then "merge" mul and mov:
+
+ mul r4, r4, lr
+ str lr, [sp, #+52]
+ ldr lr, [r1, #+32]
+ sxth r3, r3
+ mla r4, r3, lr, r4
+
+It also increase the likelyhood the store may become dead.
+
+//===---------------------------------------------------------------------===//
+
+bb27 ...
+ ...
+ %reg1037 = ADDri %reg1039, 1
+ %reg1038 = ADDrs %reg1032, %reg1039, %NOREG, 10
+ Successors according to CFG: 0x8b03bf0 (#5)
+
+bb76 (0x8b03bf0, LLVM BB @0x8b032d0, ID#5):
+ Predecessors according to CFG: 0x8b0c5f0 (#3) 0x8b0a7c0 (#4)
+ %reg1039 = PHI %reg1070, mbb<bb76.outer,0x8b0c5f0>, %reg1037, mbb<bb27,0x8b0a7c0>
+
+Note ADDri is not a two-address instruction. However, its result %reg1037 is an
+operand of the PHI node in bb76 and its operand %reg1039 is the result of the
+PHI node. We should treat it as a two-address code and make sure the ADDri is
+scheduled after any node that reads %reg1039.
+
+//===---------------------------------------------------------------------===//
+
+Use local info (i.e. register scavenger) to assign it a free register to allow
+reuse:
+ ldr r3, [sp, #+4]
+ add r3, r3, #3
+ ldr r2, [sp, #+8]
+ add r2, r2, #2
+ ldr r1, [sp, #+4] <==
+ add r1, r1, #1
+ ldr r0, [sp, #+4]
+ add r0, r0, #2
+
+//===---------------------------------------------------------------------===//
+
+LLVM aggressively lift CSE out of loop. Sometimes this can be negative side-
+effects:
+
+R1 = X + 4
+R2 = X + 7
+R3 = X + 15
+
+loop:
+load [i + R1]
+...
+load [i + R2]
+...
+load [i + R3]
+
+Suppose there is high register pressure, R1, R2, R3, can be spilled. We need
+to implement proper re-materialization to handle this:
+
+R1 = X + 4
+R2 = X + 7
+R3 = X + 15
+
+loop:
+R1 = X + 4 @ re-materialized
+load [i + R1]
+...
+R2 = X + 7 @ re-materialized
+load [i + R2]
+...
+R3 = X + 15 @ re-materialized
+load [i + R3]
+
+Furthermore, with re-association, we can enable sharing:
+
+R1 = X + 4
+R2 = X + 7
+R3 = X + 15
+
+loop:
+T = i + X
+load [T + 4]
+...
+load [T + 7]
+...
+load [T + 15]
+//===---------------------------------------------------------------------===//
+
+It's not always a good idea to choose rematerialization over spilling. If all
+the load / store instructions would be folded then spilling is cheaper because
+it won't require new live intervals / registers. See 2003-05-31-LongShifts for
+an example.
+
+//===---------------------------------------------------------------------===//
+
+With a copying garbage collector, derived pointers must not be retained across
+collector safe points; the collector could move the objects and invalidate the
+derived pointer. This is bad enough in the first place, but safe points can
+crop up unpredictably. Consider:
+
+ %array = load { i32, [0 x %obj] }** %array_addr
+ %nth_el = getelementptr { i32, [0 x %obj] }* %array, i32 0, i32 %n
+ %old = load %obj** %nth_el
+ %z = div i64 %x, %y
+ store %obj* %new, %obj** %nth_el
+
+If the i64 division is lowered to a libcall, then a safe point will (must)
+appear for the call site. If a collection occurs, %array and %nth_el no longer
+point into the correct object.
+
+The fix for this is to copy address calculations so that dependent pointers
+are never live across safe point boundaries. But the loads cannot be copied
+like this if there was an intervening store, so may be hard to get right.
+
+Only a concurrent mutator can trigger a collection at the libcall safe point.
+So single-threaded programs do not have this requirement, even with a copying
+collector. Still, LLVM optimizations would probably undo a front-end's careful
+work.
+
+//===---------------------------------------------------------------------===//
+
+The ocaml frametable structure supports liveness information. It would be good
+to support it.
+
+//===---------------------------------------------------------------------===//
+
+The FIXME in ComputeCommonTailLength in BranchFolding.cpp needs to be
+revisited. The check is there to work around a misuse of directives in inline
+assembly.
+
+//===---------------------------------------------------------------------===//
+
+It would be good to detect collector/target compatibility instead of silently
+doing the wrong thing.
+
+//===---------------------------------------------------------------------===//
+
+It would be really nice to be able to write patterns in .td files for copies,
+which would eliminate a bunch of explicit predicates on them (e.g. no side
+effects). Once this is in place, it would be even better to have tblgen
+synthesize the various copy insertion/inspection methods in TargetInstrInfo.
+
+//===---------------------------------------------------------------------===//
+
+Stack coloring improvments:
+
+1. Do proper LiveStackAnalysis on all stack objects including those which are
+ not spill slots.
+2. Reorder objects to fill in gaps between objects.
+ e.g. 4, 1, <gap>, 4, 1, 1, 1, <gap>, 4 => 4, 1, 1, 1, 1, 4, 4
+
+//===---------------------------------------------------------------------===//
+
+The scheduler should be able to sort nearby instructions by their address. For
+example, in an expanded memset sequence it's not uncommon to see code like this:
+
+ movl $0, 4(%rdi)
+ movl $0, 8(%rdi)
+ movl $0, 12(%rdi)
+ movl $0, 0(%rdi)
+
+Each of the stores is independent, and the scheduler is currently making an
+arbitrary decision about the order.
+
+//===---------------------------------------------------------------------===//
+
+Another opportunitiy in this code is that the $0 could be moved to a register:
+
+ movl $0, 4(%rdi)
+ movl $0, 8(%rdi)
+ movl $0, 12(%rdi)
+ movl $0, 0(%rdi)
+
+This would save substantial code size, especially for longer sequences like
+this. It would be easy to have a rule telling isel to avoid matching MOV32mi
+if the immediate has more than some fixed number of uses. It's more involved
+to teach the register allocator how to do late folding to recover from
+excessive register pressure.
+
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
new file mode 100644
index 0000000..8e44a57
--- /dev/null
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -0,0 +1,1512 @@
+//===-- RegAllocLinearScan.cpp - Linear Scan register allocator -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a linear scan register allocator.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "VirtRegMap.h"
+#include "VirtRegRewriter.h"
+#include "Spiller.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <set>
+#include <queue>
+#include <memory>
+#include <cmath>
+
+using namespace llvm;
+
+STATISTIC(NumIters , "Number of iterations performed");
+STATISTIC(NumBacktracks, "Number of times we had to backtrack");
+STATISTIC(NumCoalesce, "Number of copies coalesced");
+STATISTIC(NumDowngrade, "Number of registers downgraded");
+
+static cl::opt<bool>
+NewHeuristic("new-spilling-heuristic",
+ cl::desc("Use new spilling heuristic"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+PreSplitIntervals("pre-alloc-split",
+ cl::desc("Pre-register allocation live interval splitting"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+TrivCoalesceEnds("trivial-coalesce-ends",
+ cl::desc("Attempt trivial coalescing of interval ends"),
+ cl::init(false), cl::Hidden);
+
+static RegisterRegAlloc
+linearscanRegAlloc("linearscan", "linear scan register allocator",
+ createLinearScanRegisterAllocator);
+
+namespace {
+ // When we allocate a register, add it to a fixed-size queue of
+ // registers to skip in subsequent allocations. This trades a small
+ // amount of register pressure and increased spills for flexibility in
+ // the post-pass scheduler.
+ //
+ // Note that in a the number of registers used for reloading spills
+ // will be one greater than the value of this option.
+ //
+ // One big limitation of this is that it doesn't differentiate between
+ // different register classes. So on x86-64, if there is xmm register
+ // pressure, it can caused fewer GPRs to be held in the queue.
+ static cl::opt<unsigned>
+ NumRecentlyUsedRegs("linearscan-skip-count",
+ cl::desc("Number of registers for linearscan to remember to skip."),
+ cl::init(0),
+ cl::Hidden);
+
+ struct RALinScan : public MachineFunctionPass {
+ static char ID;
+ RALinScan() : MachineFunctionPass(&ID) {
+ // Initialize the queue to record recently-used registers.
+ if (NumRecentlyUsedRegs > 0)
+ RecentRegs.resize(NumRecentlyUsedRegs, 0);
+ RecentNext = RecentRegs.begin();
+ }
+
+ typedef std::pair<LiveInterval*, LiveInterval::iterator> IntervalPtr;
+ typedef SmallVector<IntervalPtr, 32> IntervalPtrs;
+ private:
+ /// RelatedRegClasses - This structure is built the first time a function is
+ /// compiled, and keeps track of which register classes have registers that
+ /// belong to multiple classes or have aliases that are in other classes.
+ EquivalenceClasses<const TargetRegisterClass*> RelatedRegClasses;
+ DenseMap<unsigned, const TargetRegisterClass*> OneClassForEachPhysReg;
+
+ // NextReloadMap - For each register in the map, it maps to the another
+ // register which is defined by a reload from the same stack slot and
+ // both reloads are in the same basic block.
+ DenseMap<unsigned, unsigned> NextReloadMap;
+
+ // DowngradedRegs - A set of registers which are being "downgraded", i.e.
+ // un-favored for allocation.
+ SmallSet<unsigned, 8> DowngradedRegs;
+
+ // DowngradeMap - A map from virtual registers to physical registers being
+ // downgraded for the virtual registers.
+ DenseMap<unsigned, unsigned> DowngradeMap;
+
+ MachineFunction* mf_;
+ MachineRegisterInfo* mri_;
+ const TargetMachine* tm_;
+ const TargetRegisterInfo* tri_;
+ const TargetInstrInfo* tii_;
+ BitVector allocatableRegs_;
+ LiveIntervals* li_;
+ LiveStacks* ls_;
+ const MachineLoopInfo *loopInfo;
+
+ /// handled_ - Intervals are added to the handled_ set in the order of their
+ /// start value. This is uses for backtracking.
+ std::vector<LiveInterval*> handled_;
+
+ /// fixed_ - Intervals that correspond to machine registers.
+ ///
+ IntervalPtrs fixed_;
+
+ /// active_ - Intervals that are currently being processed, and which have a
+ /// live range active for the current point.
+ IntervalPtrs active_;
+
+ /// inactive_ - Intervals that are currently being processed, but which have
+ /// a hold at the current point.
+ IntervalPtrs inactive_;
+
+ typedef std::priority_queue<LiveInterval*,
+ SmallVector<LiveInterval*, 64>,
+ greater_ptr<LiveInterval> > IntervalHeap;
+ IntervalHeap unhandled_;
+
+ /// regUse_ - Tracks register usage.
+ SmallVector<unsigned, 32> regUse_;
+ SmallVector<unsigned, 32> regUseBackUp_;
+
+ /// vrm_ - Tracks register assignments.
+ VirtRegMap* vrm_;
+
+ std::auto_ptr<VirtRegRewriter> rewriter_;
+
+ std::auto_ptr<Spiller> spiller_;
+
+ // The queue of recently-used registers.
+ SmallVector<unsigned, 4> RecentRegs;
+ SmallVector<unsigned, 4>::iterator RecentNext;
+
+ // Record that we just picked this register.
+ void recordRecentlyUsed(unsigned reg) {
+ assert(reg != 0 && "Recently used register is NOREG!");
+ if (!RecentRegs.empty()) {
+ *RecentNext++ = reg;
+ if (RecentNext == RecentRegs.end())
+ RecentNext = RecentRegs.begin();
+ }
+ }
+
+ public:
+ virtual const char* getPassName() const {
+ return "Linear Scan Register Allocator";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<SlotIndexes>();
+ if (StrongPHIElim)
+ AU.addRequiredID(StrongPHIEliminationID);
+ // Make sure PassManager knows which analyses to make available
+ // to coalescing and which analyses coalescing invalidates.
+ AU.addRequiredTransitive<RegisterCoalescer>();
+ AU.addRequired<CalculateSpillWeights>();
+ if (PreSplitIntervals)
+ AU.addRequiredID(PreAllocSplittingID);
+ AU.addRequired<LiveStacks>();
+ AU.addPreserved<LiveStacks>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<VirtRegMap>();
+ AU.addPreserved<VirtRegMap>();
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ /// runOnMachineFunction - register allocate the whole function
+ bool runOnMachineFunction(MachineFunction&);
+
+ // Determine if we skip this register due to its being recently used.
+ bool isRecentlyUsed(unsigned reg) const {
+ return std::find(RecentRegs.begin(), RecentRegs.end(), reg) !=
+ RecentRegs.end();
+ }
+
+ private:
+ /// linearScan - the linear scan algorithm
+ void linearScan();
+
+ /// initIntervalSets - initialize the interval sets.
+ ///
+ void initIntervalSets();
+
+ /// processActiveIntervals - expire old intervals and move non-overlapping
+ /// ones to the inactive list.
+ void processActiveIntervals(SlotIndex CurPoint);
+
+ /// processInactiveIntervals - expire old intervals and move overlapping
+ /// ones to the active list.
+ void processInactiveIntervals(SlotIndex CurPoint);
+
+ /// hasNextReloadInterval - Return the next liveinterval that's being
+ /// defined by a reload from the same SS as the specified one.
+ LiveInterval *hasNextReloadInterval(LiveInterval *cur);
+
+ /// DowngradeRegister - Downgrade a register for allocation.
+ void DowngradeRegister(LiveInterval *li, unsigned Reg);
+
+ /// UpgradeRegister - Upgrade a register for allocation.
+ void UpgradeRegister(unsigned Reg);
+
+ /// assignRegOrStackSlotAtInterval - assign a register if one
+ /// is available, or spill.
+ void assignRegOrStackSlotAtInterval(LiveInterval* cur);
+
+ void updateSpillWeights(std::vector<float> &Weights,
+ unsigned reg, float weight,
+ const TargetRegisterClass *RC);
+
+ /// findIntervalsToSpill - Determine the intervals to spill for the
+ /// specified interval. It's passed the physical registers whose spill
+ /// weight is the lowest among all the registers whose live intervals
+ /// conflict with the interval.
+ void findIntervalsToSpill(LiveInterval *cur,
+ std::vector<std::pair<unsigned,float> > &Candidates,
+ unsigned NumCands,
+ SmallVector<LiveInterval*, 8> &SpillIntervals);
+
+ /// attemptTrivialCoalescing - If a simple interval is defined by a copy,
+ /// try allocate the definition the same register as the source register
+ /// if the register is not defined during live time of the interval. This
+ /// eliminate a copy. This is used to coalesce copies which were not
+ /// coalesced away before allocation either due to dest and src being in
+ /// different register classes or because the coalescer was overly
+ /// conservative.
+ unsigned attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg);
+
+ ///
+ /// Register usage / availability tracking helpers.
+ ///
+
+ void initRegUses() {
+ regUse_.resize(tri_->getNumRegs(), 0);
+ regUseBackUp_.resize(tri_->getNumRegs(), 0);
+ }
+
+ void finalizeRegUses() {
+#ifndef NDEBUG
+ // Verify all the registers are "freed".
+ bool Error = false;
+ for (unsigned i = 0, e = tri_->getNumRegs(); i != e; ++i) {
+ if (regUse_[i] != 0) {
+ dbgs() << tri_->getName(i) << " is still in use!\n";
+ Error = true;
+ }
+ }
+ if (Error)
+ llvm_unreachable(0);
+#endif
+ regUse_.clear();
+ regUseBackUp_.clear();
+ }
+
+ void addRegUse(unsigned physReg) {
+ assert(TargetRegisterInfo::isPhysicalRegister(physReg) &&
+ "should be physical register!");
+ ++regUse_[physReg];
+ for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as)
+ ++regUse_[*as];
+ }
+
+ void delRegUse(unsigned physReg) {
+ assert(TargetRegisterInfo::isPhysicalRegister(physReg) &&
+ "should be physical register!");
+ assert(regUse_[physReg] != 0);
+ --regUse_[physReg];
+ for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as) {
+ assert(regUse_[*as] != 0);
+ --regUse_[*as];
+ }
+ }
+
+ bool isRegAvail(unsigned physReg) const {
+ assert(TargetRegisterInfo::isPhysicalRegister(physReg) &&
+ "should be physical register!");
+ return regUse_[physReg] == 0;
+ }
+
+ void backUpRegUses() {
+ regUseBackUp_ = regUse_;
+ }
+
+ void restoreRegUses() {
+ regUse_ = regUseBackUp_;
+ }
+
+ ///
+ /// Register handling helpers.
+ ///
+
+ /// getFreePhysReg - return a free physical register for this virtual
+ /// register interval if we have one, otherwise return 0.
+ unsigned getFreePhysReg(LiveInterval* cur);
+ unsigned getFreePhysReg(LiveInterval* cur,
+ const TargetRegisterClass *RC,
+ unsigned MaxInactiveCount,
+ SmallVector<unsigned, 256> &inactiveCounts,
+ bool SkipDGRegs);
+
+ /// assignVirt2StackSlot - assigns this virtual register to a
+ /// stack slot. returns the stack slot
+ int assignVirt2StackSlot(unsigned virtReg);
+
+ void ComputeRelatedRegClasses();
+
+ template <typename ItTy>
+ void printIntervals(const char* const str, ItTy i, ItTy e) const {
+ DEBUG({
+ if (str)
+ dbgs() << str << " intervals:\n";
+
+ for (; i != e; ++i) {
+ dbgs() << "\t" << *i->first << " -> ";
+
+ unsigned reg = i->first->reg;
+ if (TargetRegisterInfo::isVirtualRegister(reg))
+ reg = vrm_->getPhys(reg);
+
+ dbgs() << tri_->getName(reg) << '\n';
+ }
+ });
+ }
+ };
+ char RALinScan::ID = 0;
+}
+
+static RegisterPass<RALinScan>
+X("linearscan-regalloc", "Linear Scan Register Allocator");
+
+void RALinScan::ComputeRelatedRegClasses() {
+ // First pass, add all reg classes to the union, and determine at least one
+ // reg class that each register is in.
+ bool HasAliases = false;
+ for (TargetRegisterInfo::regclass_iterator RCI = tri_->regclass_begin(),
+ E = tri_->regclass_end(); RCI != E; ++RCI) {
+ RelatedRegClasses.insert(*RCI);
+ for (TargetRegisterClass::iterator I = (*RCI)->begin(), E = (*RCI)->end();
+ I != E; ++I) {
+ HasAliases = HasAliases || *tri_->getAliasSet(*I) != 0;
+
+ const TargetRegisterClass *&PRC = OneClassForEachPhysReg[*I];
+ if (PRC) {
+ // Already processed this register. Just make sure we know that
+ // multiple register classes share a register.
+ RelatedRegClasses.unionSets(PRC, *RCI);
+ } else {
+ PRC = *RCI;
+ }
+ }
+ }
+
+ // Second pass, now that we know conservatively what register classes each reg
+ // belongs to, add info about aliases. We don't need to do this for targets
+ // without register aliases.
+ if (HasAliases)
+ for (DenseMap<unsigned, const TargetRegisterClass*>::iterator
+ I = OneClassForEachPhysReg.begin(), E = OneClassForEachPhysReg.end();
+ I != E; ++I)
+ for (const unsigned *AS = tri_->getAliasSet(I->first); *AS; ++AS)
+ RelatedRegClasses.unionSets(I->second, OneClassForEachPhysReg[*AS]);
+}
+
+/// attemptTrivialCoalescing - If a simple interval is defined by a copy, try
+/// allocate the definition the same register as the source register if the
+/// register is not defined during live time of the interval. If the interval is
+/// killed by a copy, try to use the destination register. This eliminates a
+/// copy. This is used to coalesce copies which were not coalesced away before
+/// allocation either due to dest and src being in different register classes or
+/// because the coalescer was overly conservative.
+unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
+ unsigned Preference = vrm_->getRegAllocPref(cur.reg);
+ if ((Preference && Preference == Reg) || !cur.containsOneValue())
+ return Reg;
+
+ // We cannot handle complicated live ranges. Simple linear stuff only.
+ if (cur.ranges.size() != 1)
+ return Reg;
+
+ const LiveRange &range = cur.ranges.front();
+
+ VNInfo *vni = range.valno;
+ if (vni->isUnused())
+ return Reg;
+
+ unsigned CandReg;
+ {
+ MachineInstr *CopyMI;
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ if (vni->def != SlotIndex() && vni->isDefAccurate() &&
+ (CopyMI = li_->getInstructionFromIndex(vni->def)) &&
+ tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ // Defined by a copy, try to extend SrcReg forward
+ CandReg = SrcReg;
+ else if (TrivCoalesceEnds &&
+ (CopyMI =
+ li_->getInstructionFromIndex(range.end.getBaseIndex())) &&
+ tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
+ cur.reg == SrcReg)
+ // Only used by a copy, try to extend DstReg backwards
+ CandReg = DstReg;
+ else
+ return Reg;
+ }
+
+ if (TargetRegisterInfo::isVirtualRegister(CandReg)) {
+ if (!vrm_->isAssignedReg(CandReg))
+ return Reg;
+ CandReg = vrm_->getPhys(CandReg);
+ }
+ if (Reg == CandReg)
+ return Reg;
+
+ const TargetRegisterClass *RC = mri_->getRegClass(cur.reg);
+ if (!RC->contains(CandReg))
+ return Reg;
+
+ if (li_->conflictsWithPhysReg(cur, *vrm_, CandReg))
+ return Reg;
+
+ // Try to coalesce.
+ DEBUG(dbgs() << "Coalescing: " << cur << " -> " << tri_->getName(CandReg)
+ << '\n');
+ vrm_->clearVirt(cur.reg);
+ vrm_->assignVirt2Phys(cur.reg, CandReg);
+
+ ++NumCoalesce;
+ return CandReg;
+}
+
+bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
+ mf_ = &fn;
+ mri_ = &fn.getRegInfo();
+ tm_ = &fn.getTarget();
+ tri_ = tm_->getRegisterInfo();
+ tii_ = tm_->getInstrInfo();
+ allocatableRegs_ = tri_->getAllocatableSet(fn);
+ li_ = &getAnalysis<LiveIntervals>();
+ ls_ = &getAnalysis<LiveStacks>();
+ loopInfo = &getAnalysis<MachineLoopInfo>();
+
+ // We don't run the coalescer here because we have no reason to
+ // interact with it. If the coalescer requires interaction, it
+ // won't do anything. If it doesn't require interaction, we assume
+ // it was run as a separate pass.
+
+ // If this is the first function compiled, compute the related reg classes.
+ if (RelatedRegClasses.empty())
+ ComputeRelatedRegClasses();
+
+ // Also resize register usage trackers.
+ initRegUses();
+
+ vrm_ = &getAnalysis<VirtRegMap>();
+ if (!rewriter_.get()) rewriter_.reset(createVirtRegRewriter());
+
+ spiller_.reset(createSpiller(mf_, li_, loopInfo, vrm_));
+
+ initIntervalSets();
+
+ linearScan();
+
+ // Rewrite spill code and update the PhysRegsUsed set.
+ rewriter_->runOnMachineFunction(*mf_, *vrm_, li_);
+
+ assert(unhandled_.empty() && "Unhandled live intervals remain!");
+
+ finalizeRegUses();
+
+ fixed_.clear();
+ active_.clear();
+ inactive_.clear();
+ handled_.clear();
+ NextReloadMap.clear();
+ DowngradedRegs.clear();
+ DowngradeMap.clear();
+ spiller_.reset(0);
+
+ return true;
+}
+
+/// initIntervalSets - initialize the interval sets.
+///
+void RALinScan::initIntervalSets()
+{
+ assert(unhandled_.empty() && fixed_.empty() &&
+ active_.empty() && inactive_.empty() &&
+ "interval sets should be empty on initialization");
+
+ handled_.reserve(li_->getNumIntervals());
+
+ for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) {
+ if (TargetRegisterInfo::isPhysicalRegister(i->second->reg)) {
+ if (!i->second->empty()) {
+ mri_->setPhysRegUsed(i->second->reg);
+ fixed_.push_back(std::make_pair(i->second, i->second->begin()));
+ }
+ } else {
+ if (i->second->empty()) {
+ assignRegOrStackSlotAtInterval(i->second);
+ }
+ else
+ unhandled_.push(i->second);
+ }
+ }
+}
+
+void RALinScan::linearScan() {
+ // linear scan algorithm
+ DEBUG({
+ dbgs() << "********** LINEAR SCAN **********\n"
+ << "********** Function: "
+ << mf_->getFunction()->getName() << '\n';
+ printIntervals("fixed", fixed_.begin(), fixed_.end());
+ });
+
+ while (!unhandled_.empty()) {
+ // pick the interval with the earliest start point
+ LiveInterval* cur = unhandled_.top();
+ unhandled_.pop();
+ ++NumIters;
+ DEBUG(dbgs() << "\n*** CURRENT ***: " << *cur << '\n');
+
+ assert(!cur->empty() && "Empty interval in unhandled set.");
+
+ processActiveIntervals(cur->beginIndex());
+ processInactiveIntervals(cur->beginIndex());
+
+ assert(TargetRegisterInfo::isVirtualRegister(cur->reg) &&
+ "Can only allocate virtual registers!");
+
+ // Allocating a virtual register. try to find a free
+ // physical register or spill an interval (possibly this one) in order to
+ // assign it one.
+ assignRegOrStackSlotAtInterval(cur);
+
+ DEBUG({
+ printIntervals("active", active_.begin(), active_.end());
+ printIntervals("inactive", inactive_.begin(), inactive_.end());
+ });
+ }
+
+ // Expire any remaining active intervals
+ while (!active_.empty()) {
+ IntervalPtr &IP = active_.back();
+ unsigned reg = IP.first->reg;
+ DEBUG(dbgs() << "\tinterval " << *IP.first << " expired\n");
+ assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+ "Can only allocate virtual registers!");
+ reg = vrm_->getPhys(reg);
+ delRegUse(reg);
+ active_.pop_back();
+ }
+
+ // Expire any remaining inactive intervals
+ DEBUG({
+ for (IntervalPtrs::reverse_iterator
+ i = inactive_.rbegin(); i != inactive_.rend(); ++i)
+ dbgs() << "\tinterval " << *i->first << " expired\n";
+ });
+ inactive_.clear();
+
+ // Add live-ins to every BB except for entry. Also perform trivial coalescing.
+ MachineFunction::iterator EntryMBB = mf_->begin();
+ SmallVector<MachineBasicBlock*, 8> LiveInMBBs;
+ for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) {
+ LiveInterval &cur = *i->second;
+ unsigned Reg = 0;
+ bool isPhys = TargetRegisterInfo::isPhysicalRegister(cur.reg);
+ if (isPhys)
+ Reg = cur.reg;
+ else if (vrm_->isAssignedReg(cur.reg))
+ Reg = attemptTrivialCoalescing(cur, vrm_->getPhys(cur.reg));
+ if (!Reg)
+ continue;
+ // Ignore splited live intervals.
+ if (!isPhys && vrm_->getPreSplitReg(cur.reg))
+ continue;
+
+ for (LiveInterval::Ranges::const_iterator I = cur.begin(), E = cur.end();
+ I != E; ++I) {
+ const LiveRange &LR = *I;
+ if (li_->findLiveInMBBs(LR.start, LR.end, LiveInMBBs)) {
+ for (unsigned i = 0, e = LiveInMBBs.size(); i != e; ++i)
+ if (LiveInMBBs[i] != EntryMBB) {
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ "Adding a virtual register to livein set?");
+ LiveInMBBs[i]->addLiveIn(Reg);
+ }
+ LiveInMBBs.clear();
+ }
+ }
+ }
+
+ DEBUG(dbgs() << *vrm_);
+
+ // Look for physical registers that end up not being allocated even though
+ // register allocator had to spill other registers in its register class.
+ if (ls_->getNumIntervals() == 0)
+ return;
+ if (!vrm_->FindUnusedRegisters(li_))
+ return;
+}
+
+/// processActiveIntervals - expire old intervals and move non-overlapping ones
+/// to the inactive list.
+void RALinScan::processActiveIntervals(SlotIndex CurPoint)
+{
+ DEBUG(dbgs() << "\tprocessing active intervals:\n");
+
+ for (unsigned i = 0, e = active_.size(); i != e; ++i) {
+ LiveInterval *Interval = active_[i].first;
+ LiveInterval::iterator IntervalPos = active_[i].second;
+ unsigned reg = Interval->reg;
+
+ IntervalPos = Interval->advanceTo(IntervalPos, CurPoint);
+
+ if (IntervalPos == Interval->end()) { // Remove expired intervals.
+ DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n");
+ assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+ "Can only allocate virtual registers!");
+ reg = vrm_->getPhys(reg);
+ delRegUse(reg);
+
+ // Pop off the end of the list.
+ active_[i] = active_.back();
+ active_.pop_back();
+ --i; --e;
+
+ } else if (IntervalPos->start > CurPoint) {
+ // Move inactive intervals to inactive list.
+ DEBUG(dbgs() << "\t\tinterval " << *Interval << " inactive\n");
+ assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+ "Can only allocate virtual registers!");
+ reg = vrm_->getPhys(reg);
+ delRegUse(reg);
+ // add to inactive.
+ inactive_.push_back(std::make_pair(Interval, IntervalPos));
+
+ // Pop off the end of the list.
+ active_[i] = active_.back();
+ active_.pop_back();
+ --i; --e;
+ } else {
+ // Otherwise, just update the iterator position.
+ active_[i].second = IntervalPos;
+ }
+ }
+}
+
+/// processInactiveIntervals - expire old intervals and move overlapping
+/// ones to the active list.
+void RALinScan::processInactiveIntervals(SlotIndex CurPoint)
+{
+ DEBUG(dbgs() << "\tprocessing inactive intervals:\n");
+
+ for (unsigned i = 0, e = inactive_.size(); i != e; ++i) {
+ LiveInterval *Interval = inactive_[i].first;
+ LiveInterval::iterator IntervalPos = inactive_[i].second;
+ unsigned reg = Interval->reg;
+
+ IntervalPos = Interval->advanceTo(IntervalPos, CurPoint);
+
+ if (IntervalPos == Interval->end()) { // remove expired intervals.
+ DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n");
+
+ // Pop off the end of the list.
+ inactive_[i] = inactive_.back();
+ inactive_.pop_back();
+ --i; --e;
+ } else if (IntervalPos->start <= CurPoint) {
+ // move re-activated intervals in active list
+ DEBUG(dbgs() << "\t\tinterval " << *Interval << " active\n");
+ assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+ "Can only allocate virtual registers!");
+ reg = vrm_->getPhys(reg);
+ addRegUse(reg);
+ // add to active
+ active_.push_back(std::make_pair(Interval, IntervalPos));
+
+ // Pop off the end of the list.
+ inactive_[i] = inactive_.back();
+ inactive_.pop_back();
+ --i; --e;
+ } else {
+ // Otherwise, just update the iterator position.
+ inactive_[i].second = IntervalPos;
+ }
+ }
+}
+
+/// updateSpillWeights - updates the spill weights of the specifed physical
+/// register and its weight.
+void RALinScan::updateSpillWeights(std::vector<float> &Weights,
+ unsigned reg, float weight,
+ const TargetRegisterClass *RC) {
+ SmallSet<unsigned, 4> Processed;
+ SmallSet<unsigned, 4> SuperAdded;
+ SmallVector<unsigned, 4> Supers;
+ Weights[reg] += weight;
+ Processed.insert(reg);
+ for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) {
+ Weights[*as] += weight;
+ Processed.insert(*as);
+ if (tri_->isSubRegister(*as, reg) &&
+ SuperAdded.insert(*as) &&
+ RC->contains(*as)) {
+ Supers.push_back(*as);
+ }
+ }
+
+ // If the alias is a super-register, and the super-register is in the
+ // register class we are trying to allocate. Then add the weight to all
+ // sub-registers of the super-register even if they are not aliases.
+ // e.g. allocating for GR32, bh is not used, updating bl spill weight.
+ // bl should get the same spill weight otherwise it will be choosen
+ // as a spill candidate since spilling bh doesn't make ebx available.
+ for (unsigned i = 0, e = Supers.size(); i != e; ++i) {
+ for (const unsigned *sr = tri_->getSubRegisters(Supers[i]); *sr; ++sr)
+ if (!Processed.count(*sr))
+ Weights[*sr] += weight;
+ }
+}
+
+static
+RALinScan::IntervalPtrs::iterator
+FindIntervalInVector(RALinScan::IntervalPtrs &IP, LiveInterval *LI) {
+ for (RALinScan::IntervalPtrs::iterator I = IP.begin(), E = IP.end();
+ I != E; ++I)
+ if (I->first == LI) return I;
+ return IP.end();
+}
+
+static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, SlotIndex Point){
+ for (unsigned i = 0, e = V.size(); i != e; ++i) {
+ RALinScan::IntervalPtr &IP = V[i];
+ LiveInterval::iterator I = std::upper_bound(IP.first->begin(),
+ IP.second, Point);
+ if (I != IP.first->begin()) --I;
+ IP.second = I;
+ }
+}
+
+/// addStackInterval - Create a LiveInterval for stack if the specified live
+/// interval has been spilled.
+static void addStackInterval(LiveInterval *cur, LiveStacks *ls_,
+ LiveIntervals *li_,
+ MachineRegisterInfo* mri_, VirtRegMap &vrm_) {
+ int SS = vrm_.getStackSlot(cur->reg);
+ if (SS == VirtRegMap::NO_STACK_SLOT)
+ return;
+
+ const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
+ LiveInterval &SI = ls_->getOrCreateInterval(SS, RC);
+
+ VNInfo *VNI;
+ if (SI.hasAtLeastOneValue())
+ VNI = SI.getValNumInfo(0);
+ else
+ VNI = SI.getNextValue(SlotIndex(), 0, false,
+ ls_->getVNInfoAllocator());
+
+ LiveInterval &RI = li_->getInterval(cur->reg);
+ // FIXME: This may be overly conservative.
+ SI.MergeRangesInAsValue(RI, VNI);
+}
+
+/// getConflictWeight - Return the number of conflicts between cur
+/// live interval and defs and uses of Reg weighted by loop depthes.
+static
+float getConflictWeight(LiveInterval *cur, unsigned Reg, LiveIntervals *li_,
+ MachineRegisterInfo *mri_,
+ const MachineLoopInfo *loopInfo) {
+ float Conflicts = 0;
+ for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(Reg),
+ E = mri_->reg_end(); I != E; ++I) {
+ MachineInstr *MI = &*I;
+ if (cur->liveAt(li_->getInstructionIndex(MI))) {
+ unsigned loopDepth = loopInfo->getLoopDepth(MI->getParent());
+ Conflicts += powf(10.0f, (float)loopDepth);
+ }
+ }
+ return Conflicts;
+}
+
+/// findIntervalsToSpill - Determine the intervals to spill for the
+/// specified interval. It's passed the physical registers whose spill
+/// weight is the lowest among all the registers whose live intervals
+/// conflict with the interval.
+void RALinScan::findIntervalsToSpill(LiveInterval *cur,
+ std::vector<std::pair<unsigned,float> > &Candidates,
+ unsigned NumCands,
+ SmallVector<LiveInterval*, 8> &SpillIntervals) {
+ // We have figured out the *best* register to spill. But there are other
+ // registers that are pretty good as well (spill weight within 3%). Spill
+ // the one that has fewest defs and uses that conflict with cur.
+ float Conflicts[3] = { 0.0f, 0.0f, 0.0f };
+ SmallVector<LiveInterval*, 8> SLIs[3];
+
+ DEBUG({
+ dbgs() << "\tConsidering " << NumCands << " candidates: ";
+ for (unsigned i = 0; i != NumCands; ++i)
+ dbgs() << tri_->getName(Candidates[i].first) << " ";
+ dbgs() << "\n";
+ });
+
+ // Calculate the number of conflicts of each candidate.
+ for (IntervalPtrs::iterator i = active_.begin(); i != active_.end(); ++i) {
+ unsigned Reg = i->first->reg;
+ unsigned PhysReg = vrm_->getPhys(Reg);
+ if (!cur->overlapsFrom(*i->first, i->second))
+ continue;
+ for (unsigned j = 0; j < NumCands; ++j) {
+ unsigned Candidate = Candidates[j].first;
+ if (tri_->regsOverlap(PhysReg, Candidate)) {
+ if (NumCands > 1)
+ Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo);
+ SLIs[j].push_back(i->first);
+ }
+ }
+ }
+
+ for (IntervalPtrs::iterator i = inactive_.begin(); i != inactive_.end(); ++i){
+ unsigned Reg = i->first->reg;
+ unsigned PhysReg = vrm_->getPhys(Reg);
+ if (!cur->overlapsFrom(*i->first, i->second-1))
+ continue;
+ for (unsigned j = 0; j < NumCands; ++j) {
+ unsigned Candidate = Candidates[j].first;
+ if (tri_->regsOverlap(PhysReg, Candidate)) {
+ if (NumCands > 1)
+ Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo);
+ SLIs[j].push_back(i->first);
+ }
+ }
+ }
+
+ // Which is the best candidate?
+ unsigned BestCandidate = 0;
+ float MinConflicts = Conflicts[0];
+ for (unsigned i = 1; i != NumCands; ++i) {
+ if (Conflicts[i] < MinConflicts) {
+ BestCandidate = i;
+ MinConflicts = Conflicts[i];
+ }
+ }
+
+ std::copy(SLIs[BestCandidate].begin(), SLIs[BestCandidate].end(),
+ std::back_inserter(SpillIntervals));
+}
+
+namespace {
+ struct WeightCompare {
+ private:
+ const RALinScan &Allocator;
+
+ public:
+ WeightCompare(const RALinScan &Alloc) : Allocator(Alloc) {}
+
+ typedef std::pair<unsigned, float> RegWeightPair;
+ bool operator()(const RegWeightPair &LHS, const RegWeightPair &RHS) const {
+ return LHS.second < RHS.second && !Allocator.isRecentlyUsed(LHS.first);
+ }
+ };
+}
+
+static bool weightsAreClose(float w1, float w2) {
+ if (!NewHeuristic)
+ return false;
+
+ float diff = w1 - w2;
+ if (diff <= 0.02f) // Within 0.02f
+ return true;
+ return (diff / w2) <= 0.05f; // Within 5%.
+}
+
+LiveInterval *RALinScan::hasNextReloadInterval(LiveInterval *cur) {
+ DenseMap<unsigned, unsigned>::iterator I = NextReloadMap.find(cur->reg);
+ if (I == NextReloadMap.end())
+ return 0;
+ return &li_->getInterval(I->second);
+}
+
+void RALinScan::DowngradeRegister(LiveInterval *li, unsigned Reg) {
+ bool isNew = DowngradedRegs.insert(Reg);
+ isNew = isNew; // Silence compiler warning.
+ assert(isNew && "Multiple reloads holding the same register?");
+ DowngradeMap.insert(std::make_pair(li->reg, Reg));
+ for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS) {
+ isNew = DowngradedRegs.insert(*AS);
+ isNew = isNew; // Silence compiler warning.
+ assert(isNew && "Multiple reloads holding the same register?");
+ DowngradeMap.insert(std::make_pair(li->reg, *AS));
+ }
+ ++NumDowngrade;
+}
+
+void RALinScan::UpgradeRegister(unsigned Reg) {
+ if (Reg) {
+ DowngradedRegs.erase(Reg);
+ for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS)
+ DowngradedRegs.erase(*AS);
+ }
+}
+
+namespace {
+ struct LISorter {
+ bool operator()(LiveInterval* A, LiveInterval* B) {
+ return A->beginIndex() < B->beginIndex();
+ }
+ };
+}
+
+/// assignRegOrStackSlotAtInterval - assign a register if one is available, or
+/// spill.
+void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
+ DEBUG(dbgs() << "\tallocating current interval: ");
+
+ // This is an implicitly defined live interval, just assign any register.
+ const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
+ if (cur->empty()) {
+ unsigned physReg = vrm_->getRegAllocPref(cur->reg);
+ if (!physReg)
+ physReg = *RC->allocation_order_begin(*mf_);
+ DEBUG(dbgs() << tri_->getName(physReg) << '\n');
+ // Note the register is not really in use.
+ vrm_->assignVirt2Phys(cur->reg, physReg);
+ return;
+ }
+
+ backUpRegUses();
+
+ std::vector<std::pair<unsigned, float> > SpillWeightsToAdd;
+ SlotIndex StartPosition = cur->beginIndex();
+ const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC);
+
+ // If start of this live interval is defined by a move instruction and its
+ // source is assigned a physical register that is compatible with the target
+ // register class, then we should try to assign it the same register.
+ // This can happen when the move is from a larger register class to a smaller
+ // one, e.g. X86::mov32to32_. These move instructions are not coalescable.
+ if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) {
+ VNInfo *vni = cur->begin()->valno;
+ if ((vni->def != SlotIndex()) && !vni->isUnused() &&
+ vni->isDefAccurate()) {
+ MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ if (CopyMI &&
+ tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg)) {
+ unsigned Reg = 0;
+ if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
+ Reg = SrcReg;
+ else if (vrm_->isAssignedReg(SrcReg))
+ Reg = vrm_->getPhys(SrcReg);
+ if (Reg) {
+ if (SrcSubReg)
+ Reg = tri_->getSubReg(Reg, SrcSubReg);
+ if (DstSubReg)
+ Reg = tri_->getMatchingSuperReg(Reg, DstSubReg, RC);
+ if (Reg && allocatableRegs_[Reg] && RC->contains(Reg))
+ mri_->setRegAllocationHint(cur->reg, 0, Reg);
+ }
+ }
+ }
+ }
+
+ // For every interval in inactive we overlap with, mark the
+ // register as not free and update spill weights.
+ for (IntervalPtrs::const_iterator i = inactive_.begin(),
+ e = inactive_.end(); i != e; ++i) {
+ unsigned Reg = i->first->reg;
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+ "Can only allocate virtual registers!");
+ const TargetRegisterClass *RegRC = mri_->getRegClass(Reg);
+ // If this is not in a related reg class to the register we're allocating,
+ // don't check it.
+ if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&
+ cur->overlapsFrom(*i->first, i->second-1)) {
+ Reg = vrm_->getPhys(Reg);
+ addRegUse(Reg);
+ SpillWeightsToAdd.push_back(std::make_pair(Reg, i->first->weight));
+ }
+ }
+
+ // Speculatively check to see if we can get a register right now. If not,
+ // we know we won't be able to by adding more constraints. If so, we can
+ // check to see if it is valid. Doing an exhaustive search of the fixed_ list
+ // is very bad (it contains all callee clobbered registers for any functions
+ // with a call), so we want to avoid doing that if possible.
+ unsigned physReg = getFreePhysReg(cur);
+ unsigned BestPhysReg = physReg;
+ if (physReg) {
+ // We got a register. However, if it's in the fixed_ list, we might
+ // conflict with it. Check to see if we conflict with it or any of its
+ // aliases.
+ SmallSet<unsigned, 8> RegAliases;
+ for (const unsigned *AS = tri_->getAliasSet(physReg); *AS; ++AS)
+ RegAliases.insert(*AS);
+
+ bool ConflictsWithFixed = false;
+ for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
+ IntervalPtr &IP = fixed_[i];
+ if (physReg == IP.first->reg || RegAliases.count(IP.first->reg)) {
+ // Okay, this reg is on the fixed list. Check to see if we actually
+ // conflict.
+ LiveInterval *I = IP.first;
+ if (I->endIndex() > StartPosition) {
+ LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition);
+ IP.second = II;
+ if (II != I->begin() && II->start > StartPosition)
+ --II;
+ if (cur->overlapsFrom(*I, II)) {
+ ConflictsWithFixed = true;
+ break;
+ }
+ }
+ }
+ }
+
+ // Okay, the register picked by our speculative getFreePhysReg call turned
+ // out to be in use. Actually add all of the conflicting fixed registers to
+ // regUse_ so we can do an accurate query.
+ if (ConflictsWithFixed) {
+ // For every interval in fixed we overlap with, mark the register as not
+ // free and update spill weights.
+ for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
+ IntervalPtr &IP = fixed_[i];
+ LiveInterval *I = IP.first;
+
+ const TargetRegisterClass *RegRC = OneClassForEachPhysReg[I->reg];
+ if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&
+ I->endIndex() > StartPosition) {
+ LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition);
+ IP.second = II;
+ if (II != I->begin() && II->start > StartPosition)
+ --II;
+ if (cur->overlapsFrom(*I, II)) {
+ unsigned reg = I->reg;
+ addRegUse(reg);
+ SpillWeightsToAdd.push_back(std::make_pair(reg, I->weight));
+ }
+ }
+ }
+
+ // Using the newly updated regUse_ object, which includes conflicts in the
+ // future, see if there are any registers available.
+ physReg = getFreePhysReg(cur);
+ }
+ }
+
+ // Restore the physical register tracker, removing information about the
+ // future.
+ restoreRegUses();
+
+ // If we find a free register, we are done: assign this virtual to
+ // the free physical register and add this interval to the active
+ // list.
+ if (physReg) {
+ DEBUG(dbgs() << tri_->getName(physReg) << '\n');
+ vrm_->assignVirt2Phys(cur->reg, physReg);
+ addRegUse(physReg);
+ active_.push_back(std::make_pair(cur, cur->begin()));
+ handled_.push_back(cur);
+
+ // "Upgrade" the physical register since it has been allocated.
+ UpgradeRegister(physReg);
+ if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) {
+ // "Downgrade" physReg to try to keep physReg from being allocated until
+ // the next reload from the same SS is allocated.
+ mri_->setRegAllocationHint(NextReloadLI->reg, 0, physReg);
+ DowngradeRegister(cur, physReg);
+ }
+ return;
+ }
+ DEBUG(dbgs() << "no free registers\n");
+
+ // Compile the spill weights into an array that is better for scanning.
+ std::vector<float> SpillWeights(tri_->getNumRegs(), 0.0f);
+ for (std::vector<std::pair<unsigned, float> >::iterator
+ I = SpillWeightsToAdd.begin(), E = SpillWeightsToAdd.end(); I != E; ++I)
+ updateSpillWeights(SpillWeights, I->first, I->second, RC);
+
+ // for each interval in active, update spill weights.
+ for (IntervalPtrs::const_iterator i = active_.begin(), e = active_.end();
+ i != e; ++i) {
+ unsigned reg = i->first->reg;
+ assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+ "Can only allocate virtual registers!");
+ reg = vrm_->getPhys(reg);
+ updateSpillWeights(SpillWeights, reg, i->first->weight, RC);
+ }
+
+ DEBUG(dbgs() << "\tassigning stack slot at interval "<< *cur << ":\n");
+
+ // Find a register to spill.
+ float minWeight = HUGE_VALF;
+ unsigned minReg = 0;
+
+ bool Found = false;
+ std::vector<std::pair<unsigned,float> > RegsWeights;
+ if (!minReg || SpillWeights[minReg] == HUGE_VALF)
+ for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_),
+ e = RC->allocation_order_end(*mf_); i != e; ++i) {
+ unsigned reg = *i;
+ float regWeight = SpillWeights[reg];
+ // Skip recently allocated registers.
+ if (minWeight > regWeight && !isRecentlyUsed(reg))
+ Found = true;
+ RegsWeights.push_back(std::make_pair(reg, regWeight));
+ }
+
+ // If we didn't find a register that is spillable, try aliases?
+ if (!Found) {
+ for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_),
+ e = RC->allocation_order_end(*mf_); i != e; ++i) {
+ unsigned reg = *i;
+ // No need to worry about if the alias register size < regsize of RC.
+ // We are going to spill all registers that alias it anyway.
+ for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as)
+ RegsWeights.push_back(std::make_pair(*as, SpillWeights[*as]));
+ }
+ }
+
+ // Sort all potential spill candidates by weight.
+ std::sort(RegsWeights.begin(), RegsWeights.end(), WeightCompare(*this));
+ minReg = RegsWeights[0].first;
+ minWeight = RegsWeights[0].second;
+ if (minWeight == HUGE_VALF) {
+ // All registers must have inf weight. Just grab one!
+ minReg = BestPhysReg ? BestPhysReg : *RC->allocation_order_begin(*mf_);
+ if (cur->weight == HUGE_VALF ||
+ li_->getApproximateInstructionCount(*cur) == 0) {
+ // Spill a physical register around defs and uses.
+ if (li_->spillPhysRegAroundRegDefsUses(*cur, minReg, *vrm_)) {
+ // spillPhysRegAroundRegDefsUses may have invalidated iterator stored
+ // in fixed_. Reset them.
+ for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
+ IntervalPtr &IP = fixed_[i];
+ LiveInterval *I = IP.first;
+ if (I->reg == minReg || tri_->isSubRegister(minReg, I->reg))
+ IP.second = I->advanceTo(I->begin(), StartPosition);
+ }
+
+ DowngradedRegs.clear();
+ assignRegOrStackSlotAtInterval(cur);
+ } else {
+ assert(false && "Ran out of registers during register allocation!");
+ llvm_report_error("Ran out of registers during register allocation!");
+ }
+ return;
+ }
+ }
+
+ // Find up to 3 registers to consider as spill candidates.
+ unsigned LastCandidate = RegsWeights.size() >= 3 ? 3 : 1;
+ while (LastCandidate > 1) {
+ if (weightsAreClose(RegsWeights[LastCandidate-1].second, minWeight))
+ break;
+ --LastCandidate;
+ }
+
+ DEBUG({
+ dbgs() << "\t\tregister(s) with min weight(s): ";
+
+ for (unsigned i = 0; i != LastCandidate; ++i)
+ dbgs() << tri_->getName(RegsWeights[i].first)
+ << " (" << RegsWeights[i].second << ")\n";
+ });
+
+ // If the current has the minimum weight, we need to spill it and
+ // add any added intervals back to unhandled, and restart
+ // linearscan.
+ if (cur->weight != HUGE_VALF && cur->weight <= minWeight) {
+ DEBUG(dbgs() << "\t\t\tspilling(c): " << *cur << '\n');
+ SmallVector<LiveInterval*, 8> spillIs;
+ std::vector<LiveInterval*> added;
+
+ added = spiller_->spill(cur, spillIs);
+
+ std::sort(added.begin(), added.end(), LISorter());
+ addStackInterval(cur, ls_, li_, mri_, *vrm_);
+ if (added.empty())
+ return; // Early exit if all spills were folded.
+
+ // Merge added with unhandled. Note that we have already sorted
+ // intervals returned by addIntervalsForSpills by their starting
+ // point.
+ // This also update the NextReloadMap. That is, it adds mapping from a
+ // register defined by a reload from SS to the next reload from SS in the
+ // same basic block.
+ MachineBasicBlock *LastReloadMBB = 0;
+ LiveInterval *LastReload = 0;
+ int LastReloadSS = VirtRegMap::NO_STACK_SLOT;
+ for (unsigned i = 0, e = added.size(); i != e; ++i) {
+ LiveInterval *ReloadLi = added[i];
+ if (ReloadLi->weight == HUGE_VALF &&
+ li_->getApproximateInstructionCount(*ReloadLi) == 0) {
+ SlotIndex ReloadIdx = ReloadLi->beginIndex();
+ MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx);
+ int ReloadSS = vrm_->getStackSlot(ReloadLi->reg);
+ if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) {
+ // Last reload of same SS is in the same MBB. We want to try to
+ // allocate both reloads the same register and make sure the reg
+ // isn't clobbered in between if at all possible.
+ assert(LastReload->beginIndex() < ReloadIdx);
+ NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg));
+ }
+ LastReloadMBB = ReloadMBB;
+ LastReload = ReloadLi;
+ LastReloadSS = ReloadSS;
+ }
+ unhandled_.push(ReloadLi);
+ }
+ return;
+ }
+
+ ++NumBacktracks;
+
+ // Push the current interval back to unhandled since we are going
+ // to re-run at least this iteration. Since we didn't modify it it
+ // should go back right in the front of the list
+ unhandled_.push(cur);
+
+ assert(TargetRegisterInfo::isPhysicalRegister(minReg) &&
+ "did not choose a register to spill?");
+
+ // We spill all intervals aliasing the register with
+ // minimum weight, rollback to the interval with the earliest
+ // start point and let the linear scan algorithm run again
+ SmallVector<LiveInterval*, 8> spillIs;
+
+ // Determine which intervals have to be spilled.
+ findIntervalsToSpill(cur, RegsWeights, LastCandidate, spillIs);
+
+ // Set of spilled vregs (used later to rollback properly)
+ SmallSet<unsigned, 8> spilled;
+
+ // The earliest start of a Spilled interval indicates up to where
+ // in handled we need to roll back
+ assert(!spillIs.empty() && "No spill intervals?");
+ SlotIndex earliestStart = spillIs[0]->beginIndex();
+
+ // Spill live intervals of virtual regs mapped to the physical register we
+ // want to clear (and its aliases). We only spill those that overlap with the
+ // current interval as the rest do not affect its allocation. we also keep
+ // track of the earliest start of all spilled live intervals since this will
+ // mark our rollback point.
+ std::vector<LiveInterval*> added;
+ while (!spillIs.empty()) {
+ LiveInterval *sli = spillIs.back();
+ spillIs.pop_back();
+ DEBUG(dbgs() << "\t\t\tspilling(a): " << *sli << '\n');
+ if (sli->beginIndex() < earliestStart)
+ earliestStart = sli->beginIndex();
+
+ std::vector<LiveInterval*> newIs;
+ newIs = spiller_->spill(sli, spillIs, &earliestStart);
+ addStackInterval(sli, ls_, li_, mri_, *vrm_);
+ std::copy(newIs.begin(), newIs.end(), std::back_inserter(added));
+ spilled.insert(sli->reg);
+ }
+
+ DEBUG(dbgs() << "\t\trolling back to: " << earliestStart << '\n');
+
+ // Scan handled in reverse order up to the earliest start of a
+ // spilled live interval and undo each one, restoring the state of
+ // unhandled.
+ while (!handled_.empty()) {
+ LiveInterval* i = handled_.back();
+ // If this interval starts before t we are done.
+ if (!i->empty() && i->beginIndex() < earliestStart)
+ break;
+ DEBUG(dbgs() << "\t\t\tundo changes for: " << *i << '\n');
+ handled_.pop_back();
+
+ // When undoing a live interval allocation we must know if it is active or
+ // inactive to properly update regUse_ and the VirtRegMap.
+ IntervalPtrs::iterator it;
+ if ((it = FindIntervalInVector(active_, i)) != active_.end()) {
+ active_.erase(it);
+ assert(!TargetRegisterInfo::isPhysicalRegister(i->reg));
+ if (!spilled.count(i->reg))
+ unhandled_.push(i);
+ delRegUse(vrm_->getPhys(i->reg));
+ vrm_->clearVirt(i->reg);
+ } else if ((it = FindIntervalInVector(inactive_, i)) != inactive_.end()) {
+ inactive_.erase(it);
+ assert(!TargetRegisterInfo::isPhysicalRegister(i->reg));
+ if (!spilled.count(i->reg))
+ unhandled_.push(i);
+ vrm_->clearVirt(i->reg);
+ } else {
+ assert(TargetRegisterInfo::isVirtualRegister(i->reg) &&
+ "Can only allocate virtual registers!");
+ vrm_->clearVirt(i->reg);
+ unhandled_.push(i);
+ }
+
+ DenseMap<unsigned, unsigned>::iterator ii = DowngradeMap.find(i->reg);
+ if (ii == DowngradeMap.end())
+ // It interval has a preference, it must be defined by a copy. Clear the
+ // preference now since the source interval allocation may have been
+ // undone as well.
+ mri_->setRegAllocationHint(i->reg, 0, 0);
+ else {
+ UpgradeRegister(ii->second);
+ }
+ }
+
+ // Rewind the iterators in the active, inactive, and fixed lists back to the
+ // point we reverted to.
+ RevertVectorIteratorsTo(active_, earliestStart);
+ RevertVectorIteratorsTo(inactive_, earliestStart);
+ RevertVectorIteratorsTo(fixed_, earliestStart);
+
+ // Scan the rest and undo each interval that expired after t and
+ // insert it in active (the next iteration of the algorithm will
+ // put it in inactive if required)
+ for (unsigned i = 0, e = handled_.size(); i != e; ++i) {
+ LiveInterval *HI = handled_[i];
+ if (!HI->expiredAt(earliestStart) &&
+ HI->expiredAt(cur->beginIndex())) {
+ DEBUG(dbgs() << "\t\t\tundo changes for: " << *HI << '\n');
+ active_.push_back(std::make_pair(HI, HI->begin()));
+ assert(!TargetRegisterInfo::isPhysicalRegister(HI->reg));
+ addRegUse(vrm_->getPhys(HI->reg));
+ }
+ }
+
+ // Merge added with unhandled.
+ // This also update the NextReloadMap. That is, it adds mapping from a
+ // register defined by a reload from SS to the next reload from SS in the
+ // same basic block.
+ MachineBasicBlock *LastReloadMBB = 0;
+ LiveInterval *LastReload = 0;
+ int LastReloadSS = VirtRegMap::NO_STACK_SLOT;
+ std::sort(added.begin(), added.end(), LISorter());
+ for (unsigned i = 0, e = added.size(); i != e; ++i) {
+ LiveInterval *ReloadLi = added[i];
+ if (ReloadLi->weight == HUGE_VALF &&
+ li_->getApproximateInstructionCount(*ReloadLi) == 0) {
+ SlotIndex ReloadIdx = ReloadLi->beginIndex();
+ MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx);
+ int ReloadSS = vrm_->getStackSlot(ReloadLi->reg);
+ if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) {
+ // Last reload of same SS is in the same MBB. We want to try to
+ // allocate both reloads the same register and make sure the reg
+ // isn't clobbered in between if at all possible.
+ assert(LastReload->beginIndex() < ReloadIdx);
+ NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg));
+ }
+ LastReloadMBB = ReloadMBB;
+ LastReload = ReloadLi;
+ LastReloadSS = ReloadSS;
+ }
+ unhandled_.push(ReloadLi);
+ }
+}
+
+unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
+ const TargetRegisterClass *RC,
+ unsigned MaxInactiveCount,
+ SmallVector<unsigned, 256> &inactiveCounts,
+ bool SkipDGRegs) {
+ unsigned FreeReg = 0;
+ unsigned FreeRegInactiveCount = 0;
+
+ std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(cur->reg);
+ // Resolve second part of the hint (if possible) given the current allocation.
+ unsigned physReg = Hint.second;
+ if (physReg &&
+ TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg))
+ physReg = vrm_->getPhys(physReg);
+
+ TargetRegisterClass::iterator I, E;
+ tie(I, E) = tri_->getAllocationOrder(RC, Hint.first, physReg, *mf_);
+ assert(I != E && "No allocatable register in this register class!");
+
+ // Scan for the first available register.
+ for (; I != E; ++I) {
+ unsigned Reg = *I;
+ // Ignore "downgraded" registers.
+ if (SkipDGRegs && DowngradedRegs.count(Reg))
+ continue;
+ // Skip recently allocated registers.
+ if (isRegAvail(Reg) && !isRecentlyUsed(Reg)) {
+ FreeReg = Reg;
+ if (FreeReg < inactiveCounts.size())
+ FreeRegInactiveCount = inactiveCounts[FreeReg];
+ else
+ FreeRegInactiveCount = 0;
+ break;
+ }
+ }
+
+ // If there are no free regs, or if this reg has the max inactive count,
+ // return this register.
+ if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount) {
+ // Remember what register we picked so we can skip it next time.
+ if (FreeReg != 0) recordRecentlyUsed(FreeReg);
+ return FreeReg;
+ }
+
+ // Continue scanning the registers, looking for the one with the highest
+ // inactive count. Alkis found that this reduced register pressure very
+ // slightly on X86 (in rev 1.94 of this file), though this should probably be
+ // reevaluated now.
+ for (; I != E; ++I) {
+ unsigned Reg = *I;
+ // Ignore "downgraded" registers.
+ if (SkipDGRegs && DowngradedRegs.count(Reg))
+ continue;
+ if (isRegAvail(Reg) && Reg < inactiveCounts.size() &&
+ FreeRegInactiveCount < inactiveCounts[Reg] && !isRecentlyUsed(Reg)) {
+ FreeReg = Reg;
+ FreeRegInactiveCount = inactiveCounts[Reg];
+ if (FreeRegInactiveCount == MaxInactiveCount)
+ break; // We found the one with the max inactive count.
+ }
+ }
+
+ // Remember what register we picked so we can skip it next time.
+ recordRecentlyUsed(FreeReg);
+
+ return FreeReg;
+}
+
+/// getFreePhysReg - return a free physical register for this virtual register
+/// interval if we have one, otherwise return 0.
+unsigned RALinScan::getFreePhysReg(LiveInterval *cur) {
+ SmallVector<unsigned, 256> inactiveCounts;
+ unsigned MaxInactiveCount = 0;
+
+ const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
+ const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC);
+
+ for (IntervalPtrs::iterator i = inactive_.begin(), e = inactive_.end();
+ i != e; ++i) {
+ unsigned reg = i->first->reg;
+ assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+ "Can only allocate virtual registers!");
+
+ // If this is not in a related reg class to the register we're allocating,
+ // don't check it.
+ const TargetRegisterClass *RegRC = mri_->getRegClass(reg);
+ if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader) {
+ reg = vrm_->getPhys(reg);
+ if (inactiveCounts.size() <= reg)
+ inactiveCounts.resize(reg+1);
+ ++inactiveCounts[reg];
+ MaxInactiveCount = std::max(MaxInactiveCount, inactiveCounts[reg]);
+ }
+ }
+
+ // If copy coalescer has assigned a "preferred" register, check if it's
+ // available first.
+ unsigned Preference = vrm_->getRegAllocPref(cur->reg);
+ if (Preference) {
+ DEBUG(dbgs() << "(preferred: " << tri_->getName(Preference) << ") ");
+ if (isRegAvail(Preference) &&
+ RC->contains(Preference))
+ return Preference;
+ }
+
+ if (!DowngradedRegs.empty()) {
+ unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts,
+ true);
+ if (FreeReg)
+ return FreeReg;
+ }
+ return getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, false);
+}
+
+FunctionPass* llvm::createLinearScanRegisterAllocator() {
+ return new RALinScan();
+}
diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp
new file mode 100644
index 0000000..4d2e3a3
--- /dev/null
+++ b/lib/CodeGen/RegAllocLocal.cpp
@@ -0,0 +1,1132 @@
+//===-- RegAllocLocal.cpp - A BasicBlock generic register allocator -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This register allocator allocates registers to a basic block at a time,
+// attempting to keep values in registers and reusing registers as appropriate.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/BasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumStores, "Number of stores added");
+STATISTIC(NumLoads , "Number of loads added");
+
+static RegisterRegAlloc
+ localRegAlloc("local", "local register allocator",
+ createLocalRegisterAllocator);
+
+namespace {
+ class RALocal : public MachineFunctionPass {
+ public:
+ static char ID;
+ RALocal() : MachineFunctionPass(&ID), StackSlotForVirtReg(-1) {}
+ private:
+ const TargetMachine *TM;
+ MachineFunction *MF;
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+
+ // StackSlotForVirtReg - Maps virtual regs to the frame index where these
+ // values are spilled.
+ IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
+
+ // Virt2PhysRegMap - This map contains entries for each virtual register
+ // that is currently available in a physical register.
+ IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysRegMap;
+
+ unsigned &getVirt2PhysRegMapSlot(unsigned VirtReg) {
+ return Virt2PhysRegMap[VirtReg];
+ }
+
+ // PhysRegsUsed - This array is effectively a map, containing entries for
+ // each physical register that currently has a value (ie, it is in
+ // Virt2PhysRegMap). The value mapped to is the virtual register
+ // corresponding to the physical register (the inverse of the
+ // Virt2PhysRegMap), or 0. The value is set to 0 if this register is pinned
+ // because it is used by a future instruction, and to -2 if it is not
+ // allocatable. If the entry for a physical register is -1, then the
+ // physical register is "not in the map".
+ //
+ std::vector<int> PhysRegsUsed;
+
+ // PhysRegsUseOrder - This contains a list of the physical registers that
+ // currently have a virtual register value in them. This list provides an
+ // ordering of registers, imposing a reallocation order. This list is only
+ // used if all registers are allocated and we have to spill one, in which
+ // case we spill the least recently used register. Entries at the front of
+ // the list are the least recently used registers, entries at the back are
+ // the most recently used.
+ //
+ std::vector<unsigned> PhysRegsUseOrder;
+
+ // Virt2LastUseMap - This maps each virtual register to its last use
+ // (MachineInstr*, operand index pair).
+ IndexedMap<std::pair<MachineInstr*, unsigned>, VirtReg2IndexFunctor>
+ Virt2LastUseMap;
+
+ std::pair<MachineInstr*,unsigned>& getVirtRegLastUse(unsigned Reg) {
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
+ return Virt2LastUseMap[Reg];
+ }
+
+ // VirtRegModified - This bitset contains information about which virtual
+ // registers need to be spilled back to memory when their registers are
+ // scavenged. If a virtual register has simply been rematerialized, there
+ // is no reason to spill it to memory when we need the register back.
+ //
+ BitVector VirtRegModified;
+
+ // UsedInMultipleBlocks - Tracks whether a particular register is used in
+ // more than one block.
+ BitVector UsedInMultipleBlocks;
+
+ void markVirtRegModified(unsigned Reg, bool Val = true) {
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
+ Reg -= TargetRegisterInfo::FirstVirtualRegister;
+ if (Val)
+ VirtRegModified.set(Reg);
+ else
+ VirtRegModified.reset(Reg);
+ }
+
+ bool isVirtRegModified(unsigned Reg) const {
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
+ assert(Reg - TargetRegisterInfo::FirstVirtualRegister < VirtRegModified.size()
+ && "Illegal virtual register!");
+ return VirtRegModified[Reg - TargetRegisterInfo::FirstVirtualRegister];
+ }
+
+ void AddToPhysRegsUseOrder(unsigned Reg) {
+ std::vector<unsigned>::iterator It =
+ std::find(PhysRegsUseOrder.begin(), PhysRegsUseOrder.end(), Reg);
+ if (It != PhysRegsUseOrder.end())
+ PhysRegsUseOrder.erase(It);
+ PhysRegsUseOrder.push_back(Reg);
+ }
+
+ void MarkPhysRegRecentlyUsed(unsigned Reg) {
+ if (PhysRegsUseOrder.empty() ||
+ PhysRegsUseOrder.back() == Reg) return; // Already most recently used
+
+ for (unsigned i = PhysRegsUseOrder.size(); i != 0; --i)
+ if (areRegsEqual(Reg, PhysRegsUseOrder[i-1])) {
+ unsigned RegMatch = PhysRegsUseOrder[i-1]; // remove from middle
+ PhysRegsUseOrder.erase(PhysRegsUseOrder.begin()+i-1);
+ // Add it to the end of the list
+ PhysRegsUseOrder.push_back(RegMatch);
+ if (RegMatch == Reg)
+ return; // Found an exact match, exit early
+ }
+ }
+
+ public:
+ virtual const char *getPassName() const {
+ return "Local Register Allocator";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequiredID(PHIEliminationID);
+ AU.addRequiredID(TwoAddressInstructionPassID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ /// runOnMachineFunction - Register allocate the whole function
+ bool runOnMachineFunction(MachineFunction &Fn);
+
+ /// AllocateBasicBlock - Register allocate the specified basic block.
+ void AllocateBasicBlock(MachineBasicBlock &MBB);
+
+
+ /// areRegsEqual - This method returns true if the specified registers are
+ /// related to each other. To do this, it checks to see if they are equal
+ /// or if the first register is in the alias set of the second register.
+ ///
+ bool areRegsEqual(unsigned R1, unsigned R2) const {
+ if (R1 == R2) return true;
+ for (const unsigned *AliasSet = TRI->getAliasSet(R2);
+ *AliasSet; ++AliasSet) {
+ if (*AliasSet == R1) return true;
+ }
+ return false;
+ }
+
+ /// getStackSpaceFor - This returns the frame index of the specified virtual
+ /// register on the stack, allocating space if necessary.
+ int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC);
+
+ /// removePhysReg - This method marks the specified physical register as no
+ /// longer being in use.
+ ///
+ void removePhysReg(unsigned PhysReg);
+
+ /// spillVirtReg - This method spills the value specified by PhysReg into
+ /// the virtual register slot specified by VirtReg. It then updates the RA
+ /// data structures to indicate the fact that PhysReg is now available.
+ ///
+ void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ unsigned VirtReg, unsigned PhysReg);
+
+ /// spillPhysReg - This method spills the specified physical register into
+ /// the virtual register slot associated with it. If OnlyVirtRegs is set to
+ /// true, then the request is ignored if the physical register does not
+ /// contain a virtual register.
+ ///
+ void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
+ unsigned PhysReg, bool OnlyVirtRegs = false);
+
+ /// assignVirtToPhysReg - This method updates local state so that we know
+ /// that PhysReg is the proper container for VirtReg now. The physical
+ /// register must not be used for anything else when this is called.
+ ///
+ void assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg);
+
+ /// isPhysRegAvailable - Return true if the specified physical register is
+ /// free and available for use. This also includes checking to see if
+ /// aliased registers are all free...
+ ///
+ bool isPhysRegAvailable(unsigned PhysReg) const;
+
+ /// getFreeReg - Look to see if there is a free register available in the
+ /// specified register class. If not, return 0.
+ ///
+ unsigned getFreeReg(const TargetRegisterClass *RC);
+
+ /// getReg - Find a physical register to hold the specified virtual
+ /// register. If all compatible physical registers are used, this method
+ /// spills the last used virtual register to the stack, and uses that
+ /// register. If NoFree is true, that means the caller knows there isn't
+ /// a free register, do not call getFreeReg().
+ unsigned getReg(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned VirtReg, bool NoFree = false);
+
+ /// reloadVirtReg - This method transforms the specified virtual
+ /// register use to refer to a physical register. This method may do this
+ /// in one of several ways: if the register is available in a physical
+ /// register already, it uses that physical register. If the value is not
+ /// in a physical register, and if there are physical registers available,
+ /// it loads it into a register: PhysReg if that is an available physical
+ /// register, otherwise any physical register of the right class.
+ /// If register pressure is high, and it is possible, it tries to fold the
+ /// load of the virtual register into the instruction itself. It avoids
+ /// doing this if register pressure is low to improve the chance that
+ /// subsequent instructions can use the reloaded value. This method
+ /// returns the modified instruction.
+ ///
+ MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned OpNum, SmallSet<unsigned, 4> &RRegs,
+ unsigned PhysReg);
+
+ /// ComputeLocalLiveness - Computes liveness of registers within a basic
+ /// block, setting the killed/dead flags as appropriate.
+ void ComputeLocalLiveness(MachineBasicBlock& MBB);
+
+ void reloadPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I,
+ unsigned PhysReg);
+ };
+ char RALocal::ID = 0;
+}
+
+/// getStackSpaceFor - This allocates space for the specified virtual register
+/// to be held on the stack.
+int RALocal::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
+ // Find the location Reg would belong...
+ int SS = StackSlotForVirtReg[VirtReg];
+ if (SS != -1)
+ return SS; // Already has space allocated?
+
+ // Allocate a new stack object for this spill location...
+ int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
+ RC->getAlignment());
+
+ // Assign the slot...
+ StackSlotForVirtReg[VirtReg] = FrameIdx;
+ return FrameIdx;
+}
+
+
+/// removePhysReg - This method marks the specified physical register as no
+/// longer being in use.
+///
+void RALocal::removePhysReg(unsigned PhysReg) {
+ PhysRegsUsed[PhysReg] = -1; // PhyReg no longer used
+
+ std::vector<unsigned>::iterator It =
+ std::find(PhysRegsUseOrder.begin(), PhysRegsUseOrder.end(), PhysReg);
+ if (It != PhysRegsUseOrder.end())
+ PhysRegsUseOrder.erase(It);
+}
+
+
+/// spillVirtReg - This method spills the value specified by PhysReg into the
+/// virtual register slot specified by VirtReg. It then updates the RA data
+/// structures to indicate the fact that PhysReg is now available.
+///
+void RALocal::spillVirtReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned VirtReg, unsigned PhysReg) {
+ assert(VirtReg && "Spilling a physical register is illegal!"
+ " Must not have appropriate kill for the register or use exists beyond"
+ " the intended one.");
+ DEBUG(dbgs() << " Spilling register " << TRI->getName(PhysReg)
+ << " containing %reg" << VirtReg);
+
+ if (!isVirtRegModified(VirtReg)) {
+ DEBUG(dbgs() << " which has not been modified, so no store necessary!");
+ std::pair<MachineInstr*, unsigned> &LastUse = getVirtRegLastUse(VirtReg);
+ if (LastUse.first)
+ LastUse.first->getOperand(LastUse.second).setIsKill();
+ } else {
+ // Otherwise, there is a virtual register corresponding to this physical
+ // register. We only need to spill it into its stack slot if it has been
+ // modified.
+ const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
+ int FrameIndex = getStackSpaceFor(VirtReg, RC);
+ DEBUG(dbgs() << " to stack slot #" << FrameIndex);
+ // If the instruction reads the register that's spilled, (e.g. this can
+ // happen if it is a move to a physical register), then the spill
+ // instruction is not a kill.
+ bool isKill = !(I != MBB.end() && I->readsRegister(PhysReg));
+ TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC);
+ ++NumStores; // Update statistics
+ }
+
+ getVirt2PhysRegMapSlot(VirtReg) = 0; // VirtReg no longer available
+
+ DEBUG(dbgs() << '\n');
+ removePhysReg(PhysReg);
+}
+
+
+/// spillPhysReg - This method spills the specified physical register into the
+/// virtual register slot associated with it. If OnlyVirtRegs is set to true,
+/// then the request is ignored if the physical register does not contain a
+/// virtual register.
+///
+void RALocal::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
+ unsigned PhysReg, bool OnlyVirtRegs) {
+ if (PhysRegsUsed[PhysReg] != -1) { // Only spill it if it's used!
+ assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!");
+ if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs)
+ spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg);
+ } else {
+ // If the selected register aliases any other registers, we must make
+ // sure that one of the aliases isn't alive.
+ for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
+ *AliasSet; ++AliasSet)
+ if (PhysRegsUsed[*AliasSet] != -1 && // Spill aliased register.
+ PhysRegsUsed[*AliasSet] != -2) // If allocatable.
+ if (PhysRegsUsed[*AliasSet])
+ spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet);
+ }
+}
+
+
+/// assignVirtToPhysReg - This method updates local state so that we know
+/// that PhysReg is the proper container for VirtReg now. The physical
+/// register must not be used for anything else when this is called.
+///
+void RALocal::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) {
+ assert(PhysRegsUsed[PhysReg] == -1 && "Phys reg already assigned!");
+ // Update information to note the fact that this register was just used, and
+ // it holds VirtReg.
+ PhysRegsUsed[PhysReg] = VirtReg;
+ getVirt2PhysRegMapSlot(VirtReg) = PhysReg;
+ AddToPhysRegsUseOrder(PhysReg); // New use of PhysReg
+}
+
+
+/// isPhysRegAvailable - Return true if the specified physical register is free
+/// and available for use. This also includes checking to see if aliased
+/// registers are all free...
+///
+bool RALocal::isPhysRegAvailable(unsigned PhysReg) const {
+ if (PhysRegsUsed[PhysReg] != -1) return false;
+
+ // If the selected register aliases any other allocated registers, it is
+ // not free!
+ for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
+ *AliasSet; ++AliasSet)
+ if (PhysRegsUsed[*AliasSet] >= 0) // Aliased register in use?
+ return false; // Can't use this reg then.
+ return true;
+}
+
+
+/// getFreeReg - Look to see if there is a free register available in the
+/// specified register class. If not, return 0.
+///
+unsigned RALocal::getFreeReg(const TargetRegisterClass *RC) {
+ // Get iterators defining the range of registers that are valid to allocate in
+ // this class, which also specifies the preferred allocation order.
+ TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF);
+ TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF);
+
+ for (; RI != RE; ++RI)
+ if (isPhysRegAvailable(*RI)) { // Is reg unused?
+ assert(*RI != 0 && "Cannot use register!");
+ return *RI; // Found an unused register!
+ }
+ return 0;
+}
+
+
+/// getReg - Find a physical register to hold the specified virtual
+/// register. If all compatible physical registers are used, this method spills
+/// the last used virtual register to the stack, and uses that register.
+///
+unsigned RALocal::getReg(MachineBasicBlock &MBB, MachineInstr *I,
+ unsigned VirtReg, bool NoFree) {
+ const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
+
+ // First check to see if we have a free register of the requested type...
+ unsigned PhysReg = NoFree ? 0 : getFreeReg(RC);
+
+ // If we didn't find an unused register, scavenge one now!
+ if (PhysReg == 0) {
+ assert(!PhysRegsUseOrder.empty() && "No allocated registers??");
+
+ // Loop over all of the preallocated registers from the least recently used
+ // to the most recently used. When we find one that is capable of holding
+ // our register, use it.
+ for (unsigned i = 0; PhysReg == 0; ++i) {
+ assert(i != PhysRegsUseOrder.size() &&
+ "Couldn't find a register of the appropriate class!");
+
+ unsigned R = PhysRegsUseOrder[i];
+
+ // We can only use this register if it holds a virtual register (ie, it
+ // can be spilled). Do not use it if it is an explicitly allocated
+ // physical register!
+ assert(PhysRegsUsed[R] != -1 &&
+ "PhysReg in PhysRegsUseOrder, but is not allocated?");
+ if (PhysRegsUsed[R] && PhysRegsUsed[R] != -2) {
+ // If the current register is compatible, use it.
+ if (RC->contains(R)) {
+ PhysReg = R;
+ break;
+ } else {
+ // If one of the registers aliased to the current register is
+ // compatible, use it.
+ for (const unsigned *AliasIt = TRI->getAliasSet(R);
+ *AliasIt; ++AliasIt) {
+ if (RC->contains(*AliasIt) &&
+ // If this is pinned down for some reason, don't use it. For
+ // example, if CL is pinned, and we run across CH, don't use
+ // CH as justification for using scavenging ECX (which will
+ // fail).
+ PhysRegsUsed[*AliasIt] != 0 &&
+
+ // Make sure the register is allocatable. Don't allocate SIL on
+ // x86-32.
+ PhysRegsUsed[*AliasIt] != -2) {
+ PhysReg = *AliasIt; // Take an aliased register
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ assert(PhysReg && "Physical register not assigned!?!?");
+
+ // At this point PhysRegsUseOrder[i] is the least recently used register of
+ // compatible register class. Spill it to memory and reap its remains.
+ spillPhysReg(MBB, I, PhysReg);
+ }
+
+ // Now that we know which register we need to assign this to, do it now!
+ assignVirtToPhysReg(VirtReg, PhysReg);
+ return PhysReg;
+}
+
+
+/// reloadVirtReg - This method transforms the specified virtual
+/// register use to refer to a physical register. This method may do this in
+/// one of several ways: if the register is available in a physical register
+/// already, it uses that physical register. If the value is not in a physical
+/// register, and if there are physical registers available, it loads it into a
+/// register: PhysReg if that is an available physical register, otherwise any
+/// register. If register pressure is high, and it is possible, it tries to
+/// fold the load of the virtual register into the instruction itself. It
+/// avoids doing this if register pressure is low to improve the chance that
+/// subsequent instructions can use the reloaded value. This method returns
+/// the modified instruction.
+///
+MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned OpNum,
+ SmallSet<unsigned, 4> &ReloadedRegs,
+ unsigned PhysReg) {
+ unsigned VirtReg = MI->getOperand(OpNum).getReg();
+
+ // If the virtual register is already available, just update the instruction
+ // and return.
+ if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) {
+ MarkPhysRegRecentlyUsed(PR); // Already have this value available!
+ MI->getOperand(OpNum).setReg(PR); // Assign the input register
+ getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum);
+ return MI;
+ }
+
+ // Otherwise, we need to fold it into the current instruction, or reload it.
+ // If we have registers available to hold the value, use them.
+ const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
+ // If we already have a PhysReg (this happens when the instruction is a
+ // reg-to-reg copy with a PhysReg destination) use that.
+ if (!PhysReg || !TargetRegisterInfo::isPhysicalRegister(PhysReg) ||
+ !isPhysRegAvailable(PhysReg))
+ PhysReg = getFreeReg(RC);
+ int FrameIndex = getStackSpaceFor(VirtReg, RC);
+
+ if (PhysReg) { // Register is available, allocate it!
+ assignVirtToPhysReg(VirtReg, PhysReg);
+ } else { // No registers available.
+ // Force some poor hapless value out of the register file to
+ // make room for the new register, and reload it.
+ PhysReg = getReg(MBB, MI, VirtReg, true);
+ }
+
+ markVirtRegModified(VirtReg, false); // Note that this reg was just reloaded
+
+ DEBUG(dbgs() << " Reloading %reg" << VirtReg << " into "
+ << TRI->getName(PhysReg) << "\n");
+
+ // Add move instruction(s)
+ TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC);
+ ++NumLoads; // Update statistics
+
+ MF->getRegInfo().setPhysRegUsed(PhysReg);
+ MI->getOperand(OpNum).setReg(PhysReg); // Assign the input register
+ getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum);
+
+ if (!ReloadedRegs.insert(PhysReg)) {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "Ran out of registers during register allocation!";
+ if (MI->isInlineAsm()) {
+ Msg << "\nPlease check your inline asm statement for invalid "
+ << "constraints:\n";
+ MI->print(Msg, TM);
+ }
+ llvm_report_error(Msg.str());
+ }
+ for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
+ *SubRegs; ++SubRegs) {
+ if (!ReloadedRegs.insert(*SubRegs)) {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "Ran out of registers during register allocation!";
+ if (MI->isInlineAsm()) {
+ Msg << "\nPlease check your inline asm statement for invalid "
+ << "constraints:\n";
+ MI->print(Msg, TM);
+ }
+ llvm_report_error(Msg.str());
+ }
+ }
+
+ return MI;
+}
+
+/// isReadModWriteImplicitKill - True if this is an implicit kill for a
+/// read/mod/write register, i.e. update partial register.
+static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() &&
+ MO.isDef() && !MO.isDead())
+ return true;
+ }
+ return false;
+}
+
+/// isReadModWriteImplicitDef - True if this is an implicit def for a
+/// read/mod/write register, i.e. update partial register.
+static bool isReadModWriteImplicitDef(MachineInstr *MI, unsigned Reg) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() &&
+ !MO.isDef() && MO.isKill())
+ return true;
+ }
+ return false;
+}
+
+// precedes - Helper function to determine with MachineInstr A
+// precedes MachineInstr B within the same MBB.
+static bool precedes(MachineBasicBlock::iterator A,
+ MachineBasicBlock::iterator B) {
+ if (A == B)
+ return false;
+
+ MachineBasicBlock::iterator I = A->getParent()->begin();
+ while (I != A->getParent()->end()) {
+ if (I == A)
+ return true;
+ else if (I == B)
+ return false;
+
+ ++I;
+ }
+
+ return false;
+}
+
+/// ComputeLocalLiveness - Computes liveness of registers within a basic
+/// block, setting the killed/dead flags as appropriate.
+void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
+ MachineRegisterInfo& MRI = MBB.getParent()->getRegInfo();
+ // Keep track of the most recently seen previous use or def of each reg,
+ // so that we can update them with dead/kill markers.
+ DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > LastUseDef;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = I->getOperand(i);
+ // Uses don't trigger any flags, but we need to save
+ // them for later. Also, we have to process these
+ // _before_ processing the defs, since an instr
+ // uses regs before it defs them.
+ if (MO.isReg() && MO.getReg() && MO.isUse()) {
+ LastUseDef[MO.getReg()] = std::make_pair(I, i);
+
+
+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue;
+
+ const unsigned* Aliases = TRI->getAliasSet(MO.getReg());
+ if (Aliases) {
+ while (*Aliases) {
+ DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
+ alias = LastUseDef.find(*Aliases);
+
+ if (alias != LastUseDef.end() && alias->second.first != I)
+ LastUseDef[*Aliases] = std::make_pair(I, i);
+
+ ++Aliases;
+ }
+ }
+ }
+ }
+
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = I->getOperand(i);
+ // Defs others than 2-addr redefs _do_ trigger flag changes:
+ // - A def followed by a def is dead
+ // - A use followed by a def is a kill
+ if (MO.isReg() && MO.getReg() && MO.isDef()) {
+ DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
+ last = LastUseDef.find(MO.getReg());
+ if (last != LastUseDef.end()) {
+ // Check if this is a two address instruction. If so, then
+ // the def does not kill the use.
+ if (last->second.first == I &&
+ I->isRegTiedToUseOperand(i))
+ continue;
+
+ MachineOperand& lastUD =
+ last->second.first->getOperand(last->second.second);
+ if (lastUD.isDef())
+ lastUD.setIsDead(true);
+ else
+ lastUD.setIsKill(true);
+ }
+
+ LastUseDef[MO.getReg()] = std::make_pair(I, i);
+ }
+ }
+ }
+
+ // Live-out (of the function) registers contain return values of the function,
+ // so we need to make sure they are alive at return time.
+ if (!MBB.empty() && MBB.back().getDesc().isReturn()) {
+ MachineInstr* Ret = &MBB.back();
+ for (MachineRegisterInfo::liveout_iterator
+ I = MF->getRegInfo().liveout_begin(),
+ E = MF->getRegInfo().liveout_end(); I != E; ++I)
+ if (!Ret->readsRegister(*I)) {
+ Ret->addOperand(MachineOperand::CreateReg(*I, false, true));
+ LastUseDef[*I] = std::make_pair(Ret, Ret->getNumOperands()-1);
+ }
+ }
+
+ // Finally, loop over the final use/def of each reg
+ // in the block and determine if it is dead.
+ for (DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
+ I = LastUseDef.begin(), E = LastUseDef.end(); I != E; ++I) {
+ MachineInstr* MI = I->second.first;
+ unsigned idx = I->second.second;
+ MachineOperand& MO = MI->getOperand(idx);
+
+ bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(MO.getReg());
+
+ // A crude approximation of "live-out" calculation
+ bool usedOutsideBlock = isPhysReg ? false :
+ UsedInMultipleBlocks.test(MO.getReg() -
+ TargetRegisterInfo::FirstVirtualRegister);
+ if (!isPhysReg && !usedOutsideBlock)
+ for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()),
+ UE = MRI.reg_end(); UI != UE; ++UI)
+ // Two cases:
+ // - used in another block
+ // - used in the same block before it is defined (loop)
+ if (UI->getParent() != &MBB ||
+ (MO.isDef() && UI.getOperand().isUse() && precedes(&*UI, MI))) {
+ UsedInMultipleBlocks.set(MO.getReg() -
+ TargetRegisterInfo::FirstVirtualRegister);
+ usedOutsideBlock = true;
+ break;
+ }
+
+ // Physical registers and those that are not live-out of the block
+ // are killed/dead at their last use/def within this block.
+ if (isPhysReg || !usedOutsideBlock) {
+ if (MO.isUse()) {
+ // Don't mark uses that are tied to defs as kills.
+ if (!MI->isRegTiedToDefOperand(idx))
+ MO.setIsKill(true);
+ } else
+ MO.setIsDead(true);
+ }
+ }
+}
+
+void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
+ // loop over each instruction
+ MachineBasicBlock::iterator MII = MBB.begin();
+
+ DEBUG({
+ const BasicBlock *LBB = MBB.getBasicBlock();
+ if (LBB)
+ dbgs() << "\nStarting RegAlloc of BB: " << LBB->getName();
+ });
+
+ // Add live-in registers as active.
+ for (MachineBasicBlock::livein_iterator I = MBB.livein_begin(),
+ E = MBB.livein_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ MF->getRegInfo().setPhysRegUsed(Reg);
+ PhysRegsUsed[Reg] = 0; // It is free and reserved now
+ AddToPhysRegsUseOrder(Reg);
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ *SubRegs; ++SubRegs) {
+ if (PhysRegsUsed[*SubRegs] != -2) {
+ AddToPhysRegsUseOrder(*SubRegs);
+ PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now
+ MF->getRegInfo().setPhysRegUsed(*SubRegs);
+ }
+ }
+ }
+
+ ComputeLocalLiveness(MBB);
+
+ // Otherwise, sequentially allocate each instruction in the MBB.
+ while (MII != MBB.end()) {
+ MachineInstr *MI = MII++;
+ const TargetInstrDesc &TID = MI->getDesc();
+ DEBUG({
+ dbgs() << "\nStarting RegAlloc of: " << *MI;
+ dbgs() << " Regs have values: ";
+ for (unsigned i = 0; i != TRI->getNumRegs(); ++i)
+ if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2)
+ dbgs() << "[" << TRI->getName(i)
+ << ",%reg" << PhysRegsUsed[i] << "] ";
+ dbgs() << '\n';
+ });
+
+ // Determine whether this is a copy instruction. The cases where the
+ // source or destination are phys regs are handled specially.
+ unsigned SrcCopyReg, DstCopyReg, SrcCopySubReg, DstCopySubReg;
+ unsigned SrcCopyPhysReg = 0U;
+ bool isCopy = TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg,
+ SrcCopySubReg, DstCopySubReg);
+ if (isCopy && TargetRegisterInfo::isVirtualRegister(SrcCopyReg))
+ SrcCopyPhysReg = getVirt2PhysRegMapSlot(SrcCopyReg);
+
+ // Loop over the implicit uses, making sure that they are at the head of the
+ // use order list, so they don't get reallocated.
+ if (TID.ImplicitUses) {
+ for (const unsigned *ImplicitUses = TID.ImplicitUses;
+ *ImplicitUses; ++ImplicitUses)
+ MarkPhysRegRecentlyUsed(*ImplicitUses);
+ }
+
+ SmallVector<unsigned, 8> Kills;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isKill()) {
+ if (!MO.isImplicit())
+ Kills.push_back(MO.getReg());
+ else if (!isReadModWriteImplicitKill(MI, MO.getReg()))
+ // These are extra physical register kills when a sub-register
+ // is defined (def of a sub-register is a read/mod/write of the
+ // larger registers). Ignore.
+ Kills.push_back(MO.getReg());
+ }
+ }
+
+ // If any physical regs are earlyclobber, spill any value they might
+ // have in them, then mark them unallocatable.
+ // If any virtual regs are earlyclobber, allocate them now (before
+ // freeing inputs that are killed).
+ if (MI->isInlineAsm()) {
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() && MO.isEarlyClobber() &&
+ MO.getReg()) {
+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ unsigned DestVirtReg = MO.getReg();
+ unsigned DestPhysReg;
+
+ // If DestVirtReg already has a value, use it.
+ if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg)))
+ DestPhysReg = getReg(MBB, MI, DestVirtReg);
+ MF->getRegInfo().setPhysRegUsed(DestPhysReg);
+ markVirtRegModified(DestVirtReg);
+ getVirtRegLastUse(DestVirtReg) =
+ std::make_pair((MachineInstr*)0, 0);
+ DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg)
+ << " to %reg" << DestVirtReg << "\n");
+ MO.setReg(DestPhysReg); // Assign the earlyclobber register
+ } else {
+ unsigned Reg = MO.getReg();
+ if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP.
+ // These are extra physical register defs when a sub-register
+ // is defined (def of a sub-register is a read/mod/write of the
+ // larger registers). Ignore.
+ if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
+
+ MF->getRegInfo().setPhysRegUsed(Reg);
+ spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
+ PhysRegsUsed[Reg] = 0; // It is free and reserved now
+ AddToPhysRegsUseOrder(Reg);
+
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ *SubRegs; ++SubRegs) {
+ if (PhysRegsUsed[*SubRegs] != -2) {
+ MF->getRegInfo().setPhysRegUsed(*SubRegs);
+ PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now
+ AddToPhysRegsUseOrder(*SubRegs);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // If a DBG_VALUE says something is located in a spilled register,
+ // change the DBG_VALUE to be undef, which prevents the register
+ // from being reloaded here. Doing that would change the generated
+ // code, unless another use immediately follows this instruction.
+ if (MI->isDebugValue() &&
+ MI->getNumOperands()==3 && MI->getOperand(0).isReg()) {
+ unsigned VirtReg = MI->getOperand(0).getReg();
+ if (VirtReg && TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ !getVirt2PhysRegMapSlot(VirtReg))
+ MI->getOperand(0).setReg(0U);
+ }
+
+ // Get the used operands into registers. This has the potential to spill
+ // incoming values if we are out of registers. Note that we completely
+ // ignore physical register uses here. We assume that if an explicit
+ // physical register is referenced by the instruction, that it is guaranteed
+ // to be live-in, or the input is badly hosed.
+ //
+ SmallSet<unsigned, 4> ReloadedRegs;
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ // here we are looking for only used operands (never def&use)
+ if (MO.isReg() && !MO.isDef() && MO.getReg() && !MO.isImplicit() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ MI = reloadVirtReg(MBB, MI, i, ReloadedRegs,
+ isCopy ? DstCopyReg : 0);
+ }
+
+ // If this instruction is the last user of this register, kill the
+ // value, freeing the register being used, so it doesn't need to be
+ // spilled to memory.
+ //
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
+ unsigned VirtReg = Kills[i];
+ unsigned PhysReg = VirtReg;
+ if (TargetRegisterInfo::isVirtualRegister(VirtReg)) {
+ // If the virtual register was never materialized into a register, it
+ // might not be in the map, but it won't hurt to zero it out anyway.
+ unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg);
+ PhysReg = PhysRegSlot;
+ PhysRegSlot = 0;
+ } else if (PhysRegsUsed[PhysReg] == -2) {
+ // Unallocatable register dead, ignore.
+ continue;
+ } else {
+ assert((!PhysRegsUsed[PhysReg] || PhysRegsUsed[PhysReg] == -1) &&
+ "Silently clearing a virtual register?");
+ }
+
+ if (PhysReg) {
+ DEBUG(dbgs() << " Last use of " << TRI->getName(PhysReg)
+ << "[%reg" << VirtReg <<"], removing it from live set\n");
+ removePhysReg(PhysReg);
+ for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
+ *SubRegs; ++SubRegs) {
+ if (PhysRegsUsed[*SubRegs] != -2) {
+ DEBUG(dbgs() << " Last use of "
+ << TRI->getName(*SubRegs) << "[%reg" << VirtReg
+ <<"], removing it from live set\n");
+ removePhysReg(*SubRegs);
+ }
+ }
+ }
+ }
+
+ // Loop over all of the operands of the instruction, spilling registers that
+ // are defined, and marking explicit destinations in the PhysRegsUsed map.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() && !MO.isImplicit() && MO.getReg() &&
+ !MO.isEarlyClobber() &&
+ TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ unsigned Reg = MO.getReg();
+ if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP.
+ // These are extra physical register defs when a sub-register
+ // is defined (def of a sub-register is a read/mod/write of the
+ // larger registers). Ignore.
+ if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
+
+ MF->getRegInfo().setPhysRegUsed(Reg);
+ spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
+ PhysRegsUsed[Reg] = 0; // It is free and reserved now
+ AddToPhysRegsUseOrder(Reg);
+
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ *SubRegs; ++SubRegs) {
+ if (PhysRegsUsed[*SubRegs] != -2) {
+ MF->getRegInfo().setPhysRegUsed(*SubRegs);
+ PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now
+ AddToPhysRegsUseOrder(*SubRegs);
+ }
+ }
+ }
+ }
+
+ // Loop over the implicit defs, spilling them as well.
+ if (TID.ImplicitDefs) {
+ for (const unsigned *ImplicitDefs = TID.ImplicitDefs;
+ *ImplicitDefs; ++ImplicitDefs) {
+ unsigned Reg = *ImplicitDefs;
+ if (PhysRegsUsed[Reg] != -2) {
+ spillPhysReg(MBB, MI, Reg, true);
+ AddToPhysRegsUseOrder(Reg);
+ PhysRegsUsed[Reg] = 0; // It is free and reserved now
+ }
+ MF->getRegInfo().setPhysRegUsed(Reg);
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ *SubRegs; ++SubRegs) {
+ if (PhysRegsUsed[*SubRegs] != -2) {
+ AddToPhysRegsUseOrder(*SubRegs);
+ PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now
+ MF->getRegInfo().setPhysRegUsed(*SubRegs);
+ }
+ }
+ }
+ }
+
+ SmallVector<unsigned, 8> DeadDefs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDead())
+ DeadDefs.push_back(MO.getReg());
+ }
+
+ // Okay, we have allocated all of the source operands and spilled any values
+ // that would be destroyed by defs of this instruction. Loop over the
+ // explicit defs and assign them to a register, spilling incoming values if
+ // we need to scavenge a register.
+ //
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() && MO.getReg() &&
+ !MO.isEarlyClobber() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ unsigned DestVirtReg = MO.getReg();
+ unsigned DestPhysReg;
+
+ // If DestVirtReg already has a value, use it.
+ if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) {
+ // If this is a copy try to reuse the input as the output;
+ // that will make the copy go away.
+ // If this is a copy, the source reg is a phys reg, and
+ // that reg is available, use that phys reg for DestPhysReg.
+ // If this is a copy, the source reg is a virtual reg, and
+ // the phys reg that was assigned to that virtual reg is now
+ // available, use that phys reg for DestPhysReg. (If it's now
+ // available that means this was the last use of the source.)
+ if (isCopy &&
+ TargetRegisterInfo::isPhysicalRegister(SrcCopyReg) &&
+ isPhysRegAvailable(SrcCopyReg)) {
+ DestPhysReg = SrcCopyReg;
+ assignVirtToPhysReg(DestVirtReg, DestPhysReg);
+ } else if (isCopy &&
+ TargetRegisterInfo::isVirtualRegister(SrcCopyReg) &&
+ SrcCopyPhysReg && isPhysRegAvailable(SrcCopyPhysReg) &&
+ MF->getRegInfo().getRegClass(DestVirtReg)->
+ contains(SrcCopyPhysReg)) {
+ DestPhysReg = SrcCopyPhysReg;
+ assignVirtToPhysReg(DestVirtReg, DestPhysReg);
+ } else
+ DestPhysReg = getReg(MBB, MI, DestVirtReg);
+ }
+ MF->getRegInfo().setPhysRegUsed(DestPhysReg);
+ markVirtRegModified(DestVirtReg);
+ getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0);
+ DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg)
+ << " to %reg" << DestVirtReg << "\n");
+ MO.setReg(DestPhysReg); // Assign the output register
+ }
+ }
+
+ // If this instruction defines any registers that are immediately dead,
+ // kill them now.
+ //
+ for (unsigned i = 0, e = DeadDefs.size(); i != e; ++i) {
+ unsigned VirtReg = DeadDefs[i];
+ unsigned PhysReg = VirtReg;
+ if (TargetRegisterInfo::isVirtualRegister(VirtReg)) {
+ unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg);
+ PhysReg = PhysRegSlot;
+ assert(PhysReg != 0);
+ PhysRegSlot = 0;
+ } else if (PhysRegsUsed[PhysReg] == -2) {
+ // Unallocatable register dead, ignore.
+ continue;
+ }
+
+ if (PhysReg) {
+ DEBUG(dbgs() << " Register " << TRI->getName(PhysReg)
+ << " [%reg" << VirtReg
+ << "] is never used, removing it from live set\n");
+ removePhysReg(PhysReg);
+ for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
+ *AliasSet; ++AliasSet) {
+ if (PhysRegsUsed[*AliasSet] != -2) {
+ DEBUG(dbgs() << " Register " << TRI->getName(*AliasSet)
+ << " [%reg" << *AliasSet
+ << "] is never used, removing it from live set\n");
+ removePhysReg(*AliasSet);
+ }
+ }
+ }
+ }
+
+ // Finally, if this is a noop copy instruction, zap it. (Except that if
+ // the copy is dead, it must be kept to avoid messing up liveness info for
+ // the register scavenger. See pr4100.)
+ if (TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg,
+ SrcCopySubReg, DstCopySubReg) &&
+ SrcCopyReg == DstCopyReg && DeadDefs.empty())
+ MBB.erase(MI);
+ }
+
+ MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
+
+ // Spill all physical registers holding virtual registers now.
+ for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i)
+ if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) {
+ if (unsigned VirtReg = PhysRegsUsed[i])
+ spillVirtReg(MBB, MI, VirtReg, i);
+ else
+ removePhysReg(i);
+ }
+
+#if 0
+ // This checking code is very expensive.
+ bool AllOk = true;
+ for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
+ e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i)
+ if (unsigned PR = Virt2PhysRegMap[i]) {
+ cerr << "Register still mapped: " << i << " -> " << PR << "\n";
+ AllOk = false;
+ }
+ assert(AllOk && "Virtual registers still in phys regs?");
+#endif
+
+ // Clear any physical register which appear live at the end of the basic
+ // block, but which do not hold any virtual registers. e.g., the stack
+ // pointer.
+ PhysRegsUseOrder.clear();
+}
+
+/// runOnMachineFunction - Register allocate the whole function
+///
+bool RALocal::runOnMachineFunction(MachineFunction &Fn) {
+ DEBUG(dbgs() << "Machine Function\n");
+ MF = &Fn;
+ TM = &Fn.getTarget();
+ TRI = TM->getRegisterInfo();
+ TII = TM->getInstrInfo();
+
+ PhysRegsUsed.assign(TRI->getNumRegs(), -1);
+
+ // At various places we want to efficiently check to see whether a register
+ // is allocatable. To handle this, we mark all unallocatable registers as
+ // being pinned down, permanently.
+ {
+ BitVector Allocable = TRI->getAllocatableSet(Fn);
+ for (unsigned i = 0, e = Allocable.size(); i != e; ++i)
+ if (!Allocable[i])
+ PhysRegsUsed[i] = -2; // Mark the reg unallocable.
+ }
+
+ // initialize the virtual->physical register map to have a 'null'
+ // mapping for all virtual registers
+ unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg();
+ StackSlotForVirtReg.grow(LastVirtReg);
+ Virt2PhysRegMap.grow(LastVirtReg);
+ Virt2LastUseMap.grow(LastVirtReg);
+ VirtRegModified.resize(LastVirtReg+1-TargetRegisterInfo::FirstVirtualRegister);
+ UsedInMultipleBlocks.resize(LastVirtReg+1-TargetRegisterInfo::FirstVirtualRegister);
+
+ // Loop over all of the basic blocks, eliminating virtual register references
+ for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+ MBB != MBBe; ++MBB)
+ AllocateBasicBlock(*MBB);
+
+ StackSlotForVirtReg.clear();
+ PhysRegsUsed.clear();
+ VirtRegModified.clear();
+ UsedInMultipleBlocks.clear();
+ Virt2PhysRegMap.clear();
+ Virt2LastUseMap.clear();
+ return true;
+}
+
+FunctionPass *llvm::createLocalRegisterAllocator() {
+ return new RALocal();
+}
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
new file mode 100644
index 0000000..2701faf
--- /dev/null
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -0,0 +1,918 @@
+//===------ RegAllocPBQP.cpp ---- PBQP Register Allocator -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a Partitioned Boolean Quadratic Programming (PBQP) based
+// register allocator for LLVM. This allocator works by constructing a PBQP
+// problem representing the register allocation problem under consideration,
+// solving this using a PBQP solver, and mapping the solution back to a
+// register assignment. If any variables are selected for spilling then spill
+// code is inserted and the process repeated.
+//
+// The PBQP solver (pbqp.c) provided for this allocator uses a heuristic tuned
+// for register allocation. For more information on PBQP for register
+// allocation, see the following papers:
+//
+// (1) Hames, L. and Scholz, B. 2006. Nearly optimal register allocation with
+// PBQP. In Proceedings of the 7th Joint Modular Languages Conference
+// (JMLC'06). LNCS, vol. 4228. Springer, New York, NY, USA. 346-361.
+//
+// (2) Scholz, B., Eckstein, E. 2002. Register allocation for irregular
+// architectures. In Proceedings of the Joint Conference on Languages,
+// Compilers and Tools for Embedded Systems (LCTES'02), ACM Press, New York,
+// NY, USA, 139-148.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+
+#include "PBQP/HeuristicSolver.h"
+#include "PBQP/Graph.h"
+#include "PBQP/Heuristics/Briggs.h"
+#include "VirtRegMap.h"
+#include "VirtRegRewriter.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <limits>
+#include <map>
+#include <memory>
+#include <set>
+#include <vector>
+
+using namespace llvm;
+
+static RegisterRegAlloc
+registerPBQPRepAlloc("pbqp", "PBQP register allocator.",
+ llvm::createPBQPRegisterAllocator);
+
+static cl::opt<bool>
+pbqpCoalescing("pbqp-coalescing",
+ cl::desc("Attempt coalescing during PBQP register allocation."),
+ cl::init(false), cl::Hidden);
+
+namespace {
+
+ ///
+ /// PBQP based allocators solve the register allocation problem by mapping
+ /// register allocation problems to Partitioned Boolean Quadratic
+ /// Programming problems.
+ class PBQPRegAlloc : public MachineFunctionPass {
+ public:
+
+ static char ID;
+
+ /// Construct a PBQP register allocator.
+ PBQPRegAlloc() : MachineFunctionPass(&ID) {}
+
+ /// Return the pass name.
+ virtual const char* getPassName() const {
+ return "PBQP Register Allocator";
+ }
+
+ /// PBQP analysis usage.
+ virtual void getAnalysisUsage(AnalysisUsage &au) const {
+ au.addRequired<SlotIndexes>();
+ au.addPreserved<SlotIndexes>();
+ au.addRequired<LiveIntervals>();
+ //au.addRequiredID(SplitCriticalEdgesID);
+ au.addRequired<RegisterCoalescer>();
+ au.addRequired<CalculateSpillWeights>();
+ au.addRequired<LiveStacks>();
+ au.addPreserved<LiveStacks>();
+ au.addRequired<MachineLoopInfo>();
+ au.addPreserved<MachineLoopInfo>();
+ au.addRequired<VirtRegMap>();
+ MachineFunctionPass::getAnalysisUsage(au);
+ }
+
+ /// Perform register allocation
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ private:
+ typedef std::map<const LiveInterval*, unsigned> LI2NodeMap;
+ typedef std::vector<const LiveInterval*> Node2LIMap;
+ typedef std::vector<unsigned> AllowedSet;
+ typedef std::vector<AllowedSet> AllowedSetMap;
+ typedef std::set<unsigned> RegSet;
+ typedef std::pair<unsigned, unsigned> RegPair;
+ typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap;
+
+ typedef std::set<LiveInterval*> LiveIntervalSet;
+
+ typedef std::vector<PBQP::Graph::NodeItr> NodeVector;
+
+ MachineFunction *mf;
+ const TargetMachine *tm;
+ const TargetRegisterInfo *tri;
+ const TargetInstrInfo *tii;
+ const MachineLoopInfo *loopInfo;
+ MachineRegisterInfo *mri;
+
+ LiveIntervals *lis;
+ LiveStacks *lss;
+ VirtRegMap *vrm;
+
+ LI2NodeMap li2Node;
+ Node2LIMap node2LI;
+ AllowedSetMap allowedSets;
+ LiveIntervalSet vregIntervalsToAlloc,
+ emptyVRegIntervals;
+ NodeVector problemNodes;
+
+
+ /// Builds a PBQP cost vector.
+ template <typename RegContainer>
+ PBQP::Vector buildCostVector(unsigned vReg,
+ const RegContainer &allowed,
+ const CoalesceMap &cealesces,
+ PBQP::PBQPNum spillCost) const;
+
+ /// \brief Builds a PBQP interference matrix.
+ ///
+ /// @return Either a pointer to a non-zero PBQP matrix representing the
+ /// allocation option costs, or a null pointer for a zero matrix.
+ ///
+ /// Expects allowed sets for two interfering LiveIntervals. These allowed
+ /// sets should contain only allocable registers from the LiveInterval's
+ /// register class, with any interfering pre-colored registers removed.
+ template <typename RegContainer>
+ PBQP::Matrix* buildInterferenceMatrix(const RegContainer &allowed1,
+ const RegContainer &allowed2) const;
+
+ ///
+ /// Expects allowed sets for two potentially coalescable LiveIntervals,
+ /// and an estimated benefit due to coalescing. The allowed sets should
+ /// contain only allocable registers from the LiveInterval's register
+ /// classes, with any interfering pre-colored registers removed.
+ template <typename RegContainer>
+ PBQP::Matrix* buildCoalescingMatrix(const RegContainer &allowed1,
+ const RegContainer &allowed2,
+ PBQP::PBQPNum cBenefit) const;
+
+ /// \brief Finds coalescing opportunities and returns them as a map.
+ ///
+ /// Any entries in the map are guaranteed coalescable, even if their
+ /// corresponding live intervals overlap.
+ CoalesceMap findCoalesces();
+
+ /// \brief Finds the initial set of vreg intervals to allocate.
+ void findVRegIntervalsToAlloc();
+
+ /// \brief Constructs a PBQP problem representation of the register
+ /// allocation problem for this function.
+ ///
+ /// @return a PBQP solver object for the register allocation problem.
+ PBQP::Graph constructPBQPProblem();
+
+ /// \brief Adds a stack interval if the given live interval has been
+ /// spilled. Used to support stack slot coloring.
+ void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri);
+
+ /// \brief Given a solved PBQP problem maps this solution back to a register
+ /// assignment.
+ bool mapPBQPToRegAlloc(const PBQP::Solution &solution);
+
+ /// \brief Postprocessing before final spilling. Sets basic block "live in"
+ /// variables.
+ void finalizeAlloc() const;
+
+ };
+
+ char PBQPRegAlloc::ID = 0;
+}
+
+
+template <typename RegContainer>
+PBQP::Vector PBQPRegAlloc::buildCostVector(unsigned vReg,
+ const RegContainer &allowed,
+ const CoalesceMap &coalesces,
+ PBQP::PBQPNum spillCost) const {
+
+ typedef typename RegContainer::const_iterator AllowedItr;
+
+ // Allocate vector. Additional element (0th) used for spill option
+ PBQP::Vector v(allowed.size() + 1, 0);
+
+ v[0] = spillCost;
+
+ // Iterate over the allowed registers inserting coalesce benefits if there
+ // are any.
+ unsigned ai = 0;
+ for (AllowedItr itr = allowed.begin(), end = allowed.end();
+ itr != end; ++itr, ++ai) {
+
+ unsigned pReg = *itr;
+
+ CoalesceMap::const_iterator cmItr =
+ coalesces.find(RegPair(vReg, pReg));
+
+ // No coalesce - on to the next preg.
+ if (cmItr == coalesces.end())
+ continue;
+
+ // We have a coalesce - insert the benefit.
+ v[ai + 1] = -cmItr->second;
+ }
+
+ return v;
+}
+
+template <typename RegContainer>
+PBQP::Matrix* PBQPRegAlloc::buildInterferenceMatrix(
+ const RegContainer &allowed1, const RegContainer &allowed2) const {
+
+ typedef typename RegContainer::const_iterator RegContainerIterator;
+
+ // Construct a PBQP matrix representing the cost of allocation options. The
+ // rows and columns correspond to the allocation options for the two live
+ // intervals. Elements will be infinite where corresponding registers alias,
+ // since we cannot allocate aliasing registers to interfering live intervals.
+ // All other elements (non-aliasing combinations) will have zero cost. Note
+ // that the spill option (element 0,0) has zero cost, since we can allocate
+ // both intervals to memory safely (the cost for each individual allocation
+ // to memory is accounted for by the cost vectors for each live interval).
+ PBQP::Matrix *m =
+ new PBQP::Matrix(allowed1.size() + 1, allowed2.size() + 1, 0);
+
+ // Assume this is a zero matrix until proven otherwise. Zero matrices occur
+ // between interfering live ranges with non-overlapping register sets (e.g.
+ // non-overlapping reg classes, or disjoint sets of allowed regs within the
+ // same class). The term "overlapping" is used advisedly: sets which do not
+ // intersect, but contain registers which alias, will have non-zero matrices.
+ // We optimize zero matrices away to improve solver speed.
+ bool isZeroMatrix = true;
+
+
+ // Row index. Starts at 1, since the 0th row is for the spill option, which
+ // is always zero.
+ unsigned ri = 1;
+
+ // Iterate over allowed sets, insert infinities where required.
+ for (RegContainerIterator a1Itr = allowed1.begin(), a1End = allowed1.end();
+ a1Itr != a1End; ++a1Itr) {
+
+ // Column index, starts at 1 as for row index.
+ unsigned ci = 1;
+ unsigned reg1 = *a1Itr;
+
+ for (RegContainerIterator a2Itr = allowed2.begin(), a2End = allowed2.end();
+ a2Itr != a2End; ++a2Itr) {
+
+ unsigned reg2 = *a2Itr;
+
+ // If the row/column regs are identical or alias insert an infinity.
+ if (tri->regsOverlap(reg1, reg2)) {
+ (*m)[ri][ci] = std::numeric_limits<PBQP::PBQPNum>::infinity();
+ isZeroMatrix = false;
+ }
+
+ ++ci;
+ }
+
+ ++ri;
+ }
+
+ // If this turns out to be a zero matrix...
+ if (isZeroMatrix) {
+ // free it and return null.
+ delete m;
+ return 0;
+ }
+
+ // ...otherwise return the cost matrix.
+ return m;
+}
+
+template <typename RegContainer>
+PBQP::Matrix* PBQPRegAlloc::buildCoalescingMatrix(
+ const RegContainer &allowed1, const RegContainer &allowed2,
+ PBQP::PBQPNum cBenefit) const {
+
+ typedef typename RegContainer::const_iterator RegContainerIterator;
+
+ // Construct a PBQP Matrix representing the benefits of coalescing. As with
+ // interference matrices the rows and columns represent allowed registers
+ // for the LiveIntervals which are (potentially) to be coalesced. The amount
+ // -cBenefit will be placed in any element representing the same register
+ // for both intervals.
+ PBQP::Matrix *m =
+ new PBQP::Matrix(allowed1.size() + 1, allowed2.size() + 1, 0);
+
+ // Reset costs to zero.
+ m->reset(0);
+
+ // Assume the matrix is zero till proven otherwise. Zero matrices will be
+ // optimized away as in the interference case.
+ bool isZeroMatrix = true;
+
+ // Row index. Starts at 1, since the 0th row is for the spill option, which
+ // is always zero.
+ unsigned ri = 1;
+
+ // Iterate over the allowed sets, insert coalescing benefits where
+ // appropriate.
+ for (RegContainerIterator a1Itr = allowed1.begin(), a1End = allowed1.end();
+ a1Itr != a1End; ++a1Itr) {
+
+ // Column index, starts at 1 as for row index.
+ unsigned ci = 1;
+ unsigned reg1 = *a1Itr;
+
+ for (RegContainerIterator a2Itr = allowed2.begin(), a2End = allowed2.end();
+ a2Itr != a2End; ++a2Itr) {
+
+ // If the row and column represent the same register insert a beneficial
+ // cost to preference this allocation - it would allow us to eliminate a
+ // move instruction.
+ if (reg1 == *a2Itr) {
+ (*m)[ri][ci] = -cBenefit;
+ isZeroMatrix = false;
+ }
+
+ ++ci;
+ }
+
+ ++ri;
+ }
+
+ // If this turns out to be a zero matrix...
+ if (isZeroMatrix) {
+ // ...free it and return null.
+ delete m;
+ return 0;
+ }
+
+ return m;
+}
+
+PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() {
+
+ typedef MachineFunction::const_iterator MFIterator;
+ typedef MachineBasicBlock::const_iterator MBBIterator;
+ typedef LiveInterval::const_vni_iterator VNIIterator;
+
+ CoalesceMap coalescesFound;
+
+ // To find coalesces we need to iterate over the function looking for
+ // copy instructions.
+ for (MFIterator bbItr = mf->begin(), bbEnd = mf->end();
+ bbItr != bbEnd; ++bbItr) {
+
+ const MachineBasicBlock *mbb = &*bbItr;
+
+ for (MBBIterator iItr = mbb->begin(), iEnd = mbb->end();
+ iItr != iEnd; ++iItr) {
+
+ const MachineInstr *instr = &*iItr;
+ unsigned srcReg, dstReg, srcSubReg, dstSubReg;
+
+ // If this isn't a copy then continue to the next instruction.
+ if (!tii->isMoveInstr(*instr, srcReg, dstReg, srcSubReg, dstSubReg))
+ continue;
+
+ // If the registers are already the same our job is nice and easy.
+ if (dstReg == srcReg)
+ continue;
+
+ bool srcRegIsPhysical = TargetRegisterInfo::isPhysicalRegister(srcReg),
+ dstRegIsPhysical = TargetRegisterInfo::isPhysicalRegister(dstReg);
+
+ // If both registers are physical then we can't coalesce.
+ if (srcRegIsPhysical && dstRegIsPhysical)
+ continue;
+
+ // If it's a copy that includes a virtual register but the source and
+ // destination classes differ then we can't coalesce, so continue with
+ // the next instruction.
+ const TargetRegisterClass *srcRegClass = srcRegIsPhysical ?
+ tri->getPhysicalRegisterRegClass(srcReg) : mri->getRegClass(srcReg);
+
+ const TargetRegisterClass *dstRegClass = dstRegIsPhysical ?
+ tri->getPhysicalRegisterRegClass(dstReg) : mri->getRegClass(dstReg);
+
+ if (srcRegClass != dstRegClass)
+ continue;
+
+ // We also need any physical regs to be allocable, coalescing with
+ // a non-allocable register is invalid.
+ if (srcRegIsPhysical) {
+ if (std::find(dstRegClass->allocation_order_begin(*mf),
+ dstRegClass->allocation_order_end(*mf), srcReg) ==
+ dstRegClass->allocation_order_end(*mf))
+ continue;
+ }
+
+ if (dstRegIsPhysical) {
+ if (std::find(srcRegClass->allocation_order_begin(*mf),
+ srcRegClass->allocation_order_end(*mf), dstReg) ==
+ srcRegClass->allocation_order_end(*mf))
+ continue;
+ }
+
+ // If we've made it here we have a copy with compatible register classes.
+ // We can probably coalesce, but we need to consider overlap.
+ const LiveInterval *srcLI = &lis->getInterval(srcReg),
+ *dstLI = &lis->getInterval(dstReg);
+
+ if (srcLI->overlaps(*dstLI)) {
+ // Even in the case of an overlap we might still be able to coalesce,
+ // but we need to make sure that no definition of either range occurs
+ // while the other range is live.
+
+ // Otherwise start by assuming we're ok.
+ bool badDef = false;
+
+ // Test all defs of the source range.
+ for (VNIIterator
+ vniItr = srcLI->vni_begin(), vniEnd = srcLI->vni_end();
+ vniItr != vniEnd; ++vniItr) {
+
+ // If we find a poorly defined def we err on the side of caution.
+ if (!(*vniItr)->def.isValid()) {
+ badDef = true;
+ break;
+ }
+
+ // If we find a def that kills the coalescing opportunity then
+ // record it and break from the loop.
+ if (dstLI->liveAt((*vniItr)->def)) {
+ badDef = true;
+ break;
+ }
+ }
+
+ // If we have a bad def give up, continue to the next instruction.
+ if (badDef)
+ continue;
+
+ // Otherwise test definitions of the destination range.
+ for (VNIIterator
+ vniItr = dstLI->vni_begin(), vniEnd = dstLI->vni_end();
+ vniItr != vniEnd; ++vniItr) {
+
+ // We want to make sure we skip the copy instruction itself.
+ if ((*vniItr)->getCopy() == instr)
+ continue;
+
+ if (!(*vniItr)->def.isValid()) {
+ badDef = true;
+ break;
+ }
+
+ if (srcLI->liveAt((*vniItr)->def)) {
+ badDef = true;
+ break;
+ }
+ }
+
+ // As before a bad def we give up and continue to the next instr.
+ if (badDef)
+ continue;
+ }
+
+ // If we make it to here then either the ranges didn't overlap, or they
+ // did, but none of their definitions would prevent us from coalescing.
+ // We're good to go with the coalesce.
+
+ float cBenefit = powf(10.0f, loopInfo->getLoopDepth(mbb)) / 5.0;
+
+ coalescesFound[RegPair(srcReg, dstReg)] = cBenefit;
+ coalescesFound[RegPair(dstReg, srcReg)] = cBenefit;
+ }
+
+ }
+
+ return coalescesFound;
+}
+
+void PBQPRegAlloc::findVRegIntervalsToAlloc() {
+
+ // Iterate over all live ranges.
+ for (LiveIntervals::iterator itr = lis->begin(), end = lis->end();
+ itr != end; ++itr) {
+
+ // Ignore physical ones.
+ if (TargetRegisterInfo::isPhysicalRegister(itr->first))
+ continue;
+
+ LiveInterval *li = itr->second;
+
+ // If this live interval is non-empty we will use pbqp to allocate it.
+ // Empty intervals we allocate in a simple post-processing stage in
+ // finalizeAlloc.
+ if (!li->empty()) {
+ vregIntervalsToAlloc.insert(li);
+ }
+ else {
+ emptyVRegIntervals.insert(li);
+ }
+ }
+}
+
+PBQP::Graph PBQPRegAlloc::constructPBQPProblem() {
+
+ typedef std::vector<const LiveInterval*> LIVector;
+ typedef std::vector<unsigned> RegVector;
+
+ // This will store the physical intervals for easy reference.
+ LIVector physIntervals;
+
+ // Start by clearing the old node <-> live interval mappings & allowed sets
+ li2Node.clear();
+ node2LI.clear();
+ allowedSets.clear();
+
+ // Populate physIntervals, update preg use:
+ for (LiveIntervals::iterator itr = lis->begin(), end = lis->end();
+ itr != end; ++itr) {
+
+ if (TargetRegisterInfo::isPhysicalRegister(itr->first)) {
+ physIntervals.push_back(itr->second);
+ mri->setPhysRegUsed(itr->second->reg);
+ }
+ }
+
+ // Iterate over vreg intervals, construct live interval <-> node number
+ // mappings.
+ for (LiveIntervalSet::const_iterator
+ itr = vregIntervalsToAlloc.begin(), end = vregIntervalsToAlloc.end();
+ itr != end; ++itr) {
+ const LiveInterval *li = *itr;
+
+ li2Node[li] = node2LI.size();
+ node2LI.push_back(li);
+ }
+
+ // Get the set of potential coalesces.
+ CoalesceMap coalesces;
+
+ if (pbqpCoalescing) {
+ coalesces = findCoalesces();
+ }
+
+ // Construct a PBQP solver for this problem
+ PBQP::Graph problem;
+ problemNodes.resize(vregIntervalsToAlloc.size());
+
+ // Resize allowedSets container appropriately.
+ allowedSets.resize(vregIntervalsToAlloc.size());
+
+ // Iterate over virtual register intervals to compute allowed sets...
+ for (unsigned node = 0; node < node2LI.size(); ++node) {
+
+ // Grab pointers to the interval and its register class.
+ const LiveInterval *li = node2LI[node];
+ const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
+
+ // Start by assuming all allocable registers in the class are allowed...
+ RegVector liAllowed(liRC->allocation_order_begin(*mf),
+ liRC->allocation_order_end(*mf));
+
+ // Eliminate the physical registers which overlap with this range, along
+ // with all their aliases.
+ for (LIVector::iterator pItr = physIntervals.begin(),
+ pEnd = physIntervals.end(); pItr != pEnd; ++pItr) {
+
+ if (!li->overlaps(**pItr))
+ continue;
+
+ unsigned pReg = (*pItr)->reg;
+
+ // If we get here then the live intervals overlap, but we're still ok
+ // if they're coalescable.
+ if (coalesces.find(RegPair(li->reg, pReg)) != coalesces.end())
+ continue;
+
+ // If we get here then we have a genuine exclusion.
+
+ // Remove the overlapping reg...
+ RegVector::iterator eraseItr =
+ std::find(liAllowed.begin(), liAllowed.end(), pReg);
+
+ if (eraseItr != liAllowed.end())
+ liAllowed.erase(eraseItr);
+
+ const unsigned *aliasItr = tri->getAliasSet(pReg);
+
+ if (aliasItr != 0) {
+ // ...and its aliases.
+ for (; *aliasItr != 0; ++aliasItr) {
+ RegVector::iterator eraseItr =
+ std::find(liAllowed.begin(), liAllowed.end(), *aliasItr);
+
+ if (eraseItr != liAllowed.end()) {
+ liAllowed.erase(eraseItr);
+ }
+ }
+ }
+ }
+
+ // Copy the allowed set into a member vector for use when constructing cost
+ // vectors & matrices, and mapping PBQP solutions back to assignments.
+ allowedSets[node] = AllowedSet(liAllowed.begin(), liAllowed.end());
+
+ // Set the spill cost to the interval weight, or epsilon if the
+ // interval weight is zero
+ PBQP::PBQPNum spillCost = (li->weight != 0.0) ?
+ li->weight : std::numeric_limits<PBQP::PBQPNum>::min();
+
+ // Build a cost vector for this interval.
+ problemNodes[node] =
+ problem.addNode(
+ buildCostVector(li->reg, allowedSets[node], coalesces, spillCost));
+
+ }
+
+
+ // Now add the cost matrices...
+ for (unsigned node1 = 0; node1 < node2LI.size(); ++node1) {
+ const LiveInterval *li = node2LI[node1];
+
+ // Test for live range overlaps and insert interference matrices.
+ for (unsigned node2 = node1 + 1; node2 < node2LI.size(); ++node2) {
+ const LiveInterval *li2 = node2LI[node2];
+
+ CoalesceMap::const_iterator cmItr =
+ coalesces.find(RegPair(li->reg, li2->reg));
+
+ PBQP::Matrix *m = 0;
+
+ if (cmItr != coalesces.end()) {
+ m = buildCoalescingMatrix(allowedSets[node1], allowedSets[node2],
+ cmItr->second);
+ }
+ else if (li->overlaps(*li2)) {
+ m = buildInterferenceMatrix(allowedSets[node1], allowedSets[node2]);
+ }
+
+ if (m != 0) {
+ problem.addEdge(problemNodes[node1],
+ problemNodes[node2],
+ *m);
+
+ delete m;
+ }
+ }
+ }
+
+ assert(problem.getNumNodes() == allowedSets.size());
+/*
+ std::cerr << "Allocating for " << problem.getNumNodes() << " nodes, "
+ << problem.getNumEdges() << " edges.\n";
+
+ problem.printDot(std::cerr);
+*/
+ // We're done, PBQP problem constructed - return it.
+ return problem;
+}
+
+void PBQPRegAlloc::addStackInterval(const LiveInterval *spilled,
+ MachineRegisterInfo* mri) {
+ int stackSlot = vrm->getStackSlot(spilled->reg);
+
+ if (stackSlot == VirtRegMap::NO_STACK_SLOT)
+ return;
+
+ const TargetRegisterClass *RC = mri->getRegClass(spilled->reg);
+ LiveInterval &stackInterval = lss->getOrCreateInterval(stackSlot, RC);
+
+ VNInfo *vni;
+ if (stackInterval.getNumValNums() != 0)
+ vni = stackInterval.getValNumInfo(0);
+ else
+ vni = stackInterval.getNextValue(
+ SlotIndex(), 0, false, lss->getVNInfoAllocator());
+
+ LiveInterval &rhsInterval = lis->getInterval(spilled->reg);
+ stackInterval.MergeRangesInAsValue(rhsInterval, vni);
+}
+
+bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) {
+
+ // Set to true if we have any spills
+ bool anotherRoundNeeded = false;
+
+ // Clear the existing allocation.
+ vrm->clearAllVirt();
+
+ // Iterate over the nodes mapping the PBQP solution to a register assignment.
+ for (unsigned node = 0; node < node2LI.size(); ++node) {
+ unsigned virtReg = node2LI[node]->reg,
+ allocSelection = solution.getSelection(problemNodes[node]);
+
+
+ // If the PBQP solution is non-zero it's a physical register...
+ if (allocSelection != 0) {
+ // Get the physical reg, subtracting 1 to account for the spill option.
+ unsigned physReg = allowedSets[node][allocSelection - 1];
+
+ DEBUG(dbgs() << "VREG " << virtReg << " -> "
+ << tri->getName(physReg) << "\n");
+
+ assert(physReg != 0);
+
+ // Add to the virt reg map and update the used phys regs.
+ vrm->assignVirt2Phys(virtReg, physReg);
+ }
+ // ...Otherwise it's a spill.
+ else {
+
+ // Make sure we ignore this virtual reg on the next round
+ // of allocation
+ vregIntervalsToAlloc.erase(&lis->getInterval(virtReg));
+
+ // Insert spill ranges for this live range
+ const LiveInterval *spillInterval = node2LI[node];
+ double oldSpillWeight = spillInterval->weight;
+ SmallVector<LiveInterval*, 8> spillIs;
+ std::vector<LiveInterval*> newSpills =
+ lis->addIntervalsForSpills(*spillInterval, spillIs, loopInfo, *vrm);
+ addStackInterval(spillInterval, mri);
+
+ (void) oldSpillWeight;
+ DEBUG(dbgs() << "VREG " << virtReg << " -> SPILLED (Cost: "
+ << oldSpillWeight << ", New vregs: ");
+
+ // Copy any newly inserted live intervals into the list of regs to
+ // allocate.
+ for (std::vector<LiveInterval*>::const_iterator
+ itr = newSpills.begin(), end = newSpills.end();
+ itr != end; ++itr) {
+
+ assert(!(*itr)->empty() && "Empty spill range.");
+
+ DEBUG(dbgs() << (*itr)->reg << " ");
+
+ vregIntervalsToAlloc.insert(*itr);
+ }
+
+ DEBUG(dbgs() << ")\n");
+
+ // We need another round if spill intervals were added.
+ anotherRoundNeeded |= !newSpills.empty();
+ }
+ }
+
+ return !anotherRoundNeeded;
+}
+
+void PBQPRegAlloc::finalizeAlloc() const {
+ typedef LiveIntervals::iterator LIIterator;
+ typedef LiveInterval::Ranges::const_iterator LRIterator;
+
+ // First allocate registers for the empty intervals.
+ for (LiveIntervalSet::const_iterator
+ itr = emptyVRegIntervals.begin(), end = emptyVRegIntervals.end();
+ itr != end; ++itr) {
+ LiveInterval *li = *itr;
+
+ unsigned physReg = vrm->getRegAllocPref(li->reg);
+
+ if (physReg == 0) {
+ const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
+ physReg = *liRC->allocation_order_begin(*mf);
+ }
+
+ vrm->assignVirt2Phys(li->reg, physReg);
+ }
+
+ // Finally iterate over the basic blocks to compute and set the live-in sets.
+ SmallVector<MachineBasicBlock*, 8> liveInMBBs;
+ MachineBasicBlock *entryMBB = &*mf->begin();
+
+ for (LIIterator liItr = lis->begin(), liEnd = lis->end();
+ liItr != liEnd; ++liItr) {
+
+ const LiveInterval *li = liItr->second;
+ unsigned reg = 0;
+
+ // Get the physical register for this interval
+ if (TargetRegisterInfo::isPhysicalRegister(li->reg)) {
+ reg = li->reg;
+ }
+ else if (vrm->isAssignedReg(li->reg)) {
+ reg = vrm->getPhys(li->reg);
+ }
+ else {
+ // Ranges which are assigned a stack slot only are ignored.
+ continue;
+ }
+
+ if (reg == 0) {
+ // Filter out zero regs - they're for intervals that were spilled.
+ continue;
+ }
+
+ // Iterate over the ranges of the current interval...
+ for (LRIterator lrItr = li->begin(), lrEnd = li->end();
+ lrItr != lrEnd; ++lrItr) {
+
+ // Find the set of basic blocks which this range is live into...
+ if (lis->findLiveInMBBs(lrItr->start, lrItr->end, liveInMBBs)) {
+ // And add the physreg for this interval to their live-in sets.
+ for (unsigned i = 0; i < liveInMBBs.size(); ++i) {
+ if (liveInMBBs[i] != entryMBB) {
+ if (!liveInMBBs[i]->isLiveIn(reg)) {
+ liveInMBBs[i]->addLiveIn(reg);
+ }
+ }
+ }
+ liveInMBBs.clear();
+ }
+ }
+ }
+
+}
+
+bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
+
+ mf = &MF;
+ tm = &mf->getTarget();
+ tri = tm->getRegisterInfo();
+ tii = tm->getInstrInfo();
+ mri = &mf->getRegInfo();
+
+ lis = &getAnalysis<LiveIntervals>();
+ lss = &getAnalysis<LiveStacks>();
+ loopInfo = &getAnalysis<MachineLoopInfo>();
+
+ vrm = &getAnalysis<VirtRegMap>();
+
+ DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n");
+
+ // Allocator main loop:
+ //
+ // * Map current regalloc problem to a PBQP problem
+ // * Solve the PBQP problem
+ // * Map the solution back to a register allocation
+ // * Spill if necessary
+ //
+ // This process is continued till no more spills are generated.
+
+ // Find the vreg intervals in need of allocation.
+ findVRegIntervalsToAlloc();
+
+ // If there aren't any then we're done here.
+ if (vregIntervalsToAlloc.empty() && emptyVRegIntervals.empty())
+ return true;
+
+ // If there are non-empty intervals allocate them using pbqp.
+ if (!vregIntervalsToAlloc.empty()) {
+
+ bool pbqpAllocComplete = false;
+ unsigned round = 0;
+
+ while (!pbqpAllocComplete) {
+ DEBUG(dbgs() << " PBQP Regalloc round " << round << ":\n");
+
+ PBQP::Graph problem = constructPBQPProblem();
+ PBQP::Solution solution =
+ PBQP::HeuristicSolver<PBQP::Heuristics::Briggs>::solve(problem);
+
+ pbqpAllocComplete = mapPBQPToRegAlloc(solution);
+
+ ++round;
+ }
+ }
+
+ // Finalise allocation, allocate empty ranges.
+ finalizeAlloc();
+
+ vregIntervalsToAlloc.clear();
+ emptyVRegIntervals.clear();
+ li2Node.clear();
+ node2LI.clear();
+ allowedSets.clear();
+ problemNodes.clear();
+
+ DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n");
+
+ // Run rewriter
+ std::auto_ptr<VirtRegRewriter> rewriter(createVirtRegRewriter());
+
+ rewriter->runOnMachineFunction(*mf, *vrm, lis);
+
+ return true;
+}
+
+FunctionPass* llvm::createPBQPRegisterAllocator() {
+ return new PBQPRegAlloc();
+}
+
+
+#undef DEBUG_TYPE
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
new file mode 100644
index 0000000..1131e3d
--- /dev/null
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -0,0 +1,41 @@
+//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface -------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the generic RegisterCoalescer interface which
+// is used as the common interface used by all clients and
+// implementations of register coalescing.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+// Register the RegisterCoalescer interface, providing a nice name to refer to.
+static RegisterAnalysisGroup<RegisterCoalescer> Z("Register Coalescer");
+char RegisterCoalescer::ID = 0;
+
+// RegisterCoalescer destructor: DO NOT move this to the header file
+// for RegisterCoalescer or else clients of the RegisterCoalescer
+// class may not depend on the RegisterCoalescer.o file in the current
+// .a file, causing alias analysis support to not be included in the
+// tool correctly!
+//
+RegisterCoalescer::~RegisterCoalescer() {}
+
+// Because of the way .a files work, we must force the SimpleRC
+// implementation to be pulled in if the RegisterCoalescer classes are
+// pulled in. Otherwise we run the risk of RegisterCoalescer being
+// used, but the default implementation not being linked into the tool
+// that uses it.
+DEFINING_FILE_FOR(RegisterCoalescer)
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
new file mode 100644
index 0000000..67bf209
--- /dev/null
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -0,0 +1,360 @@
+//===-- RegisterScavenging.cpp - Machine register scavenging --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the machine register scavenger. It can provide
+// information, such as unused registers, at any point in a machine basic block.
+// It also provides a mechanism to make registers available by evicting them to
+// spill slots.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reg-scavenging"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+/// setUsed - Set the register and its sub-registers as being used.
+void RegScavenger::setUsed(unsigned Reg) {
+ RegsAvailable.reset(Reg);
+
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs)
+ RegsAvailable.reset(SubReg);
+}
+
+bool RegScavenger::isAliasUsed(unsigned Reg) const {
+ if (isUsed(Reg))
+ return true;
+ for (const unsigned *R = TRI->getAliasSet(Reg); *R; ++R)
+ if (isUsed(*R))
+ return true;
+ return false;
+}
+
+void RegScavenger::initRegState() {
+ ScavengedReg = 0;
+ ScavengedRC = NULL;
+ ScavengeRestore = NULL;
+
+ // All registers started out unused.
+ RegsAvailable.set();
+
+ // Reserved registers are always used.
+ RegsAvailable ^= ReservedRegs;
+
+ if (!MBB)
+ return;
+
+ // Live-in registers are in use.
+ for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(),
+ E = MBB->livein_end(); I != E; ++I)
+ setUsed(*I);
+
+ // Pristine CSRs are also unavailable.
+ BitVector PR = MBB->getParent()->getFrameInfo()->getPristineRegs(MBB);
+ for (int I = PR.find_first(); I>0; I = PR.find_next(I))
+ setUsed(I);
+}
+
+void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
+ MachineFunction &MF = *mbb->getParent();
+ const TargetMachine &TM = MF.getTarget();
+ TII = TM.getInstrInfo();
+ TRI = TM.getRegisterInfo();
+ MRI = &MF.getRegInfo();
+
+ assert((NumPhysRegs == 0 || NumPhysRegs == TRI->getNumRegs()) &&
+ "Target changed?");
+
+ // Self-initialize.
+ if (!MBB) {
+ NumPhysRegs = TRI->getNumRegs();
+ RegsAvailable.resize(NumPhysRegs);
+
+ // Create reserved registers bitvector.
+ ReservedRegs = TRI->getReservedRegs(MF);
+
+ // Create callee-saved registers bitvector.
+ CalleeSavedRegs.resize(NumPhysRegs);
+ const unsigned *CSRegs = TRI->getCalleeSavedRegs();
+ if (CSRegs != NULL)
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ CalleeSavedRegs.set(CSRegs[i]);
+ }
+
+ MBB = mbb;
+ initRegState();
+
+ Tracking = false;
+}
+
+void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) {
+ BV.set(Reg);
+ for (const unsigned *R = TRI->getSubRegisters(Reg); *R; R++)
+ BV.set(*R);
+}
+
+void RegScavenger::addRegWithAliases(BitVector &BV, unsigned Reg) {
+ BV.set(Reg);
+ for (const unsigned *R = TRI->getAliasSet(Reg); *R; R++)
+ BV.set(*R);
+}
+
+void RegScavenger::forward() {
+ // Move ptr forward.
+ if (!Tracking) {
+ MBBI = MBB->begin();
+ Tracking = true;
+ } else {
+ assert(MBBI != MBB->end() && "Already at the end of the basic block!");
+ MBBI = llvm::next(MBBI);
+ }
+
+ MachineInstr *MI = MBBI;
+
+ if (MI == ScavengeRestore) {
+ ScavengedReg = 0;
+ ScavengedRC = NULL;
+ ScavengeRestore = NULL;
+ }
+
+ // Find out which registers are early clobbered, killed, defined, and marked
+ // def-dead in this instruction.
+ BitVector EarlyClobberRegs(NumPhysRegs);
+ BitVector KillRegs(NumPhysRegs);
+ BitVector DefRegs(NumPhysRegs);
+ BitVector DeadRegs(NumPhysRegs);
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isUndef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || isReserved(Reg))
+ continue;
+
+ if (MO.isUse()) {
+ // Two-address operands implicitly kill.
+ if (MO.isKill() || MI->isRegTiedToDefOperand(i))
+ addRegWithSubRegs(KillRegs, Reg);
+ } else {
+ assert(MO.isDef());
+ if (MO.isDead())
+ addRegWithSubRegs(DeadRegs, Reg);
+ else
+ addRegWithSubRegs(DefRegs, Reg);
+ if (MO.isEarlyClobber())
+ addRegWithAliases(EarlyClobberRegs, Reg);
+ }
+ }
+
+ // Verify uses and defs.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isUndef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || isReserved(Reg))
+ continue;
+ if (MO.isUse()) {
+ if (!isUsed(Reg)) {
+ // Check if it's partial live: e.g.
+ // D0 = insert_subreg D0<undef>, S0
+ // ... D0
+ // The problem is the insert_subreg could be eliminated. The use of
+ // D0 is using a partially undef value. This is not *incorrect* since
+ // S1 is can be freely clobbered.
+ // Ideally we would like a way to model this, but leaving the
+ // insert_subreg around causes both correctness and performance issues.
+ bool SubUsed = false;
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs)
+ if (isUsed(SubReg)) {
+ SubUsed = true;
+ break;
+ }
+ assert(SubUsed && "Using an undefined register!");
+ }
+ assert((!EarlyClobberRegs.test(Reg) || MI->isRegTiedToDefOperand(i)) &&
+ "Using an early clobbered register!");
+ } else {
+ assert(MO.isDef());
+#if 0
+ // FIXME: Enable this once we've figured out how to correctly transfer
+ // implicit kills during codegen passes like the coalescer.
+ assert((KillRegs.test(Reg) || isUnused(Reg) ||
+ isLiveInButUnusedBefore(Reg, MI, MBB, TRI, MRI)) &&
+ "Re-defining a live register!");
+#endif
+ }
+ }
+
+ // Commit the changes.
+ setUnused(KillRegs);
+ setUnused(DeadRegs);
+ setUsed(DefRegs);
+}
+
+void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) {
+ if (includeReserved)
+ used = ~RegsAvailable;
+ else
+ used = ~RegsAvailable & ~ReservedRegs;
+}
+
+/// CreateRegClassMask - Set the bits that represent the registers in the
+/// TargetRegisterClass.
+static void CreateRegClassMask(const TargetRegisterClass *RC, BitVector &Mask) {
+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E;
+ ++I)
+ Mask.set(*I);
+}
+
+unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+ I != E; ++I)
+ if (!isAliasUsed(*I))
+ return *I;
+ return 0;
+}
+
+/// findSurvivorReg - Return the candidate register that is unused for the
+/// longest after MBBI. UseMI is set to the instruction where the search
+/// stopped.
+///
+/// No more than InstrLimit instructions are inspected.
+///
+unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
+ BitVector &Candidates,
+ unsigned InstrLimit,
+ MachineBasicBlock::iterator &UseMI) {
+ int Survivor = Candidates.find_first();
+ assert(Survivor > 0 && "No candidates for scavenging");
+
+ MachineBasicBlock::iterator ME = MBB->getFirstTerminator();
+ assert(StartMI != ME && "MI already at terminator");
+ MachineBasicBlock::iterator RestorePointMI = StartMI;
+ MachineBasicBlock::iterator MI = StartMI;
+
+ bool inVirtLiveRange = false;
+ for (++MI; InstrLimit > 0 && MI != ME; ++MI, --InstrLimit) {
+ bool isVirtKillInsn = false;
+ bool isVirtDefInsn = false;
+ // Remove any candidates touched by instruction.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isUndef() || !MO.getReg())
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ if (MO.isDef())
+ isVirtDefInsn = true;
+ else if (MO.isKill())
+ isVirtKillInsn = true;
+ continue;
+ }
+ Candidates.reset(MO.getReg());
+ for (const unsigned *R = TRI->getAliasSet(MO.getReg()); *R; R++)
+ Candidates.reset(*R);
+ }
+ // If we're not in a virtual reg's live range, this is a valid
+ // restore point.
+ if (!inVirtLiveRange) RestorePointMI = MI;
+
+ // Update whether we're in the live range of a virtual register
+ if (isVirtKillInsn) inVirtLiveRange = false;
+ if (isVirtDefInsn) inVirtLiveRange = true;
+
+ // Was our survivor untouched by this instruction?
+ if (Candidates.test(Survivor))
+ continue;
+
+ // All candidates gone?
+ if (Candidates.none())
+ break;
+
+ Survivor = Candidates.find_first();
+ }
+ // If we ran off the end, that's where we want to restore.
+ if (MI == ME) RestorePointMI = ME;
+ assert (RestorePointMI != StartMI &&
+ "No available scavenger restore location!");
+
+ // We ran out of candidates, so stop the search.
+ UseMI = RestorePointMI;
+ return Survivor;
+}
+
+unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
+ MachineBasicBlock::iterator I,
+ int SPAdj) {
+ // Mask off the registers which are not in the TargetRegisterClass.
+ BitVector Candidates(NumPhysRegs, false);
+ CreateRegClassMask(RC, Candidates);
+ // Do not include reserved registers.
+ Candidates ^= ReservedRegs & Candidates;
+
+ // Exclude all the registers being used by the instruction.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = I->getOperand(i);
+ if (MO.isReg() && MO.getReg() != 0 &&
+ !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ Candidates.reset(MO.getReg());
+ }
+
+ // Find the register whose use is furthest away.
+ MachineBasicBlock::iterator UseMI;
+ unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI);
+
+ // If we found an unused register there is no reason to spill it. We have
+ // probably found a callee-saved register that has been saved in the
+ // prologue, but happens to be unused at this point.
+ if (!isAliasUsed(SReg))
+ return SReg;
+
+ assert(ScavengedReg == 0 &&
+ "Scavenger slot is live, unable to scavenge another register!");
+
+ // Avoid infinite regress
+ ScavengedReg = SReg;
+
+ // If the target knows how to save/restore the register, let it do so;
+ // otherwise, use the emergency stack spill slot.
+ if (!TRI->saveScavengerRegister(*MBB, I, UseMI, RC, SReg)) {
+ // Spill the scavenged register before I.
+ assert(ScavengingFrameIndex >= 0 &&
+ "Cannot scavenge register without an emergency spill slot!");
+ TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC);
+ MachineBasicBlock::iterator II = prior(I);
+ TRI->eliminateFrameIndex(II, SPAdj, NULL, this);
+
+ // Restore the scavenged register before its use (or first terminator).
+ TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC);
+ II = prior(UseMI);
+ TRI->eliminateFrameIndex(II, SPAdj, NULL, this);
+ }
+
+ ScavengeRestore = prior(UseMI);
+
+ // Doing this here leads to infinite regress.
+ // ScavengedReg = SReg;
+ ScavengedRC = RC;
+
+ return SReg;
+}
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
new file mode 100644
index 0000000..1f3e295
--- /dev/null
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -0,0 +1,589 @@
+//===---- ScheduleDAG.cpp - Implement the ScheduleDAG class ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG class, which is a base class used by
+// scheduling implementation classes.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <climits>
+using namespace llvm;
+
+ScheduleDAG::ScheduleDAG(MachineFunction &mf)
+ : TM(mf.getTarget()),
+ TII(TM.getInstrInfo()),
+ TRI(TM.getRegisterInfo()),
+ TLI(TM.getTargetLowering()),
+ MF(mf), MRI(mf.getRegInfo()),
+ ConstPool(MF.getConstantPool()),
+ EntrySU(), ExitSU() {
+}
+
+ScheduleDAG::~ScheduleDAG() {}
+
+/// dump - dump the schedule.
+void ScheduleDAG::dumpSchedule() const {
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ if (SUnit *SU = Sequence[i])
+ SU->dump(this);
+ else
+ dbgs() << "**** NOOP ****\n";
+ }
+}
+
+
+/// Run - perform scheduling.
+///
+void ScheduleDAG::Run(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator insertPos) {
+ BB = bb;
+ InsertPos = insertPos;
+
+ SUnits.clear();
+ Sequence.clear();
+ EntrySU = SUnit();
+ ExitSU = SUnit();
+
+ Schedule();
+
+ DEBUG({
+ dbgs() << "*** Final schedule ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
+}
+
+/// addPred - This adds the specified edge as a pred of the current node if
+/// not already. It also adds the current node as a successor of the
+/// specified node.
+void SUnit::addPred(const SDep &D) {
+ // If this node already has this depenence, don't add a redundant one.
+ for (SmallVector<SDep, 4>::const_iterator I = Preds.begin(), E = Preds.end();
+ I != E; ++I)
+ if (*I == D)
+ return;
+ // Now add a corresponding succ to N.
+ SDep P = D;
+ P.setSUnit(this);
+ SUnit *N = D.getSUnit();
+ // Update the bookkeeping.
+ if (D.getKind() == SDep::Data) {
+ assert(NumPreds < UINT_MAX && "NumPreds will overflow!");
+ assert(N->NumSuccs < UINT_MAX && "NumSuccs will overflow!");
+ ++NumPreds;
+ ++N->NumSuccs;
+ }
+ if (!N->isScheduled) {
+ assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!");
+ ++NumPredsLeft;
+ }
+ if (!isScheduled) {
+ assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
+ ++N->NumSuccsLeft;
+ }
+ Preds.push_back(D);
+ N->Succs.push_back(P);
+ if (P.getLatency() != 0) {
+ this->setDepthDirty();
+ N->setHeightDirty();
+ }
+}
+
+/// removePred - This removes the specified edge as a pred of the current
+/// node if it exists. It also removes the current node as a successor of
+/// the specified node.
+void SUnit::removePred(const SDep &D) {
+ // Find the matching predecessor.
+ for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end();
+ I != E; ++I)
+ if (*I == D) {
+ bool FoundSucc = false;
+ // Find the corresponding successor in N.
+ SDep P = D;
+ P.setSUnit(this);
+ SUnit *N = D.getSUnit();
+ for (SmallVector<SDep, 4>::iterator II = N->Succs.begin(),
+ EE = N->Succs.end(); II != EE; ++II)
+ if (*II == P) {
+ FoundSucc = true;
+ N->Succs.erase(II);
+ break;
+ }
+ assert(FoundSucc && "Mismatching preds / succs lists!");
+ Preds.erase(I);
+ // Update the bookkeeping.
+ if (P.getKind() == SDep::Data) {
+ assert(NumPreds > 0 && "NumPreds will underflow!");
+ assert(N->NumSuccs > 0 && "NumSuccs will underflow!");
+ --NumPreds;
+ --N->NumSuccs;
+ }
+ if (!N->isScheduled) {
+ assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!");
+ --NumPredsLeft;
+ }
+ if (!isScheduled) {
+ assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!");
+ --N->NumSuccsLeft;
+ }
+ if (P.getLatency() != 0) {
+ this->setDepthDirty();
+ N->setHeightDirty();
+ }
+ return;
+ }
+}
+
+void SUnit::setDepthDirty() {
+ if (!isDepthCurrent) return;
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ do {
+ SUnit *SU = WorkList.pop_back_val();
+ SU->isDepthCurrent = false;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(),
+ E = SU->Succs.end(); I != E; ++I) {
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isDepthCurrent)
+ WorkList.push_back(SuccSU);
+ }
+ } while (!WorkList.empty());
+}
+
+void SUnit::setHeightDirty() {
+ if (!isHeightCurrent) return;
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ do {
+ SUnit *SU = WorkList.pop_back_val();
+ SU->isHeightCurrent = false;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),
+ E = SU->Preds.end(); I != E; ++I) {
+ SUnit *PredSU = I->getSUnit();
+ if (PredSU->isHeightCurrent)
+ WorkList.push_back(PredSU);
+ }
+ } while (!WorkList.empty());
+}
+
+/// setDepthToAtLeast - Update this node's successors to reflect the
+/// fact that this node's depth just increased.
+///
+void SUnit::setDepthToAtLeast(unsigned NewDepth) {
+ if (NewDepth <= getDepth())
+ return;
+ setDepthDirty();
+ Depth = NewDepth;
+ isDepthCurrent = true;
+}
+
+/// setHeightToAtLeast - Update this node's predecessors to reflect the
+/// fact that this node's height just increased.
+///
+void SUnit::setHeightToAtLeast(unsigned NewHeight) {
+ if (NewHeight <= getHeight())
+ return;
+ setHeightDirty();
+ Height = NewHeight;
+ isHeightCurrent = true;
+}
+
+/// ComputeDepth - Calculate the maximal path from the node to the exit.
+///
+void SUnit::ComputeDepth() {
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ do {
+ SUnit *Cur = WorkList.back();
+
+ bool Done = true;
+ unsigned MaxPredDepth = 0;
+ for (SUnit::const_pred_iterator I = Cur->Preds.begin(),
+ E = Cur->Preds.end(); I != E; ++I) {
+ SUnit *PredSU = I->getSUnit();
+ if (PredSU->isDepthCurrent)
+ MaxPredDepth = std::max(MaxPredDepth,
+ PredSU->Depth + I->getLatency());
+ else {
+ Done = false;
+ WorkList.push_back(PredSU);
+ }
+ }
+
+ if (Done) {
+ WorkList.pop_back();
+ if (MaxPredDepth != Cur->Depth) {
+ Cur->setDepthDirty();
+ Cur->Depth = MaxPredDepth;
+ }
+ Cur->isDepthCurrent = true;
+ }
+ } while (!WorkList.empty());
+}
+
+/// ComputeHeight - Calculate the maximal path from the node to the entry.
+///
+void SUnit::ComputeHeight() {
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ do {
+ SUnit *Cur = WorkList.back();
+
+ bool Done = true;
+ unsigned MaxSuccHeight = 0;
+ for (SUnit::const_succ_iterator I = Cur->Succs.begin(),
+ E = Cur->Succs.end(); I != E; ++I) {
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isHeightCurrent)
+ MaxSuccHeight = std::max(MaxSuccHeight,
+ SuccSU->Height + I->getLatency());
+ else {
+ Done = false;
+ WorkList.push_back(SuccSU);
+ }
+ }
+
+ if (Done) {
+ WorkList.pop_back();
+ if (MaxSuccHeight != Cur->Height) {
+ Cur->setHeightDirty();
+ Cur->Height = MaxSuccHeight;
+ }
+ Cur->isHeightCurrent = true;
+ }
+ } while (!WorkList.empty());
+}
+
+/// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
+/// a group of nodes flagged together.
+void SUnit::dump(const ScheduleDAG *G) const {
+ dbgs() << "SU(" << NodeNum << "): ";
+ G->dumpNode(this);
+}
+
+void SUnit::dumpAll(const ScheduleDAG *G) const {
+ dump(G);
+
+ dbgs() << " # preds left : " << NumPredsLeft << "\n";
+ dbgs() << " # succs left : " << NumSuccsLeft << "\n";
+ dbgs() << " Latency : " << Latency << "\n";
+ dbgs() << " Depth : " << Depth << "\n";
+ dbgs() << " Height : " << Height << "\n";
+
+ if (Preds.size() != 0) {
+ dbgs() << " Predecessors:\n";
+ for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end();
+ I != E; ++I) {
+ dbgs() << " ";
+ switch (I->getKind()) {
+ case SDep::Data: dbgs() << "val "; break;
+ case SDep::Anti: dbgs() << "anti"; break;
+ case SDep::Output: dbgs() << "out "; break;
+ case SDep::Order: dbgs() << "ch "; break;
+ }
+ dbgs() << "#";
+ dbgs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
+ if (I->isArtificial())
+ dbgs() << " *";
+ dbgs() << ": Latency=" << I->getLatency();
+ dbgs() << "\n";
+ }
+ }
+ if (Succs.size() != 0) {
+ dbgs() << " Successors:\n";
+ for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end();
+ I != E; ++I) {
+ dbgs() << " ";
+ switch (I->getKind()) {
+ case SDep::Data: dbgs() << "val "; break;
+ case SDep::Anti: dbgs() << "anti"; break;
+ case SDep::Output: dbgs() << "out "; break;
+ case SDep::Order: dbgs() << "ch "; break;
+ }
+ dbgs() << "#";
+ dbgs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
+ if (I->isArtificial())
+ dbgs() << " *";
+ dbgs() << ": Latency=" << I->getLatency();
+ dbgs() << "\n";
+ }
+ }
+ dbgs() << "\n";
+}
+
+#ifndef NDEBUG
+/// VerifySchedule - Verify that all SUnits were scheduled and that
+/// their state is consistent.
+///
+void ScheduleDAG::VerifySchedule(bool isBottomUp) {
+ bool AnyNotSched = false;
+ unsigned DeadNodes = 0;
+ unsigned Noops = 0;
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ if (!SUnits[i].isScheduled) {
+ if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) {
+ ++DeadNodes;
+ continue;
+ }
+ if (!AnyNotSched)
+ dbgs() << "*** Scheduling failed! ***\n";
+ SUnits[i].dump(this);
+ dbgs() << "has not been scheduled!\n";
+ AnyNotSched = true;
+ }
+ if (SUnits[i].isScheduled &&
+ (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getDepth()) >
+ unsigned(INT_MAX)) {
+ if (!AnyNotSched)
+ dbgs() << "*** Scheduling failed! ***\n";
+ SUnits[i].dump(this);
+ dbgs() << "has an unexpected "
+ << (isBottomUp ? "Height" : "Depth") << " value!\n";
+ AnyNotSched = true;
+ }
+ if (isBottomUp) {
+ if (SUnits[i].NumSuccsLeft != 0) {
+ if (!AnyNotSched)
+ dbgs() << "*** Scheduling failed! ***\n";
+ SUnits[i].dump(this);
+ dbgs() << "has successors left!\n";
+ AnyNotSched = true;
+ }
+ } else {
+ if (SUnits[i].NumPredsLeft != 0) {
+ if (!AnyNotSched)
+ dbgs() << "*** Scheduling failed! ***\n";
+ SUnits[i].dump(this);
+ dbgs() << "has predecessors left!\n";
+ AnyNotSched = true;
+ }
+ }
+ }
+ for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
+ if (!Sequence[i])
+ ++Noops;
+ assert(!AnyNotSched);
+ assert(Sequence.size() + DeadNodes - Noops == SUnits.size() &&
+ "The number of nodes scheduled doesn't match the expected number!");
+}
+#endif
+
+/// InitDAGTopologicalSorting - create the initial topological
+/// ordering from the DAG to be scheduled.
+///
+/// The idea of the algorithm is taken from
+/// "Online algorithms for managing the topological order of
+/// a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly
+/// This is the MNR algorithm, which was first introduced by
+/// A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in
+/// "Maintaining a topological order under edge insertions".
+///
+/// Short description of the algorithm:
+///
+/// Topological ordering, ord, of a DAG maps each node to a topological
+/// index so that for all edges X->Y it is the case that ord(X) < ord(Y).
+///
+/// This means that if there is a path from the node X to the node Z,
+/// then ord(X) < ord(Z).
+///
+/// This property can be used to check for reachability of nodes:
+/// if Z is reachable from X, then an insertion of the edge Z->X would
+/// create a cycle.
+///
+/// The algorithm first computes a topological ordering for the DAG by
+/// initializing the Index2Node and Node2Index arrays and then tries to keep
+/// the ordering up-to-date after edge insertions by reordering the DAG.
+///
+/// On insertion of the edge X->Y, the algorithm first marks by calling DFS
+/// the nodes reachable from Y, and then shifts them using Shift to lie
+/// immediately after X in Index2Node.
+void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
+ unsigned DAGSize = SUnits.size();
+ std::vector<SUnit*> WorkList;
+ WorkList.reserve(DAGSize);
+
+ Index2Node.resize(DAGSize);
+ Node2Index.resize(DAGSize);
+
+ // Initialize the data structures.
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &SUnits[i];
+ int NodeNum = SU->NodeNum;
+ unsigned Degree = SU->Succs.size();
+ // Temporarily use the Node2Index array as scratch space for degree counts.
+ Node2Index[NodeNum] = Degree;
+
+ // Is it a node without dependencies?
+ if (Degree == 0) {
+ assert(SU->Succs.empty() && "SUnit should have no successors");
+ // Collect leaf nodes.
+ WorkList.push_back(SU);
+ }
+ }
+
+ int Id = DAGSize;
+ while (!WorkList.empty()) {
+ SUnit *SU = WorkList.back();
+ WorkList.pop_back();
+ Allocate(SU->NodeNum, --Id);
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ SUnit *SU = I->getSUnit();
+ if (!--Node2Index[SU->NodeNum])
+ // If all dependencies of the node are processed already,
+ // then the node can be computed now.
+ WorkList.push_back(SU);
+ }
+ }
+
+ Visited.resize(DAGSize);
+
+#ifndef NDEBUG
+ // Check correctness of the ordering
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &SUnits[i];
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ assert(Node2Index[SU->NodeNum] > Node2Index[I->getSUnit()->NodeNum] &&
+ "Wrong topological sorting");
+ }
+ }
+#endif
+}
+
+/// AddPred - Updates the topological ordering to accomodate an edge
+/// to be added from SUnit X to SUnit Y.
+void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) {
+ int UpperBound, LowerBound;
+ LowerBound = Node2Index[Y->NodeNum];
+ UpperBound = Node2Index[X->NodeNum];
+ bool HasLoop = false;
+ // Is Ord(X) < Ord(Y) ?
+ if (LowerBound < UpperBound) {
+ // Update the topological order.
+ Visited.reset();
+ DFS(Y, UpperBound, HasLoop);
+ assert(!HasLoop && "Inserted edge creates a loop!");
+ // Recompute topological indexes.
+ Shift(Visited, LowerBound, UpperBound);
+ }
+}
+
+/// RemovePred - Updates the topological ordering to accomodate an
+/// an edge to be removed from the specified node N from the predecessors
+/// of the current node M.
+void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) {
+ // InitDAGTopologicalSorting();
+}
+
+/// DFS - Make a DFS traversal to mark all nodes reachable from SU and mark
+/// all nodes affected by the edge insertion. These nodes will later get new
+/// topological indexes by means of the Shift method.
+void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
+ bool& HasLoop) {
+ std::vector<const SUnit*> WorkList;
+ WorkList.reserve(SUnits.size());
+
+ WorkList.push_back(SU);
+ do {
+ SU = WorkList.back();
+ WorkList.pop_back();
+ Visited.set(SU->NodeNum);
+ for (int I = SU->Succs.size()-1; I >= 0; --I) {
+ int s = SU->Succs[I].getSUnit()->NodeNum;
+ if (Node2Index[s] == UpperBound) {
+ HasLoop = true;
+ return;
+ }
+ // Visit successors if not already and in affected region.
+ if (!Visited.test(s) && Node2Index[s] < UpperBound) {
+ WorkList.push_back(SU->Succs[I].getSUnit());
+ }
+ }
+ } while (!WorkList.empty());
+}
+
+/// Shift - Renumber the nodes so that the topological ordering is
+/// preserved.
+void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound,
+ int UpperBound) {
+ std::vector<int> L;
+ int shift = 0;
+ int i;
+
+ for (i = LowerBound; i <= UpperBound; ++i) {
+ // w is node at topological index i.
+ int w = Index2Node[i];
+ if (Visited.test(w)) {
+ // Unmark.
+ Visited.reset(w);
+ L.push_back(w);
+ shift = shift + 1;
+ } else {
+ Allocate(w, i - shift);
+ }
+ }
+
+ for (unsigned j = 0; j < L.size(); ++j) {
+ Allocate(L[j], i - shift);
+ i = i + 1;
+ }
+}
+
+
+/// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
+/// create a cycle.
+bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
+ if (IsReachable(TargetSU, SU))
+ return true;
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I)
+ if (I->isAssignedRegDep() &&
+ IsReachable(TargetSU, I->getSUnit()))
+ return true;
+ return false;
+}
+
+/// IsReachable - Checks if SU is reachable from TargetSU.
+bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU,
+ const SUnit *TargetSU) {
+ // If insertion of the edge SU->TargetSU would create a cycle
+ // then there is a path from TargetSU to SU.
+ int UpperBound, LowerBound;
+ LowerBound = Node2Index[TargetSU->NodeNum];
+ UpperBound = Node2Index[SU->NodeNum];
+ bool HasLoop = false;
+ // Is Ord(TargetSU) < Ord(SU) ?
+ if (LowerBound < UpperBound) {
+ Visited.reset();
+ // There may be a path from TargetSU to SU. Check for it.
+ DFS(TargetSU, UpperBound, HasLoop);
+ }
+ return HasLoop;
+}
+
+/// Allocate - assign the topological index to the node n.
+void ScheduleDAGTopologicalSort::Allocate(int n, int index) {
+ Node2Index[n] = index;
+ Index2Node[index] = n;
+}
+
+ScheduleDAGTopologicalSort::ScheduleDAGTopologicalSort(
+ std::vector<SUnit> &sunits)
+ : SUnits(sunits) {}
+
+ScheduleHazardRecognizer::~ScheduleHazardRecognizer() {}
diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp
new file mode 100644
index 0000000..8e03420
--- /dev/null
+++ b/lib/CodeGen/ScheduleDAGEmit.cpp
@@ -0,0 +1,71 @@
+//===---- ScheduleDAGEmit.cpp - Emit routines for the ScheduleDAG class ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the Emit routines for the ScheduleDAG class, which creates
+// MachineInstrs according to the computed schedule.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+void ScheduleDAG::EmitNoop() {
+ TII->insertNoop(*BB, InsertPos);
+}
+
+void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
+ DenseMap<SUnit*, unsigned> &VRBaseMap) {
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ if (I->getSUnit()->CopyDstRC) {
+ // Copy to physical register.
+ DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit());
+ assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
+ // Find the destination physical register.
+ unsigned Reg = 0;
+ for (SUnit::const_succ_iterator II = SU->Succs.begin(),
+ EE = SU->Succs.end(); II != EE; ++II) {
+ if (II->getReg()) {
+ Reg = II->getReg();
+ break;
+ }
+ }
+ bool Success = TII->copyRegToReg(*BB, InsertPos, Reg, VRI->second,
+ SU->CopyDstRC, SU->CopySrcRC);
+ (void)Success;
+ assert(Success && "copyRegToReg failed!");
+ } else {
+ // Copy from physical register.
+ assert(I->getReg() && "Unknown physical register!");
+ unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
+ bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
+ isNew = isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ bool Success = TII->copyRegToReg(*BB, InsertPos, VRBase, I->getReg(),
+ SU->CopyDstRC, SU->CopySrcRC);
+ (void)Success;
+ assert(Success && "copyRegToReg failed!");
+ }
+ break;
+ }
+}
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
new file mode 100644
index 0000000..56dd533
--- /dev/null
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -0,0 +1,576 @@
+//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAGInstrs class, which implements re-scheduling
+// of MachineInstrs.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sched-instrs"
+#include "ScheduleDAGInstrs.h"
+#include "llvm/Operator.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallSet.h"
+using namespace llvm;
+
+ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
+ const MachineLoopInfo &mli,
+ const MachineDominatorTree &mdt)
+ : ScheduleDAG(mf), MLI(mli), MDT(mdt), LoopRegs(MLI, MDT) {
+ MFI = mf.getFrameInfo();
+}
+
+/// Run - perform scheduling.
+///
+void ScheduleDAGInstrs::Run(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned endcount) {
+ BB = bb;
+ Begin = begin;
+ InsertPosIndex = endcount;
+
+ ScheduleDAG::Run(bb, end);
+}
+
+/// getUnderlyingObjectFromInt - This is the function that does the work of
+/// looking through basic ptrtoint+arithmetic+inttoptr sequences.
+static const Value *getUnderlyingObjectFromInt(const Value *V) {
+ do {
+ if (const Operator *U = dyn_cast<Operator>(V)) {
+ // If we find a ptrtoint, we can transfer control back to the
+ // regular getUnderlyingObjectFromInt.
+ if (U->getOpcode() == Instruction::PtrToInt)
+ return U->getOperand(0);
+ // If we find an add of a constant or a multiplied value, it's
+ // likely that the other operand will lead us to the base
+ // object. We don't have to worry about the case where the
+ // object address is somehow being computed by the multiply,
+ // because our callers only care when the result is an
+ // identifibale object.
+ if (U->getOpcode() != Instruction::Add ||
+ (!isa<ConstantInt>(U->getOperand(1)) &&
+ Operator::getOpcode(U->getOperand(1)) != Instruction::Mul))
+ return V;
+ V = U->getOperand(0);
+ } else {
+ return V;
+ }
+ assert(isa<IntegerType>(V->getType()) && "Unexpected operand type!");
+ } while (1);
+}
+
+/// getUnderlyingObject - This is a wrapper around Value::getUnderlyingObject
+/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
+static const Value *getUnderlyingObject(const Value *V) {
+ // First just call Value::getUnderlyingObject to let it do what it does.
+ do {
+ V = V->getUnderlyingObject();
+ // If it found an inttoptr, use special code to continue climing.
+ if (Operator::getOpcode(V) != Instruction::IntToPtr)
+ break;
+ const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
+ // If that succeeded in finding a pointer, continue the search.
+ if (!isa<PointerType>(O->getType()))
+ break;
+ V = O;
+ } while (1);
+ return V;
+}
+
+/// getUnderlyingObjectForInstr - If this machine instr has memory reference
+/// information and it can be tracked to a normal reference to a known
+/// object, return the Value for that object. Otherwise return null.
+static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
+ const MachineFrameInfo *MFI,
+ bool &MayAlias) {
+ MayAlias = true;
+ if (!MI->hasOneMemOperand() ||
+ !(*MI->memoperands_begin())->getValue() ||
+ (*MI->memoperands_begin())->isVolatile())
+ return 0;
+
+ const Value *V = (*MI->memoperands_begin())->getValue();
+ if (!V)
+ return 0;
+
+ V = getUnderlyingObject(V);
+ if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
+ // For now, ignore PseudoSourceValues which may alias LLVM IR values
+ // because the code that uses this function has no way to cope with
+ // such aliases.
+ if (PSV->isAliased(MFI))
+ return 0;
+
+ MayAlias = PSV->mayAlias(MFI);
+ return V;
+ }
+
+ if (isIdentifiedObject(V))
+ return V;
+
+ return 0;
+}
+
+void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
+ if (MachineLoop *ML = MLI.getLoopFor(BB))
+ if (BB == ML->getLoopLatch()) {
+ MachineBasicBlock *Header = ML->getHeader();
+ for (MachineBasicBlock::livein_iterator I = Header->livein_begin(),
+ E = Header->livein_end(); I != E; ++I)
+ LoopLiveInRegs.insert(*I);
+ LoopRegs.VisitLoop(ML);
+ }
+}
+
+void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
+ // We'll be allocating one SUnit for each instruction, plus one for
+ // the region exit node.
+ SUnits.reserve(BB->size());
+
+ // We build scheduling units by walking a block's instruction list from bottom
+ // to top.
+
+ // Remember where a generic side-effecting instruction is as we procede.
+ SUnit *BarrierChain = 0, *AliasChain = 0;
+
+ // Memory references to specific known memory locations are tracked
+ // so that they can be given more precise dependencies. We track
+ // separately the known memory locations that may alias and those
+ // that are known not to alias
+ std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
+ std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
+
+ // Check to see if the scheduler cares about latencies.
+ bool UnitLatencies = ForceUnitLatencies();
+
+ // Ask the target if address-backscheduling is desirable, and if so how much.
+ const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>();
+ unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
+
+ // Walk the list of instructions, from bottom moving up.
+ for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin;
+ MII != MIE; --MII) {
+ MachineInstr *MI = prior(MII);
+ const TargetInstrDesc &TID = MI->getDesc();
+ assert(!TID.isTerminator() && !MI->isLabel() &&
+ "Cannot schedule terminators or labels!");
+ // Create the SUnit for this MI.
+ SUnit *SU = NewSUnit(MI);
+
+ // Assign the Latency field of SU using target-provided information.
+ if (UnitLatencies)
+ SU->Latency = 1;
+ else
+ ComputeLatency(SU);
+
+ // Add register-based dependencies (data, anti, and output).
+ for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
+ const MachineOperand &MO = MI->getOperand(j);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
+ std::vector<SUnit *> &UseList = Uses[Reg];
+ std::vector<SUnit *> &DefList = Defs[Reg];
+ // Optionally add output and anti dependencies. For anti
+ // dependencies we use a latency of 0 because for a multi-issue
+ // target we want to allow the defining instruction to issue
+ // in the same cycle as the using instruction.
+ // TODO: Using a latency of 1 here for output dependencies assumes
+ // there's no cost for reusing registers.
+ SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
+ unsigned AOLatency = (Kind == SDep::Anti) ? 0 : 1;
+ for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
+ SUnit *DefSU = DefList[i];
+ if (DefSU != SU &&
+ (Kind != SDep::Output || !MO.isDead() ||
+ !DefSU->getInstr()->registerDefIsDead(Reg)))
+ DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/Reg));
+ }
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ std::vector<SUnit *> &DefList = Defs[*Alias];
+ for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
+ SUnit *DefSU = DefList[i];
+ if (DefSU != SU &&
+ (Kind != SDep::Output || !MO.isDead() ||
+ !DefSU->getInstr()->registerDefIsDead(*Alias)))
+ DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/ *Alias));
+ }
+ }
+
+ if (MO.isDef()) {
+ // Add any data dependencies.
+ unsigned DataLatency = SU->Latency;
+ for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
+ SUnit *UseSU = UseList[i];
+ if (UseSU != SU) {
+ unsigned LDataLatency = DataLatency;
+ // Optionally add in a special extra latency for nodes that
+ // feed addresses.
+ // TODO: Do this for register aliases too.
+ // TODO: Perhaps we should get rid of
+ // SpecialAddressLatency and just move this into
+ // adjustSchedDependency for the targets that care about
+ // it.
+ if (SpecialAddressLatency != 0 && !UnitLatencies) {
+ MachineInstr *UseMI = UseSU->getInstr();
+ const TargetInstrDesc &UseTID = UseMI->getDesc();
+ int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg);
+ assert(RegUseIndex >= 0 && "UseMI doesn's use register!");
+ if ((UseTID.mayLoad() || UseTID.mayStore()) &&
+ (unsigned)RegUseIndex < UseTID.getNumOperands() &&
+ UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass())
+ LDataLatency += SpecialAddressLatency;
+ }
+ // Adjust the dependence latency using operand def/use
+ // information (if any), and then allow the target to
+ // perform its own adjustments.
+ const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg);
+ if (!UnitLatencies) {
+ ComputeOperandLatency(SU, UseSU, (SDep &)dep);
+ ST.adjustSchedDependency(SU, UseSU, (SDep &)dep);
+ }
+ UseSU->addPred(dep);
+ }
+ }
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ std::vector<SUnit *> &UseList = Uses[*Alias];
+ for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
+ SUnit *UseSU = UseList[i];
+ if (UseSU != SU) {
+ const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias);
+ if (!UnitLatencies) {
+ ComputeOperandLatency(SU, UseSU, (SDep &)dep);
+ ST.adjustSchedDependency(SU, UseSU, (SDep &)dep);
+ }
+ UseSU->addPred(dep);
+ }
+ }
+ }
+
+ // If a def is going to wrap back around to the top of the loop,
+ // backschedule it.
+ if (!UnitLatencies && DefList.empty()) {
+ LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(Reg);
+ if (I != LoopRegs.Deps.end()) {
+ const MachineOperand *UseMO = I->second.first;
+ unsigned Count = I->second.second;
+ const MachineInstr *UseMI = UseMO->getParent();
+ unsigned UseMOIdx = UseMO - &UseMI->getOperand(0);
+ const TargetInstrDesc &UseTID = UseMI->getDesc();
+ // TODO: If we knew the total depth of the region here, we could
+ // handle the case where the whole loop is inside the region but
+ // is large enough that the isScheduleHigh trick isn't needed.
+ if (UseMOIdx < UseTID.getNumOperands()) {
+ // Currently, we only support scheduling regions consisting of
+ // single basic blocks. Check to see if the instruction is in
+ // the same region by checking to see if it has the same parent.
+ if (UseMI->getParent() != MI->getParent()) {
+ unsigned Latency = SU->Latency;
+ if (UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass())
+ Latency += SpecialAddressLatency;
+ // This is a wild guess as to the portion of the latency which
+ // will be overlapped by work done outside the current
+ // scheduling region.
+ Latency -= std::min(Latency, Count);
+ // Add the artifical edge.
+ ExitSU.addPred(SDep(SU, SDep::Order, Latency,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false,
+ /*isArtificial=*/true));
+ } else if (SpecialAddressLatency > 0 &&
+ UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
+ // The entire loop body is within the current scheduling region
+ // and the latency of this operation is assumed to be greater
+ // than the latency of the loop.
+ // TODO: Recursively mark data-edge predecessors as
+ // isScheduleHigh too.
+ SU->isScheduleHigh = true;
+ }
+ }
+ LoopRegs.Deps.erase(I);
+ }
+ }
+
+ UseList.clear();
+ if (!MO.isDead())
+ DefList.clear();
+ DefList.push_back(SU);
+ } else {
+ UseList.push_back(SU);
+ }
+ }
+
+ // Add chain dependencies.
+ // Chain dependencies used to enforce memory order should have
+ // latency of 0 (except for true dependency of Store followed by
+ // aliased Load... we estimate that with a single cycle of latency
+ // assuming the hardware will bypass)
+ // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable
+ // after stack slots are lowered to actual addresses.
+ // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
+ // produce more precise dependence information.
+#define STORE_LOAD_LATENCY 1
+ unsigned TrueMemOrderLatency = 0;
+ if (TID.isCall() || TID.hasUnmodeledSideEffects() ||
+ (MI->hasVolatileMemoryRef() &&
+ (!TID.mayLoad() || !MI->isInvariantLoad(AA)))) {
+ // Be conservative with these and add dependencies on all memory
+ // references, even those that are known to not alias.
+ for (std::map<const Value *, SUnit *>::iterator I =
+ NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
+ I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ }
+ for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
+ NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
+ for (unsigned i = 0, e = I->second.size(); i != e; ++i)
+ I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
+ }
+ NonAliasMemDefs.clear();
+ NonAliasMemUses.clear();
+ // Add SU to the barrier chain.
+ if (BarrierChain)
+ BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ BarrierChain = SU;
+
+ // fall-through
+ new_alias_chain:
+ // Chain all possibly aliasing memory references though SU.
+ if (AliasChain)
+ AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ AliasChain = SU;
+ for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
+ PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
+ for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(),
+ E = AliasMemDefs.end(); I != E; ++I) {
+ I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ }
+ for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
+ AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
+ for (unsigned i = 0, e = I->second.size(); i != e; ++i)
+ I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
+ }
+ PendingLoads.clear();
+ AliasMemDefs.clear();
+ AliasMemUses.clear();
+ } else if (TID.mayStore()) {
+ bool MayAlias = true;
+ TrueMemOrderLatency = STORE_LOAD_LATENCY;
+ if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
+ // A store to a specific PseudoSourceValue. Add precise dependencies.
+ // Record the def in MemDefs, first adding a dep if there is
+ // an existing def.
+ std::map<const Value *, SUnit *>::iterator I =
+ ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
+ std::map<const Value *, SUnit *>::iterator IE =
+ ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
+ if (I != IE) {
+ I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
+ /*isNormalMemory=*/true));
+ I->second = SU;
+ } else {
+ if (MayAlias)
+ AliasMemDefs[V] = SU;
+ else
+ NonAliasMemDefs[V] = SU;
+ }
+ // Handle the uses in MemUses, if there are any.
+ std::map<const Value *, std::vector<SUnit *> >::iterator J =
+ ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
+ std::map<const Value *, std::vector<SUnit *> >::iterator JE =
+ ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
+ if (J != JE) {
+ for (unsigned i = 0, e = J->second.size(); i != e; ++i)
+ J->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency,
+ /*Reg=*/0, /*isNormalMemory=*/true));
+ J->second.clear();
+ }
+ if (MayAlias) {
+ // Add dependencies from all the PendingLoads, i.e. loads
+ // with no underlying object.
+ for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
+ PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
+ // Add dependence on alias chain, if needed.
+ if (AliasChain)
+ AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ }
+ // Add dependence on barrier chain, if needed.
+ if (BarrierChain)
+ BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ } else {
+ // Treat all other stores conservatively.
+ goto new_alias_chain;
+ }
+ } else if (TID.mayLoad()) {
+ bool MayAlias = true;
+ TrueMemOrderLatency = 0;
+ if (MI->isInvariantLoad(AA)) {
+ // Invariant load, no chain dependencies needed!
+ } else {
+ if (const Value *V =
+ getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
+ // A load from a specific PseudoSourceValue. Add precise dependencies.
+ std::map<const Value *, SUnit *>::iterator I =
+ ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
+ std::map<const Value *, SUnit *>::iterator IE =
+ ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
+ if (I != IE)
+ I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
+ /*isNormalMemory=*/true));
+ if (MayAlias)
+ AliasMemUses[V].push_back(SU);
+ else
+ NonAliasMemUses[V].push_back(SU);
+ } else {
+ // A load with no underlying object. Depend on all
+ // potentially aliasing stores.
+ for (std::map<const Value *, SUnit *>::iterator I =
+ AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
+ I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+
+ PendingLoads.push_back(SU);
+ MayAlias = true;
+ }
+
+ // Add dependencies on alias and barrier chains, if needed.
+ if (MayAlias && AliasChain)
+ AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ if (BarrierChain)
+ BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ }
+ }
+ }
+
+ for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) {
+ Defs[i].clear();
+ Uses[i].clear();
+ }
+ PendingLoads.clear();
+}
+
+void ScheduleDAGInstrs::FinishBlock() {
+ // Nothing to do.
+}
+
+void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
+ const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
+
+ // Compute the latency for the node.
+ SU->Latency =
+ InstrItins.getStageLatency(SU->getInstr()->getDesc().getSchedClass());
+
+ // Simplistic target-independent heuristic: assume that loads take
+ // extra time.
+ if (InstrItins.isEmpty())
+ if (SU->getInstr()->getDesc().mayLoad())
+ SU->Latency += 2;
+}
+
+void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
+ SDep& dep) const {
+ const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
+ if (InstrItins.isEmpty())
+ return;
+
+ // For a data dependency with a known register...
+ if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0))
+ return;
+
+ const unsigned Reg = dep.getReg();
+
+ // ... find the definition of the register in the defining
+ // instruction
+ MachineInstr *DefMI = Def->getInstr();
+ int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
+ if (DefIdx != -1) {
+ int DefCycle = InstrItins.getOperandCycle(DefMI->getDesc().getSchedClass(), DefIdx);
+ if (DefCycle >= 0) {
+ MachineInstr *UseMI = Use->getInstr();
+ const unsigned UseClass = UseMI->getDesc().getSchedClass();
+
+ // For all uses of the register, calculate the maxmimum latency
+ int Latency = -1;
+ for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = UseMI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (MOReg != Reg)
+ continue;
+
+ int UseCycle = InstrItins.getOperandCycle(UseClass, i);
+ if (UseCycle >= 0)
+ Latency = std::max(Latency, DefCycle - UseCycle + 1);
+ }
+
+ // If we found a latency, then replace the existing dependence latency.
+ if (Latency >= 0)
+ dep.setLatency(Latency);
+ }
+ }
+}
+
+void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
+ SU->getInstr()->dump();
+}
+
+std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
+ std::string s;
+ raw_string_ostream oss(s);
+ if (SU == &EntrySU)
+ oss << "<entry>";
+ else if (SU == &ExitSU)
+ oss << "<exit>";
+ else
+ SU->getInstr()->print(oss);
+ return oss.str();
+}
+
+// EmitSchedule - Emit the machine code in scheduled order.
+MachineBasicBlock *ScheduleDAGInstrs::
+EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
+ // For MachineInstr-based scheduling, we're rescheduling the instructions in
+ // the block, so start by removing them from the block.
+ while (Begin != InsertPos) {
+ MachineBasicBlock::iterator I = Begin;
+ ++Begin;
+ BB->remove(I);
+ }
+
+ // Then re-insert them according to the given schedule.
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ SUnit *SU = Sequence[i];
+ if (!SU) {
+ // Null SUnit* is a noop.
+ EmitNoop();
+ continue;
+ }
+
+ BB->insert(InsertPos, SU->getInstr());
+ }
+
+ // Update the Begin iterator, as the first instruction in the block
+ // may have been scheduled later.
+ if (!Sequence.empty())
+ Begin = Sequence[0]->getInstr();
+
+ return BB;
+}
diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h
new file mode 100644
index 0000000..366c3a8
--- /dev/null
+++ b/lib/CodeGen/ScheduleDAGInstrs.h
@@ -0,0 +1,192 @@
+//==- ScheduleDAGInstrs.h - MachineInstr Scheduling --------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScheduleDAGInstrs class, which implements
+// scheduling for a MachineInstr-based dependency graph.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SCHEDULEDAGINSTRS_H
+#define SCHEDULEDAGINSTRS_H
+
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include <map>
+
+namespace llvm {
+ class MachineLoopInfo;
+ class MachineDominatorTree;
+
+ /// LoopDependencies - This class analyzes loop-oriented register
+ /// dependencies, which are used to guide scheduling decisions.
+ /// For example, loop induction variable increments should be
+ /// scheduled as soon as possible after the variable's last use.
+ ///
+ class VISIBILITY_HIDDEN LoopDependencies {
+ const MachineLoopInfo &MLI;
+ const MachineDominatorTree &MDT;
+
+ public:
+ typedef std::map<unsigned, std::pair<const MachineOperand *, unsigned> >
+ LoopDeps;
+ LoopDeps Deps;
+
+ LoopDependencies(const MachineLoopInfo &mli,
+ const MachineDominatorTree &mdt) :
+ MLI(mli), MDT(mdt) {}
+
+ /// VisitLoop - Clear out any previous state and analyze the given loop.
+ ///
+ void VisitLoop(const MachineLoop *Loop) {
+ Deps.clear();
+ MachineBasicBlock *Header = Loop->getHeader();
+ SmallSet<unsigned, 8> LoopLiveIns;
+ for (MachineBasicBlock::livein_iterator LI = Header->livein_begin(),
+ LE = Header->livein_end(); LI != LE; ++LI)
+ LoopLiveIns.insert(*LI);
+
+ const MachineDomTreeNode *Node = MDT.getNode(Header);
+ const MachineBasicBlock *MBB = Node->getBlock();
+ assert(Loop->contains(MBB) &&
+ "Loop does not contain header!");
+ VisitRegion(Node, MBB, Loop, LoopLiveIns);
+ }
+
+ private:
+ void VisitRegion(const MachineDomTreeNode *Node,
+ const MachineBasicBlock *MBB,
+ const MachineLoop *Loop,
+ const SmallSet<unsigned, 8> &LoopLiveIns) {
+ unsigned Count = 0;
+ for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I, ++Count) {
+ const MachineInstr *MI = I;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (LoopLiveIns.count(MOReg))
+ Deps.insert(std::make_pair(MOReg, std::make_pair(&MO, Count)));
+ }
+ }
+
+ const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
+ for (std::vector<MachineDomTreeNode*>::const_iterator I =
+ Children.begin(), E = Children.end(); I != E; ++I) {
+ const MachineDomTreeNode *ChildNode = *I;
+ MachineBasicBlock *ChildBlock = ChildNode->getBlock();
+ if (Loop->contains(ChildBlock))
+ VisitRegion(ChildNode, ChildBlock, Loop, LoopLiveIns);
+ }
+ }
+ };
+
+ /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of
+ /// MachineInstrs.
+ class VISIBILITY_HIDDEN ScheduleDAGInstrs : public ScheduleDAG {
+ const MachineLoopInfo &MLI;
+ const MachineDominatorTree &MDT;
+ const MachineFrameInfo *MFI;
+
+ /// Defs, Uses - Remember where defs and uses of each physical register
+ /// are as we iterate upward through the instructions. This is allocated
+ /// here instead of inside BuildSchedGraph to avoid the need for it to be
+ /// initialized and destructed for each block.
+ std::vector<SUnit *> Defs[TargetRegisterInfo::FirstVirtualRegister];
+ std::vector<SUnit *> Uses[TargetRegisterInfo::FirstVirtualRegister];
+
+ /// PendingLoads - Remember where unknown loads are after the most recent
+ /// unknown store, as we iterate. As with Defs and Uses, this is here
+ /// to minimize construction/destruction.
+ std::vector<SUnit *> PendingLoads;
+
+ /// LoopRegs - Track which registers are used for loop-carried dependencies.
+ ///
+ LoopDependencies LoopRegs;
+
+ /// LoopLiveInRegs - Track which regs are live into a loop, to help guide
+ /// back-edge-aware scheduling.
+ ///
+ SmallSet<unsigned, 8> LoopLiveInRegs;
+
+ public:
+ MachineBasicBlock::iterator Begin; // The beginning of the range to
+ // be scheduled. The range extends
+ // to InsertPos.
+ unsigned InsertPosIndex; // The index in BB of InsertPos.
+
+ explicit ScheduleDAGInstrs(MachineFunction &mf,
+ const MachineLoopInfo &mli,
+ const MachineDominatorTree &mdt);
+
+ virtual ~ScheduleDAGInstrs() {}
+
+ /// NewSUnit - Creates a new SUnit and return a ptr to it.
+ ///
+ SUnit *NewSUnit(MachineInstr *MI) {
+#ifndef NDEBUG
+ const SUnit *Addr = SUnits.empty() ? 0 : &SUnits[0];
+#endif
+ SUnits.push_back(SUnit(MI, (unsigned)SUnits.size()));
+ assert((Addr == 0 || Addr == &SUnits[0]) &&
+ "SUnits std::vector reallocated on the fly!");
+ SUnits.back().OrigNode = &SUnits.back();
+ return &SUnits.back();
+ }
+
+ /// Run - perform scheduling.
+ ///
+ void Run(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned endindex);
+
+ /// BuildSchedGraph - Build SUnits from the MachineBasicBlock that we are
+ /// input.
+ virtual void BuildSchedGraph(AliasAnalysis *AA);
+
+ /// ComputeLatency - Compute node latency.
+ ///
+ virtual void ComputeLatency(SUnit *SU);
+
+ /// ComputeOperandLatency - Override dependence edge latency using
+ /// operand use/def information
+ ///
+ virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
+ SDep& dep) const;
+
+ virtual MachineBasicBlock*
+ EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*>*);
+
+ /// StartBlock - Prepare to perform scheduling in the given block.
+ ///
+ virtual void StartBlock(MachineBasicBlock *BB);
+
+ /// Schedule - Order nodes according to selected style, filling
+ /// in the Sequence member.
+ ///
+ virtual void Schedule() = 0;
+
+ /// FinishBlock - Clean up after scheduling in the given block.
+ ///
+ virtual void FinishBlock();
+
+ virtual void dumpNode(const SUnit *SU) const;
+
+ virtual std::string getGraphNodeLabel(const SUnit *SU) const;
+ };
+}
+
+#endif
diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp
new file mode 100644
index 0000000..027f615
--- /dev/null
+++ b/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -0,0 +1,98 @@
+//===-- ScheduleDAGPrinter.cpp - Implement ScheduleDAG::viewGraph() -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG::viewGraph method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/config.h"
+#include <fstream>
+using namespace llvm;
+
+namespace llvm {
+ template<>
+ struct DOTGraphTraits<ScheduleDAG*> : public DefaultDOTGraphTraits {
+
+ DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
+ static std::string getGraphName(const ScheduleDAG *G) {
+ return G->MF.getFunction()->getName();
+ }
+
+ static bool renderGraphFromBottomUp() {
+ return true;
+ }
+
+ static bool hasNodeAddressLabel(const SUnit *Node,
+ const ScheduleDAG *Graph) {
+ return true;
+ }
+
+ /// If you want to override the dot attributes printed for a particular
+ /// edge, override this method.
+ static std::string getEdgeAttributes(const SUnit *Node,
+ SUnitIterator EI) {
+ if (EI.isArtificialDep())
+ return "color=cyan,style=dashed";
+ if (EI.isCtrlDep())
+ return "color=blue,style=dashed";
+ return "";
+ }
+
+
+ std::string getNodeLabel(const SUnit *Node, const ScheduleDAG *Graph);
+ static std::string getNodeAttributes(const SUnit *N,
+ const ScheduleDAG *Graph) {
+ return "shape=Mrecord";
+ }
+
+ static void addCustomGraphFeatures(ScheduleDAG *G,
+ GraphWriter<ScheduleDAG*> &GW) {
+ return G->addCustomGraphFeatures(GW);
+ }
+ };
+}
+
+std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU,
+ const ScheduleDAG *G) {
+ return G->getGraphNodeLabel(SU);
+}
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+///
+void ScheduleDAG::viewGraph() {
+// This code is only for debugging!
+#ifndef NDEBUG
+ if (BB->getBasicBlock())
+ ViewGraph(this, "dag." + MF.getFunction()->getNameStr(), false,
+ "Scheduling-Units Graph for " + MF.getFunction()->getNameStr() +
+ ":" + BB->getBasicBlock()->getNameStr());
+ else
+ ViewGraph(this, "dag." + MF.getFunction()->getNameStr(), false,
+ "Scheduling-Units Graph for " + MF.getFunction()->getNameStr());
+#else
+ errs() << "ScheduleDAG::viewGraph is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
new file mode 100644
index 0000000..80c7d7c
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -0,0 +1,25 @@
+add_llvm_library(LLVMSelectionDAG
+ CallingConvLower.cpp
+ DAGCombiner.cpp
+ FastISel.cpp
+ FunctionLoweringInfo.cpp
+ InstrEmitter.cpp
+ LegalizeDAG.cpp
+ LegalizeFloatTypes.cpp
+ LegalizeIntegerTypes.cpp
+ LegalizeTypes.cpp
+ LegalizeTypesGeneric.cpp
+ LegalizeVectorOps.cpp
+ LegalizeVectorTypes.cpp
+ ScheduleDAGFast.cpp
+ ScheduleDAGList.cpp
+ ScheduleDAGRRList.cpp
+ ScheduleDAGSDNodes.cpp
+ SelectionDAG.cpp
+ SelectionDAGBuilder.cpp
+ SelectionDAGISel.cpp
+ SelectionDAGPrinter.cpp
+ TargetLowering.cpp
+ )
+
+target_link_libraries (LLVMSelectionDAG LLVMAnalysis LLVMAsmPrinter LLVMCodeGen)
diff --git a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
new file mode 100644
index 0000000..4e6c1fc
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
@@ -0,0 +1,179 @@
+//===-- CallingConvLower.cpp - Calling Conventions ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CCState class, used for lowering and implementing
+// calling conventions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+CCState::CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &tm,
+ SmallVector<CCValAssign, 16> &locs, LLVMContext &C)
+ : CallingConv(CC), IsVarArg(isVarArg), TM(tm),
+ TRI(*TM.getRegisterInfo()), Locs(locs), Context(C) {
+ // No stack is used.
+ StackOffset = 0;
+
+ UsedRegs.resize((TRI.getNumRegs()+31)/32);
+}
+
+// HandleByVal - Allocate a stack slot large enough to pass an argument by
+// value. The size and alignment information of the argument is encoded in its
+// parameter attribute.
+void CCState::HandleByVal(unsigned ValNo, EVT ValVT,
+ EVT LocVT, CCValAssign::LocInfo LocInfo,
+ int MinSize, int MinAlign,
+ ISD::ArgFlagsTy ArgFlags) {
+ unsigned Align = ArgFlags.getByValAlign();
+ unsigned Size = ArgFlags.getByValSize();
+ if (MinSize > (int)Size)
+ Size = MinSize;
+ if (MinAlign > (int)Align)
+ Align = MinAlign;
+ unsigned Offset = AllocateStack(Size, Align);
+
+ addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+}
+
+/// MarkAllocated - Mark a register and all of its aliases as allocated.
+void CCState::MarkAllocated(unsigned Reg) {
+ UsedRegs[Reg/32] |= 1 << (Reg&31);
+
+ if (const unsigned *RegAliases = TRI.getAliasSet(Reg))
+ for (; (Reg = *RegAliases); ++RegAliases)
+ UsedRegs[Reg/32] |= 1 << (Reg&31);
+}
+
+/// AnalyzeFormalArguments - Analyze an array of argument values,
+/// incorporating info about the formals into this state.
+void
+CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+ CCAssignFn Fn) {
+ unsigned NumArgs = Ins.size();
+
+ for (unsigned i = 0; i != NumArgs; ++i) {
+ EVT ArgVT = Ins[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+ dbgs() << "Formal argument #" << i << " has unhandled type "
+ << ArgVT.getEVTString();
+#endif
+ llvm_unreachable(0);
+ }
+ }
+}
+
+/// CheckReturn - Analyze the return values of a function, returning true if
+/// the return can be performed without sret-demotion, and false otherwise.
+bool CCState::CheckReturn(const SmallVectorImpl<EVT> &OutTys,
+ const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
+ CCAssignFn Fn) {
+ // Determine which register each value should be copied into.
+ for (unsigned i = 0, e = OutTys.size(); i != e; ++i) {
+ EVT VT = OutTys[i];
+ ISD::ArgFlagsTy ArgFlags = ArgsFlags[i];
+ if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this))
+ return false;
+ }
+ return true;
+}
+
+/// AnalyzeReturn - Analyze the returned values of a return,
+/// incorporating info about the result values into this state.
+void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ CCAssignFn Fn) {
+ // Determine which register each value should be copied into.
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ EVT VT = Outs[i].Val.getValueType();
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+ dbgs() << "Return operand #" << i << " has unhandled type "
+ << VT.getEVTString();
+#endif
+ llvm_unreachable(0);
+ }
+ }
+}
+
+
+/// AnalyzeCallOperands - Analyze the outgoing arguments to a call,
+/// incorporating info about the passed values into this state.
+void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ CCAssignFn Fn) {
+ unsigned NumOps = Outs.size();
+ for (unsigned i = 0; i != NumOps; ++i) {
+ EVT ArgVT = Outs[i].Val.getValueType();
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+ dbgs() << "Call operand #" << i << " has unhandled type "
+ << ArgVT.getEVTString();
+#endif
+ llvm_unreachable(0);
+ }
+ }
+}
+
+/// AnalyzeCallOperands - Same as above except it takes vectors of types
+/// and argument flags.
+void CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
+ CCAssignFn Fn) {
+ unsigned NumOps = ArgVTs.size();
+ for (unsigned i = 0; i != NumOps; ++i) {
+ EVT ArgVT = ArgVTs[i];
+ ISD::ArgFlagsTy ArgFlags = Flags[i];
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+ dbgs() << "Call operand #" << i << " has unhandled type "
+ << ArgVT.getEVTString();
+#endif
+ llvm_unreachable(0);
+ }
+ }
+}
+
+/// AnalyzeCallResult - Analyze the return values of a call,
+/// incorporating info about the passed values into this state.
+void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+ CCAssignFn Fn) {
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ EVT VT = Ins[i].VT;
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+ if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) {
+#ifndef NDEBUG
+ dbgs() << "Call result #" << i << " has unhandled type "
+ << VT.getEVTString();
+#endif
+ llvm_unreachable(0);
+ }
+ }
+}
+
+/// AnalyzeCallResult - Same as above except it's specialized for calls which
+/// produce a single value.
+void CCState::AnalyzeCallResult(EVT VT, CCAssignFn Fn) {
+ if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) {
+#ifndef NDEBUG
+ dbgs() << "Call result has unhandled type "
+ << VT.getEVTString();
+#endif
+ llvm_unreachable(0);
+ }
+}
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
new file mode 100644
index 0000000..9189e71
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -0,0 +1,6454 @@
+//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
+// both before and after the DAG is legalized.
+//
+// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
+// primarily intended to handle simplification opportunities that are implicit
+// in the LLVM IR and exposed by the various codegen lowering phases.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dagcombine"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NodesCombined , "Number of dag nodes combined");
+STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
+STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
+STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
+
+namespace {
+ static cl::opt<bool>
+ CombinerAA("combiner-alias-analysis", cl::Hidden,
+ cl::desc("Turn on alias analysis during testing"));
+
+ static cl::opt<bool>
+ CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
+ cl::desc("Include global information in alias analysis"));
+
+//------------------------------ DAGCombiner ---------------------------------//
+
+ class DAGCombiner {
+ SelectionDAG &DAG;
+ const TargetLowering &TLI;
+ CombineLevel Level;
+ CodeGenOpt::Level OptLevel;
+ bool LegalOperations;
+ bool LegalTypes;
+
+ // Worklist of all of the nodes that need to be simplified.
+ std::vector<SDNode*> WorkList;
+
+ // AA - Used for DAG load/store alias analysis.
+ AliasAnalysis &AA;
+
+ /// AddUsersToWorkList - When an instruction is simplified, add all users of
+ /// the instruction to the work lists because they might get more simplified
+ /// now.
+ ///
+ void AddUsersToWorkList(SDNode *N) {
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI)
+ AddToWorkList(*UI);
+ }
+
+ /// visit - call the node-specific routine that knows how to fold each
+ /// particular type of node.
+ SDValue visit(SDNode *N);
+
+ public:
+ /// AddToWorkList - Add to the work list making sure it's instance is at the
+ /// the back (next to be processed.)
+ void AddToWorkList(SDNode *N) {
+ removeFromWorkList(N);
+ WorkList.push_back(N);
+ }
+
+ /// removeFromWorkList - remove all instances of N from the worklist.
+ ///
+ void removeFromWorkList(SDNode *N) {
+ WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), N),
+ WorkList.end());
+ }
+
+ SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
+ bool AddTo = true);
+
+ SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
+ return CombineTo(N, &Res, 1, AddTo);
+ }
+
+ SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
+ bool AddTo = true) {
+ SDValue To[] = { Res0, Res1 };
+ return CombineTo(N, To, 2, AddTo);
+ }
+
+ void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
+
+ private:
+
+ /// SimplifyDemandedBits - Check the specified integer node value to see if
+ /// it can be simplified or if things it uses can be simplified by bit
+ /// propagation. If so, return true.
+ bool SimplifyDemandedBits(SDValue Op) {
+ unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+ APInt Demanded = APInt::getAllOnesValue(BitWidth);
+ return SimplifyDemandedBits(Op, Demanded);
+ }
+
+ bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
+
+ bool CombineToPreIndexedLoadStore(SDNode *N);
+ bool CombineToPostIndexedLoadStore(SDNode *N);
+
+
+ /// combine - call the node-specific routine that knows how to fold each
+ /// particular type of node. If that doesn't do anything, try the
+ /// target-specific DAG combines.
+ SDValue combine(SDNode *N);
+
+ // Visitation implementation - Implement dag node combining for different
+ // node types. The semantics are as follows:
+ // Return Value:
+ // SDValue.getNode() == 0 - No change was made
+ // SDValue.getNode() == N - N was replaced, is dead and has been handled.
+ // otherwise - N should be replaced by the returned Operand.
+ //
+ SDValue visitTokenFactor(SDNode *N);
+ SDValue visitMERGE_VALUES(SDNode *N);
+ SDValue visitADD(SDNode *N);
+ SDValue visitSUB(SDNode *N);
+ SDValue visitADDC(SDNode *N);
+ SDValue visitADDE(SDNode *N);
+ SDValue visitMUL(SDNode *N);
+ SDValue visitSDIV(SDNode *N);
+ SDValue visitUDIV(SDNode *N);
+ SDValue visitSREM(SDNode *N);
+ SDValue visitUREM(SDNode *N);
+ SDValue visitMULHU(SDNode *N);
+ SDValue visitMULHS(SDNode *N);
+ SDValue visitSMUL_LOHI(SDNode *N);
+ SDValue visitUMUL_LOHI(SDNode *N);
+ SDValue visitSDIVREM(SDNode *N);
+ SDValue visitUDIVREM(SDNode *N);
+ SDValue visitAND(SDNode *N);
+ SDValue visitOR(SDNode *N);
+ SDValue visitXOR(SDNode *N);
+ SDValue SimplifyVBinOp(SDNode *N);
+ SDValue visitSHL(SDNode *N);
+ SDValue visitSRA(SDNode *N);
+ SDValue visitSRL(SDNode *N);
+ SDValue visitCTLZ(SDNode *N);
+ SDValue visitCTTZ(SDNode *N);
+ SDValue visitCTPOP(SDNode *N);
+ SDValue visitSELECT(SDNode *N);
+ SDValue visitSELECT_CC(SDNode *N);
+ SDValue visitSETCC(SDNode *N);
+ SDValue visitSIGN_EXTEND(SDNode *N);
+ SDValue visitZERO_EXTEND(SDNode *N);
+ SDValue visitANY_EXTEND(SDNode *N);
+ SDValue visitSIGN_EXTEND_INREG(SDNode *N);
+ SDValue visitTRUNCATE(SDNode *N);
+ SDValue visitBIT_CONVERT(SDNode *N);
+ SDValue visitBUILD_PAIR(SDNode *N);
+ SDValue visitFADD(SDNode *N);
+ SDValue visitFSUB(SDNode *N);
+ SDValue visitFMUL(SDNode *N);
+ SDValue visitFDIV(SDNode *N);
+ SDValue visitFREM(SDNode *N);
+ SDValue visitFCOPYSIGN(SDNode *N);
+ SDValue visitSINT_TO_FP(SDNode *N);
+ SDValue visitUINT_TO_FP(SDNode *N);
+ SDValue visitFP_TO_SINT(SDNode *N);
+ SDValue visitFP_TO_UINT(SDNode *N);
+ SDValue visitFP_ROUND(SDNode *N);
+ SDValue visitFP_ROUND_INREG(SDNode *N);
+ SDValue visitFP_EXTEND(SDNode *N);
+ SDValue visitFNEG(SDNode *N);
+ SDValue visitFABS(SDNode *N);
+ SDValue visitBRCOND(SDNode *N);
+ SDValue visitBR_CC(SDNode *N);
+ SDValue visitLOAD(SDNode *N);
+ SDValue visitSTORE(SDNode *N);
+ SDValue visitINSERT_VECTOR_ELT(SDNode *N);
+ SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue visitBUILD_VECTOR(SDNode *N);
+ SDValue visitCONCAT_VECTORS(SDNode *N);
+ SDValue visitVECTOR_SHUFFLE(SDNode *N);
+
+ SDValue XformToShuffleWithZero(SDNode *N);
+ SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS);
+
+ SDValue visitShiftByConstant(SDNode *N, unsigned Amt);
+
+ bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
+ SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
+ SDValue SimplifySelect(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2);
+ SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2,
+ SDValue N3, ISD::CondCode CC,
+ bool NotExtCompare = false);
+ SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
+ DebugLoc DL, bool foldBooleans = true);
+ SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
+ unsigned HiOp);
+ SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
+ SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, EVT);
+ SDValue BuildSDIV(SDNode *N);
+ SDValue BuildUDIV(SDNode *N);
+ SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);
+ SDValue ReduceLoadWidth(SDNode *N);
+ SDValue ReduceLoadOpStoreWidth(SDNode *N);
+
+ SDValue GetDemandedBits(SDValue V, const APInt &Mask);
+
+ /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
+ /// looking for aliasing nodes and adding them to the Aliases vector.
+ void GatherAllAliases(SDNode *N, SDValue OriginalChain,
+ SmallVector<SDValue, 8> &Aliases);
+
+ /// isAlias - Return true if there is any possibility that the two addresses
+ /// overlap.
+ bool isAlias(SDValue Ptr1, int64_t Size1,
+ const Value *SrcValue1, int SrcValueOffset1,
+ unsigned SrcValueAlign1,
+ SDValue Ptr2, int64_t Size2,
+ const Value *SrcValue2, int SrcValueOffset2,
+ unsigned SrcValueAlign2) const;
+
+ /// FindAliasInfo - Extracts the relevant alias information from the memory
+ /// node. Returns true if the operand was a load.
+ bool FindAliasInfo(SDNode *N,
+ SDValue &Ptr, int64_t &Size,
+ const Value *&SrcValue, int &SrcValueOffset,
+ unsigned &SrcValueAlignment) const;
+
+ /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
+ /// looking for a better chain (aliasing node.)
+ SDValue FindBetterChain(SDNode *N, SDValue Chain);
+
+ /// getShiftAmountTy - Returns a type large enough to hold any valid
+ /// shift amount - before type legalization these can be huge.
+ EVT getShiftAmountTy() {
+ return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy();
+ }
+
+public:
+ DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
+ : DAG(D),
+ TLI(D.getTargetLoweringInfo()),
+ Level(Unrestricted),
+ OptLevel(OL),
+ LegalOperations(false),
+ LegalTypes(false),
+ AA(A) {}
+
+ /// Run - runs the dag combiner on all nodes in the work list
+ void Run(CombineLevel AtLevel);
+ };
+}
+
+
+namespace {
+/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted
+/// nodes from the worklist.
+class WorkListRemover : public SelectionDAG::DAGUpdateListener {
+ DAGCombiner &DC;
+public:
+ explicit WorkListRemover(DAGCombiner &dc) : DC(dc) {}
+
+ virtual void NodeDeleted(SDNode *N, SDNode *E) {
+ DC.removeFromWorkList(N);
+ }
+
+ virtual void NodeUpdated(SDNode *N) {
+ // Ignore updates.
+ }
+};
+}
+
+//===----------------------------------------------------------------------===//
+// TargetLowering::DAGCombinerInfo implementation
+//===----------------------------------------------------------------------===//
+
+void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
+ ((DAGCombiner*)DC)->AddToWorkList(N);
+}
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) {
+ return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
+}
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, SDValue Res, bool AddTo) {
+ return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
+}
+
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
+ return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
+}
+
+void TargetLowering::DAGCombinerInfo::
+CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
+ return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
+}
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+/// isNegatibleForFree - Return 1 if we can compute the negated form of the
+/// specified expression for the same cost as the expression itself, or 2 if we
+/// can compute the negated form more cheaply than the expression itself.
+static char isNegatibleForFree(SDValue Op, bool LegalOperations,
+ unsigned Depth = 0) {
+ // No compile time optimizations on this type.
+ if (Op.getValueType() == MVT::ppcf128)
+ return 0;
+
+ // fneg is removable even if it has multiple uses.
+ if (Op.getOpcode() == ISD::FNEG) return 2;
+
+ // Don't allow anything with multiple uses.
+ if (!Op.hasOneUse()) return 0;
+
+ // Don't recurse exponentially.
+ if (Depth > 6) return 0;
+
+ switch (Op.getOpcode()) {
+ default: return false;
+ case ISD::ConstantFP:
+ // Don't invert constant FP values after legalize. The negated constant
+ // isn't necessarily legal.
+ return LegalOperations ? 0 : 1;
+ case ISD::FADD:
+ // FIXME: determine better conditions for this xform.
+ if (!UnsafeFPMath) return 0;
+
+ // fold (fsub (fadd A, B)) -> (fsub (fneg A), B)
+ if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+ return V;
+ // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
+ return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);
+ case ISD::FSUB:
+ // We can't turn -(A-B) into B-A when we honor signed zeros.
+ if (!UnsafeFPMath) return 0;
+
+ // fold (fneg (fsub A, B)) -> (fsub B, A)
+ return 1;
+
+ case ISD::FMUL:
+ case ISD::FDIV:
+ if (HonorSignDependentRoundingFPMath()) return 0;
+
+ // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
+ if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+ return V;
+
+ return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);
+
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::FSIN:
+ return isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1);
+ }
+}
+
+/// GetNegatedExpression - If isNegatibleForFree returns true, this function
+/// returns the newly negated expression.
+static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
+ bool LegalOperations, unsigned Depth = 0) {
+ // fneg is removable even if it has multiple uses.
+ if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
+
+ // Don't allow anything with multiple uses.
+ assert(Op.hasOneUse() && "Unknown reuse!");
+
+ assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Unknown code");
+ case ISD::ConstantFP: {
+ APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
+ V.changeSign();
+ return DAG.getConstantFP(V, Op.getValueType());
+ }
+ case ISD::FADD:
+ // FIXME: determine better conditions for this xform.
+ assert(UnsafeFPMath);
+
+ // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
+ if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+ return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, Depth+1),
+ Op.getOperand(1));
+ // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
+ return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(1), DAG,
+ LegalOperations, Depth+1),
+ Op.getOperand(0));
+ case ISD::FSUB:
+ // We can't turn -(A-B) into B-A when we honor signed zeros.
+ assert(UnsafeFPMath);
+
+ // fold (fneg (fsub 0, B)) -> B
+ if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
+ if (N0CFP->getValueAPF().isZero())
+ return Op.getOperand(1);
+
+ // fold (fneg (fsub A, B)) -> (fsub B, A)
+ return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(0));
+
+ case ISD::FMUL:
+ case ISD::FDIV:
+ assert(!HonorSignDependentRoundingFPMath());
+
+ // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
+ if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+ return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, Depth+1),
+ Op.getOperand(1));
+
+ // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
+ return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
+ Op.getOperand(0),
+ GetNegatedExpression(Op.getOperand(1), DAG,
+ LegalOperations, Depth+1));
+
+ case ISD::FP_EXTEND:
+ case ISD::FSIN:
+ return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, Depth+1));
+ case ISD::FP_ROUND:
+ return DAG.getNode(ISD::FP_ROUND, Op.getDebugLoc(), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, Depth+1),
+ Op.getOperand(1));
+ }
+}
+
+
+// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc
+// that selects between the values 1 and 0, making it equivalent to a setcc.
+// Also, set the incoming LHS, RHS, and CC references to the appropriate
+// nodes based on the type of node we are checking. This simplifies life a
+// bit for the callers.
+static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
+ SDValue &CC) {
+ if (N.getOpcode() == ISD::SETCC) {
+ LHS = N.getOperand(0);
+ RHS = N.getOperand(1);
+ CC = N.getOperand(2);
+ return true;
+ }
+ if (N.getOpcode() == ISD::SELECT_CC &&
+ N.getOperand(2).getOpcode() == ISD::Constant &&
+ N.getOperand(3).getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 &&
+ cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) {
+ LHS = N.getOperand(0);
+ RHS = N.getOperand(1);
+ CC = N.getOperand(4);
+ return true;
+ }
+ return false;
+}
+
+// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only
+// one use. If this is true, it allows the users to invert the operation for
+// free when it is profitable to do so.
+static bool isOneUseSetCC(SDValue N) {
+ SDValue N0, N1, N2;
+ if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
+ return true;
+ return false;
+}
+
+SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL,
+ SDValue N0, SDValue N1) {
+ EVT VT = N0.getValueType();
+ if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) {
+ if (isa<ConstantSDNode>(N1)) {
+ // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
+ SDValue OpNode =
+ DAG.FoldConstantArithmetic(Opc, VT,
+ cast<ConstantSDNode>(N0.getOperand(1)),
+ cast<ConstantSDNode>(N1));
+ return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
+ } else if (N0.hasOneUse()) {
+ // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use
+ SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1);
+ AddToWorkList(OpNode.getNode());
+ return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
+ }
+ }
+
+ if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) {
+ if (isa<ConstantSDNode>(N0)) {
+ // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
+ SDValue OpNode =
+ DAG.FoldConstantArithmetic(Opc, VT,
+ cast<ConstantSDNode>(N1.getOperand(1)),
+ cast<ConstantSDNode>(N0));
+ return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
+ } else if (N1.hasOneUse()) {
+ // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use
+ SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT,
+ N1.getOperand(0), N0);
+ AddToWorkList(OpNode.getNode());
+ return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
+ bool AddTo) {
+ assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
+ ++NodesCombined;
+ DEBUG(dbgs() << "\nReplacing.1 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith: ";
+ To[0].getNode()->dump(&DAG);
+ dbgs() << " and " << NumTo-1 << " other values\n";
+ for (unsigned i = 0, e = NumTo; i != e; ++i)
+ assert((!To[i].getNode() ||
+ N->getValueType(i) == To[i].getValueType()) &&
+ "Cannot combine value to value of different type!"));
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesWith(N, To, &DeadNodes);
+
+ if (AddTo) {
+ // Push the new nodes and any users onto the worklist
+ for (unsigned i = 0, e = NumTo; i != e; ++i) {
+ if (To[i].getNode()) {
+ AddToWorkList(To[i].getNode());
+ AddUsersToWorkList(To[i].getNode());
+ }
+ }
+ }
+
+ // Finally, if the node is now dead, remove it from the graph. The node
+ // may not be dead if the replacement process recursively simplified to
+ // something else needing this node.
+ if (N->use_empty()) {
+ // Nodes can be reintroduced into the worklist. Make sure we do not
+ // process a node that has been replaced.
+ removeFromWorkList(N);
+
+ // Finally, since the node is now dead, remove it from the graph.
+ DAG.DeleteNode(N);
+ }
+ return SDValue(N, 0);
+}
+
+void
+DAGCombiner::CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &
+ TLO) {
+ // Replace all uses. If any nodes become isomorphic to other nodes and
+ // are deleted, make sure to remove them from our worklist.
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes);
+
+ // Push the new node and any (possibly new) users onto the worklist.
+ AddToWorkList(TLO.New.getNode());
+ AddUsersToWorkList(TLO.New.getNode());
+
+ // Finally, if the node is now dead, remove it from the graph. The node
+ // may not be dead if the replacement process recursively simplified to
+ // something else needing this node.
+ if (TLO.Old.getNode()->use_empty()) {
+ removeFromWorkList(TLO.Old.getNode());
+
+ // If the operands of this node are only used by the node, they will now
+ // be dead. Make sure to visit them first to delete dead nodes early.
+ for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i)
+ if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse())
+ AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode());
+
+ DAG.DeleteNode(TLO.Old.getNode());
+ }
+}
+
+/// SimplifyDemandedBits - Check the specified integer node value to see if
+/// it can be simplified or if things it uses can be simplified by bit
+/// propagation. If so, return true.
+bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
+ TargetLowering::TargetLoweringOpt TLO(DAG);
+ APInt KnownZero, KnownOne;
+ if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
+ return false;
+
+ // Revisit the node.
+ AddToWorkList(Op.getNode());
+
+ // Replace the old value with the new one.
+ ++NodesCombined;
+ DEBUG(dbgs() << "\nReplacing.2 ";
+ TLO.Old.getNode()->dump(&DAG);
+ dbgs() << "\nWith: ";
+ TLO.New.getNode()->dump(&DAG);
+ dbgs() << '\n');
+
+ CommitTargetLoweringOpt(TLO);
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Main DAG Combiner implementation
+//===----------------------------------------------------------------------===//
+
+void DAGCombiner::Run(CombineLevel AtLevel) {
+ // set the instance variables, so that the various visit routines may use it.
+ Level = AtLevel;
+ LegalOperations = Level >= NoIllegalOperations;
+ LegalTypes = Level >= NoIllegalTypes;
+
+ // Add all the dag nodes to the worklist.
+ WorkList.reserve(DAG.allnodes_size());
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ++I)
+ WorkList.push_back(I);
+
+ // Create a dummy node (which is not added to allnodes), that adds a reference
+ // to the root node, preventing it from being deleted, and tracking any
+ // changes of the root.
+ HandleSDNode Dummy(DAG.getRoot());
+
+ // The root of the dag may dangle to deleted nodes until the dag combiner is
+ // done. Set it to null to avoid confusion.
+ DAG.setRoot(SDValue());
+
+ // while the worklist isn't empty, inspect the node on the end of it and
+ // try and combine it.
+ while (!WorkList.empty()) {
+ SDNode *N = WorkList.back();
+ WorkList.pop_back();
+
+ // If N has no uses, it is dead. Make sure to revisit all N's operands once
+ // N is deleted from the DAG, since they too may now be dead or may have a
+ // reduced number of uses, allowing other xforms.
+ if (N->use_empty() && N != &Dummy) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ AddToWorkList(N->getOperand(i).getNode());
+
+ DAG.DeleteNode(N);
+ continue;
+ }
+
+ SDValue RV = combine(N);
+
+ if (RV.getNode() == 0)
+ continue;
+
+ ++NodesCombined;
+
+ // If we get back the same node we passed in, rather than a new node or
+ // zero, we know that the node must have defined multiple values and
+ // CombineTo was used. Since CombineTo takes care of the worklist
+ // mechanics for us, we have no work to do in this case.
+ if (RV.getNode() == N)
+ continue;
+
+ assert(N->getOpcode() != ISD::DELETED_NODE &&
+ RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
+ "Node was deleted but visit returned new node!");
+
+ DEBUG(dbgs() << "\nReplacing.3 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith: ";
+ RV.getNode()->dump(&DAG);
+ dbgs() << '\n');
+ WorkListRemover DeadNodes(*this);
+ if (N->getNumValues() == RV.getNode()->getNumValues())
+ DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes);
+ else {
+ assert(N->getValueType(0) == RV.getValueType() &&
+ N->getNumValues() == 1 && "Type mismatch");
+ SDValue OpV = RV;
+ DAG.ReplaceAllUsesWith(N, &OpV, &DeadNodes);
+ }
+
+ // Push the new node and any users onto the worklist
+ AddToWorkList(RV.getNode());
+ AddUsersToWorkList(RV.getNode());
+
+ // Add any uses of the old node to the worklist in case this node is the
+ // last one that uses them. They may become dead after this node is
+ // deleted.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ AddToWorkList(N->getOperand(i).getNode());
+
+ // Finally, if the node is now dead, remove it from the graph. The node
+ // may not be dead if the replacement process recursively simplified to
+ // something else needing this node.
+ if (N->use_empty()) {
+ // Nodes can be reintroduced into the worklist. Make sure we do not
+ // process a node that has been replaced.
+ removeFromWorkList(N);
+
+ // Finally, since the node is now dead, remove it from the graph.
+ DAG.DeleteNode(N);
+ }
+ }
+
+ // If the root changed (e.g. it was a dead load, update the root).
+ DAG.setRoot(Dummy.getValue());
+}
+
+SDValue DAGCombiner::visit(SDNode *N) {
+ switch(N->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor: return visitTokenFactor(N);
+ case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
+ case ISD::ADD: return visitADD(N);
+ case ISD::SUB: return visitSUB(N);
+ case ISD::ADDC: return visitADDC(N);
+ case ISD::ADDE: return visitADDE(N);
+ case ISD::MUL: return visitMUL(N);
+ case ISD::SDIV: return visitSDIV(N);
+ case ISD::UDIV: return visitUDIV(N);
+ case ISD::SREM: return visitSREM(N);
+ case ISD::UREM: return visitUREM(N);
+ case ISD::MULHU: return visitMULHU(N);
+ case ISD::MULHS: return visitMULHS(N);
+ case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
+ case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
+ case ISD::SDIVREM: return visitSDIVREM(N);
+ case ISD::UDIVREM: return visitUDIVREM(N);
+ case ISD::AND: return visitAND(N);
+ case ISD::OR: return visitOR(N);
+ case ISD::XOR: return visitXOR(N);
+ case ISD::SHL: return visitSHL(N);
+ case ISD::SRA: return visitSRA(N);
+ case ISD::SRL: return visitSRL(N);
+ case ISD::CTLZ: return visitCTLZ(N);
+ case ISD::CTTZ: return visitCTTZ(N);
+ case ISD::CTPOP: return visitCTPOP(N);
+ case ISD::SELECT: return visitSELECT(N);
+ case ISD::SELECT_CC: return visitSELECT_CC(N);
+ case ISD::SETCC: return visitSETCC(N);
+ case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
+ case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
+ case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
+ case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
+ case ISD::TRUNCATE: return visitTRUNCATE(N);
+ case ISD::BIT_CONVERT: return visitBIT_CONVERT(N);
+ case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
+ case ISD::FADD: return visitFADD(N);
+ case ISD::FSUB: return visitFSUB(N);
+ case ISD::FMUL: return visitFMUL(N);
+ case ISD::FDIV: return visitFDIV(N);
+ case ISD::FREM: return visitFREM(N);
+ case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
+ case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
+ case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
+ case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
+ case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
+ case ISD::FP_ROUND: return visitFP_ROUND(N);
+ case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
+ case ISD::FP_EXTEND: return visitFP_EXTEND(N);
+ case ISD::FNEG: return visitFNEG(N);
+ case ISD::FABS: return visitFABS(N);
+ case ISD::BRCOND: return visitBRCOND(N);
+ case ISD::BR_CC: return visitBR_CC(N);
+ case ISD::LOAD: return visitLOAD(N);
+ case ISD::STORE: return visitSTORE(N);
+ case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
+ case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
+ case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
+ case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
+ case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::combine(SDNode *N) {
+ SDValue RV = visit(N);
+
+ // If nothing happened, try a target-specific DAG combine.
+ if (RV.getNode() == 0) {
+ assert(N->getOpcode() != ISD::DELETED_NODE &&
+ "Node was deleted but visit returned NULL!");
+
+ if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
+ TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
+
+ // Expose the DAG combiner to the target combiner impls.
+ TargetLowering::DAGCombinerInfo
+ DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this);
+
+ RV = TLI.PerformDAGCombine(N, DagCombineInfo);
+ }
+ }
+
+ // If N is a commutative binary node, try commuting it to enable more
+ // sdisel CSE.
+ if (RV.getNode() == 0 &&
+ SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
+ N->getNumValues() == 1) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Constant operands are canonicalized to RHS.
+ if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
+ SDValue Ops[] = { N1, N0 };
+ SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(),
+ Ops, 2);
+ if (CSENode)
+ return SDValue(CSENode, 0);
+ }
+ }
+
+ return RV;
+}
+
+/// getInputChainForNode - Given a node, return its input chain if it has one,
+/// otherwise return a null sd operand.
+static SDValue getInputChainForNode(SDNode *N) {
+ if (unsigned NumOps = N->getNumOperands()) {
+ if (N->getOperand(0).getValueType() == MVT::Other)
+ return N->getOperand(0);
+ else if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
+ return N->getOperand(NumOps-1);
+ for (unsigned i = 1; i < NumOps-1; ++i)
+ if (N->getOperand(i).getValueType() == MVT::Other)
+ return N->getOperand(i);
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
+ // If N has two operands, where one has an input chain equal to the other,
+ // the 'other' chain is redundant.
+ if (N->getNumOperands() == 2) {
+ if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
+ return N->getOperand(0);
+ if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
+ return N->getOperand(1);
+ }
+
+ SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
+ SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
+ SmallPtrSet<SDNode*, 16> SeenOps;
+ bool Changed = false; // If we should replace this token factor.
+
+ // Start out with this token factor.
+ TFs.push_back(N);
+
+ // Iterate through token factors. The TFs grows when new token factors are
+ // encountered.
+ for (unsigned i = 0; i < TFs.size(); ++i) {
+ SDNode *TF = TFs[i];
+
+ // Check each of the operands.
+ for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) {
+ SDValue Op = TF->getOperand(i);
+
+ switch (Op.getOpcode()) {
+ case ISD::EntryToken:
+ // Entry tokens don't need to be added to the list. They are
+ // rededundant.
+ Changed = true;
+ break;
+
+ case ISD::TokenFactor:
+ if (Op.hasOneUse() &&
+ std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
+ // Queue up for processing.
+ TFs.push_back(Op.getNode());
+ // Clean up in case the token factor is removed.
+ AddToWorkList(Op.getNode());
+ Changed = true;
+ break;
+ }
+ // Fall thru
+
+ default:
+ // Only add if it isn't already in the list.
+ if (SeenOps.insert(Op.getNode()))
+ Ops.push_back(Op);
+ else
+ Changed = true;
+ break;
+ }
+ }
+ }
+
+ SDValue Result;
+
+ // If we've change things around then replace token factor.
+ if (Changed) {
+ if (Ops.empty()) {
+ // The entry token is the only possible outcome.
+ Result = DAG.getEntryNode();
+ } else {
+ // New and improved token factor.
+ Result = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
+ MVT::Other, &Ops[0], Ops.size());
+ }
+
+ // Don't add users to work list.
+ return CombineTo(N, Result, false);
+ }
+
+ return Result;
+}
+
+/// MERGE_VALUES can always be eliminated.
+SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
+ WorkListRemover DeadNodes(*this);
+ // Replacing results may cause a different MERGE_VALUES to suddenly
+ // be CSE'd with N, and carry its uses with it. Iterate until no
+ // uses remain, to ensure that the node can be safely deleted.
+ do {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i),
+ &DeadNodes);
+ } while (!N->use_empty());
+ removeFromWorkList(N);
+ DAG.DeleteNode(N);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+}
+
+static
+SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1,
+ SelectionDAG &DAG) {
+ EVT VT = N0.getValueType();
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01);
+
+ if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() &&
+ isa<ConstantSDNode>(N00.getOperand(1))) {
+ // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
+ N0 = DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT,
+ DAG.getNode(ISD::SHL, N00.getDebugLoc(), VT,
+ N00.getOperand(0), N01),
+ DAG.getNode(ISD::SHL, N01.getDebugLoc(), VT,
+ N00.getOperand(1), N01));
+ return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADD(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (add x, undef) -> undef
+ if (N0.getOpcode() == ISD::UNDEF)
+ return N0;
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+ // fold (add c1, c2) -> c1+c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0);
+ // fold (add x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // fold (add Sym, c) -> Sym+c
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
+ if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C &&
+ GA->getOpcode() == ISD::GlobalAddress)
+ return DAG.getGlobalAddress(GA->getGlobal(), VT,
+ GA->getOffset() +
+ (uint64_t)N1C->getSExtValue());
+ // fold ((c1-A)+c2) -> (c1+c2)-A
+ if (N1C && N0.getOpcode() == ISD::SUB)
+ if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getConstant(N1C->getAPIntValue()+
+ N0C->getAPIntValue(), VT),
+ N0.getOperand(1));
+ // reassociate add
+ SDValue RADD = ReassociateOps(ISD::ADD, N->getDebugLoc(), N0, N1);
+ if (RADD.getNode() != 0)
+ return RADD;
+ // fold ((0-A) + B) -> B-A
+ if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) &&
+ cast<ConstantSDNode>(N0.getOperand(0))->isNullValue())
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, N0.getOperand(1));
+ // fold (A + (0-B)) -> A-B
+ if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) &&
+ cast<ConstantSDNode>(N1.getOperand(0))->isNullValue())
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1.getOperand(1));
+ // fold (A+(B-A)) -> B
+ if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
+ return N1.getOperand(0);
+ // fold ((B-A)+A) -> B
+ if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
+ return N0.getOperand(0);
+ // fold (A+(B-(A+C))) to (B-C)
+ if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
+ N0 == N1.getOperand(1).getOperand(0))
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0),
+ N1.getOperand(1).getOperand(1));
+ // fold (A+(B-(C+A))) to (B-C)
+ if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
+ N0 == N1.getOperand(1).getOperand(1))
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0),
+ N1.getOperand(1).getOperand(0));
+ // fold (A+((B-A)+or-C)) to (B+or-C)
+ if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
+ N1.getOperand(0).getOpcode() == ISD::SUB &&
+ N0 == N1.getOperand(0).getOperand(1))
+ return DAG.getNode(N1.getOpcode(), N->getDebugLoc(), VT,
+ N1.getOperand(0).getOperand(0), N1.getOperand(1));
+
+ // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
+ if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ SDValue N10 = N1.getOperand(0);
+ SDValue N11 = N1.getOperand(1);
+
+ if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, N00, N10),
+ DAG.getNode(ISD::ADD, N1.getDebugLoc(), VT, N01, N11));
+ }
+
+ if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (a+b) -> (a|b) iff a and b share no bits.
+ if (VT.isInteger() && !VT.isVector()) {
+ APInt LHSZero, LHSOne;
+ APInt RHSZero, RHSOne;
+ APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+ DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+
+ if (LHSZero.getBoolValue()) {
+ DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+
+ // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
+ // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
+ if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
+ (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1);
+ }
+ }
+
+ // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
+ if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) {
+ SDValue Result = combineShlAddConstant(N->getDebugLoc(), N0, N1, DAG);
+ if (Result.getNode()) return Result;
+ }
+ if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) {
+ SDValue Result = combineShlAddConstant(N->getDebugLoc(), N1, N0, DAG);
+ if (Result.getNode()) return Result;
+ }
+
+ // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
+ if (N1.getOpcode() == ISD::SHL &&
+ N1.getOperand(0).getOpcode() == ISD::SUB)
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0)))
+ if (C->getAPIntValue() == 0)
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0,
+ DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+ N1.getOperand(0).getOperand(1),
+ N1.getOperand(1)));
+ if (N0.getOpcode() == ISD::SHL &&
+ N0.getOperand(0).getOpcode() == ISD::SUB)
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0)))
+ if (C->getAPIntValue() == 0)
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1,
+ DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+ N0.getOperand(0).getOperand(1),
+ N0.getOperand(1)));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADDC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // If the flag result is dead, turn this into an ADD.
+ if (N->hasNUsesOfValue(0, 1))
+ return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0),
+ DAG.getNode(ISD::CARRY_FALSE,
+ N->getDebugLoc(), MVT::Flag));
+
+ // canonicalize constant to RHS.
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0);
+
+ // fold (addc x, 0) -> x + no carry out
+ if (N1C && N1C->isNullValue())
+ return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
+ N->getDebugLoc(), MVT::Flag));
+
+ // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
+ APInt LHSZero, LHSOne;
+ APInt RHSZero, RHSOne;
+ APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+ DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+
+ if (LHSZero.getBoolValue()) {
+ DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+
+ // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
+ // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
+ if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
+ (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
+ return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE,
+ N->getDebugLoc(), MVT::Flag));
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADDE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(),
+ N1, N0, CarryIn);
+
+ // fold (adde x, y, false) -> (addc x, y)
+ if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
+ return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSUB(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ EVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (sub x, x) -> 0
+ if (N0 == N1)
+ return DAG.getConstant(0, N->getValueType(0));
+ // fold (sub c1, c2) -> c1-c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
+ // fold (sub x, c) -> (add x, -c)
+ if (N1C)
+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(-N1C->getAPIntValue(), VT));
+ // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
+ if (N0C && N0C->isAllOnesValue())
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0);
+ // fold (A+B)-A -> B
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
+ return N0.getOperand(1);
+ // fold (A+B)-B -> A
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
+ return N0.getOperand(0);
+ // fold ((A+(B+or-C))-B) -> A+or-C
+ if (N0.getOpcode() == ISD::ADD &&
+ (N0.getOperand(1).getOpcode() == ISD::SUB ||
+ N0.getOperand(1).getOpcode() == ISD::ADD) &&
+ N0.getOperand(1).getOperand(0) == N1)
+ return DAG.getNode(N0.getOperand(1).getOpcode(), N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1).getOperand(1));
+ // fold ((A+(C+B))-B) -> A+C
+ if (N0.getOpcode() == ISD::ADD &&
+ N0.getOperand(1).getOpcode() == ISD::ADD &&
+ N0.getOperand(1).getOperand(1) == N1)
+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1).getOperand(0));
+ // fold ((A-(B-C))-C) -> A-B
+ if (N0.getOpcode() == ISD::SUB &&
+ N0.getOperand(1).getOpcode() == ISD::SUB &&
+ N0.getOperand(1).getOperand(1) == N1)
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1).getOperand(0));
+
+ // If either operand of a sub is undef, the result is undef
+ if (N0.getOpcode() == ISD::UNDEF)
+ return N0;
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ // If the relocation model supports it, consider symbol offsets.
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
+ if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
+ // fold (sub Sym, c) -> Sym-c
+ if (N1C && GA->getOpcode() == ISD::GlobalAddress)
+ return DAG.getGlobalAddress(GA->getGlobal(), VT,
+ GA->getOffset() -
+ (uint64_t)N1C->getSExtValue());
+ // fold (sub Sym+c1, Sym+c2) -> c1-c2
+ if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
+ if (GA->getGlobal() == GB->getGlobal())
+ return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
+ VT);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMUL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (mul x, undef) -> 0
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // fold (mul c1, c2) -> c1*c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, N1, N0);
+ // fold (mul x, 0) -> 0
+ if (N1C && N1C->isNullValue())
+ return N1;
+ // fold (mul x, -1) -> 0-x
+ if (N1C && N1C->isAllOnesValue())
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getConstant(0, VT), N0);
+ // fold (mul x, (1 << c)) -> x << c
+ if (N1C && N1C->getAPIntValue().isPowerOf2())
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(N1C->getAPIntValue().logBase2(),
+ getShiftAmountTy()));
+ // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
+ if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) {
+ unsigned Log2Val = (-N1C->getAPIntValue()).logBase2();
+ // FIXME: If the input is something that is easily negated (e.g. a
+ // single-use add), we should put the negate there.
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getConstant(0, VT),
+ DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(Log2Val, getShiftAmountTy())));
+ }
+ // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
+ if (N1C && N0.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ SDValue C3 = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+ N1, N0.getOperand(1));
+ AddToWorkList(C3.getNode());
+ return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+ N0.getOperand(0), C3);
+ }
+
+ // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
+ // use.
+ {
+ SDValue Sh(0,0), Y(0,0);
+ // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
+ if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
+ N0.getNode()->hasOneUse()) {
+ Sh = N0; Y = N1;
+ } else if (N1.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(N1.getOperand(1)) &&
+ N1.getNode()->hasOneUse()) {
+ Sh = N1; Y = N0;
+ }
+
+ if (Sh.getNode()) {
+ SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+ Sh.getOperand(0), Y);
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+ Mul, Sh.getOperand(1));
+ }
+ }
+
+ // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
+ if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
+ isa<ConstantSDNode>(N0.getOperand(1)))
+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::MUL, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1),
+ DAG.getNode(ISD::MUL, N1.getDebugLoc(), VT,
+ N0.getOperand(1), N1));
+
+ // reassociate mul
+ SDValue RMUL = ReassociateOps(ISD::MUL, N->getDebugLoc(), N0, N1);
+ if (RMUL.getNode() != 0)
+ return RMUL;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSDIV(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ EVT VT = N->getValueType(0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (sdiv c1, c2) -> c1/c2
+ if (N0C && N1C && !N1C->isNullValue())
+ return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);
+ // fold (sdiv X, 1) -> X
+ if (N1C && N1C->getSExtValue() == 1LL)
+ return N0;
+ // fold (sdiv X, -1) -> 0-X
+ if (N1C && N1C->isAllOnesValue())
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getConstant(0, VT), N0);
+ // If we know the sign bits of both operands are zero, strength reduce to a
+ // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
+ if (!VT.isVector()) {
+ if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UDIV, N->getDebugLoc(), N1.getValueType(),
+ N0, N1);
+ }
+ // fold (sdiv X, pow2) -> simple ops after legalize
+ if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap() &&
+ (isPowerOf2_64(N1C->getSExtValue()) ||
+ isPowerOf2_64(-N1C->getSExtValue()))) {
+ // If dividing by powers of two is cheap, then don't perform the following
+ // fold.
+ if (TLI.isPow2DivCheap())
+ return SDValue();
+
+ int64_t pow2 = N1C->getSExtValue();
+ int64_t abs2 = pow2 > 0 ? pow2 : -pow2;
+ unsigned lg2 = Log2_64(abs2);
+
+ // Splat the sign bit into the register
+ SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(VT.getSizeInBits()-1,
+ getShiftAmountTy()));
+ AddToWorkList(SGN.getNode());
+
+ // Add (N0 < 0) ? abs2 - 1 : 0;
+ SDValue SRL = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, SGN,
+ DAG.getConstant(VT.getSizeInBits() - lg2,
+ getShiftAmountTy()));
+ SDValue ADD = DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, SRL);
+ AddToWorkList(SRL.getNode());
+ AddToWorkList(ADD.getNode()); // Divide by pow2
+ SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, ADD,
+ DAG.getConstant(lg2, getShiftAmountTy()));
+
+ // If we're dividing by a positive value, we're done. Otherwise, we must
+ // negate the result.
+ if (pow2 > 0)
+ return SRA;
+
+ AddToWorkList(SRA.getNode());
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getConstant(0, VT), SRA);
+ }
+
+ // if integer divide is expensive and we satisfy the requirements, emit an
+ // alternate sequence.
+ if (N1C && (N1C->getSExtValue() < -1 || N1C->getSExtValue() > 1) &&
+ !TLI.isIntDivCheap()) {
+ SDValue Op = BuildSDIV(N);
+ if (Op.getNode()) return Op;
+ }
+
+ // undef / X -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // X / undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUDIV(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ EVT VT = N->getValueType(0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (udiv c1, c2) -> c1/c2
+ if (N0C && N1C && !N1C->isNullValue())
+ return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C);
+ // fold (udiv x, (1 << c)) -> x >>u c
+ if (N1C && N1C->getAPIntValue().isPowerOf2())
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(N1C->getAPIntValue().logBase2(),
+ getShiftAmountTy()));
+ // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
+ if (N1.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+ if (SHC->getAPIntValue().isPowerOf2()) {
+ EVT ADDVT = N1.getOperand(1).getValueType();
+ SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT,
+ N1.getOperand(1),
+ DAG.getConstant(SHC->getAPIntValue()
+ .logBase2(),
+ ADDVT));
+ AddToWorkList(Add.getNode());
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, Add);
+ }
+ }
+ }
+ // fold (udiv x, c) -> alternate
+ if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {
+ SDValue Op = BuildUDIV(N);
+ if (Op.getNode()) return Op;
+ }
+
+ // undef / X -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // X / undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSREM(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ // fold (srem c1, c2) -> c1%c2
+ if (N0C && N1C && !N1C->isNullValue())
+ return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C);
+ // If we know the sign bits of both operands are zero, strength reduce to a
+ // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
+ if (!VT.isVector()) {
+ if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UREM, N->getDebugLoc(), VT, N0, N1);
+ }
+
+ // If X/C can be simplified by the division-by-constant logic, lower
+ // X%C to the equivalent of X-X/C*C.
+ if (N1C && !N1C->isNullValue()) {
+ SDValue Div = DAG.getNode(ISD::SDIV, N->getDebugLoc(), VT, N0, N1);
+ AddToWorkList(Div.getNode());
+ SDValue OptimizedDiv = combine(Div.getNode());
+ if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
+ SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+ OptimizedDiv, N1);
+ SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul);
+ AddToWorkList(Mul.getNode());
+ return Sub;
+ }
+ }
+
+ // undef % X -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // X % undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUREM(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ // fold (urem c1, c2) -> c1%c2
+ if (N0C && N1C && !N1C->isNullValue())
+ return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C);
+ // fold (urem x, pow2) -> (and x, pow2-1)
+ if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2())
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(N1C->getAPIntValue()-1,VT));
+ // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
+ if (N1.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+ if (SHC->getAPIntValue().isPowerOf2()) {
+ SDValue Add =
+ DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1,
+ DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()),
+ VT));
+ AddToWorkList(Add.getNode());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, Add);
+ }
+ }
+ }
+
+ // If X/C can be simplified by the division-by-constant logic, lower
+ // X%C to the equivalent of X-X/C*C.
+ if (N1C && !N1C->isNullValue()) {
+ SDValue Div = DAG.getNode(ISD::UDIV, N->getDebugLoc(), VT, N0, N1);
+ AddToWorkList(Div.getNode());
+ SDValue OptimizedDiv = combine(Div.getNode());
+ if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
+ SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+ OptimizedDiv, N1);
+ SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul);
+ AddToWorkList(Mul.getNode());
+ return Sub;
+ }
+ }
+
+ // undef % X -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // X % undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMULHS(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ // fold (mulhs x, 0) -> 0
+ if (N1C && N1C->isNullValue())
+ return N1;
+ // fold (mulhs x, 1) -> (sra x, size(x)-1)
+ if (N1C && N1C->getAPIntValue() == 1)
+ return DAG.getNode(ISD::SRA, N->getDebugLoc(), N0.getValueType(), N0,
+ DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
+ getShiftAmountTy()));
+ // fold (mulhs x, undef) -> 0
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMULHU(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ // fold (mulhu x, 0) -> 0
+ if (N1C && N1C->isNullValue())
+ return N1;
+ // fold (mulhu x, 1) -> 0
+ if (N1C && N1C->getAPIntValue() == 1)
+ return DAG.getConstant(0, N0.getValueType());
+ // fold (mulhu x, undef) -> 0
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+
+ return SDValue();
+}
+
+/// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that
+/// compute two values. LoOp and HiOp give the opcodes for the two computations
+/// that are being performed. Return true if a simplification was made.
+///
+SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
+ unsigned HiOp) {
+ // If the high half is not needed, just compute the low half.
+ bool HiExists = N->hasAnyUseOfValue(1);
+ if (!HiExists &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(LoOp, N->getValueType(0)))) {
+ SDValue Res = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0),
+ N->op_begin(), N->getNumOperands());
+ return CombineTo(N, Res, Res);
+ }
+
+ // If the low half is not needed, just compute the high half.
+ bool LoExists = N->hasAnyUseOfValue(0);
+ if (!LoExists &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
+ SDValue Res = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1),
+ N->op_begin(), N->getNumOperands());
+ return CombineTo(N, Res, Res);
+ }
+
+ // If both halves are used, return as it is.
+ if (LoExists && HiExists)
+ return SDValue();
+
+ // If the two computed results can be simplified separately, separate them.
+ if (LoExists) {
+ SDValue Lo = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0),
+ N->op_begin(), N->getNumOperands());
+ AddToWorkList(Lo.getNode());
+ SDValue LoOpt = combine(Lo.getNode());
+ if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
+ return CombineTo(N, LoOpt, LoOpt);
+ }
+
+ if (HiExists) {
+ SDValue Hi = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1),
+ N->op_begin(), N->getNumOperands());
+ AddToWorkList(Hi.getNode());
+ SDValue HiOpt = combine(Hi.getNode());
+ if (HiOpt.getNode() && HiOpt != Hi &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
+ return CombineTo(N, HiOpt, HiOpt);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
+ SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);
+ if (Res.getNode()) return Res;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
+ SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);
+ if (Res.getNode()) return Res;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSDIVREM(SDNode *N) {
+ SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM);
+ if (Res.getNode()) return Res;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUDIVREM(SDNode *N) {
+ SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM);
+ if (Res.getNode()) return Res;
+
+ return SDValue();
+}
+
+/// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with
+/// two operands of the same opcode, try to simplify it.
+SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
+
+ // Bail early if none of these transforms apply.
+ if (N0.getNode()->getNumOperands() == 0) return SDValue();
+
+ // For each of OP in AND/OR/XOR:
+ // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
+ // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
+ // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
+ // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y))
+ //
+ // do not sink logical op inside of a vector extend, since it may combine
+ // into a vsetcc.
+ EVT Op0VT = N0.getOperand(0).getValueType();
+ if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::ANY_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND ||
+ (N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) &&
+ !VT.isVector() &&
+ Op0VT == N1.getOperand(0).getValueType() &&
+ (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
+ SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
+ N0.getOperand(0).getValueType(),
+ N0.getOperand(0), N1.getOperand(0));
+ AddToWorkList(ORNode.getNode());
+ return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, ORNode);
+ }
+
+ // For each of OP in SHL/SRL/SRA/AND...
+ // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
+ // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z)
+ // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
+ if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
+ N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
+ N0.getOperand(1) == N1.getOperand(1)) {
+ SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
+ N0.getOperand(0).getValueType(),
+ N0.getOperand(0), N1.getOperand(0));
+ AddToWorkList(ORNode.getNode());
+ return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+ ORNode, N0.getOperand(1));
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitAND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue LL, LR, RL, RR, CC0, CC1;
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N1.getValueType();
+ unsigned BitWidth = VT.getSizeInBits();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (and x, undef) -> 0
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // fold (and c1, c2) -> c1&c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N1, N0);
+ // fold (and x, -1) -> x
+ if (N1C && N1C->isAllOnesValue())
+ return N0;
+ // if (and x, c) is known to be zero, return 0
+ if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
+ APInt::getAllOnesValue(BitWidth)))
+ return DAG.getConstant(0, VT);
+ // reassociate and
+ SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1);
+ if (RAND.getNode() != 0)
+ return RAND;
+ // fold (and (or x, 0xFFFF), 0xFF) -> 0xFF
+ if (N1C && N0.getOpcode() == ISD::OR)
+ if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+ if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
+ return N1;
+ // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
+ if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
+ SDValue N0Op0 = N0.getOperand(0);
+ APInt Mask = ~N1C->getAPIntValue();
+ Mask.trunc(N0Op0.getValueSizeInBits());
+ if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
+ SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(),
+ N0.getValueType(), N0Op0);
+
+ // Replace uses of the AND with uses of the Zero extend node.
+ CombineTo(N, Zext);
+
+ // We actually want to replace all uses of the any_extend with the
+ // zero_extend, to avoid duplicating things. This will later cause this
+ // AND to be folded.
+ CombineTo(N0.getNode(), Zext);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
+ if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+ ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+ ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+
+ if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
+ LL.getValueType().isInteger()) {
+ // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
+ if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) {
+ SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(),
+ LR.getValueType(), LL, RL);
+ AddToWorkList(ORNode.getNode());
+ return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
+ }
+ // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
+ if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) {
+ SDValue ANDNode = DAG.getNode(ISD::AND, N0.getDebugLoc(),
+ LR.getValueType(), LL, RL);
+ AddToWorkList(ANDNode.getNode());
+ return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1);
+ }
+ // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
+ if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) {
+ SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(),
+ LR.getValueType(), LL, RL);
+ AddToWorkList(ORNode.getNode());
+ return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
+ }
+ }
+ // canonicalize equivalent to ll == rl
+ if (LL == RR && LR == RL) {
+ Op1 = ISD::getSetCCSwappedOperands(Op1);
+ std::swap(RL, RR);
+ }
+ if (LL == RL && LR == RR) {
+ bool isInteger = LL.getValueType().isInteger();
+ ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
+ if (Result != ISD::SETCC_INVALID &&
+ (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType())))
+ return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(),
+ LL, LR, Result);
+ }
+ }
+
+ // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
+ if (N0.getOpcode() == N1.getOpcode()) {
+ SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+ if (Tmp.getNode()) return Tmp;
+ }
+
+ // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
+ // fold (and (sra)) -> (and (srl)) when possible.
+ if (!VT.isVector() &&
+ SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (zext_inreg (extload x)) -> (zextload x)
+ if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ // If we zero all the possible extended bits, then we can turn this into
+ // a zextload if we are running before legalize or the operation is legal.
+ unsigned BitWidth = N1.getValueSizeInBits();
+ if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
+ BitWidth - MemVT.getSizeInBits())) &&
+ ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
+ LN0->getChain(), LN0->getBasePtr(),
+ LN0->getSrcValue(),
+ LN0->getSrcValueOffset(), MemVT,
+ LN0->isVolatile(), LN0->getAlignment());
+ AddToWorkList(N);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
+ if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ // If we zero all the possible extended bits, then we can turn this into
+ // a zextload if we are running before legalize or the operation is legal.
+ unsigned BitWidth = N1.getValueSizeInBits();
+ if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
+ BitWidth - MemVT.getSizeInBits())) &&
+ ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(), MemVT,
+ LN0->isVolatile(), LN0->getAlignment());
+ AddToWorkList(N);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (and (load x), 255) -> (zextload x, i8)
+ // fold (and (extload x, i16), 255) -> (zextload x, i8)
+ // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
+ if (N1C && (N0.getOpcode() == ISD::LOAD ||
+ (N0.getOpcode() == ISD::ANY_EXTEND &&
+ N0.getOperand(0).getOpcode() == ISD::LOAD))) {
+ bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
+ LoadSDNode *LN0 = HasAnyExt
+ ? cast<LoadSDNode>(N0.getOperand(0))
+ : cast<LoadSDNode>(N0);
+ if (LN0->getExtensionType() != ISD::SEXTLOAD &&
+ LN0->isUnindexed() && N0.hasOneUse() && LN0->hasOneUse()) {
+ uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
+ if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){
+ EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+ EVT LoadedVT = LN0->getMemoryVT();
+
+ if (ExtVT == LoadedVT &&
+ (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
+ EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
+
+ SDValue NewLoad =
+ DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
+ LN0->getChain(), LN0->getBasePtr(),
+ LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ ExtVT, LN0->isVolatile(), LN0->getAlignment());
+ AddToWorkList(N);
+ CombineTo(LN0, NewLoad, NewLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ // Do not change the width of a volatile load.
+ // Do not generate loads of non-round integer types since these can
+ // be expensive (and would be wrong if the type is not byte sized).
+ if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
+ (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
+ EVT PtrType = LN0->getOperand(1).getValueType();
+
+ unsigned Alignment = LN0->getAlignment();
+ SDValue NewPtr = LN0->getBasePtr();
+
+ // For big endian targets, we need to add an offset to the pointer
+ // to load the correct bytes. For little endian systems, we merely
+ // need to read fewer bytes from the same pointer.
+ if (TLI.isBigEndian()) {
+ unsigned LVTStoreBytes = LoadedVT.getStoreSize();
+ unsigned EVTStoreBytes = ExtVT.getStoreSize();
+ unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
+ NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType,
+ NewPtr, DAG.getConstant(PtrOff, PtrType));
+ Alignment = MinAlign(Alignment, PtrOff);
+ }
+
+ AddToWorkList(NewPtr.getNode());
+
+ EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
+ SDValue Load =
+ DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
+ LN0->getChain(), NewPtr,
+ LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ ExtVT, LN0->isVolatile(), Alignment);
+ AddToWorkList(N);
+ CombineTo(LN0, Load, Load.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue LL, LR, RL, RR, CC0, CC1;
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N1.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (or x, undef) -> -1
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) {
+ EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+ return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
+ }
+ // fold (or c1, c2) -> c1|c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N1, N0);
+ // fold (or x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // fold (or x, -1) -> -1
+ if (N1C && N1C->isAllOnesValue())
+ return N1;
+ // fold (or x, c) -> c iff (x & ~c) == 0
+ if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
+ return N1;
+ // reassociate or
+ SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1);
+ if (ROR.getNode() != 0)
+ return ROR;
+ // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
+ if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1),
+ DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1));
+ }
+ // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
+ if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+ ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+ ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+
+ if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
+ LL.getValueType().isInteger()) {
+ // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
+ // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
+ if (cast<ConstantSDNode>(LR)->isNullValue() &&
+ (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
+ SDValue ORNode = DAG.getNode(ISD::OR, LR.getDebugLoc(),
+ LR.getValueType(), LL, RL);
+ AddToWorkList(ORNode.getNode());
+ return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
+ }
+ // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
+ // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1)
+ if (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
+ (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
+ SDValue ANDNode = DAG.getNode(ISD::AND, LR.getDebugLoc(),
+ LR.getValueType(), LL, RL);
+ AddToWorkList(ANDNode.getNode());
+ return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1);
+ }
+ }
+ // canonicalize equivalent to ll == rl
+ if (LL == RR && LR == RL) {
+ Op1 = ISD::getSetCCSwappedOperands(Op1);
+ std::swap(RL, RR);
+ }
+ if (LL == RL && LR == RR) {
+ bool isInteger = LL.getValueType().isInteger();
+ ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
+ if (Result != ISD::SETCC_INVALID &&
+ (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType())))
+ return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(),
+ LL, LR, Result);
+ }
+ }
+
+ // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
+ if (N0.getOpcode() == N1.getOpcode()) {
+ SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+ if (Tmp.getNode()) return Tmp;
+ }
+
+ // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
+ if (N0.getOpcode() == ISD::AND &&
+ N1.getOpcode() == ISD::AND &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ N1.getOperand(1).getOpcode() == ISD::Constant &&
+ // Don't increase # computations.
+ (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
+ // We can only do this xform if we know that bits from X that are set in C2
+ // but not in C1 are already zero. Likewise for Y.
+ const APInt &LHSMask =
+ cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ const APInt &RHSMask =
+ cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue();
+
+ if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
+ DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
+ SDValue X = DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1.getOperand(0));
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, X,
+ DAG.getConstant(LHSMask | RHSMask, VT));
+ }
+ }
+
+ // See if this is some rotate idiom.
+ if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc()))
+ return SDValue(Rot, 0);
+
+ return SDValue();
+}
+
+/// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present.
+static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
+ if (Op.getOpcode() == ISD::AND) {
+ if (isa<ConstantSDNode>(Op.getOperand(1))) {
+ Mask = Op.getOperand(1);
+ Op = Op.getOperand(0);
+ } else {
+ return false;
+ }
+ }
+
+ if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
+ Shift = Op;
+ return true;
+ }
+
+ return false;
+}
+
+// MatchRotate - Handle an 'or' of two operands. If this is one of the many
+// idioms for rotate, and if the target supports rotation instructions, generate
+// a rot[lr].
+SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) {
+ // Must be a legal type. Expanded 'n promoted things won't work with rotates.
+ EVT VT = LHS.getValueType();
+ if (!TLI.isTypeLegal(VT)) return 0;
+
+ // The target must have at least one rotate flavor.
+ bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
+ bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
+ if (!HasROTL && !HasROTR) return 0;
+
+ // Match "(X shl/srl V1) & V2" where V2 may not be present.
+ SDValue LHSShift; // The shift.
+ SDValue LHSMask; // AND value if any.
+ if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
+ return 0; // Not part of a rotate.
+
+ SDValue RHSShift; // The shift.
+ SDValue RHSMask; // AND value if any.
+ if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
+ return 0; // Not part of a rotate.
+
+ if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
+ return 0; // Not shifting the same value.
+
+ if (LHSShift.getOpcode() == RHSShift.getOpcode())
+ return 0; // Shifts must disagree.
+
+ // Canonicalize shl to left side in a shl/srl pair.
+ if (RHSShift.getOpcode() == ISD::SHL) {
+ std::swap(LHS, RHS);
+ std::swap(LHSShift, RHSShift);
+ std::swap(LHSMask , RHSMask );
+ }
+
+ unsigned OpSizeInBits = VT.getSizeInBits();
+ SDValue LHSShiftArg = LHSShift.getOperand(0);
+ SDValue LHSShiftAmt = LHSShift.getOperand(1);
+ SDValue RHSShiftAmt = RHSShift.getOperand(1);
+
+ // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
+ // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
+ if (LHSShiftAmt.getOpcode() == ISD::Constant &&
+ RHSShiftAmt.getOpcode() == ISD::Constant) {
+ uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue();
+ uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue();
+ if ((LShVal + RShVal) != OpSizeInBits)
+ return 0;
+
+ SDValue Rot;
+ if (HasROTL)
+ Rot = DAG.getNode(ISD::ROTL, DL, VT, LHSShiftArg, LHSShiftAmt);
+ else
+ Rot = DAG.getNode(ISD::ROTR, DL, VT, LHSShiftArg, RHSShiftAmt);
+
+ // If there is an AND of either shifted operand, apply it to the result.
+ if (LHSMask.getNode() || RHSMask.getNode()) {
+ APInt Mask = APInt::getAllOnesValue(OpSizeInBits);
+
+ if (LHSMask.getNode()) {
+ APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal);
+ Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits;
+ }
+ if (RHSMask.getNode()) {
+ APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal);
+ Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits;
+ }
+
+ Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT));
+ }
+
+ return Rot.getNode();
+ }
+
+ // If there is a mask here, and we have a variable shift, we can't be sure
+ // that we're masking out the right stuff.
+ if (LHSMask.getNode() || RHSMask.getNode())
+ return 0;
+
+ // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y)
+ // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y))
+ if (RHSShiftAmt.getOpcode() == ISD::SUB &&
+ LHSShiftAmt == RHSShiftAmt.getOperand(1)) {
+ if (ConstantSDNode *SUBC =
+ dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) {
+ if (SUBC->getAPIntValue() == OpSizeInBits) {
+ if (HasROTL)
+ return DAG.getNode(ISD::ROTL, DL, VT,
+ LHSShiftArg, LHSShiftAmt).getNode();
+ else
+ return DAG.getNode(ISD::ROTR, DL, VT,
+ LHSShiftArg, RHSShiftAmt).getNode();
+ }
+ }
+ }
+
+ // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y)
+ // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y))
+ if (LHSShiftAmt.getOpcode() == ISD::SUB &&
+ RHSShiftAmt == LHSShiftAmt.getOperand(1)) {
+ if (ConstantSDNode *SUBC =
+ dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) {
+ if (SUBC->getAPIntValue() == OpSizeInBits) {
+ if (HasROTR)
+ return DAG.getNode(ISD::ROTR, DL, VT,
+ LHSShiftArg, RHSShiftAmt).getNode();
+ else
+ return DAG.getNode(ISD::ROTL, DL, VT,
+ LHSShiftArg, LHSShiftAmt).getNode();
+ }
+ }
+ }
+
+ // Look for sign/zext/any-extended or truncate cases:
+ if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
+ || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
+ || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND
+ || LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
+ (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
+ || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
+ || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND
+ || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
+ SDValue LExtOp0 = LHSShiftAmt.getOperand(0);
+ SDValue RExtOp0 = RHSShiftAmt.getOperand(0);
+ if (RExtOp0.getOpcode() == ISD::SUB &&
+ RExtOp0.getOperand(1) == LExtOp0) {
+ // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
+ // (rotl x, y)
+ // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
+ // (rotr x, (sub 32, y))
+ if (ConstantSDNode *SUBC =
+ dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) {
+ if (SUBC->getAPIntValue() == OpSizeInBits) {
+ return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
+ LHSShiftArg,
+ HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
+ }
+ }
+ } else if (LExtOp0.getOpcode() == ISD::SUB &&
+ RExtOp0 == LExtOp0.getOperand(1)) {
+ // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
+ // (rotr x, y)
+ // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
+ // (rotl x, (sub 32, y))
+ if (ConstantSDNode *SUBC =
+ dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) {
+ if (SUBC->getAPIntValue() == OpSizeInBits) {
+ return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT,
+ LHSShiftArg,
+ HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+SDValue DAGCombiner::visitXOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue LHS, RHS, CC;
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
+ if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // fold (xor x, undef) -> undef
+ if (N0.getOpcode() == ISD::UNDEF)
+ return N0;
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+ // fold (xor c1, c2) -> c1^c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0);
+ // fold (xor x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // reassociate xor
+ SDValue RXOR = ReassociateOps(ISD::XOR, N->getDebugLoc(), N0, N1);
+ if (RXOR.getNode() != 0)
+ return RXOR;
+
+ // fold !(x cc y) -> (x !cc y)
+ if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) {
+ bool isInt = LHS.getValueType().isInteger();
+ ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
+ isInt);
+
+ if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getValueType())) {
+ switch (N0.getOpcode()) {
+ default:
+ llvm_unreachable("Unhandled SetCC Equivalent!");
+ case ISD::SETCC:
+ return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC);
+ case ISD::SELECT_CC:
+ return DAG.getSelectCC(N->getDebugLoc(), LHS, RHS, N0.getOperand(2),
+ N0.getOperand(3), NotCC);
+ }
+ }
+ }
+
+ // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
+ if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND &&
+ N0.getNode()->hasOneUse() &&
+ isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
+ SDValue V = N0.getOperand(0);
+ V = DAG.getNode(ISD::XOR, N0.getDebugLoc(), V.getValueType(), V,
+ DAG.getConstant(1, V.getValueType()));
+ AddToWorkList(V.getNode());
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, V);
+ }
+
+ // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
+ if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 &&
+ (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
+ SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+ if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
+ unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
+ LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS
+ RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS
+ AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
+ return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS);
+ }
+ }
+ // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
+ if (N1C && N1C->isAllOnesValue() &&
+ (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
+ SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+ if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
+ unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
+ LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS
+ RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS
+ AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
+ return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS);
+ }
+ }
+ // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
+ if (N1C && N0.getOpcode() == ISD::XOR) {
+ ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0));
+ ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (N00C)
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(1),
+ DAG.getConstant(N1C->getAPIntValue() ^
+ N00C->getAPIntValue(), VT));
+ if (N01C)
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(N1C->getAPIntValue() ^
+ N01C->getAPIntValue(), VT));
+ }
+ // fold (xor x, x) -> 0
+ if (N0 == N1) {
+ if (!VT.isVector()) {
+ return DAG.getConstant(0, VT);
+ } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)){
+ // Produce a vector of zeros.
+ SDValue El = DAG.getConstant(0, VT.getVectorElementType());
+ std::vector<SDValue> Ops(VT.getVectorNumElements(), El);
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
+ &Ops[0], Ops.size());
+ }
+ }
+
+ // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
+ if (N0.getOpcode() == N1.getOpcode()) {
+ SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+ if (Tmp.getNode()) return Tmp;
+ }
+
+ // Simplify the expression using non-local knowledge.
+ if (!VT.isVector() &&
+ SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+/// visitShiftByConstant - Handle transforms common to the three shifts, when
+/// the shift amount is a constant.
+SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
+ SDNode *LHS = N->getOperand(0).getNode();
+ if (!LHS->hasOneUse()) return SDValue();
+
+ // We want to pull some binops through shifts, so that we have (and (shift))
+ // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
+ // thing happens with address calculations, so it's important to canonicalize
+ // it.
+ bool HighBitSet = false; // Can we transform this if the high bit is set?
+
+ switch (LHS->getOpcode()) {
+ default: return SDValue();
+ case ISD::OR:
+ case ISD::XOR:
+ HighBitSet = false; // We can only transform sra if the high bit is clear.
+ break;
+ case ISD::AND:
+ HighBitSet = true; // We can only transform sra if the high bit is set.
+ break;
+ case ISD::ADD:
+ if (N->getOpcode() != ISD::SHL)
+ return SDValue(); // only shl(add) not sr[al](add).
+ HighBitSet = false; // We can only transform sra if the high bit is clear.
+ break;
+ }
+
+ // We require the RHS of the binop to be a constant as well.
+ ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
+ if (!BinOpCst) return SDValue();
+
+ // FIXME: disable this unless the input to the binop is a shift by a constant.
+ // If it is not a shift, it pessimizes some common cases like:
+ //
+ // void foo(int *X, int i) { X[i & 1235] = 1; }
+ // int bar(int *X, int i) { return X[i & 255]; }
+ SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
+ if ((BinOpLHSVal->getOpcode() != ISD::SHL &&
+ BinOpLHSVal->getOpcode() != ISD::SRA &&
+ BinOpLHSVal->getOpcode() != ISD::SRL) ||
+ !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+
+ // If this is a signed shift right, and the high bit is modified by the
+ // logical operation, do not perform the transformation. The highBitSet
+ // boolean indicates the value of the high bit of the constant which would
+ // cause it to be modified for this operation.
+ if (N->getOpcode() == ISD::SRA) {
+ bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
+ if (BinOpRHSSignSet != HighBitSet)
+ return SDValue();
+ }
+
+ // Fold the constants, shifting the binop RHS by the shift amount.
+ SDValue NewRHS = DAG.getNode(N->getOpcode(), LHS->getOperand(1).getDebugLoc(),
+ N->getValueType(0),
+ LHS->getOperand(1), N->getOperand(1));
+
+ // Create the new shift.
+ SDValue NewShift = DAG.getNode(N->getOpcode(), LHS->getOperand(0).getDebugLoc(),
+ VT, LHS->getOperand(0), N->getOperand(1));
+
+ // Create the new binop.
+ return DAG.getNode(LHS->getOpcode(), N->getDebugLoc(), VT, NewShift, NewRHS);
+}
+
+SDValue DAGCombiner::visitSHL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+ unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+
+ // fold (shl c1, c2) -> c1<<c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C);
+ // fold (shl 0, x) -> 0
+ if (N0C && N0C->isNullValue())
+ return N0;
+ // fold (shl x, c >= size(x)) -> undef
+ if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+ return DAG.getUNDEF(VT);
+ // fold (shl x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // if (shl x, c) is known to be zero, return 0
+ if (DAG.MaskedValueIsZero(SDValue(N, 0),
+ APInt::getAllOnesValue(OpSizeInBits)))
+ return DAG.getConstant(0, VT);
+ // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
+ if (N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getOpcode() == ISD::AND &&
+ N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
+ SDValue N101 = N1.getOperand(0).getOperand(1);
+ if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
+ EVT TruncVT = N1.getValueType();
+ SDValue N100 = N1.getOperand(0).getOperand(0);
+ APInt TruncC = N101C->getAPIntValue();
+ TruncC.trunc(TruncVT.getSizeInBits());
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
+ DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT,
+ DAG.getNode(ISD::TRUNCATE,
+ N->getDebugLoc(),
+ TruncVT, N100),
+ DAG.getConstant(TruncC, TruncVT)));
+ }
+ }
+
+ if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
+ if (N1C && N0.getOpcode() == ISD::SHL &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ if (c1 + c2 > OpSizeInBits)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(c1 + c2, N1.getValueType()));
+ }
+ // fold (shl (srl x, c1), c2) -> (shl (and x, (shl -1, c1)), (sub c2, c1)) or
+ // (srl (and x, (shl -1, c1)), (sub c1, c2))
+ if (N1C && N0.getOpcode() == ISD::SRL &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+ if (c1 < VT.getSizeInBits()) {
+ uint64_t c2 = N1C->getZExtValue();
+ SDValue HiBitsMask =
+ DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
+ VT.getSizeInBits() - c1),
+ VT);
+ SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT,
+ N0.getOperand(0),
+ HiBitsMask);
+ if (c2 > c1)
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask,
+ DAG.getConstant(c2-c1, N1.getValueType()));
+ else
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Mask,
+ DAG.getConstant(c1-c2, N1.getValueType()));
+ }
+ }
+ // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
+ if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
+ SDValue HiBitsMask =
+ DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
+ VT.getSizeInBits() -
+ N1C->getZExtValue()),
+ VT);
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),
+ HiBitsMask);
+ }
+
+ return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue();
+}
+
+SDValue DAGCombiner::visitSRA(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+ unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+
+ // fold (sra c1, c2) -> (sra c1, c2)
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C);
+ // fold (sra 0, x) -> 0
+ if (N0C && N0C->isNullValue())
+ return N0;
+ // fold (sra -1, x) -> -1
+ if (N0C && N0C->isAllOnesValue())
+ return N0;
+ // fold (sra x, (setge c, size(x))) -> undef
+ if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+ return DAG.getUNDEF(VT);
+ // fold (sra x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
+ // sext_inreg.
+ if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
+ unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
+ EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
+ if (VT.isVector())
+ ExtVT = EVT::getVectorVT(*DAG.getContext(),
+ ExtVT, VT.getVectorNumElements());
+ if ((!LegalOperations ||
+ TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
+ N0.getOperand(0), DAG.getValueType(ExtVT));
+ }
+
+ // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
+ if (N1C && N0.getOpcode() == ISD::SRA) {
+ if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
+ if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1;
+ return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(Sum, N1C->getValueType(0)));
+ }
+ }
+
+ // fold (sra (shl X, m), (sub result_size, n))
+ // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
+ // result_size - n != m.
+ // If truncate is free for the target sext(shl) is likely to result in better
+ // code.
+ if (N0.getOpcode() == ISD::SHL) {
+ // Get the two constanst of the shifts, CN0 = m, CN = n.
+ const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (N01C && N1C) {
+ // Determine what the truncate's result bitsize and type would be.
+ EVT TruncVT =
+ EVT::getIntegerVT(*DAG.getContext(), OpSizeInBits - N1C->getZExtValue());
+ // Determine the residual right-shift amount.
+ signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
+
+ // If the shift is not a no-op (in which case this should be just a sign
+ // extend already), the truncated to type is legal, sign_extend is legal
+ // on that type, and the truncate to that type is both legal and free,
+ // perform the transform.
+ if ((ShiftAmt > 0) &&
+ TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
+ TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
+ TLI.isTruncateFree(VT, TruncVT)) {
+
+ SDValue Amt = DAG.getConstant(ShiftAmt, getShiftAmountTy());
+ SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT,
+ N0.getOperand(0), Amt);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), TruncVT,
+ Shift);
+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(),
+ N->getValueType(0), Trunc);
+ }
+ }
+ }
+
+ // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
+ if (N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getOpcode() == ISD::AND &&
+ N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
+ SDValue N101 = N1.getOperand(0).getOperand(1);
+ if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
+ EVT TruncVT = N1.getValueType();
+ SDValue N100 = N1.getOperand(0).getOperand(0);
+ APInt TruncC = N101C->getAPIntValue();
+ TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
+ return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
+ DAG.getNode(ISD::AND, N->getDebugLoc(),
+ TruncVT,
+ DAG.getNode(ISD::TRUNCATE,
+ N->getDebugLoc(),
+ TruncVT, N100),
+ DAG.getConstant(TruncC, TruncVT)));
+ }
+ }
+
+ // Simplify, based on bits shifted out of the LHS.
+ if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+
+ // If the sign bit is known to be zero, switch this to a SRL.
+ if (DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1);
+
+ return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue();
+}
+
+SDValue DAGCombiner::visitSRL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+ unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+
+ // fold (srl c1, c2) -> c1 >>u c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C);
+ // fold (srl 0, x) -> 0
+ if (N0C && N0C->isNullValue())
+ return N0;
+ // fold (srl x, c >= size(x)) -> undef
+ if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+ return DAG.getUNDEF(VT);
+ // fold (srl x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // if (srl x, c) is known to be zero, return 0
+ if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
+ APInt::getAllOnesValue(OpSizeInBits)))
+ return DAG.getConstant(0, VT);
+
+ // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
+ if (N1C && N0.getOpcode() == ISD::SRL &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ if (c1 + c2 > OpSizeInBits)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(c1 + c2, N1.getValueType()));
+ }
+
+ // fold (srl (anyextend x), c) -> (anyextend (srl x, c))
+ if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
+ // Shifting in all undef bits?
+ EVT SmallVT = N0.getOperand(0).getValueType();
+ if (N1C->getZExtValue() >= SmallVT.getSizeInBits())
+ return DAG.getUNDEF(VT);
+
+ SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT,
+ N0.getOperand(0), N1);
+ AddToWorkList(SmallShift.getNode());
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift);
+ }
+
+ // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
+ // bit, which is unmodified by sra.
+ if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) {
+ if (N0.getOpcode() == ISD::SRA)
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), N1);
+ }
+
+ // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
+ if (N1C && N0.getOpcode() == ISD::CTLZ &&
+ N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {
+ APInt KnownZero, KnownOne;
+ APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+ DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne);
+
+ // If any of the input bits are KnownOne, then the input couldn't be all
+ // zeros, thus the result of the srl will always be zero.
+ if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT);
+
+ // If all of the bits input the to ctlz node are known to be zero, then
+ // the result of the ctlz is "32" and the result of the shift is one.
+ APInt UnknownBits = ~KnownZero & Mask;
+ if (UnknownBits == 0) return DAG.getConstant(1, VT);
+
+ // Otherwise, check to see if there is exactly one bit input to the ctlz.
+ if ((UnknownBits & (UnknownBits - 1)) == 0) {
+ // Okay, we know that only that the single bit specified by UnknownBits
+ // could be set on input to the CTLZ node. If this bit is set, the SRL
+ // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
+ // to an SRL/XOR pair, which is likely to simplify more.
+ unsigned ShAmt = UnknownBits.countTrailingZeros();
+ SDValue Op = N0.getOperand(0);
+
+ if (ShAmt) {
+ Op = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, Op,
+ DAG.getConstant(ShAmt, getShiftAmountTy()));
+ AddToWorkList(Op.getNode());
+ }
+
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT,
+ Op, DAG.getConstant(1, VT));
+ }
+ }
+
+ // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
+ if (N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getOpcode() == ISD::AND &&
+ N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
+ SDValue N101 = N1.getOperand(0).getOperand(1);
+ if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
+ EVT TruncVT = N1.getValueType();
+ SDValue N100 = N1.getOperand(0).getOperand(0);
+ APInt TruncC = N101C->getAPIntValue();
+ TruncC.trunc(TruncVT.getSizeInBits());
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,
+ DAG.getNode(ISD::AND, N->getDebugLoc(),
+ TruncVT,
+ DAG.getNode(ISD::TRUNCATE,
+ N->getDebugLoc(),
+ TruncVT, N100),
+ DAG.getConstant(TruncC, TruncVT)));
+ }
+ }
+
+ // fold operands of srl based on knowledge that the low bits are not
+ // demanded.
+ if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ if (N1C) {
+ SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue());
+ if (NewSRL.getNode())
+ return NewSRL;
+ }
+
+ // Here is a common situation. We want to optimize:
+ //
+ // %a = ...
+ // %b = and i32 %a, 2
+ // %c = srl i32 %b, 1
+ // brcond i32 %c ...
+ //
+ // into
+ //
+ // %a = ...
+ // %b = and %a, 2
+ // %c = setcc eq %b, 0
+ // brcond %c ...
+ //
+ // However when after the source operand of SRL is optimized into AND, the SRL
+ // itself may not be optimized further. Look for it and add the BRCOND into
+ // the worklist.
+ if (N->hasOneUse()) {
+ SDNode *Use = *N->use_begin();
+ if (Use->getOpcode() == ISD::BRCOND)
+ AddToWorkList(Use);
+ else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
+ // Also look pass the truncate.
+ Use = *Use->use_begin();
+ if (Use->getOpcode() == ISD::BRCOND)
+ AddToWorkList(Use);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTLZ(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ctlz c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTLZ, N->getDebugLoc(), VT, N0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTTZ(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (cttz c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTTZ, N->getDebugLoc(), VT, N0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTPOP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ctpop c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), VT, N0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSELECT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
+ EVT VT = N->getValueType(0);
+ EVT VT0 = N0.getValueType();
+
+ // fold (select C, X, X) -> X
+ if (N1 == N2)
+ return N1;
+ // fold (select true, X, Y) -> X
+ if (N0C && !N0C->isNullValue())
+ return N1;
+ // fold (select false, X, Y) -> Y
+ if (N0C && N0C->isNullValue())
+ return N2;
+ // fold (select C, 1, X) -> (or C, X)
+ if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1)
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2);
+ // fold (select C, 0, 1) -> (xor C, 1)
+ if (VT.isInteger() &&
+ (VT0 == MVT::i1 ||
+ (VT0.isInteger() &&
+ TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent)) &&
+ N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
+ SDValue XORNode;
+ if (VT == VT0)
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT0,
+ N0, DAG.getConstant(1, VT0));
+ XORNode = DAG.getNode(ISD::XOR, N0.getDebugLoc(), VT0,
+ N0, DAG.getConstant(1, VT0));
+ AddToWorkList(XORNode.getNode());
+ if (VT.bitsGT(VT0))
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, XORNode);
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, XORNode);
+ }
+ // fold (select C, 0, X) -> (and (not C), X)
+ if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) {
+ SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT);
+ AddToWorkList(NOTNode.getNode());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, NOTNode, N2);
+ }
+ // fold (select C, X, 1) -> (or (not C), X)
+ if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) {
+ SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT);
+ AddToWorkList(NOTNode.getNode());
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, NOTNode, N1);
+ }
+ // fold (select C, X, 0) -> (and C, X)
+ if (VT == MVT::i1 && N2C && N2C->isNullValue())
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1);
+ // fold (select X, X, Y) -> (or X, Y)
+ // fold (select X, 1, Y) -> (or X, Y)
+ if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1)))
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2);
+ // fold (select X, Y, X) -> (and X, Y)
+ // fold (select X, Y, 0) -> (and X, Y)
+ if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0)))
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1);
+
+ // If we can fold this based on the true/false value, do so.
+ if (SimplifySelectOps(N, N1, N2))
+ return SDValue(N, 0); // Don't revisit N.
+
+ // fold selects based on a setcc into other things, such as min/max/abs
+ if (N0.getOpcode() == ISD::SETCC) {
+ // FIXME:
+ // Check against MVT::Other for SELECT_CC, which is a workaround for targets
+ // having to say they don't support SELECT_CC on every type the DAG knows
+ // about, since there is no way to mark an opcode illegal at all value types
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) &&
+ TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1),
+ N1, N2, N0.getOperand(2));
+ return SimplifySelect(N->getDebugLoc(), N0, N1, N2);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ SDValue N3 = N->getOperand(3);
+ SDValue N4 = N->getOperand(4);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
+
+ // fold select_cc lhs, rhs, x, x, cc -> x
+ if (N2 == N3)
+ return N2;
+
+ // Determine if the condition we're dealing with is constant
+ SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()),
+ N0, N1, CC, N->getDebugLoc(), false);
+ if (SCC.getNode()) AddToWorkList(SCC.getNode());
+
+ if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
+ if (!SCCC->isNullValue())
+ return N2; // cond always true -> true val
+ else
+ return N3; // cond always false -> false val
+ }
+
+ // Fold to a simpler select_cc
+ if (SCC.getNode() && SCC.getOpcode() == ISD::SETCC)
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N2.getValueType(),
+ SCC.getOperand(0), SCC.getOperand(1), N2, N3,
+ SCC.getOperand(2));
+
+ // If we can fold this based on the true/false value, do so.
+ if (SimplifySelectOps(N, N2, N3))
+ return SDValue(N, 0); // Don't revisit N.
+
+ // fold select_cc into other things, such as min/max/abs
+ return SimplifySelectCC(N->getDebugLoc(), N0, N1, N2, N3, CC);
+}
+
+SDValue DAGCombiner::visitSETCC(SDNode *N) {
+ return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
+ cast<CondCodeSDNode>(N->getOperand(2))->get(),
+ N->getDebugLoc());
+}
+
+// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
+// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
+// transformation. Returns true if extension are possible and the above
+// mentioned transformation is profitable.
+static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
+ unsigned ExtOpc,
+ SmallVector<SDNode*, 4> &ExtendNodes,
+ const TargetLowering &TLI) {
+ bool HasCopyToRegUses = false;
+ bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
+ for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
+ UE = N0.getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User == N)
+ continue;
+ if (UI.getUse().getResNo() != N0.getResNo())
+ continue;
+ // FIXME: Only extend SETCC N, N and SETCC N, c for now.
+ if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
+ if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
+ // Sign bits will be lost after a zext.
+ return false;
+ bool Add = false;
+ for (unsigned i = 0; i != 2; ++i) {
+ SDValue UseOp = User->getOperand(i);
+ if (UseOp == N0)
+ continue;
+ if (!isa<ConstantSDNode>(UseOp))
+ return false;
+ Add = true;
+ }
+ if (Add)
+ ExtendNodes.push_back(User);
+ continue;
+ }
+ // If truncates aren't free and there are users we can't
+ // extend, it isn't worthwhile.
+ if (!isTruncFree)
+ return false;
+ // Remember if this value is live-out.
+ if (User->getOpcode() == ISD::CopyToReg)
+ HasCopyToRegUses = true;
+ }
+
+ if (HasCopyToRegUses) {
+ bool BothLiveOut = false;
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI) {
+ SDUse &Use = UI.getUse();
+ if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
+ BothLiveOut = true;
+ break;
+ }
+ }
+ if (BothLiveOut)
+ // Both unextended and extended values are live out. There had better be
+ // good a reason for the transformation.
+ return ExtendNodes.size();
+ }
+ return true;
+}
+
+SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (sext c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N0);
+
+ // fold (sext (sext x)) -> (sext x)
+ // fold (sext (aext x)) -> (sext x)
+ if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT,
+ N0.getOperand(0));
+
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ // fold (sext (truncate (load x))) -> (sext (smaller load x))
+ // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
+ SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+ if (NarrowLoad.getNode()) {
+ if (NarrowLoad.getNode() != N0.getNode())
+ CombineTo(N0.getNode(), NarrowLoad);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ // See if the value being truncated is already sign extended. If so, just
+ // eliminate the trunc/sext pair.
+ SDValue Op = N0.getOperand(0);
+ unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits();
+ unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits();
+ unsigned DestBits = VT.getScalarType().getSizeInBits();
+ unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
+
+ if (OpBits == DestBits) {
+ // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
+ // bits, it is already ready.
+ if (NumSignBits > DestBits-MidBits)
+ return Op;
+ } else if (OpBits < DestBits) {
+ // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
+ // bits, just sext from i32.
+ if (NumSignBits > OpBits-MidBits)
+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, Op);
+ } else {
+ // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
+ // bits, just truncate to i32.
+ if (NumSignBits > OpBits-MidBits)
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
+ }
+
+ // fold (sext (truncate x)) -> (sextinreg x).
+ if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
+ N0.getValueType())) {
+ if (OpBits < DestBits)
+ Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op);
+ else if (OpBits > DestBits)
+ Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op,
+ DAG.getValueType(N0.getValueType()));
+ }
+ }
+
+ // fold (sext (load x)) -> (sext (truncate (sextload x)))
+ if (ISD::isNON_EXTLoad(N0.getNode()) &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
+ if (DoXform) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(),
+ N0.getValueType(),
+ LN0->isVolatile(), LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad);
+ CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
+
+ // Extend SetCC uses if necessary.
+ for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
+ SDNode *SetCC = SetCCs[i];
+ SmallVector<SDValue, 4> Ops;
+
+ for (unsigned j = 0; j != 2; ++j) {
+ SDValue SOp = SetCC->getOperand(j);
+ if (SOp == Trunc)
+ Ops.push_back(ExtLoad);
+ else
+ Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND,
+ N->getDebugLoc(), VT, SOp));
+ }
+
+ Ops.push_back(SetCC->getOperand(2));
+ CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
+ SetCC->getValueType(0),
+ &Ops[0], Ops.size()));
+ }
+
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
+ // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
+ if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ if ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(), MemVT,
+ LN0->isVolatile(), LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ if (N0.getOpcode() == ISD::SETCC) {
+ // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
+ if (VT.isVector() &&
+ // We know that the # elements of the results is the same as the
+ // # elements of the compare (and the # elements of the compare result
+ // for that matter). Check to see that they are the same size. If so,
+ // we know that the element size of the sext'd result matches the
+ // element size of the compare operands.
+ VT.getSizeInBits() == N0.getOperand(0).getValueType().getSizeInBits() &&
+
+ // Only do this before legalize for now.
+ !LegalOperations) {
+ return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ }
+
+ // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc)
+ SDValue NegOne =
+ DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
+ SDValue SCC =
+ SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
+ NegOne, DAG.getConstant(0, VT),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+ if (SCC.getNode()) return SCC;
+ if (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT)))
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
+ DAG.getSetCC(N->getDebugLoc(),
+ TLI.getSetCCResultType(VT),
+ N0.getOperand(0), N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get()),
+ NegOne, DAG.getConstant(0, VT));
+ }
+
+
+
+ // fold (sext x) -> (zext x) if the sign bit is known zero.
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
+ DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (zext c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0);
+ // fold (zext (zext x)) -> (zext x)
+ // fold (zext (aext x)) -> (zext x)
+ if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT,
+ N0.getOperand(0));
+
+ // fold (zext (truncate (load x))) -> (zext (smaller load x))
+ // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+ if (NarrowLoad.getNode()) {
+ if (NarrowLoad.getNode() != N0.getNode())
+ CombineTo(N0.getNode(), NarrowLoad);
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, NarrowLoad);
+ }
+ }
+
+ // fold (zext (truncate x)) -> (and x, mask)
+ if (N0.getOpcode() == ISD::TRUNCATE &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) &&
+ (!TLI.isTruncateFree(N0.getOperand(0).getValueType(),
+ N0.getValueType()) ||
+ !TLI.isZExtFree(N0.getValueType(), VT))) {
+ SDValue Op = N0.getOperand(0);
+ if (Op.getValueType().bitsLT(VT)) {
+ Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
+ } else if (Op.getValueType().bitsGT(VT)) {
+ Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
+ }
+ return DAG.getZeroExtendInReg(Op, N->getDebugLoc(),
+ N0.getValueType().getScalarType());
+ }
+
+ // Fold (zext (and (trunc x), cst)) -> (and x, cst),
+ // if either of the casts is not free.
+ if (N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
+ N0.getValueType()) ||
+ !TLI.isZExtFree(N0.getValueType(), VT))) {
+ SDValue X = N0.getOperand(0).getOperand(0);
+ if (X.getValueType().bitsLT(VT)) {
+ X = DAG.getNode(ISD::ANY_EXTEND, X.getDebugLoc(), VT, X);
+ } else if (X.getValueType().bitsGT(VT)) {
+ X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X);
+ }
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask.zext(VT.getSizeInBits());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ X, DAG.getConstant(Mask, VT));
+ }
+
+ // fold (zext (load x)) -> (zext (truncate (zextload x)))
+ if (ISD::isNON_EXTLoad(N0.getNode()) &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
+ if (DoXform) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(),
+ N0.getValueType(),
+ LN0->isVolatile(), LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad);
+ CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
+
+ // Extend SetCC uses if necessary.
+ for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
+ SDNode *SetCC = SetCCs[i];
+ SmallVector<SDValue, 4> Ops;
+
+ for (unsigned j = 0; j != 2; ++j) {
+ SDValue SOp = SetCC->getOperand(j);
+ if (SOp == Trunc)
+ Ops.push_back(ExtLoad);
+ else
+ Ops.push_back(DAG.getNode(ISD::ZERO_EXTEND,
+ N->getDebugLoc(), VT, SOp));
+ }
+
+ Ops.push_back(SetCC->getOperand(2));
+ CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
+ SetCC->getValueType(0),
+ &Ops[0], Ops.size()));
+ }
+
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
+ // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
+ if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ if ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(), MemVT,
+ LN0->isVolatile(), LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(),
+ ExtLoad),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+ if (N0.getOpcode() == ISD::SETCC) {
+ SDValue SCC =
+ SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+ if (SCC.getNode()) return SCC;
+ }
+
+ // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
+ if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
+ isa<ConstantSDNode>(N0.getOperand(1)) &&
+ N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
+ N0.hasOneUse()) {
+ if (N0.getOpcode() == ISD::SHL) {
+ // If the original shl may be shifting out bits, do not perform this
+ // transformation.
+ unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+ unsigned KnownZeroBits = N0.getOperand(0).getValueType().getSizeInBits() -
+ N0.getOperand(0).getOperand(0).getValueType().getSizeInBits();
+ if (ShAmt > KnownZeroBits)
+ return SDValue();
+ }
+ DebugLoc dl = N->getDebugLoc();
+ return DAG.getNode(N0.getOpcode(), dl, VT,
+ DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)),
+ DAG.getNode(ISD::ZERO_EXTEND, dl,
+ N0.getOperand(1).getValueType(),
+ N0.getOperand(1)));
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (aext c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, N0);
+ // fold (aext (aext x)) -> (aext x)
+ // fold (aext (zext x)) -> (zext x)
+ // fold (aext (sext x)) -> (sext x)
+ if (N0.getOpcode() == ISD::ANY_EXTEND ||
+ N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND)
+ return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0));
+
+ // fold (aext (truncate (load x))) -> (aext (smaller load x))
+ // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+ if (NarrowLoad.getNode()) {
+ if (NarrowLoad.getNode() != N0.getNode())
+ CombineTo(N0.getNode(), NarrowLoad);
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, NarrowLoad);
+ }
+ }
+
+ // fold (aext (truncate x))
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ SDValue TruncOp = N0.getOperand(0);
+ if (TruncOp.getValueType() == VT)
+ return TruncOp; // x iff x size == zext size.
+ if (TruncOp.getValueType().bitsGT(VT))
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, TruncOp);
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, TruncOp);
+ }
+
+ // Fold (aext (and (trunc x), cst)) -> (and x, cst)
+ // if the trunc is not free.
+ if (N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
+ N0.getValueType())) {
+ SDValue X = N0.getOperand(0).getOperand(0);
+ if (X.getValueType().bitsLT(VT)) {
+ X = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, X);
+ } else if (X.getValueType().bitsGT(VT)) {
+ X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X);
+ }
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask.zext(VT.getSizeInBits());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ X, DAG.getConstant(Mask, VT));
+ }
+
+ // fold (aext (load x)) -> (aext (truncate (extload x)))
+ if (ISD::isNON_EXTLoad(N0.getNode()) &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
+ if (DoXform) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(),
+ N0.getValueType(),
+ LN0->isVolatile(), LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad);
+ CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
+
+ // Extend SetCC uses if necessary.
+ for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
+ SDNode *SetCC = SetCCs[i];
+ SmallVector<SDValue, 4> Ops;
+
+ for (unsigned j = 0; j != 2; ++j) {
+ SDValue SOp = SetCC->getOperand(j);
+ if (SOp == Trunc)
+ Ops.push_back(ExtLoad);
+ else
+ Ops.push_back(DAG.getNode(ISD::ANY_EXTEND,
+ N->getDebugLoc(), VT, SOp));
+ }
+
+ Ops.push_back(SetCC->getOperand(2));
+ CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
+ SetCC->getValueType(0),
+ &Ops[0], Ops.size()));
+ }
+
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
+ // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
+ // fold (aext ( extload x)) -> (aext (truncate (extload x)))
+ if (N0.getOpcode() == ISD::LOAD &&
+ !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(),
+ VT, LN0->getChain(), LN0->getBasePtr(),
+ LN0->getSrcValue(),
+ LN0->getSrcValueOffset(), MemVT,
+ LN0->isVolatile(), LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+ if (N0.getOpcode() == ISD::SETCC) {
+ SDValue SCC =
+ SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+ if (SCC.getNode())
+ return SCC;
+ }
+
+ return SDValue();
+}
+
+/// GetDemandedBits - See if the specified operand can be simplified with the
+/// knowledge that only the bits specified by Mask are used. If so, return the
+/// simpler operand, otherwise return a null SDValue.
+SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
+ switch (V.getOpcode()) {
+ default: break;
+ case ISD::OR:
+ case ISD::XOR:
+ // If the LHS or RHS don't contribute bits to the or, drop them.
+ if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
+ return V.getOperand(1);
+ if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
+ return V.getOperand(0);
+ break;
+ case ISD::SRL:
+ // Only look at single-use SRLs.
+ if (!V.getNode()->hasOneUse())
+ break;
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
+ // See if we can recursively simplify the LHS.
+ unsigned Amt = RHSC->getZExtValue();
+
+ // Watch out for shift count overflow though.
+ if (Amt >= Mask.getBitWidth()) break;
+ APInt NewMask = Mask << Amt;
+ SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask);
+ if (SimplifyLHS.getNode())
+ return DAG.getNode(ISD::SRL, V.getDebugLoc(), V.getValueType(),
+ SimplifyLHS, V.getOperand(1));
+ }
+ }
+ return SDValue();
+}
+
+/// ReduceLoadWidth - If the result of a wider load is shifted to right of N
+/// bits and then truncated to a narrower type and where N is a multiple
+/// of number of bits of the narrower type, transform it to a narrower load
+/// from address + N / num of bits of new type. If the result is to be
+/// extended, also fold the extension to form a extending load.
+SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
+ unsigned Opc = N->getOpcode();
+ ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT ExtVT = VT;
+
+ // This transformation isn't valid for vector loads.
+ if (VT.isVector())
+ return SDValue();
+
+ // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
+ // extended to VT.
+ if (Opc == ISD::SIGN_EXTEND_INREG) {
+ ExtType = ISD::SEXTLOAD;
+ ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT))
+ return SDValue();
+ }
+
+ unsigned EVTBits = ExtVT.getSizeInBits();
+ unsigned ShAmt = 0;
+ if (N0.getOpcode() == ISD::SRL && N0.hasOneUse() && ExtVT.isRound()) {
+ if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ ShAmt = N01->getZExtValue();
+ // Is the shift amount a multiple of size of VT?
+ if ((ShAmt & (EVTBits-1)) == 0) {
+ N0 = N0.getOperand(0);
+ // Is the load width a multiple of size of VT?
+ if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
+ return SDValue();
+ }
+ }
+ }
+
+ // Do not generate loads of non-round integer types since these can
+ // be expensive (and would be wrong if the type is not byte sized).
+ if (isa<LoadSDNode>(N0) && N0.hasOneUse() && ExtVT.isRound() &&
+ cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() > EVTBits &&
+ // Do not change the width of a volatile load.
+ !cast<LoadSDNode>(N0)->isVolatile()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT PtrType = N0.getOperand(1).getValueType();
+
+ // For big endian targets, we need to adjust the offset to the pointer to
+ // load the correct bytes.
+ if (TLI.isBigEndian()) {
+ unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
+ unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
+ ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
+ }
+
+ uint64_t PtrOff = ShAmt / 8;
+ unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
+ SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(),
+ PtrType, LN0->getBasePtr(),
+ DAG.getConstant(PtrOff, PtrType));
+ AddToWorkList(NewPtr.getNode());
+
+ SDValue Load = (ExtType == ISD::NON_EXTLOAD)
+ ? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
+ LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
+ LN0->isVolatile(), NewAlign)
+ : DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(), NewPtr,
+ LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
+ ExtVT, LN0->isVolatile(), NewAlign);
+
+ // Replace the old load's chain with the new load's chain.
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1),
+ &DeadNodes);
+
+ // Return the new loaded value.
+ return Load;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ EVT EVT = cast<VTSDNode>(N1)->getVT();
+ unsigned VTBits = VT.getScalarType().getSizeInBits();
+ unsigned EVTBits = EVT.getScalarType().getSizeInBits();
+
+ // fold (sext_in_reg c1) -> c1
+ if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF)
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1);
+
+ // If the input is already sign extended, just drop the extension.
+ if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
+ return N0;
+
+ // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
+ if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) {
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
+ N0.getOperand(0), N1);
+ }
+
+ // fold (sext_in_reg (sext x)) -> (sext x)
+ // fold (sext_in_reg (aext x)) -> (sext x)
+ // if x is small enough.
+ if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getValueType().getScalarType().getSizeInBits() < EVTBits)
+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1);
+ }
+
+ // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
+ if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
+ return DAG.getZeroExtendInReg(N0, N->getDebugLoc(), EVT);
+
+ // fold operands of sext_in_reg based on knowledge that the top bits are not
+ // demanded.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (sext_in_reg (load x)) -> (smaller sextload x)
+ // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
+ SDValue NarrowLoad = ReduceLoadWidth(N);
+ if (NarrowLoad.getNode())
+ return NarrowLoad;
+
+ // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
+ // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
+ // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
+ if (N0.getOpcode() == ISD::SRL) {
+ if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+ if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
+ // We can turn this into an SRA iff the input to the SRL is already sign
+ // extended enough.
+ unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
+ if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
+ return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1));
+ }
+ }
+
+ // fold (sext_inreg (extload x)) -> (sextload x)
+ if (ISD::isEXTLoad(N0.getNode()) &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(), EVT,
+ LN0->isVolatile(), LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
+ if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ N0.hasOneUse() &&
+ EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(), EVT,
+ LN0->isVolatile(), LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // noop truncate
+ if (N0.getValueType() == N->getValueType(0))
+ return N0;
+ // fold (truncate c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0);
+ // fold (truncate (truncate x)) -> (truncate x)
+ if (N0.getOpcode() == ISD::TRUNCATE)
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
+ // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
+ if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND||
+ N0.getOpcode() == ISD::ANY_EXTEND) {
+ if (N0.getOperand(0).getValueType().bitsLT(VT))
+ // if the source is smaller than the dest, we still need an extend
+ return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+ N0.getOperand(0));
+ else if (N0.getOperand(0).getValueType().bitsGT(VT))
+ // if the source is larger than the dest, than we just need the truncate
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
+ else
+ // if the source and dest are the same type, we can drop both the extend
+ // and the truncate.
+ return N0.getOperand(0);
+ }
+
+ // See if we can simplify the input to this truncate through knowledge that
+ // only the low bits are being used. For example "trunc (or (shl x, 8), y)"
+ // -> trunc y
+ SDValue Shorter =
+ GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
+ VT.getSizeInBits()));
+ if (Shorter.getNode())
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter);
+
+ // fold (truncate (load x)) -> (smaller load x)
+ // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
+ return ReduceLoadWidth(N);
+}
+
+static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
+ SDValue Elt = N->getOperand(i);
+ if (Elt.getOpcode() != ISD::MERGE_VALUES)
+ return Elt.getNode();
+ return Elt.getOperand(Elt.getResNo()).getNode();
+}
+
+/// CombineConsecutiveLoads - build_pair (load, load) -> load
+/// if load locations are consecutive.
+SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
+ assert(N->getOpcode() == ISD::BUILD_PAIR);
+
+ LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
+ LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
+ if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())
+ return SDValue();
+ EVT LD1VT = LD1->getValueType(0);
+
+ if (ISD::isNON_EXTLoad(LD2) &&
+ LD2->hasOneUse() &&
+ // If both are volatile this would reduce the number of volatile loads.
+ // If one is volatile it might be ok, but play conservative and bail out.
+ !LD1->isVolatile() &&
+ !LD2->isVolatile() &&
+ DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) {
+ unsigned Align = LD1->getAlignment();
+ unsigned NewAlign = TLI.getTargetData()->
+ getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+
+ if (NewAlign <= Align &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
+ return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(),
+ LD1->getBasePtr(), LD1->getSrcValue(),
+ LD1->getSrcValueOffset(), false, Align);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // If the input is a BUILD_VECTOR with all constant elements, fold this now.
+ // Only do this before legalize, since afterward the target may be depending
+ // on the bitconvert.
+ // First check to see if this is all constant.
+ if (!LegalTypes &&
+ N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
+ VT.isVector()) {
+ bool isSimple = true;
+ for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i)
+ if (N0.getOperand(i).getOpcode() != ISD::UNDEF &&
+ N0.getOperand(i).getOpcode() != ISD::Constant &&
+ N0.getOperand(i).getOpcode() != ISD::ConstantFP) {
+ isSimple = false;
+ break;
+ }
+
+ EVT DestEltVT = N->getValueType(0).getVectorElementType();
+ assert(!DestEltVT.isVector() &&
+ "Element type of vector ValueType must not be vector!");
+ if (isSimple)
+ return ConstantFoldBIT_CONVERTofBUILD_VECTOR(N0.getNode(), DestEltVT);
+ }
+
+ // If the input is a constant, let getNode fold it.
+ if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
+ SDValue Res = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, N0);
+ if (Res.getNode() != N) {
+ if (!LegalOperations ||
+ TLI.isOperationLegal(Res.getNode()->getOpcode(), VT))
+ return Res;
+
+ // Folding it resulted in an illegal node, and it's too late to
+ // do that. Clean up the old node and forego the transformation.
+ // Ideally this won't happen very often, because instcombine
+ // and the earlier dagcombine runs (where illegal nodes are
+ // permitted) should have folded most of them already.
+ DAG.DeleteNode(Res.getNode());
+ }
+ }
+
+ // (conv (conv x, t1), t2) -> (conv x, t2)
+ if (N0.getOpcode() == ISD::BIT_CONVERT)
+ return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT,
+ N0.getOperand(0));
+
+ // fold (conv (load x)) -> (load (conv*)x)
+ // If the resultant load doesn't need a higher alignment than the original!
+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+ // Do not change the width of a volatile load.
+ !cast<LoadSDNode>(N0)->isVolatile() &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ unsigned Align = TLI.getTargetData()->
+ getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+ unsigned OrigAlign = LN0->getAlignment();
+
+ if (Align <= OrigAlign) {
+ SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(),
+ LN0->getBasePtr(),
+ LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ LN0->isVolatile(), OrigAlign);
+ AddToWorkList(N);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
+ N0.getValueType(), Load),
+ Load.getValue(1));
+ return Load;
+ }
+ }
+
+ // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
+ // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
+ // This often reduces constant pool loads.
+ if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) &&
+ N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) {
+ SDValue NewConv = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), VT,
+ N0.getOperand(0));
+ AddToWorkList(NewConv.getNode());
+
+ APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
+ if (N0.getOpcode() == ISD::FNEG)
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT,
+ NewConv, DAG.getConstant(SignBit, VT));
+ assert(N0.getOpcode() == ISD::FABS);
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ NewConv, DAG.getConstant(~SignBit, VT));
+ }
+
+ // fold (bitconvert (fcopysign cst, x)) ->
+ // (or (and (bitconvert x), sign), (and cst, (not sign)))
+ // Note that we don't handle (copysign x, cst) because this can always be
+ // folded to an fneg or fabs.
+ if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
+ isa<ConstantFPSDNode>(N0.getOperand(0)) &&
+ VT.isInteger() && !VT.isVector()) {
+ unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
+ EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
+ if (TLI.isTypeLegal(IntXVT) || !LegalTypes) {
+ SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
+ IntXVT, N0.getOperand(1));
+ AddToWorkList(X.getNode());
+
+ // If X has a different width than the result/lhs, sext it or truncate it.
+ unsigned VTWidth = VT.getSizeInBits();
+ if (OrigXWidth < VTWidth) {
+ X = DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, X);
+ AddToWorkList(X.getNode());
+ } else if (OrigXWidth > VTWidth) {
+ // To get the sign bit in the right place, we have to shift it right
+ // before truncating.
+ X = DAG.getNode(ISD::SRL, X.getDebugLoc(),
+ X.getValueType(), X,
+ DAG.getConstant(OrigXWidth-VTWidth, X.getValueType()));
+ AddToWorkList(X.getNode());
+ X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X);
+ AddToWorkList(X.getNode());
+ }
+
+ APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
+ X = DAG.getNode(ISD::AND, X.getDebugLoc(), VT,
+ X, DAG.getConstant(SignBit, VT));
+ AddToWorkList(X.getNode());
+
+ SDValue Cst = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
+ VT, N0.getOperand(0));
+ Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT,
+ Cst, DAG.getConstant(~SignBit, VT));
+ AddToWorkList(Cst.getNode());
+
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, X, Cst);
+ }
+ }
+
+ // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
+ if (N0.getOpcode() == ISD::BUILD_PAIR) {
+ SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT);
+ if (CombineLD.getNode())
+ return CombineLD;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ return CombineConsecutiveLoads(N, VT);
+}
+
+/// ConstantFoldBIT_CONVERTofBUILD_VECTOR - We know that BV is a build_vector
+/// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the
+/// destination element value type.
+SDValue DAGCombiner::
+ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
+ EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
+
+ // If this is already the right type, we're done.
+ if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
+
+ unsigned SrcBitSize = SrcEltVT.getSizeInBits();
+ unsigned DstBitSize = DstEltVT.getSizeInBits();
+
+ // If this is a conversion of N elements of one type to N elements of another
+ // type, convert each element. This handles FP<->INT cases.
+ if (SrcBitSize == DstBitSize) {
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+ SDValue Op = BV->getOperand(i);
+ // If the vector element type is not legal, the BUILD_VECTOR operands
+ // are promoted and implicitly truncated. Make that explicit here.
+ if (Op.getValueType() != SrcEltVT)
+ Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op);
+ Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(),
+ DstEltVT, Op));
+ AddToWorkList(Ops.back().getNode());
+ }
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
+ BV->getValueType(0).getVectorNumElements());
+ return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
+ &Ops[0], Ops.size());
+ }
+
+ // Otherwise, we're growing or shrinking the elements. To avoid having to
+ // handle annoying details of growing/shrinking FP values, we convert them to
+ // int first.
+ if (SrcEltVT.isFloatingPoint()) {
+ // Convert the input float vector to a int vector where the elements are the
+ // same sizes.
+ assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
+ BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).getNode();
+ SrcEltVT = IntVT;
+ }
+
+ // Now we know the input is an integer vector. If the output is a FP type,
+ // convert to integer first, then to FP of the right size.
+ if (DstEltVT.isFloatingPoint()) {
+ assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");
+ EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
+ SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).getNode();
+
+ // Next, convert to FP elements of the same size.
+ return ConstantFoldBIT_CONVERTofBUILD_VECTOR(Tmp, DstEltVT);
+ }
+
+ // Okay, we know the src/dst types are both integers of differing types.
+ // Handling growing first.
+ assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
+ if (SrcBitSize < DstBitSize) {
+ unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
+
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e;
+ i += NumInputsPerOutput) {
+ bool isLE = TLI.isLittleEndian();
+ APInt NewBits = APInt(DstBitSize, 0);
+ bool EltIsUndef = true;
+ for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
+ // Shift the previously computed bits over.
+ NewBits <<= SrcBitSize;
+ SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
+ if (Op.getOpcode() == ISD::UNDEF) continue;
+ EltIsUndef = false;
+
+ NewBits |= (APInt(cast<ConstantSDNode>(Op)->getAPIntValue()).
+ zextOrTrunc(SrcBitSize).zext(DstBitSize));
+ }
+
+ if (EltIsUndef)
+ Ops.push_back(DAG.getUNDEF(DstEltVT));
+ else
+ Ops.push_back(DAG.getConstant(NewBits, DstEltVT));
+ }
+
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
+ &Ops[0], Ops.size());
+ }
+
+ // Finally, this must be the case where we are shrinking elements: each input
+ // turns into multiple outputs.
+ bool isS2V = ISD::isScalarToVector(BV);
+ unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
+ NumOutputsPerInput*BV->getNumOperands());
+ SmallVector<SDValue, 8> Ops;
+
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+ if (BV->getOperand(i).getOpcode() == ISD::UNDEF) {
+ for (unsigned j = 0; j != NumOutputsPerInput; ++j)
+ Ops.push_back(DAG.getUNDEF(DstEltVT));
+ continue;
+ }
+
+ APInt OpVal = APInt(cast<ConstantSDNode>(BV->getOperand(i))->
+ getAPIntValue()).zextOrTrunc(SrcBitSize);
+
+ for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
+ APInt ThisVal = APInt(OpVal).trunc(DstBitSize);
+ Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
+ if (isS2V && i == 0 && j == 0 && APInt(ThisVal).zext(SrcBitSize) == OpVal)
+ // Simply turn this into a SCALAR_TO_VECTOR of the new type.
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
+ Ops[0]);
+ OpVal = OpVal.lshr(DstBitSize);
+ }
+
+ // For big endian targets, swap the order of the pieces of each element.
+ if (TLI.isBigEndian())
+ std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
+ &Ops[0], Ops.size());
+}
+
+SDValue DAGCombiner::visitFADD(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (fadd c1, c2) -> (fadd c1, c2)
+ if (N0CFP && N1CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1);
+ // canonicalize constant to RHS
+ if (N0CFP && !N1CFP)
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0);
+ // fold (fadd A, 0) -> A
+ if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+ return N0;
+ // fold (fadd A, (fneg B)) -> (fsub A, B)
+ if (isNegatibleForFree(N1, LegalOperations) == 2)
+ return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0,
+ GetNegatedExpression(N1, DAG, LegalOperations));
+ // fold (fadd (fneg A), B) -> (fsub B, A)
+ if (isNegatibleForFree(N0, LegalOperations) == 2)
+ return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1,
+ GetNegatedExpression(N0, DAG, LegalOperations));
+
+ // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
+ if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD &&
+ N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ N0.getOperand(1), N1));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFSUB(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (fsub c1, c2) -> c1-c2
+ if (N0CFP && N1CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1);
+ // fold (fsub A, 0) -> A
+ if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+ return N0;
+ // fold (fsub 0, B) -> -B
+ if (UnsafeFPMath && N0CFP && N0CFP->getValueAPF().isZero()) {
+ if (isNegatibleForFree(N1, LegalOperations))
+ return GetNegatedExpression(N1, DAG, LegalOperations);
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1);
+ }
+ // fold (fsub A, (fneg B)) -> (fadd A, B)
+ if (isNegatibleForFree(N1, LegalOperations))
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0,
+ GetNegatedExpression(N1, DAG, LegalOperations));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFMUL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (fmul c1, c2) -> c1*c2
+ if (N0CFP && N1CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1);
+ // canonicalize constant to RHS
+ if (N0CFP && !N1CFP)
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0);
+ // fold (fmul A, 0) -> 0
+ if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+ return N1;
+ // fold (fmul A, 0) -> 0, vector edition.
+ if (UnsafeFPMath && ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N1;
+ // fold (fmul X, 2.0) -> (fadd X, X)
+ if (N1CFP && N1CFP->isExactlyValue(+2.0))
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0);
+ // fold (fmul X, -1.0) -> (fneg X)
+ if (N1CFP && N1CFP->isExactlyValue(-1.0))
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0);
+
+ // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {
+ // Both can be negated for free, check to see if at least one is cheaper
+ // negated.
+ if (LHSNeg == 2 || RHSNeg == 2)
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ GetNegatedExpression(N0, DAG, LegalOperations),
+ GetNegatedExpression(N1, DAG, LegalOperations));
+ }
+ }
+
+ // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
+ if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL &&
+ N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0.getOperand(1), N1));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFDIV(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (fdiv c1, c2) -> c1/c2
+ if (N0CFP && N1CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1);
+
+
+ // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {
+ // Both can be negated for free, check to see if at least one is cheaper
+ // negated.
+ if (LHSNeg == 2 || RHSNeg == 2)
+ return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT,
+ GetNegatedExpression(N0, DAG, LegalOperations),
+ GetNegatedExpression(N1, DAG, LegalOperations));
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFREM(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ // fold (frem c1, c2) -> fmod(c1,c2)
+ if (N0CFP && N1CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ if (N0CFP && N1CFP && VT != MVT::ppcf128) // Constant fold
+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1);
+
+ if (N1CFP) {
+ const APFloat& V = N1CFP->getValueAPF();
+ // copysign(x, c1) -> fabs(x) iff ispos(c1)
+ // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
+ if (!V.isNegative()) {
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
+ return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
+ } else {
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::FABS, N0.getDebugLoc(), VT, N0));
+ }
+ }
+
+ // copysign(fabs(x), y) -> copysign(x, y)
+ // copysign(fneg(x), y) -> copysign(x, y)
+ // copysign(copysign(x,z), y) -> copysign(x, y)
+ if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
+ N0.getOpcode() == ISD::FCOPYSIGN)
+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+ N0.getOperand(0), N1);
+
+ // copysign(x, abs(y)) -> abs(x)
+ if (N1.getOpcode() == ISD::FABS)
+ return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
+
+ // copysign(x, copysign(y,z)) -> copysign(x, z)
+ if (N1.getOpcode() == ISD::FCOPYSIGN)
+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+ N0, N1.getOperand(1));
+
+ // copysign(x, fp_extend(y)) -> copysign(x, y)
+ // copysign(x, fp_round(y)) -> copysign(x, y)
+ if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND)
+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+ N0, N1.getOperand(0));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ EVT VT = N->getValueType(0);
+ EVT OpVT = N0.getValueType();
+
+ // fold (sint_to_fp c1) -> c1fp
+ if (N0C && OpVT != MVT::ppcf128)
+ return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
+
+ // If the input is a legal type, and SINT_TO_FP is not legal on this target,
+ // but UINT_TO_FP is legal on this target, try to convert.
+ if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
+ TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
+ // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
+ if (DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ EVT VT = N->getValueType(0);
+ EVT OpVT = N0.getValueType();
+
+ // fold (uint_to_fp c1) -> c1fp
+ if (N0C && OpVT != MVT::ppcf128)
+ return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
+
+ // If the input is a legal type, and UINT_TO_FP is not legal on this target,
+ // but SINT_TO_FP is legal on this target, try to convert.
+ if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
+ TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
+ // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
+ if (DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fp_to_sint c1fp) -> c1
+ if (N0CFP)
+ return DAG.getNode(ISD::FP_TO_SINT, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fp_to_uint c1fp) -> c1
+ if (N0CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fp_round c1fp) -> c1fp
+ if (N0CFP && N0.getValueType() != MVT::ppcf128)
+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1);
+
+ // fold (fp_round (fp_extend x)) -> x
+ if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
+ return N0.getOperand(0);
+
+ // fold (fp_round (fp_round x)) -> (fp_round x)
+ if (N0.getOpcode() == ISD::FP_ROUND) {
+ // This is a value preserving truncation if both round's are.
+ bool IsTrunc = N->getConstantOperandVal(1) == 1 &&
+ N0.getNode()->getConstantOperandVal(1) == 1;
+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getIntPtrConstant(IsTrunc));
+ }
+
+ // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
+ if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
+ SDValue Tmp = DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1);
+ AddToWorkList(Tmp.getNode());
+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+ Tmp, N0.getOperand(1));
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+
+ // fold (fp_round_inreg c1fp) -> c1fp
+ if (N0CFP && (TLI.isTypeLegal(EVT) || !LegalTypes)) {
+ SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT);
+ return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
+ if (N->hasOneUse() &&
+ N->use_begin()->getOpcode() == ISD::FP_ROUND)
+ return SDValue();
+
+ // fold (fp_extend c1fp) -> c1fp
+ if (N0CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0);
+
+ // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
+ // value of X.
+ if (N0.getOpcode() == ISD::FP_ROUND
+ && N0.getNode()->getConstantOperandVal(1) == 1) {
+ SDValue In = N0.getOperand(0);
+ if (In.getValueType() == VT) return In;
+ if (VT.bitsLT(In.getValueType()))
+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT,
+ In, N0.getOperand(1));
+ return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, In);
+ }
+
+ // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
+ if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(),
+ N0.getValueType(),
+ LN0->isVolatile(), LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFNEG(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ if (isNegatibleForFree(N0, LegalOperations))
+ return GetNegatedExpression(N0, DAG, LegalOperations);
+
+ // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
+ // constant pool values.
+ if (N0.getOpcode() == ISD::BIT_CONVERT &&
+ !VT.isVector() &&
+ N0.getNode()->hasOneUse() &&
+ N0.getOperand(0).getValueType().isInteger()) {
+ SDValue Int = N0.getOperand(0);
+ EVT IntVT = Int.getValueType();
+ if (IntVT.isInteger() && !IntVT.isVector()) {
+ Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int,
+ DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
+ AddToWorkList(Int.getNode());
+ return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+ VT, Int);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFABS(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fabs c1) -> fabs(c1)
+ if (N0CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
+ // fold (fabs (fabs x)) -> (fabs x)
+ if (N0.getOpcode() == ISD::FABS)
+ return N->getOperand(0);
+ // fold (fabs (fneg x)) -> (fabs x)
+ // fold (fabs (fcopysign x, y)) -> (fabs x)
+ if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
+ return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0.getOperand(0));
+
+ // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
+ // constant pool values.
+ if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() &&
+ N0.getOperand(0).getValueType().isInteger() &&
+ !N0.getOperand(0).getValueType().isVector()) {
+ SDValue Int = N0.getOperand(0);
+ EVT IntVT = Int.getValueType();
+ if (IntVT.isInteger() && !IntVT.isVector()) {
+ Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int,
+ DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
+ AddToWorkList(Int.getNode());
+ return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+ N->getValueType(0), Int);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBRCOND(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+
+ // If N is a constant we could fold this into a fallthrough or unconditional
+ // branch. However that doesn't happen very often in normal code, because
+ // Instcombine/SimplifyCFG should have handled the available opportunities.
+ // If we did this folding here, it would be necessary to update the
+ // MachineBasicBlock CFG, which is awkward.
+
+ // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
+ // on the target.
+ if (N1.getOpcode() == ISD::SETCC &&
+ TLI.isOperationLegalOrCustom(ISD::BR_CC, MVT::Other)) {
+ return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,
+ Chain, N1.getOperand(2),
+ N1.getOperand(0), N1.getOperand(1), N2);
+ }
+
+ SDNode *Trunc = 0;
+ if (N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) {
+ // Look pass truncate.
+ Trunc = N1.getNode();
+ N1 = N1.getOperand(0);
+ }
+
+ if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) {
+ // Match this pattern so that we can generate simpler code:
+ //
+ // %a = ...
+ // %b = and i32 %a, 2
+ // %c = srl i32 %b, 1
+ // brcond i32 %c ...
+ //
+ // into
+ //
+ // %a = ...
+ // %b = and i32 %a, 2
+ // %c = setcc eq %b, 0
+ // brcond %c ...
+ //
+ // This applies only when the AND constant value has one bit set and the
+ // SRL constant is equal to the log2 of the AND constant. The back-end is
+ // smart enough to convert the result into a TEST/JMP sequence.
+ SDValue Op0 = N1.getOperand(0);
+ SDValue Op1 = N1.getOperand(1);
+
+ if (Op0.getOpcode() == ISD::AND &&
+ Op1.getOpcode() == ISD::Constant) {
+ SDValue AndOp1 = Op0.getOperand(1);
+
+ if (AndOp1.getOpcode() == ISD::Constant) {
+ const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
+
+ if (AndConst.isPowerOf2() &&
+ cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
+ SDValue SetCC =
+ DAG.getSetCC(N->getDebugLoc(),
+ TLI.getSetCCResultType(Op0.getValueType()),
+ Op0, DAG.getConstant(0, Op0.getValueType()),
+ ISD::SETNE);
+
+ SDValue NewBRCond = DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+ MVT::Other, Chain, SetCC, N2);
+ // Don't add the new BRCond into the worklist or else SimplifySelectCC
+ // will convert it back to (X & C1) >> C2.
+ CombineTo(N, NewBRCond, false);
+ // Truncate is dead.
+ if (Trunc) {
+ removeFromWorkList(Trunc);
+ DAG.DeleteNode(Trunc);
+ }
+ // Replace the uses of SRL with SETCC
+ DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
+ removeFromWorkList(N1.getNode());
+ DAG.DeleteNode(N1.getNode());
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
+//
+SDValue DAGCombiner::visitBR_CC(SDNode *N) {
+ CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
+ SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
+
+ // If N is a constant we could fold this into a fallthrough or unconditional
+ // branch. However that doesn't happen very often in normal code, because
+ // Instcombine/SimplifyCFG should have handled the available opportunities.
+ // If we did this folding here, it would be necessary to update the
+ // MachineBasicBlock CFG, which is awkward.
+
+ // Use SimplifySetCC to simplify SETCC's.
+ SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()),
+ CondLHS, CondRHS, CC->get(), N->getDebugLoc(),
+ false);
+ if (Simp.getNode()) AddToWorkList(Simp.getNode());
+
+ // fold to a simpler setcc
+ if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
+ return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,
+ N->getOperand(0), Simp.getOperand(2),
+ Simp.getOperand(0), Simp.getOperand(1),
+ N->getOperand(4));
+
+ return SDValue();
+}
+
+/// CombineToPreIndexedLoadStore - Try turning a load / store into a
+/// pre-indexed load / store when the base pointer is an add or subtract
+/// and it has other uses besides the load / store. After the
+/// transformation, the new indexed load / store has effectively folded
+/// the add / subtract in and all of its other uses are redirected to the
+/// new load / store.
+bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
+ if (!LegalOperations)
+ return false;
+
+ bool isLoad = true;
+ SDValue Ptr;
+ EVT VT;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ if (LD->isIndexed())
+ return false;
+ VT = LD->getMemoryVT();
+ if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
+ !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
+ return false;
+ Ptr = LD->getBasePtr();
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ if (ST->isIndexed())
+ return false;
+ VT = ST->getMemoryVT();
+ if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
+ !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
+ return false;
+ Ptr = ST->getBasePtr();
+ isLoad = false;
+ } else {
+ return false;
+ }
+
+ // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
+ // out. There is no reason to make this a preinc/predec.
+ if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
+ Ptr.getNode()->hasOneUse())
+ return false;
+
+ // Ask the target to do addressing mode selection.
+ SDValue BasePtr;
+ SDValue Offset;
+ ISD::MemIndexedMode AM = ISD::UNINDEXED;
+ if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
+ return false;
+ // Don't create a indexed load / store with zero offset.
+ if (isa<ConstantSDNode>(Offset) &&
+ cast<ConstantSDNode>(Offset)->isNullValue())
+ return false;
+
+ // Try turning it into a pre-indexed load / store except when:
+ // 1) The new base ptr is a frame index.
+ // 2) If N is a store and the new base ptr is either the same as or is a
+ // predecessor of the value being stored.
+ // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
+ // that would create a cycle.
+ // 4) All uses are load / store ops that use it as old base ptr.
+
+ // Check #1. Preinc'ing a frame index would require copying the stack pointer
+ // (plus the implicit offset) to a register to preinc anyway.
+ if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
+ return false;
+
+ // Check #2.
+ if (!isLoad) {
+ SDValue Val = cast<StoreSDNode>(N)->getValue();
+ if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
+ return false;
+ }
+
+ // Now check for #3 and #4.
+ bool RealUse = false;
+ for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
+ E = Ptr.getNode()->use_end(); I != E; ++I) {
+ SDNode *Use = *I;
+ if (Use == N)
+ continue;
+ if (Use->isPredecessorOf(N))
+ return false;
+
+ if (!((Use->getOpcode() == ISD::LOAD &&
+ cast<LoadSDNode>(Use)->getBasePtr() == Ptr) ||
+ (Use->getOpcode() == ISD::STORE &&
+ cast<StoreSDNode>(Use)->getBasePtr() == Ptr)))
+ RealUse = true;
+ }
+
+ if (!RealUse)
+ return false;
+
+ SDValue Result;
+ if (isLoad)
+ Result = DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(),
+ BasePtr, Offset, AM);
+ else
+ Result = DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(),
+ BasePtr, Offset, AM);
+ ++PreIndexedNodes;
+ ++NodesCombined;
+ DEBUG(dbgs() << "\nReplacing.4 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith: ";
+ Result.getNode()->dump(&DAG);
+ dbgs() << '\n');
+ WorkListRemover DeadNodes(*this);
+ if (isLoad) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
+ &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2),
+ &DeadNodes);
+ } else {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1),
+ &DeadNodes);
+ }
+
+ // Finally, since the node is now dead, remove it from the graph.
+ DAG.DeleteNode(N);
+
+ // Replace the uses of Ptr with uses of the updated base value.
+ DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0),
+ &DeadNodes);
+ removeFromWorkList(Ptr.getNode());
+ DAG.DeleteNode(Ptr.getNode());
+
+ return true;
+}
+
+/// CombineToPostIndexedLoadStore - Try to combine a load / store with a
+/// add / sub of the base pointer node into a post-indexed load / store.
+/// The transformation folded the add / subtract into the new indexed
+/// load / store effectively and all of its uses are redirected to the
+/// new load / store.
+bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
+ if (!LegalOperations)
+ return false;
+
+ bool isLoad = true;
+ SDValue Ptr;
+ EVT VT;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ if (LD->isIndexed())
+ return false;
+ VT = LD->getMemoryVT();
+ if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
+ !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
+ return false;
+ Ptr = LD->getBasePtr();
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ if (ST->isIndexed())
+ return false;
+ VT = ST->getMemoryVT();
+ if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
+ !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
+ return false;
+ Ptr = ST->getBasePtr();
+ isLoad = false;
+ } else {
+ return false;
+ }
+
+ if (Ptr.getNode()->hasOneUse())
+ return false;
+
+ for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
+ E = Ptr.getNode()->use_end(); I != E; ++I) {
+ SDNode *Op = *I;
+ if (Op == N ||
+ (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
+ continue;
+
+ SDValue BasePtr;
+ SDValue Offset;
+ ISD::MemIndexedMode AM = ISD::UNINDEXED;
+ if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
+ if (Ptr == Offset && Op->getOpcode() == ISD::ADD)
+ std::swap(BasePtr, Offset);
+ if (Ptr != BasePtr)
+ continue;
+ // Don't create a indexed load / store with zero offset.
+ if (isa<ConstantSDNode>(Offset) &&
+ cast<ConstantSDNode>(Offset)->isNullValue())
+ continue;
+
+ // Try turning it into a post-indexed load / store except when
+ // 1) All uses are load / store ops that use it as base ptr.
+ // 2) Op must be independent of N, i.e. Op is neither a predecessor
+ // nor a successor of N. Otherwise, if Op is folded that would
+ // create a cycle.
+
+ if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
+ continue;
+
+ // Check for #1.
+ bool TryNext = false;
+ for (SDNode::use_iterator II = BasePtr.getNode()->use_begin(),
+ EE = BasePtr.getNode()->use_end(); II != EE; ++II) {
+ SDNode *Use = *II;
+ if (Use == Ptr.getNode())
+ continue;
+
+ // If all the uses are load / store addresses, then don't do the
+ // transformation.
+ if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
+ bool RealUse = false;
+ for (SDNode::use_iterator III = Use->use_begin(),
+ EEE = Use->use_end(); III != EEE; ++III) {
+ SDNode *UseUse = *III;
+ if (!((UseUse->getOpcode() == ISD::LOAD &&
+ cast<LoadSDNode>(UseUse)->getBasePtr().getNode() == Use) ||
+ (UseUse->getOpcode() == ISD::STORE &&
+ cast<StoreSDNode>(UseUse)->getBasePtr().getNode() == Use)))
+ RealUse = true;
+ }
+
+ if (!RealUse) {
+ TryNext = true;
+ break;
+ }
+ }
+ }
+
+ if (TryNext)
+ continue;
+
+ // Check for #2
+ if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
+ SDValue Result = isLoad
+ ? DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(),
+ BasePtr, Offset, AM)
+ : DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(),
+ BasePtr, Offset, AM);
+ ++PostIndexedNodes;
+ ++NodesCombined;
+ DEBUG(dbgs() << "\nReplacing.5 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith: ";
+ Result.getNode()->dump(&DAG);
+ dbgs() << '\n');
+ WorkListRemover DeadNodes(*this);
+ if (isLoad) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
+ &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2),
+ &DeadNodes);
+ } else {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1),
+ &DeadNodes);
+ }
+
+ // Finally, since the node is now dead, remove it from the graph.
+ DAG.DeleteNode(N);
+
+ // Replace the uses of Use with uses of the updated base value.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
+ Result.getValue(isLoad ? 1 : 0),
+ &DeadNodes);
+ removeFromWorkList(Op);
+ DAG.DeleteNode(Op);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+SDValue DAGCombiner::visitLOAD(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+
+ // Try to infer better alignment information than the load already has.
+ if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
+ if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
+ if (Align > LD->getAlignment())
+ return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
+ LD->getValueType(0),
+ Chain, Ptr, LD->getSrcValue(),
+ LD->getSrcValueOffset(), LD->getMemoryVT(),
+ LD->isVolatile(), Align);
+ }
+ }
+
+ // If load is not volatile and there are no uses of the loaded value (and
+ // the updated indexed value in case of indexed loads), change uses of the
+ // chain value into uses of the chain input (i.e. delete the dead load).
+ if (!LD->isVolatile()) {
+ if (N->getValueType(1) == MVT::Other) {
+ // Unindexed loads.
+ if (N->hasNUsesOfValue(0, 0)) {
+ // It's not safe to use the two value CombineTo variant here. e.g.
+ // v1, chain2 = load chain1, loc
+ // v2, chain3 = load chain2, loc
+ // v3 = add v2, c
+ // Now we replace use of chain2 with chain1. This makes the second load
+ // isomorphic to the one we are deleting, and thus makes this load live.
+ DEBUG(dbgs() << "\nReplacing.6 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith chain: ";
+ Chain.getNode()->dump(&DAG);
+ dbgs() << "\n");
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes);
+
+ if (N->use_empty()) {
+ removeFromWorkList(N);
+ DAG.DeleteNode(N);
+ }
+
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ } else {
+ // Indexed loads.
+ assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
+ if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) {
+ SDValue Undef = DAG.getUNDEF(N->getValueType(0));
+ DEBUG(dbgs() << "\nReplacing.6 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith: ";
+ Undef.getNode()->dump(&DAG);
+ dbgs() << " and 2 other values\n");
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
+ DAG.getUNDEF(N->getValueType(1)),
+ &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain, &DeadNodes);
+ removeFromWorkList(N);
+ DAG.DeleteNode(N);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+
+ // If this load is directly stored, replace the load value with the stored
+ // value.
+ // TODO: Handle store large -> read small portion.
+ // TODO: Handle TRUNCSTORE/LOADEXT
+ if (LD->getExtensionType() == ISD::NON_EXTLOAD &&
+ !LD->isVolatile()) {
+ if (ISD::isNON_TRUNCStore(Chain.getNode())) {
+ StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
+ if (PrevST->getBasePtr() == Ptr &&
+ PrevST->getValue().getValueType() == N->getValueType(0))
+ return CombineTo(N, Chain.getOperand(1), Chain);
+ }
+ }
+
+ if (CombinerAA) {
+ // Walk up chain skipping non-aliasing memory nodes.
+ SDValue BetterChain = FindBetterChain(N, Chain);
+
+ // If there is a better chain.
+ if (Chain != BetterChain) {
+ SDValue ReplLoad;
+
+ // Replace the chain to void dependency.
+ if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
+ ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(),
+ BetterChain, Ptr,
+ LD->getSrcValue(), LD->getSrcValueOffset(),
+ LD->isVolatile(), LD->getAlignment());
+ } else {
+ ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(),
+ LD->getValueType(0),
+ BetterChain, Ptr, LD->getSrcValue(),
+ LD->getSrcValueOffset(),
+ LD->getMemoryVT(),
+ LD->isVolatile(),
+ LD->getAlignment());
+ }
+
+ // Create token factor to keep old chain connected.
+ SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
+ MVT::Other, Chain, ReplLoad.getValue(1));
+
+ // Make sure the new and old chains are cleaned up.
+ AddToWorkList(Token.getNode());
+
+ // Replace uses with load result and token factor. Don't add users
+ // to work list.
+ return CombineTo(N, ReplLoad.getValue(0), Token, false);
+ }
+ }
+
+ // Try transforming N to an indexed load.
+ if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+
+/// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is
+/// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some
+/// of the loaded bits, try narrowing the load and store if it would end up
+/// being a win for performance or code size.
+SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ if (ST->isVolatile())
+ return SDValue();
+
+ SDValue Chain = ST->getChain();
+ SDValue Value = ST->getValue();
+ SDValue Ptr = ST->getBasePtr();
+ EVT VT = Value.getValueType();
+
+ if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
+ return SDValue();
+
+ unsigned Opc = Value.getOpcode();
+ if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
+ Value.getOperand(1).getOpcode() != ISD::Constant)
+ return SDValue();
+
+ SDValue N0 = Value.getOperand(0);
+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
+ LoadSDNode *LD = cast<LoadSDNode>(N0);
+ if (LD->getBasePtr() != Ptr)
+ return SDValue();
+
+ // Find the type to narrow it the load / op / store to.
+ SDValue N1 = Value.getOperand(1);
+ unsigned BitWidth = N1.getValueSizeInBits();
+ APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
+ if (Opc == ISD::AND)
+ Imm ^= APInt::getAllOnesValue(BitWidth);
+ if (Imm == 0 || Imm.isAllOnesValue())
+ return SDValue();
+ unsigned ShAmt = Imm.countTrailingZeros();
+ unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
+ unsigned NewBW = NextPowerOf2(MSB - ShAmt);
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
+ while (NewBW < BitWidth &&
+ !(TLI.isOperationLegalOrCustom(Opc, NewVT) &&
+ TLI.isNarrowingProfitable(VT, NewVT))) {
+ NewBW = NextPowerOf2(NewBW);
+ NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
+ }
+ if (NewBW >= BitWidth)
+ return SDValue();
+
+ // If the lsb changed does not start at the type bitwidth boundary,
+ // start at the previous one.
+ if (ShAmt % NewBW)
+ ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
+ APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, ShAmt + NewBW);
+ if ((Imm & Mask) == Imm) {
+ APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
+ if (Opc == ISD::AND)
+ NewImm ^= APInt::getAllOnesValue(NewBW);
+ uint64_t PtrOff = ShAmt / 8;
+ // For big endian targets, we need to adjust the offset to the pointer to
+ // load the correct bytes.
+ if (TLI.isBigEndian())
+ PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
+
+ unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
+ if (NewAlign <
+ TLI.getTargetData()->getABITypeAlignment(NewVT.getTypeForEVT(*DAG.getContext())))
+ return SDValue();
+
+ SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(),
+ Ptr.getValueType(), Ptr,
+ DAG.getConstant(PtrOff, Ptr.getValueType()));
+ SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(),
+ LD->getChain(), NewPtr,
+ LD->getSrcValue(), LD->getSrcValueOffset(),
+ LD->isVolatile(), NewAlign);
+ SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,
+ DAG.getConstant(NewImm, NewVT));
+ SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),
+ NewVal, NewPtr,
+ ST->getSrcValue(), ST->getSrcValueOffset(),
+ false, NewAlign);
+
+ AddToWorkList(NewPtr.getNode());
+ AddToWorkList(NewLD.getNode());
+ AddToWorkList(NewVal.getNode());
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1),
+ &DeadNodes);
+ ++OpsNarrowed;
+ return NewST;
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSTORE(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Chain = ST->getChain();
+ SDValue Value = ST->getValue();
+ SDValue Ptr = ST->getBasePtr();
+
+ // Try to infer better alignment information than the store already has.
+ if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
+ if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
+ if (Align > ST->getAlignment())
+ return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
+ Ptr, ST->getSrcValue(),
+ ST->getSrcValueOffset(), ST->getMemoryVT(),
+ ST->isVolatile(), Align);
+ }
+ }
+
+ // If this is a store of a bit convert, store the input value if the
+ // resultant store does not need a higher alignment than the original.
+ if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() &&
+ ST->isUnindexed()) {
+ unsigned OrigAlign = ST->getAlignment();
+ EVT SVT = Value.getOperand(0).getValueType();
+ unsigned Align = TLI.getTargetData()->
+ getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext()));
+ if (Align <= OrigAlign &&
+ ((!LegalOperations && !ST->isVolatile()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
+ return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0),
+ Ptr, ST->getSrcValue(),
+ ST->getSrcValueOffset(), ST->isVolatile(), OrigAlign);
+ }
+
+ // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {
+ // NOTE: If the original store is volatile, this transform must not increase
+ // the number of stores. For example, on x86-32 an f64 can be stored in one
+ // processor operation but an i64 (which is not legal) requires two. So the
+ // transform should not be done in this case.
+ if (Value.getOpcode() != ISD::TargetConstantFP) {
+ SDValue Tmp;
+ switch (CFP->getValueType(0).getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unknown FP type");
+ case MVT::f80: // We don't do this for these yet.
+ case MVT::f128:
+ case MVT::ppcf128:
+ break;
+ case MVT::f32:
+ if (((TLI.isTypeLegal(MVT::i32) || !LegalTypes) && !LegalOperations &&
+ !ST->isVolatile()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+ Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
+ bitcastToAPInt().getZExtValue(), MVT::i32);
+ return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
+ Ptr, ST->getSrcValue(),
+ ST->getSrcValueOffset(), ST->isVolatile(),
+ ST->getAlignment());
+ }
+ break;
+ case MVT::f64:
+ if (((TLI.isTypeLegal(MVT::i64) || !LegalTypes) && !LegalOperations &&
+ !ST->isVolatile()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
+ Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+ getZExtValue(), MVT::i64);
+ return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
+ Ptr, ST->getSrcValue(),
+ ST->getSrcValueOffset(), ST->isVolatile(),
+ ST->getAlignment());
+ } else if (!ST->isVolatile() &&
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+ // Many FP stores are not made apparent until after legalize, e.g. for
+ // argument passing. Since this is so common, custom legalize the
+ // 64-bit integer store into two 32-bit stores.
+ uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32);
+ SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32);
+ if (TLI.isBigEndian()) std::swap(Lo, Hi);
+
+ int SVOffset = ST->getSrcValueOffset();
+ unsigned Alignment = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+
+ SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo,
+ Ptr, ST->getSrcValue(),
+ ST->getSrcValueOffset(),
+ isVolatile, ST->getAlignment());
+ Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr,
+ DAG.getConstant(4, Ptr.getValueType()));
+ SVOffset += 4;
+ Alignment = MinAlign(Alignment, 4U);
+ SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi,
+ Ptr, ST->getSrcValue(),
+ SVOffset, isVolatile, Alignment);
+ return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
+ St0, St1);
+ }
+
+ break;
+ }
+ }
+ }
+
+ if (CombinerAA) {
+ // Walk up chain skipping non-aliasing memory nodes.
+ SDValue BetterChain = FindBetterChain(N, Chain);
+
+ // If there is a better chain.
+ if (Chain != BetterChain) {
+ SDValue ReplStore;
+
+ // Replace the chain to avoid dependency.
+ if (ST->isTruncatingStore()) {
+ ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr,
+ ST->getSrcValue(),ST->getSrcValueOffset(),
+ ST->getMemoryVT(),
+ ST->isVolatile(), ST->getAlignment());
+ } else {
+ ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr,
+ ST->getSrcValue(), ST->getSrcValueOffset(),
+ ST->isVolatile(), ST->getAlignment());
+ }
+
+ // Create token to keep both nodes around.
+ SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
+ MVT::Other, Chain, ReplStore);
+
+ // Make sure the new and old chains are cleaned up.
+ AddToWorkList(Token.getNode());
+
+ // Don't add users to work list.
+ return CombineTo(N, Token, false);
+ }
+ }
+
+ // Try transforming N to an indexed store.
+ if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+ return SDValue(N, 0);
+
+ // FIXME: is there such a thing as a truncating indexed store?
+ if (ST->isTruncatingStore() && ST->isUnindexed() &&
+ Value.getValueType().isInteger()) {
+ // See if we can simplify the input to this truncstore with knowledge that
+ // only the low bits are being used. For example:
+ // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
+ SDValue Shorter =
+ GetDemandedBits(Value,
+ APInt::getLowBitsSet(Value.getValueSizeInBits(),
+ ST->getMemoryVT().getSizeInBits()));
+ AddToWorkList(Value.getNode());
+ if (Shorter.getNode())
+ return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter,
+ Ptr, ST->getSrcValue(),
+ ST->getSrcValueOffset(), ST->getMemoryVT(),
+ ST->isVolatile(), ST->getAlignment());
+
+ // Otherwise, see if we can simplify the operation with
+ // SimplifyDemandedBits, which only works if the value has a single use.
+ if (SimplifyDemandedBits(Value,
+ APInt::getLowBitsSet(
+ Value.getValueType().getScalarType().getSizeInBits(),
+ ST->getMemoryVT().getSizeInBits())))
+ return SDValue(N, 0);
+ }
+
+ // If this is a load followed by a store to the same location, then the store
+ // is dead/noop.
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
+ if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
+ ST->isUnindexed() && !ST->isVolatile() &&
+ // There can't be any side effects between the load and store, such as
+ // a call or store.
+ Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
+ // The store is dead, remove it.
+ return Chain;
+ }
+ }
+
+ // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
+ // truncating store. We can do this even if this is already a truncstore.
+ if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
+ && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
+ TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
+ ST->getMemoryVT())) {
+ return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0),
+ Ptr, ST->getSrcValue(),
+ ST->getSrcValueOffset(), ST->getMemoryVT(),
+ ST->isVolatile(), ST->getAlignment());
+ }
+
+ return ReduceLoadOpStoreWidth(N);
+}
+
+SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
+ SDValue InVec = N->getOperand(0);
+ SDValue InVal = N->getOperand(1);
+ SDValue EltNo = N->getOperand(2);
+
+ // If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new
+ // vector with the inserted element.
+ if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) {
+ unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ SmallVector<SDValue, 8> Ops(InVec.getNode()->op_begin(),
+ InVec.getNode()->op_end());
+ if (Elt < Ops.size())
+ Ops[Elt] = InVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ InVec.getValueType(), &Ops[0], Ops.size());
+ }
+ // If the invec is an UNDEF and if EltNo is a constant, create a new
+ // BUILD_VECTOR with undef elements and the inserted element.
+ if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF &&
+ isa<ConstantSDNode>(EltNo)) {
+ EVT VT = InVec.getValueType();
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NElts = VT.getVectorNumElements();
+ SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EltVT));
+
+ unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ if (Elt < Ops.size())
+ Ops[Elt] = InVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ InVec.getValueType(), &Ops[0], Ops.size());
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
+ // (vextract (scalar_to_vector val, 0) -> val
+ SDValue InVec = N->getOperand(0);
+
+ if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ // Check if the result type doesn't match the inserted element type. A
+ // SCALAR_TO_VECTOR may truncate the inserted element and the
+ // EXTRACT_VECTOR_ELT may widen the extracted vector.
+ EVT EltVT = InVec.getValueType().getVectorElementType();
+ SDValue InOp = InVec.getOperand(0);
+ EVT NVT = N->getValueType(0);
+ if (InOp.getValueType() != NVT) {
+ assert(InOp.getValueType().isInteger() && NVT.isInteger());
+ return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT);
+ }
+ return InOp;
+ }
+
+ // Perform only after legalization to ensure build_vector / vector_shuffle
+ // optimizations have already been done.
+ if (!LegalOperations) return SDValue();
+
+ // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
+ // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
+ // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
+ SDValue EltNo = N->getOperand(1);
+
+ if (isa<ConstantSDNode>(EltNo)) {
+ unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ bool NewLoad = false;
+ bool BCNumEltsChanged = false;
+ EVT VT = InVec.getValueType();
+ EVT ExtVT = VT.getVectorElementType();
+ EVT LVT = ExtVT;
+
+ if (InVec.getOpcode() == ISD::BIT_CONVERT) {
+ EVT BCVT = InVec.getOperand(0).getValueType();
+ if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
+ return SDValue();
+ if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
+ BCNumEltsChanged = true;
+ InVec = InVec.getOperand(0);
+ ExtVT = BCVT.getVectorElementType();
+ NewLoad = true;
+ }
+
+ LoadSDNode *LN0 = NULL;
+ const ShuffleVectorSDNode *SVN = NULL;
+ if (ISD::isNormalLoad(InVec.getNode())) {
+ LN0 = cast<LoadSDNode>(InVec);
+ } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ InVec.getOperand(0).getValueType() == ExtVT &&
+ ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
+ LN0 = cast<LoadSDNode>(InVec.getOperand(0));
+ } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
+ // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
+ // =>
+ // (load $addr+1*size)
+
+ // If the bit convert changed the number of elements, it is unsafe
+ // to examine the mask.
+ if (BCNumEltsChanged)
+ return SDValue();
+
+ // Select the input vector, guarding against out of range extract vector.
+ unsigned NumElems = VT.getVectorNumElements();
+ int Idx = (Elt > NumElems) ? -1 : SVN->getMaskElt(Elt);
+ InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
+
+ if (InVec.getOpcode() == ISD::BIT_CONVERT)
+ InVec = InVec.getOperand(0);
+ if (ISD::isNormalLoad(InVec.getNode())) {
+ LN0 = cast<LoadSDNode>(InVec);
+ Elt = (Idx < (int)NumElems) ? Idx : Idx - NumElems;
+ }
+ }
+
+ if (!LN0 || !LN0->hasOneUse() || LN0->isVolatile())
+ return SDValue();
+
+ unsigned Align = LN0->getAlignment();
+ if (NewLoad) {
+ // Check the resultant load doesn't need a higher alignment than the
+ // original load.
+ unsigned NewAlign =
+ TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
+
+ if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
+ return SDValue();
+
+ Align = NewAlign;
+ }
+
+ SDValue NewPtr = LN0->getBasePtr();
+ if (Elt) {
+ unsigned PtrOff = LVT.getSizeInBits() * Elt / 8;
+ EVT PtrType = NewPtr.getValueType();
+ if (TLI.isBigEndian())
+ PtrOff = VT.getSizeInBits() / 8 - PtrOff;
+ NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr,
+ DAG.getConstant(PtrOff, PtrType));
+ }
+
+ return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
+ LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ LN0->isVolatile(), Align);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
+ unsigned NumInScalars = N->getNumOperands();
+ EVT VT = N->getValueType(0);
+
+ // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
+ // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
+ // at most two distinct vectors, turn this into a shuffle node.
+ SDValue VecIn1, VecIn2;
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ // Ignore undef inputs.
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+
+ // If this input is something other than a EXTRACT_VECTOR_ELT with a
+ // constant index, bail out.
+ if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) {
+ VecIn1 = VecIn2 = SDValue(0, 0);
+ break;
+ }
+
+ // If the input vector type disagrees with the result of the build_vector,
+ // we can't make a shuffle.
+ SDValue ExtractedFromVec = N->getOperand(i).getOperand(0);
+ if (ExtractedFromVec.getValueType() != VT) {
+ VecIn1 = VecIn2 = SDValue(0, 0);
+ break;
+ }
+
+ // Otherwise, remember this. We allow up to two distinct input vectors.
+ if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
+ continue;
+
+ if (VecIn1.getNode() == 0) {
+ VecIn1 = ExtractedFromVec;
+ } else if (VecIn2.getNode() == 0) {
+ VecIn2 = ExtractedFromVec;
+ } else {
+ // Too many inputs.
+ VecIn1 = VecIn2 = SDValue(0, 0);
+ break;
+ }
+ }
+
+ // If everything is good, we can make a shuffle operation.
+ if (VecIn1.getNode()) {
+ SmallVector<int, 8> Mask;
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) {
+ Mask.push_back(-1);
+ continue;
+ }
+
+ // If extracting from the first vector, just use the index directly.
+ SDValue Extract = N->getOperand(i);
+ SDValue ExtVal = Extract.getOperand(1);
+ if (Extract.getOperand(0) == VecIn1) {
+ unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
+ if (ExtIndex > VT.getVectorNumElements())
+ return SDValue();
+
+ Mask.push_back(ExtIndex);
+ continue;
+ }
+
+ // Otherwise, use InIdx + VecSize
+ unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue();
+ Mask.push_back(Idx+NumInScalars);
+ }
+
+ // Add count and size info.
+ if (!TLI.isTypeLegal(VT) && LegalTypes)
+ return SDValue();
+
+ // Return the new VECTOR_SHUFFLE node.
+ SDValue Ops[2];
+ Ops[0] = VecIn1;
+ Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
+ // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
+ // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector
+ // inputs come from at most two distinct vectors, turn this into a shuffle
+ // node.
+
+ // If we only have one input vector, we don't need to do any concatenation.
+ if (N->getNumOperands() == 1)
+ return N->getOperand(0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+
+ SDValue N0 = N->getOperand(0);
+
+ assert(N0.getValueType().getVectorNumElements() == NumElts &&
+ "Vector shuffle must be normalized in DAG");
+
+ // FIXME: implement canonicalizations from DAG.getVectorShuffle()
+
+ // If it is a splat, check if the argument vector is a build_vector with
+ // all scalar elements the same.
+ if (cast<ShuffleVectorSDNode>(N)->isSplat()) {
+ SDNode *V = N0.getNode();
+
+
+ // If this is a bit convert that changes the element type of the vector but
+ // not the number of vector elements, look through it. Be careful not to
+ // look though conversions that change things like v4f32 to v2f64.
+ if (V->getOpcode() == ISD::BIT_CONVERT) {
+ SDValue ConvInput = V->getOperand(0);
+ if (ConvInput.getValueType().isVector() &&
+ ConvInput.getValueType().getVectorNumElements() == NumElts)
+ V = ConvInput.getNode();
+ }
+
+ if (V->getOpcode() == ISD::BUILD_VECTOR) {
+ unsigned NumElems = V->getNumOperands();
+ unsigned BaseIdx = cast<ShuffleVectorSDNode>(N)->getSplatIndex();
+ if (NumElems > BaseIdx) {
+ SDValue Base;
+ bool AllSame = true;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
+ Base = V->getOperand(i);
+ break;
+ }
+ }
+ // Splat of <u, u, u, u>, return <u, u, u, u>
+ if (!Base.getNode())
+ return N0;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ if (V->getOperand(i) != Base) {
+ AllSame = false;
+ break;
+ }
+ }
+ // Splat of <x, x, x, x>, return <x, x, x, x>
+ if (AllSame)
+ return N0;
+ }
+ }
+ }
+ return SDValue();
+}
+
+/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform
+/// an AND to a vector_shuffle with the destination vector and a zero vector.
+/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
+/// vector_shuffle V, Zero, <0, 4, 2, 4>
+SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ if (N->getOpcode() == ISD::AND) {
+ if (RHS.getOpcode() == ISD::BIT_CONVERT)
+ RHS = RHS.getOperand(0);
+ if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
+ SmallVector<int, 8> Indices;
+ unsigned NumElts = RHS.getNumOperands();
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue Elt = RHS.getOperand(i);
+ if (!isa<ConstantSDNode>(Elt))
+ return SDValue();
+ else if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
+ Indices.push_back(i);
+ else if (cast<ConstantSDNode>(Elt)->isNullValue())
+ Indices.push_back(NumElts);
+ else
+ return SDValue();
+ }
+
+ // Let's see if the target supports this vector_shuffle.
+ EVT RVT = RHS.getValueType();
+ if (!TLI.isVectorClearMaskLegal(Indices, RVT))
+ return SDValue();
+
+ // Return the new VECTOR_SHUFFLE node.
+ EVT EltVT = RVT.getVectorElementType();
+ SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
+ DAG.getConstant(0, EltVT));
+ SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ RVT, &ZeroOps[0], ZeroOps.size());
+ LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS);
+ SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuf);
+ }
+ }
+
+ return SDValue();
+}
+
+/// SimplifyVBinOp - Visit a binary vector operation, like ADD.
+SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
+ // After legalize, the target may be depending on adds and other
+ // binary ops to provide legal ways to construct constants or other
+ // things. Simplifying them may result in a loss of legality.
+ if (LegalOperations) return SDValue();
+
+ EVT VT = N->getValueType(0);
+ assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
+
+ EVT EltType = VT.getVectorElementType();
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue Shuffle = XformToShuffleWithZero(N);
+ if (Shuffle.getNode()) return Shuffle;
+
+ // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold
+ // this operation.
+ if (LHS.getOpcode() == ISD::BUILD_VECTOR &&
+ RHS.getOpcode() == ISD::BUILD_VECTOR) {
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
+ SDValue LHSOp = LHS.getOperand(i);
+ SDValue RHSOp = RHS.getOperand(i);
+ // If these two elements can't be folded, bail out.
+ if ((LHSOp.getOpcode() != ISD::UNDEF &&
+ LHSOp.getOpcode() != ISD::Constant &&
+ LHSOp.getOpcode() != ISD::ConstantFP) ||
+ (RHSOp.getOpcode() != ISD::UNDEF &&
+ RHSOp.getOpcode() != ISD::Constant &&
+ RHSOp.getOpcode() != ISD::ConstantFP))
+ break;
+
+ // Can't fold divide by zero.
+ if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
+ N->getOpcode() == ISD::FDIV) {
+ if ((RHSOp.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) ||
+ (RHSOp.getOpcode() == ISD::ConstantFP &&
+ cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero()))
+ break;
+ }
+
+ Ops.push_back(DAG.getNode(N->getOpcode(), LHS.getDebugLoc(),
+ EltType, LHSOp, RHSOp));
+ AddToWorkList(Ops.back().getNode());
+ assert((Ops.back().getOpcode() == ISD::UNDEF ||
+ Ops.back().getOpcode() == ISD::Constant ||
+ Ops.back().getOpcode() == ISD::ConstantFP) &&
+ "Scalar binop didn't fold!");
+ }
+
+ if (Ops.size() == LHS.getNumOperands()) {
+ EVT VT = LHS.getValueType();
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
+ &Ops[0], Ops.size());
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0,
+ SDValue N1, SDValue N2){
+ assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
+
+ SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+
+ // If we got a simplified select_cc node back from SimplifySelectCC, then
+ // break it down into a new SETCC node, and a new SELECT node, and then return
+ // the SELECT node, since we were called with a SELECT node.
+ if (SCC.getNode()) {
+ // Check to see if we got a select_cc back (to turn into setcc/select).
+ // Otherwise, just return whatever node we got back, like fabs.
+ if (SCC.getOpcode() == ISD::SELECT_CC) {
+ SDValue SETCC = DAG.getNode(ISD::SETCC, N0.getDebugLoc(),
+ N0.getValueType(),
+ SCC.getOperand(0), SCC.getOperand(1),
+ SCC.getOperand(4));
+ AddToWorkList(SETCC.getNode());
+ return DAG.getNode(ISD::SELECT, SCC.getDebugLoc(), SCC.getValueType(),
+ SCC.getOperand(2), SCC.getOperand(3), SETCC);
+ }
+
+ return SCC;
+ }
+ return SDValue();
+}
+
+/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS
+/// are the two values being selected between, see if we can simplify the
+/// select. Callers of this should assume that TheSelect is deleted if this
+/// returns true. As such, they should return the appropriate thing (e.g. the
+/// node) back to the top-level of the DAG combiner loop to avoid it being
+/// looked at.
+bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
+ SDValue RHS) {
+
+ // If this is a select from two identical things, try to pull the operation
+ // through the select.
+ if (LHS.getOpcode() == RHS.getOpcode() && LHS.hasOneUse() && RHS.hasOneUse()){
+ // If this is a load and the token chain is identical, replace the select
+ // of two loads with a load through a select of the address to load from.
+ // This triggers in things like "select bool X, 10.0, 123.0" after the FP
+ // constants have been dropped into the constant pool.
+ if (LHS.getOpcode() == ISD::LOAD &&
+ // Do not let this transformation reduce the number of volatile loads.
+ !cast<LoadSDNode>(LHS)->isVolatile() &&
+ !cast<LoadSDNode>(RHS)->isVolatile() &&
+ // Token chains must be identical.
+ LHS.getOperand(0) == RHS.getOperand(0)) {
+ LoadSDNode *LLD = cast<LoadSDNode>(LHS);
+ LoadSDNode *RLD = cast<LoadSDNode>(RHS);
+
+ // If this is an EXTLOAD, the VT's must match.
+ if (LLD->getMemoryVT() == RLD->getMemoryVT()) {
+ // FIXME: this discards src value information. This is
+ // over-conservative. It would be beneficial to be able to remember
+ // both potential memory locations. Since we are discarding
+ // src value info, don't do the transformation if the memory
+ // locations are not in the default address space.
+ unsigned LLDAddrSpace = 0, RLDAddrSpace = 0;
+ if (const Value *LLDVal = LLD->getMemOperand()->getValue()) {
+ if (const PointerType *PT = dyn_cast<PointerType>(LLDVal->getType()))
+ LLDAddrSpace = PT->getAddressSpace();
+ }
+ if (const Value *RLDVal = RLD->getMemOperand()->getValue()) {
+ if (const PointerType *PT = dyn_cast<PointerType>(RLDVal->getType()))
+ RLDAddrSpace = PT->getAddressSpace();
+ }
+ SDValue Addr;
+ if (LLDAddrSpace == 0 && RLDAddrSpace == 0) {
+ if (TheSelect->getOpcode() == ISD::SELECT) {
+ // Check that the condition doesn't reach either load. If so, folding
+ // this will induce a cycle into the DAG.
+ if ((!LLD->hasAnyUseOfValue(1) ||
+ !LLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) &&
+ (!RLD->hasAnyUseOfValue(1) ||
+ !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()))) {
+ Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
+ LLD->getBasePtr().getValueType(),
+ TheSelect->getOperand(0), LLD->getBasePtr(),
+ RLD->getBasePtr());
+ }
+ } else {
+ // Check that the condition doesn't reach either load. If so, folding
+ // this will induce a cycle into the DAG.
+ if ((!LLD->hasAnyUseOfValue(1) ||
+ (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
+ !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()))) &&
+ (!RLD->hasAnyUseOfValue(1) ||
+ (!RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
+ !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())))) {
+ Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
+ LLD->getBasePtr().getValueType(),
+ TheSelect->getOperand(0),
+ TheSelect->getOperand(1),
+ LLD->getBasePtr(), RLD->getBasePtr(),
+ TheSelect->getOperand(4));
+ }
+ }
+ }
+
+ if (Addr.getNode()) {
+ SDValue Load;
+ if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
+ Load = DAG.getLoad(TheSelect->getValueType(0),
+ TheSelect->getDebugLoc(),
+ LLD->getChain(),
+ Addr, 0, 0,
+ LLD->isVolatile(),
+ LLD->getAlignment());
+ } else {
+ Load = DAG.getExtLoad(LLD->getExtensionType(),
+ TheSelect->getDebugLoc(),
+ TheSelect->getValueType(0),
+ LLD->getChain(), Addr, 0, 0,
+ LLD->getMemoryVT(),
+ LLD->isVolatile(),
+ LLD->getAlignment());
+ }
+
+ // Users of the select now use the result of the load.
+ CombineTo(TheSelect, Load);
+
+ // Users of the old loads now use the new load's chain. We know the
+ // old-load value is dead now.
+ CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
+ CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+/// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3
+/// where 'cond' is the comparison specified by CC.
+SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
+ SDValue N2, SDValue N3,
+ ISD::CondCode CC, bool NotExtCompare) {
+ // (x ? y : y) -> y.
+ if (N2 == N3) return N2;
+
+ EVT VT = N2.getValueType();
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+ ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
+
+ // Determine if the condition we're dealing with is constant
+ SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()),
+ N0, N1, CC, DL, false);
+ if (SCC.getNode()) AddToWorkList(SCC.getNode());
+ ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode());
+
+ // fold select_cc true, x, y -> x
+ if (SCCC && !SCCC->isNullValue())
+ return N2;
+ // fold select_cc false, x, y -> y
+ if (SCCC && SCCC->isNullValue())
+ return N3;
+
+ // Check to see if we can simplify the select into an fabs node
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
+ // Allow either -0.0 or 0.0
+ if (CFP->getValueAPF().isZero()) {
+ // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
+ if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
+ N0 == N2 && N3.getOpcode() == ISD::FNEG &&
+ N2 == N3.getOperand(0))
+ return DAG.getNode(ISD::FABS, DL, VT, N0);
+
+ // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
+ if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
+ N0 == N3 && N2.getOpcode() == ISD::FNEG &&
+ N2.getOperand(0) == N3)
+ return DAG.getNode(ISD::FABS, DL, VT, N3);
+ }
+ }
+
+ // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
+ // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
+ // in it. This is a win when the constant is not otherwise available because
+ // it replaces two constant pool loads with one. We only do this if the FP
+ // type is known to be legal, because if it isn't, then we are before legalize
+ // types an we want the other legalization to happen first (e.g. to avoid
+ // messing with soft float) and if the ConstantFP is not legal, because if
+ // it is legal, we may not need to store the FP constant in a constant pool.
+ if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
+ if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
+ if (TLI.isTypeLegal(N2.getValueType()) &&
+ (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
+ TargetLowering::Legal) &&
+ // If both constants have multiple uses, then we won't need to do an
+ // extra load, they are likely around in registers for other users.
+ (TV->hasOneUse() || FV->hasOneUse())) {
+ Constant *Elts[] = {
+ const_cast<ConstantFP*>(FV->getConstantFPValue()),
+ const_cast<ConstantFP*>(TV->getConstantFPValue())
+ };
+ const Type *FPTy = Elts[0]->getType();
+ const TargetData &TD = *TLI.getTargetData();
+
+ // Create a ConstantArray of the two constants.
+ Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts, 2);
+ SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(),
+ TD.getPrefTypeAlignment(FPTy));
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+
+ // Get the offsets to the 0 and 1 element of the array so that we can
+ // select between them.
+ SDValue Zero = DAG.getIntPtrConstant(0);
+ unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
+ SDValue One = DAG.getIntPtrConstant(EltSize);
+
+ SDValue Cond = DAG.getSetCC(DL,
+ TLI.getSetCCResultType(N0.getValueType()),
+ N0, N1, CC);
+ SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(),
+ Cond, One, Zero);
+ CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx,
+ CstOffset);
+ return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0, false,
+ Alignment);
+
+ }
+ }
+
+ // Check to see if we can perform the "gzip trick", transforming
+ // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
+ if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT &&
+ N0.getValueType().isInteger() &&
+ N2.getValueType().isInteger() &&
+ (N1C->isNullValue() || // (a < 0) ? b : 0
+ (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0
+ EVT XType = N0.getValueType();
+ EVT AType = N2.getValueType();
+ if (XType.bitsGE(AType)) {
+ // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
+ // single-bit constant.
+ if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) {
+ unsigned ShCtV = N2C->getAPIntValue().logBase2();
+ ShCtV = XType.getSizeInBits()-ShCtV-1;
+ SDValue ShCt = DAG.getConstant(ShCtV, getShiftAmountTy());
+ SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(),
+ XType, N0, ShCt);
+ AddToWorkList(Shift.getNode());
+
+ if (XType.bitsGT(AType)) {
+ Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+ AddToWorkList(Shift.getNode());
+ }
+
+ return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+ }
+
+ SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(),
+ XType, N0,
+ DAG.getConstant(XType.getSizeInBits()-1,
+ getShiftAmountTy()));
+ AddToWorkList(Shift.getNode());
+
+ if (XType.bitsGT(AType)) {
+ Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+ AddToWorkList(Shift.getNode());
+ }
+
+ return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+ }
+ }
+
+ // fold select C, 16, 0 -> shl C, 4
+ if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
+ TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent) {
+
+ // If the caller doesn't want us to simplify this into a zext of a compare,
+ // don't do it.
+ if (NotExtCompare && N2C->getAPIntValue() == 1)
+ return SDValue();
+
+ // Get a SetCC of the condition
+ // FIXME: Should probably make sure that setcc is legal if we ever have a
+ // target where it isn't.
+ SDValue Temp, SCC;
+ // cast from setcc result type to select result type
+ if (LegalTypes) {
+ SCC = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()),
+ N0, N1, CC);
+ if (N2.getValueType().bitsLT(SCC.getValueType()))
+ Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), N2.getValueType());
+ else
+ Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),
+ N2.getValueType(), SCC);
+ } else {
+ SCC = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC);
+ Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),
+ N2.getValueType(), SCC);
+ }
+
+ AddToWorkList(SCC.getNode());
+ AddToWorkList(Temp.getNode());
+
+ if (N2C->getAPIntValue() == 1)
+ return Temp;
+
+ // shl setcc result by log2 n2c
+ return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
+ DAG.getConstant(N2C->getAPIntValue().logBase2(),
+ getShiftAmountTy()));
+ }
+
+ // Check to see if this is the equivalent of setcc
+ // FIXME: Turn all of these into setcc if setcc if setcc is legal
+ // otherwise, go ahead with the folds.
+ if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) {
+ EVT XType = N0.getValueType();
+ if (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) {
+ SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC);
+ if (Res.getValueType() != VT)
+ Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
+ return Res;
+ }
+
+ // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X))))
+ if (N1C && N1C->isNullValue() && CC == ISD::SETEQ &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(ISD::CTLZ, XType))) {
+ SDValue Ctlz = DAG.getNode(ISD::CTLZ, N0.getDebugLoc(), XType, N0);
+ return DAG.getNode(ISD::SRL, DL, XType, Ctlz,
+ DAG.getConstant(Log2_32(XType.getSizeInBits()),
+ getShiftAmountTy()));
+ }
+ // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1))
+ if (N1C && N1C->isNullValue() && CC == ISD::SETGT) {
+ SDValue NegN0 = DAG.getNode(ISD::SUB, N0.getDebugLoc(),
+ XType, DAG.getConstant(0, XType), N0);
+ SDValue NotN0 = DAG.getNOT(N0.getDebugLoc(), N0, XType);
+ return DAG.getNode(ISD::SRL, DL, XType,
+ DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0),
+ DAG.getConstant(XType.getSizeInBits()-1,
+ getShiftAmountTy()));
+ }
+ // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1))
+ if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) {
+ SDValue Sign = DAG.getNode(ISD::SRL, N0.getDebugLoc(), XType, N0,
+ DAG.getConstant(XType.getSizeInBits()-1,
+ getShiftAmountTy()));
+ return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType));
+ }
+ }
+
+ // Check to see if this is an integer abs. select_cc setl[te] X, 0, -X, X ->
+ // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+ if (N1C && N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE) &&
+ N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1) &&
+ N2.getOperand(0) == N1 && N0.getValueType().isInteger()) {
+ EVT XType = N0.getValueType();
+ SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0,
+ DAG.getConstant(XType.getSizeInBits()-1,
+ getShiftAmountTy()));
+ SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), XType,
+ N0, Shift);
+ AddToWorkList(Shift.getNode());
+ AddToWorkList(Add.getNode());
+ return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
+ }
+ // Check to see if this is an integer abs. select_cc setgt X, -1, X, -X ->
+ // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+ if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT &&
+ N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) {
+ if (ConstantSDNode *SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0))) {
+ EVT XType = N0.getValueType();
+ if (SubC->isNullValue() && XType.isInteger()) {
+ SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType,
+ N0,
+ DAG.getConstant(XType.getSizeInBits()-1,
+ getShiftAmountTy()));
+ SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(),
+ XType, N0, Shift);
+ AddToWorkList(Shift.getNode());
+ AddToWorkList(Add.getNode());
+ return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC.
+SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
+ SDValue N1, ISD::CondCode Cond,
+ DebugLoc DL, bool foldBooleans) {
+ TargetLowering::DAGCombinerInfo
+ DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this);
+ return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
+}
+
+/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number. See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue DAGCombiner::BuildSDIV(SDNode *N) {
+ std::vector<SDNode*> Built;
+ SDValue S = TLI.BuildSDIV(N, DAG, &Built);
+
+ for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
+ ii != ee; ++ii)
+ AddToWorkList(*ii);
+ return S;
+}
+
+/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number. See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue DAGCombiner::BuildUDIV(SDNode *N) {
+ std::vector<SDNode*> Built;
+ SDValue S = TLI.BuildUDIV(N, DAG, &Built);
+
+ for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
+ ii != ee; ++ii)
+ AddToWorkList(*ii);
+ return S;
+}
+
+/// FindBaseOffset - Return true if base is a frame index, which is known not
+// to alias with anything but itself. Provides base object and offset as results.
+static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
+ GlobalValue *&GV, void *&CV) {
+ // Assume it is a primitive operation.
+ Base = Ptr; Offset = 0; GV = 0; CV = 0;
+
+ // If it's an adding a simple constant then integrate the offset.
+ if (Base.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
+ Base = Base.getOperand(0);
+ Offset += C->getZExtValue();
+ }
+ }
+
+ // Return the underlying GlobalValue, and update the Offset. Return false
+ // for GlobalAddressSDNode since the same GlobalAddress may be represented
+ // by multiple nodes with different offsets.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
+ GV = G->getGlobal();
+ Offset += G->getOffset();
+ return false;
+ }
+
+ // Return the underlying Constant value, and update the Offset. Return false
+ // for ConstantSDNodes since the same constant pool entry may be represented
+ // by multiple nodes with different offsets.
+ if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
+ CV = C->isMachineConstantPoolEntry() ? (void *)C->getMachineCPVal()
+ : (void *)C->getConstVal();
+ Offset += C->getOffset();
+ return false;
+ }
+ // If it's any of the following then it can't alias with anything but itself.
+ return isa<FrameIndexSDNode>(Base);
+}
+
+/// isAlias - Return true if there is any possibility that the two addresses
+/// overlap.
+bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
+ const Value *SrcValue1, int SrcValueOffset1,
+ unsigned SrcValueAlign1,
+ SDValue Ptr2, int64_t Size2,
+ const Value *SrcValue2, int SrcValueOffset2,
+ unsigned SrcValueAlign2) const {
+ // If they are the same then they must be aliases.
+ if (Ptr1 == Ptr2) return true;
+
+ // Gather base node and offset information.
+ SDValue Base1, Base2;
+ int64_t Offset1, Offset2;
+ GlobalValue *GV1, *GV2;
+ void *CV1, *CV2;
+ bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1);
+ bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2);
+
+ // If they have a same base address then check to see if they overlap.
+ if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
+ return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
+
+ // If we know what the bases are, and they aren't identical, then we know they
+ // cannot alias.
+ if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
+ return false;
+
+ // If we know required SrcValue1 and SrcValue2 have relatively large alignment
+ // compared to the size and offset of the access, we may be able to prove they
+ // do not alias. This check is conservative for now to catch cases created by
+ // splitting vector types.
+ if ((SrcValueAlign1 == SrcValueAlign2) &&
+ (SrcValueOffset1 != SrcValueOffset2) &&
+ (Size1 == Size2) && (SrcValueAlign1 > Size1)) {
+ int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1;
+ int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1;
+
+ // There is no overlap between these relatively aligned accesses of similar
+ // size, return no alias.
+ if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1)
+ return false;
+ }
+
+ if (CombinerGlobalAA) {
+ // Use alias analysis information.
+ int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
+ int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset;
+ int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset;
+ AliasAnalysis::AliasResult AAResult =
+ AA.alias(SrcValue1, Overlap1, SrcValue2, Overlap2);
+ if (AAResult == AliasAnalysis::NoAlias)
+ return false;
+ }
+
+ // Otherwise we have to assume they alias.
+ return true;
+}
+
+/// FindAliasInfo - Extracts the relevant alias information from the memory
+/// node. Returns true if the operand was a load.
+bool DAGCombiner::FindAliasInfo(SDNode *N,
+ SDValue &Ptr, int64_t &Size,
+ const Value *&SrcValue,
+ int &SrcValueOffset,
+ unsigned &SrcValueAlign) const {
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ Ptr = LD->getBasePtr();
+ Size = LD->getMemoryVT().getSizeInBits() >> 3;
+ SrcValue = LD->getSrcValue();
+ SrcValueOffset = LD->getSrcValueOffset();
+ SrcValueAlign = LD->getOriginalAlignment();
+ return true;
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ Ptr = ST->getBasePtr();
+ Size = ST->getMemoryVT().getSizeInBits() >> 3;
+ SrcValue = ST->getSrcValue();
+ SrcValueOffset = ST->getSrcValueOffset();
+ SrcValueAlign = ST->getOriginalAlignment();
+ } else {
+ llvm_unreachable("FindAliasInfo expected a memory operand");
+ }
+
+ return false;
+}
+
+/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
+/// looking for aliasing nodes and adding them to the Aliases vector.
+void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
+ SmallVector<SDValue, 8> &Aliases) {
+ SmallVector<SDValue, 8> Chains; // List of chains to visit.
+ SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
+
+ // Get alias information for node.
+ SDValue Ptr;
+ int64_t Size;
+ const Value *SrcValue;
+ int SrcValueOffset;
+ unsigned SrcValueAlign;
+ bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset,
+ SrcValueAlign);
+
+ // Starting off.
+ Chains.push_back(OriginalChain);
+ unsigned Depth = 0;
+
+ // Look at each chain and determine if it is an alias. If so, add it to the
+ // aliases list. If not, then continue up the chain looking for the next
+ // candidate.
+ while (!Chains.empty()) {
+ SDValue Chain = Chains.back();
+ Chains.pop_back();
+
+ // For TokenFactor nodes, look at each operand and only continue up the
+ // chain until we find two aliases. If we've seen two aliases, assume we'll
+ // find more and revert to original chain since the xform is unlikely to be
+ // profitable.
+ //
+ // FIXME: The depth check could be made to return the last non-aliasing
+ // chain we found before we hit a tokenfactor rather than the original
+ // chain.
+ if (Depth > 6 || Aliases.size() == 2) {
+ Aliases.clear();
+ Aliases.push_back(OriginalChain);
+ break;
+ }
+
+ // Don't bother if we've been before.
+ if (!Visited.insert(Chain.getNode()))
+ continue;
+
+ switch (Chain.getOpcode()) {
+ case ISD::EntryToken:
+ // Entry token is ideal chain operand, but handled in FindBetterChain.
+ break;
+
+ case ISD::LOAD:
+ case ISD::STORE: {
+ // Get alias information for Chain.
+ SDValue OpPtr;
+ int64_t OpSize;
+ const Value *OpSrcValue;
+ int OpSrcValueOffset;
+ unsigned OpSrcValueAlign;
+ bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize,
+ OpSrcValue, OpSrcValueOffset,
+ OpSrcValueAlign);
+
+ // If chain is alias then stop here.
+ if (!(IsLoad && IsOpLoad) &&
+ isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign,
+ OpPtr, OpSize, OpSrcValue, OpSrcValueOffset,
+ OpSrcValueAlign)) {
+ Aliases.push_back(Chain);
+ } else {
+ // Look further up the chain.
+ Chains.push_back(Chain.getOperand(0));
+ ++Depth;
+ }
+ break;
+ }
+
+ case ISD::TokenFactor:
+ // We have to check each of the operands of the token factor for "small"
+ // token factors, so we queue them up. Adding the operands to the queue
+ // (stack) in reverse order maintains the original order and increases the
+ // likelihood that getNode will find a matching token factor (CSE.)
+ if (Chain.getNumOperands() > 16) {
+ Aliases.push_back(Chain);
+ break;
+ }
+ for (unsigned n = Chain.getNumOperands(); n;)
+ Chains.push_back(Chain.getOperand(--n));
+ ++Depth;
+ break;
+
+ default:
+ // For all other instructions we will just have to take what we can get.
+ Aliases.push_back(Chain);
+ break;
+ }
+ }
+}
+
+/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking
+/// for a better chain (aliasing node.)
+SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
+ SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor.
+
+ // Accumulate all the aliases to this node.
+ GatherAllAliases(N, OldChain, Aliases);
+
+ if (Aliases.size() == 0) {
+ // If no operands then chain to entry token.
+ return DAG.getEntryNode();
+ } else if (Aliases.size() == 1) {
+ // If a single operand then chain to it. We don't need to revisit it.
+ return Aliases[0];
+ }
+
+ // Construct a custom tailored token factor.
+ return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
+ &Aliases[0], Aliases.size());
+}
+
+// SelectionDAG::Combine - This is the entry point for the file.
+//
+void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
+ CodeGenOpt::Level OptLevel) {
+ /// run - This is the main entry point to this class.
+ ///
+ DAGCombiner(*this, AA, OptLevel).Run(Level);
+}
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
new file mode 100644
index 0000000..35ef5b7
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -0,0 +1,992 @@
+///===-- FastISel.cpp - Implementation of the FastISel class --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the FastISel class.
+//
+// "Fast" instruction selection is designed to emit very poor code quickly.
+// Also, it is not designed to be able to do much lowering, so most illegal
+// types (e.g. i64 on 32-bit targets) and operations are not supported. It is
+// also not intended to be able to do much optimization, except in a few cases
+// where doing optimizations reduces overall compile time. For example, folding
+// constants into immediate fields is often done, because it's cheap and it
+// reduces the number of instructions later phases have to examine.
+//
+// "Fast" instruction selection is able to fail gracefully and transfer
+// control to the SelectionDAG selector for operations that it doesn't
+// support. In many cases, this allows us to avoid duplicating a lot of
+// the complicated lowering logic that SelectionDAG currently has.
+//
+// The intended use for "fast" instruction selection is "-O0" mode
+// compilation, where the quality of the generated code is irrelevant when
+// weighed against the speed at which the code can be generated. Also,
+// at -O0, the LLVM optimizers are not running, and this makes the
+// compile time of codegen a much higher portion of the overall compile
+// time. Despite its limitations, "fast" instruction selection is able to
+// handle enough code on its own to provide noticeable overall speedups
+// in -O0 compiles.
+//
+// Basic operations are supported in a target-independent way, by reading
+// the same instruction descriptions that the SelectionDAG selector reads,
+// and identifying simple arithmetic operations that can be directly selected
+// from simple operators. More complicated operations currently require
+// target-specific code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "SelectionDAGBuilder.h"
+#include "FunctionLoweringInfo.h"
+using namespace llvm;
+
+unsigned FastISel::getRegForValue(Value *V) {
+ EVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true);
+ // Don't handle non-simple values in FastISel.
+ if (!RealVT.isSimple())
+ return 0;
+
+ // Ignore illegal types. We must do this before looking up the value
+ // in ValueMap because Arguments are given virtual registers regardless
+ // of whether FastISel can handle them.
+ MVT VT = RealVT.getSimpleVT();
+ if (!TLI.isTypeLegal(VT)) {
+ // Promote MVT::i1 to a legal type though, because it's common and easy.
+ if (VT == MVT::i1)
+ VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT();
+ else
+ return 0;
+ }
+
+ // Look up the value to see if we already have a register for it. We
+ // cache values defined by Instructions across blocks, and other values
+ // only locally. This is because Instructions already have the SSA
+ // def-dominates-use requirement enforced.
+ if (ValueMap.count(V))
+ return ValueMap[V];
+ unsigned Reg = LocalValueMap[V];
+ if (Reg != 0)
+ return Reg;
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getValue().getActiveBits() <= 64)
+ Reg = FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
+ } else if (isa<AllocaInst>(V)) {
+ Reg = TargetMaterializeAlloca(cast<AllocaInst>(V));
+ } else if (isa<ConstantPointerNull>(V)) {
+ // Translate this as an integer zero so that it can be
+ // local-CSE'd with actual integer zeros.
+ Reg =
+ getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext())));
+ } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+ Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF);
+
+ if (!Reg) {
+ const APFloat &Flt = CF->getValueAPF();
+ EVT IntVT = TLI.getPointerTy();
+
+ uint64_t x[2];
+ uint32_t IntBitWidth = IntVT.getSizeInBits();
+ bool isExact;
+ (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
+ APFloat::rmTowardZero, &isExact);
+ if (isExact) {
+ APInt IntVal(IntBitWidth, 2, x);
+
+ unsigned IntegerReg =
+ getRegForValue(ConstantInt::get(V->getContext(), IntVal));
+ if (IntegerReg != 0)
+ Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg);
+ }
+ }
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ if (!SelectOperator(CE, CE->getOpcode())) return 0;
+ Reg = LocalValueMap[CE];
+ } else if (isa<UndefValue>(V)) {
+ Reg = createResultReg(TLI.getRegClassFor(VT));
+ BuildMI(MBB, DL, TII.get(TargetOpcode::IMPLICIT_DEF), Reg);
+ }
+
+ // If target-independent code couldn't handle the value, give target-specific
+ // code a try.
+ if (!Reg && isa<Constant>(V))
+ Reg = TargetMaterializeConstant(cast<Constant>(V));
+
+ // Don't cache constant materializations in the general ValueMap.
+ // To do so would require tracking what uses they dominate.
+ if (Reg != 0)
+ LocalValueMap[V] = Reg;
+ return Reg;
+}
+
+unsigned FastISel::lookUpRegForValue(Value *V) {
+ // Look up the value to see if we already have a register for it. We
+ // cache values defined by Instructions across blocks, and other values
+ // only locally. This is because Instructions already have the SSA
+ // def-dominatess-use requirement enforced.
+ if (ValueMap.count(V))
+ return ValueMap[V];
+ return LocalValueMap[V];
+}
+
+/// UpdateValueMap - Update the value map to include the new mapping for this
+/// instruction, or insert an extra copy to get the result in a previous
+/// determined register.
+/// NOTE: This is only necessary because we might select a block that uses
+/// a value before we select the block that defines the value. It might be
+/// possible to fix this by selecting blocks in reverse postorder.
+unsigned FastISel::UpdateValueMap(Value* I, unsigned Reg) {
+ if (!isa<Instruction>(I)) {
+ LocalValueMap[I] = Reg;
+ return Reg;
+ }
+
+ unsigned &AssignedReg = ValueMap[I];
+ if (AssignedReg == 0)
+ AssignedReg = Reg;
+ else if (Reg != AssignedReg) {
+ const TargetRegisterClass *RegClass = MRI.getRegClass(Reg);
+ TII.copyRegToReg(*MBB, MBB->end(), AssignedReg,
+ Reg, RegClass, RegClass);
+ }
+ return AssignedReg;
+}
+
+unsigned FastISel::getRegForGEPIndex(Value *Idx) {
+ unsigned IdxN = getRegForValue(Idx);
+ if (IdxN == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return 0;
+
+ // If the index is smaller or larger than intptr_t, truncate or extend it.
+ MVT PtrVT = TLI.getPointerTy();
+ EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
+ if (IdxVT.bitsLT(PtrVT))
+ IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN);
+ else if (IdxVT.bitsGT(PtrVT))
+ IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, IdxN);
+ return IdxN;
+}
+
+/// SelectBinaryOp - Select and emit code for a binary operator instruction,
+/// which has an opcode which directly corresponds to the given ISD opcode.
+///
+bool FastISel::SelectBinaryOp(User *I, unsigned ISDOpcode) {
+ EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true);
+ if (VT == MVT::Other || !VT.isSimple())
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ // We only handle legal types. For example, on x86-32 the instruction
+ // selector contains all of the 64-bit instructions from x86-64,
+ // under the assumption that i64 won't be used if the target doesn't
+ // support it.
+ if (!TLI.isTypeLegal(VT)) {
+ // MVT::i1 is special. Allow AND, OR, or XOR because they
+ // don't require additional zeroing, which makes them easy.
+ if (VT == MVT::i1 &&
+ (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR ||
+ ISDOpcode == ISD::XOR))
+ VT = TLI.getTypeToTransformTo(I->getContext(), VT);
+ else
+ return false;
+ }
+
+ unsigned Op0 = getRegForValue(I->getOperand(0));
+ if (Op0 == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ // Check if the second operand is a constant and handle it appropriately.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ unsigned ResultReg = FastEmit_ri(VT.getSimpleVT(), VT.getSimpleVT(),
+ ISDOpcode, Op0, CI->getZExtValue());
+ if (ResultReg != 0) {
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+
+ // Check if the second operand is a constant float.
+ if (ConstantFP *CF = dyn_cast<ConstantFP>(I->getOperand(1))) {
+ unsigned ResultReg = FastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(),
+ ISDOpcode, Op0, CF);
+ if (ResultReg != 0) {
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+
+ unsigned Op1 = getRegForValue(I->getOperand(1));
+ if (Op1 == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ // Now we have both operands in registers. Emit the instruction.
+ unsigned ResultReg = FastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(),
+ ISDOpcode, Op0, Op1);
+ if (ResultReg == 0)
+ // Target-specific code wasn't able to find a machine opcode for
+ // the given ISD opcode and type. Halt "fast" selection and bail.
+ return false;
+
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool FastISel::SelectGetElementPtr(User *I) {
+ unsigned N = getRegForValue(I->getOperand(0));
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ const Type *Ty = I->getOperand(0)->getType();
+ MVT VT = TLI.getPointerTy();
+ for (GetElementPtrInst::op_iterator OI = I->op_begin()+1, E = I->op_end();
+ OI != E; ++OI) {
+ Value *Idx = *OI;
+ if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
+ unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+ if (Field) {
+ // N = N + Offset
+ uint64_t Offs = TD.getStructLayout(StTy)->getElementOffset(Field);
+ // FIXME: This can be optimized by combining the add with a
+ // subsequent one.
+ N = FastEmit_ri_(VT, ISD::ADD, N, Offs, VT);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ }
+ Ty = StTy->getElementType(Field);
+ } else {
+ Ty = cast<SequentialType>(Ty)->getElementType();
+
+ // If this is a constant subscript, handle it quickly.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
+ if (CI->getZExtValue() == 0) continue;
+ uint64_t Offs =
+ TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
+ N = FastEmit_ri_(VT, ISD::ADD, N, Offs, VT);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ continue;
+ }
+
+ // N = N + Idx * ElementSize;
+ uint64_t ElementSize = TD.getTypeAllocSize(Ty);
+ unsigned IdxN = getRegForGEPIndex(Idx);
+ if (IdxN == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ if (ElementSize != 1) {
+ IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, ElementSize, VT);
+ if (IdxN == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ }
+ N = FastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ }
+ }
+
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, N);
+ return true;
+}
+
+bool FastISel::SelectCall(User *I) {
+ Function *F = cast<CallInst>(I)->getCalledFunction();
+ if (!F) return false;
+
+ unsigned IID = F->getIntrinsicID();
+ switch (IID) {
+ default: break;
+ case Intrinsic::dbg_declare: {
+ DbgDeclareInst *DI = cast<DbgDeclareInst>(I);
+ if (!DIDescriptor::ValidDebugInfo(DI->getVariable(), CodeGenOpt::None)||!DW
+ || !DW->ShouldEmitDwarfDebug())
+ return true;
+
+ Value *Address = DI->getAddress();
+ if (!Address)
+ return true;
+ AllocaInst *AI = dyn_cast<AllocaInst>(Address);
+ // Don't handle byval struct arguments or VLAs, for example.
+ if (!AI) break;
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ StaticAllocaMap.find(AI);
+ if (SI == StaticAllocaMap.end()) break; // VLAs.
+ int FI = SI->second;
+ if (MMI) {
+ if (MDNode *Dbg = DI->getMetadata("dbg"))
+ MMI->setVariableDbgInfo(DI->getVariable(), FI, Dbg);
+ }
+ // Building the map above is target independent. Generating DBG_VALUE
+ // inline is target dependent; do this now.
+ (void)TargetSelectInstruction(cast<Instruction>(I));
+ return true;
+ }
+ case Intrinsic::eh_exception: {
+ EVT VT = TLI.getValueType(I->getType());
+ switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) {
+ default: break;
+ case TargetLowering::Expand: {
+ assert(MBB->isLandingPad() && "Call to eh.exception not in landing pad!");
+ unsigned Reg = TLI.getExceptionAddressRegister();
+ const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+ unsigned ResultReg = createResultReg(RC);
+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+ Reg, RC, RC);
+ assert(InsertedCopy && "Can't copy address registers!");
+ InsertedCopy = InsertedCopy;
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+ break;
+ }
+ case Intrinsic::eh_selector: {
+ EVT VT = TLI.getValueType(I->getType());
+ switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) {
+ default: break;
+ case TargetLowering::Expand: {
+ if (MMI) {
+ if (MBB->isLandingPad())
+ AddCatchInfo(*cast<CallInst>(I), MMI, MBB);
+ else {
+#ifndef NDEBUG
+ CatchInfoLost.insert(cast<CallInst>(I));
+#endif
+ // FIXME: Mark exception selector register as live in. Hack for PR1508.
+ unsigned Reg = TLI.getExceptionSelectorRegister();
+ if (Reg) MBB->addLiveIn(Reg);
+ }
+
+ unsigned Reg = TLI.getExceptionSelectorRegister();
+ EVT SrcVT = TLI.getPointerTy();
+ const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
+ unsigned ResultReg = createResultReg(RC);
+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, Reg,
+ RC, RC);
+ assert(InsertedCopy && "Can't copy address registers!");
+ InsertedCopy = InsertedCopy;
+
+ // Cast the register to the type of the selector.
+ if (SrcVT.bitsGT(MVT::i32))
+ ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE,
+ ResultReg);
+ else if (SrcVT.bitsLT(MVT::i32))
+ ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32,
+ ISD::SIGN_EXTEND, ResultReg);
+ if (ResultReg == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ } else {
+ unsigned ResultReg =
+ getRegForValue(Constant::getNullValue(I->getType()));
+ UpdateValueMap(I, ResultReg);
+ }
+ return true;
+ }
+ }
+ break;
+ }
+ }
+ return false;
+}
+
+bool FastISel::SelectCast(User *I, unsigned Opcode) {
+ EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(I->getType());
+
+ if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
+ DstVT == MVT::Other || !DstVT.isSimple())
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ // Check if the destination type is legal. Or as a special case,
+ // it may be i1 if we're doing a truncate because that's
+ // easy and somewhat common.
+ if (!TLI.isTypeLegal(DstVT))
+ if (DstVT != MVT::i1 || Opcode != ISD::TRUNCATE)
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ // Check if the source operand is legal. Or as a special case,
+ // it may be i1 if we're doing zero-extension because that's
+ // easy and somewhat common.
+ if (!TLI.isTypeLegal(SrcVT))
+ if (SrcVT != MVT::i1 || Opcode != ISD::ZERO_EXTEND)
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ unsigned InputReg = getRegForValue(I->getOperand(0));
+ if (!InputReg)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ // If the operand is i1, arrange for the high bits in the register to be zero.
+ if (SrcVT == MVT::i1) {
+ SrcVT = TLI.getTypeToTransformTo(I->getContext(), SrcVT);
+ InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg);
+ if (!InputReg)
+ return false;
+ }
+ // If the result is i1, truncate to the target's type for i1 first.
+ if (DstVT == MVT::i1)
+ DstVT = TLI.getTypeToTransformTo(I->getContext(), DstVT);
+
+ unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(),
+ DstVT.getSimpleVT(),
+ Opcode,
+ InputReg);
+ if (!ResultReg)
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool FastISel::SelectBitCast(User *I) {
+ // If the bitcast doesn't change the type, just use the operand value.
+ if (I->getType() == I->getOperand(0)->getType()) {
+ unsigned Reg = getRegForValue(I->getOperand(0));
+ if (Reg == 0)
+ return false;
+ UpdateValueMap(I, Reg);
+ return true;
+ }
+
+ // Bitcasts of other values become reg-reg copies or BIT_CONVERT operators.
+ EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(I->getType());
+
+ if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
+ DstVT == MVT::Other || !DstVT.isSimple() ||
+ !TLI.isTypeLegal(SrcVT) || !TLI.isTypeLegal(DstVT))
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ unsigned Op0 = getRegForValue(I->getOperand(0));
+ if (Op0 == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ // First, try to perform the bitcast by inserting a reg-reg copy.
+ unsigned ResultReg = 0;
+ if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) {
+ TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT);
+ TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT);
+ ResultReg = createResultReg(DstClass);
+
+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+ Op0, DstClass, SrcClass);
+ if (!InsertedCopy)
+ ResultReg = 0;
+ }
+
+ // If the reg-reg copy failed, select a BIT_CONVERT opcode.
+ if (!ResultReg)
+ ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(),
+ ISD::BIT_CONVERT, Op0);
+
+ if (!ResultReg)
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool
+FastISel::SelectInstruction(Instruction *I) {
+ // First, try doing target-independent selection.
+ if (SelectOperator(I, I->getOpcode()))
+ return true;
+
+ // Next, try calling the target to attempt to handle the instruction.
+ if (TargetSelectInstruction(I))
+ return true;
+
+ return false;
+}
+
+/// FastEmitBranch - Emit an unconditional branch to the given block,
+/// unless it is the immediate (fall-through) successor, and update
+/// the CFG.
+void
+FastISel::FastEmitBranch(MachineBasicBlock *MSucc) {
+ if (MBB->isLayoutSuccessor(MSucc)) {
+ // The unconditional fall-through case, which needs no instructions.
+ } else {
+ // The unconditional branch case.
+ TII.InsertBranch(*MBB, MSucc, NULL, SmallVector<MachineOperand, 0>());
+ }
+ MBB->addSuccessor(MSucc);
+}
+
+/// SelectFNeg - Emit an FNeg operation.
+///
+bool
+FastISel::SelectFNeg(User *I) {
+ unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I));
+ if (OpReg == 0) return false;
+
+ // If the target has ISD::FNEG, use it.
+ EVT VT = TLI.getValueType(I->getType());
+ unsigned ResultReg = FastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(),
+ ISD::FNEG, OpReg);
+ if (ResultReg != 0) {
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+
+ // Bitcast the value to integer, twiddle the sign bit with xor,
+ // and then bitcast it back to floating-point.
+ if (VT.getSizeInBits() > 64) return false;
+ EVT IntVT = EVT::getIntegerVT(I->getContext(), VT.getSizeInBits());
+ if (!TLI.isTypeLegal(IntVT))
+ return false;
+
+ unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(),
+ ISD::BIT_CONVERT, OpReg);
+ if (IntReg == 0)
+ return false;
+
+ unsigned IntResultReg = FastEmit_ri_(IntVT.getSimpleVT(), ISD::XOR, IntReg,
+ UINT64_C(1) << (VT.getSizeInBits()-1),
+ IntVT.getSimpleVT());
+ if (IntResultReg == 0)
+ return false;
+
+ ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(),
+ ISD::BIT_CONVERT, IntResultReg);
+ if (ResultReg == 0)
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool
+FastISel::SelectOperator(User *I, unsigned Opcode) {
+ switch (Opcode) {
+ case Instruction::Add:
+ return SelectBinaryOp(I, ISD::ADD);
+ case Instruction::FAdd:
+ return SelectBinaryOp(I, ISD::FADD);
+ case Instruction::Sub:
+ return SelectBinaryOp(I, ISD::SUB);
+ case Instruction::FSub:
+ // FNeg is currently represented in LLVM IR as a special case of FSub.
+ if (BinaryOperator::isFNeg(I))
+ return SelectFNeg(I);
+ return SelectBinaryOp(I, ISD::FSUB);
+ case Instruction::Mul:
+ return SelectBinaryOp(I, ISD::MUL);
+ case Instruction::FMul:
+ return SelectBinaryOp(I, ISD::FMUL);
+ case Instruction::SDiv:
+ return SelectBinaryOp(I, ISD::SDIV);
+ case Instruction::UDiv:
+ return SelectBinaryOp(I, ISD::UDIV);
+ case Instruction::FDiv:
+ return SelectBinaryOp(I, ISD::FDIV);
+ case Instruction::SRem:
+ return SelectBinaryOp(I, ISD::SREM);
+ case Instruction::URem:
+ return SelectBinaryOp(I, ISD::UREM);
+ case Instruction::FRem:
+ return SelectBinaryOp(I, ISD::FREM);
+ case Instruction::Shl:
+ return SelectBinaryOp(I, ISD::SHL);
+ case Instruction::LShr:
+ return SelectBinaryOp(I, ISD::SRL);
+ case Instruction::AShr:
+ return SelectBinaryOp(I, ISD::SRA);
+ case Instruction::And:
+ return SelectBinaryOp(I, ISD::AND);
+ case Instruction::Or:
+ return SelectBinaryOp(I, ISD::OR);
+ case Instruction::Xor:
+ return SelectBinaryOp(I, ISD::XOR);
+
+ case Instruction::GetElementPtr:
+ return SelectGetElementPtr(I);
+
+ case Instruction::Br: {
+ BranchInst *BI = cast<BranchInst>(I);
+
+ if (BI->isUnconditional()) {
+ BasicBlock *LLVMSucc = BI->getSuccessor(0);
+ MachineBasicBlock *MSucc = MBBMap[LLVMSucc];
+ FastEmitBranch(MSucc);
+ return true;
+ }
+
+ // Conditional branches are not handed yet.
+ // Halt "fast" selection and bail.
+ return false;
+ }
+
+ case Instruction::Unreachable:
+ // Nothing to emit.
+ return true;
+
+ case Instruction::PHI:
+ // PHI nodes are already emitted.
+ return true;
+
+ case Instruction::Alloca:
+ // FunctionLowering has the static-sized case covered.
+ if (StaticAllocaMap.count(cast<AllocaInst>(I)))
+ return true;
+
+ // Dynamic-sized alloca is not handled yet.
+ return false;
+
+ case Instruction::Call:
+ return SelectCall(I);
+
+ case Instruction::BitCast:
+ return SelectBitCast(I);
+
+ case Instruction::FPToSI:
+ return SelectCast(I, ISD::FP_TO_SINT);
+ case Instruction::ZExt:
+ return SelectCast(I, ISD::ZERO_EXTEND);
+ case Instruction::SExt:
+ return SelectCast(I, ISD::SIGN_EXTEND);
+ case Instruction::Trunc:
+ return SelectCast(I, ISD::TRUNCATE);
+ case Instruction::SIToFP:
+ return SelectCast(I, ISD::SINT_TO_FP);
+
+ case Instruction::IntToPtr: // Deliberate fall-through.
+ case Instruction::PtrToInt: {
+ EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(I->getType());
+ if (DstVT.bitsGT(SrcVT))
+ return SelectCast(I, ISD::ZERO_EXTEND);
+ if (DstVT.bitsLT(SrcVT))
+ return SelectCast(I, ISD::TRUNCATE);
+ unsigned Reg = getRegForValue(I->getOperand(0));
+ if (Reg == 0) return false;
+ UpdateValueMap(I, Reg);
+ return true;
+ }
+
+ default:
+ // Unhandled instruction. Halt "fast" selection and bail.
+ return false;
+ }
+}
+
+FastISel::FastISel(MachineFunction &mf,
+ MachineModuleInfo *mmi,
+ DwarfWriter *dw,
+ DenseMap<const Value *, unsigned> &vm,
+ DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
+ DenseMap<const AllocaInst *, int> &am
+#ifndef NDEBUG
+ , SmallSet<Instruction*, 8> &cil
+#endif
+ )
+ : MBB(0),
+ ValueMap(vm),
+ MBBMap(bm),
+ StaticAllocaMap(am),
+#ifndef NDEBUG
+ CatchInfoLost(cil),
+#endif
+ MF(mf),
+ MMI(mmi),
+ DW(dw),
+ MRI(MF.getRegInfo()),
+ MFI(*MF.getFrameInfo()),
+ MCP(*MF.getConstantPool()),
+ TM(MF.getTarget()),
+ TD(*TM.getTargetData()),
+ TII(*TM.getInstrInfo()),
+ TLI(*TM.getTargetLowering()) {
+}
+
+FastISel::~FastISel() {}
+
+unsigned FastISel::FastEmit_(MVT, MVT,
+ unsigned) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_r(MVT, MVT,
+ unsigned, unsigned /*Op0*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_rr(MVT, MVT,
+ unsigned, unsigned /*Op0*/,
+ unsigned /*Op0*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_f(MVT, MVT,
+ unsigned, ConstantFP * /*FPImm*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_ri(MVT, MVT,
+ unsigned, unsigned /*Op0*/,
+ uint64_t /*Imm*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_rf(MVT, MVT,
+ unsigned, unsigned /*Op0*/,
+ ConstantFP * /*FPImm*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_rri(MVT, MVT,
+ unsigned,
+ unsigned /*Op0*/, unsigned /*Op1*/,
+ uint64_t /*Imm*/) {
+ return 0;
+}
+
+/// FastEmit_ri_ - This method is a wrapper of FastEmit_ri. It first tries
+/// to emit an instruction with an immediate operand using FastEmit_ri.
+/// If that fails, it materializes the immediate into a register and try
+/// FastEmit_rr instead.
+unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode,
+ unsigned Op0, uint64_t Imm,
+ MVT ImmType) {
+ // First check if immediate type is legal. If not, we can't use the ri form.
+ unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Imm);
+ if (ResultReg != 0)
+ return ResultReg;
+ unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
+ if (MaterialReg == 0)
+ return 0;
+ return FastEmit_rr(VT, VT, Opcode, Op0, MaterialReg);
+}
+
+/// FastEmit_rf_ - This method is a wrapper of FastEmit_ri. It first tries
+/// to emit an instruction with a floating-point immediate operand using
+/// FastEmit_rf. If that fails, it materializes the immediate into a register
+/// and try FastEmit_rr instead.
+unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode,
+ unsigned Op0, ConstantFP *FPImm,
+ MVT ImmType) {
+ // First check if immediate type is legal. If not, we can't use the rf form.
+ unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, FPImm);
+ if (ResultReg != 0)
+ return ResultReg;
+
+ // Materialize the constant in a register.
+ unsigned MaterialReg = FastEmit_f(ImmType, ImmType, ISD::ConstantFP, FPImm);
+ if (MaterialReg == 0) {
+ // If the target doesn't have a way to directly enter a floating-point
+ // value into a register, use an alternate approach.
+ // TODO: The current approach only supports floating-point constants
+ // that can be constructed by conversion from integer values. This should
+ // be replaced by code that creates a load from a constant-pool entry,
+ // which will require some target-specific work.
+ const APFloat &Flt = FPImm->getValueAPF();
+ EVT IntVT = TLI.getPointerTy();
+
+ uint64_t x[2];
+ uint32_t IntBitWidth = IntVT.getSizeInBits();
+ bool isExact;
+ (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
+ APFloat::rmTowardZero, &isExact);
+ if (!isExact)
+ return 0;
+ APInt IntVal(IntBitWidth, 2, x);
+
+ unsigned IntegerReg = FastEmit_i(IntVT.getSimpleVT(), IntVT.getSimpleVT(),
+ ISD::Constant, IntVal.getZExtValue());
+ if (IntegerReg == 0)
+ return 0;
+ MaterialReg = FastEmit_r(IntVT.getSimpleVT(), VT,
+ ISD::SINT_TO_FP, IntegerReg);
+ if (MaterialReg == 0)
+ return 0;
+ }
+ return FastEmit_rr(VT, VT, Opcode, Op0, MaterialReg);
+}
+
+unsigned FastISel::createResultReg(const TargetRegisterClass* RC) {
+ return MRI.createVirtualRegister(RC);
+}
+
+unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass* RC) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ BuildMI(MBB, DL, II, ResultReg);
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(MBB, DL, II, ResultReg).addReg(Op0);
+ else {
+ BuildMI(MBB, DL, II).addReg(Op0);
+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+ II.ImplicitDefs[0], RC, RC);
+ if (!InsertedCopy)
+ ResultReg = 0;
+ }
+
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, unsigned Op1) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addReg(Op1);
+ else {
+ BuildMI(MBB, DL, II).addReg(Op0).addReg(Op1);
+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+ II.ImplicitDefs[0], RC, RC);
+ if (!InsertedCopy)
+ ResultReg = 0;
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addImm(Imm);
+ else {
+ BuildMI(MBB, DL, II).addReg(Op0).addImm(Imm);
+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+ II.ImplicitDefs[0], RC, RC);
+ if (!InsertedCopy)
+ ResultReg = 0;
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, ConstantFP *FPImm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addFPImm(FPImm);
+ else {
+ BuildMI(MBB, DL, II).addReg(Op0).addFPImm(FPImm);
+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+ II.ImplicitDefs[0], RC, RC);
+ if (!InsertedCopy)
+ ResultReg = 0;
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, unsigned Op1, uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addReg(Op1).addImm(Imm);
+ else {
+ BuildMI(MBB, DL, II).addReg(Op0).addReg(Op1).addImm(Imm);
+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+ II.ImplicitDefs[0], RC, RC);
+ if (!InsertedCopy)
+ ResultReg = 0;
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(MBB, DL, II, ResultReg).addImm(Imm);
+ else {
+ BuildMI(MBB, DL, II).addImm(Imm);
+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+ II.ImplicitDefs[0], RC, RC);
+ if (!InsertedCopy)
+ ResultReg = 0;
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT,
+ unsigned Op0, uint32_t Idx) {
+ const TargetRegisterClass* RC = MRI.getRegClass(Op0);
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+ const TargetInstrDesc &II = TII.get(TargetOpcode::EXTRACT_SUBREG);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addImm(Idx);
+ else {
+ BuildMI(MBB, DL, II).addReg(Op0).addImm(Idx);
+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+ II.ImplicitDefs[0], RC, RC);
+ if (!InsertedCopy)
+ ResultReg = 0;
+ }
+ return ResultReg;
+}
+
+/// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op
+/// with all but the least significant bit set to zero.
+unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op) {
+ return FastEmit_ri(VT, VT, ISD::AND, Op, 1);
+}
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
new file mode 100644
index 0000000..50f4c32
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -0,0 +1,354 @@
+//===-- FunctionLoweringInfo.cpp ------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating functions from LLVM IR into
+// Machine IR.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "function-lowering-info"
+#include "FunctionLoweringInfo.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
+/// of insertvalue or extractvalue indices that identify a member, return
+/// the linearized index of the start of the member.
+///
+unsigned llvm::ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
+ const unsigned *Indices,
+ const unsigned *IndicesEnd,
+ unsigned CurIndex) {
+ // Base case: We're done.
+ if (Indices && Indices == IndicesEnd)
+ return CurIndex;
+
+ // Given a struct type, recursively traverse the elements.
+ if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ for (StructType::element_iterator EB = STy->element_begin(),
+ EI = EB,
+ EE = STy->element_end();
+ EI != EE; ++EI) {
+ if (Indices && *Indices == unsigned(EI - EB))
+ return ComputeLinearIndex(TLI, *EI, Indices+1, IndicesEnd, CurIndex);
+ CurIndex = ComputeLinearIndex(TLI, *EI, 0, 0, CurIndex);
+ }
+ return CurIndex;
+ }
+ // Given an array type, recursively traverse the elements.
+ else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ const Type *EltTy = ATy->getElementType();
+ for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
+ if (Indices && *Indices == i)
+ return ComputeLinearIndex(TLI, EltTy, Indices+1, IndicesEnd, CurIndex);
+ CurIndex = ComputeLinearIndex(TLI, EltTy, 0, 0, CurIndex);
+ }
+ return CurIndex;
+ }
+ // We haven't found the type we're looking for, so keep searching.
+ return CurIndex + 1;
+}
+
+/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
+/// EVTs that represent all the individual underlying
+/// non-aggregate types that comprise it.
+///
+/// If Offsets is non-null, it points to a vector to be filled in
+/// with the in-memory offsets of each of the individual values.
+///
+void llvm::ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
+ SmallVectorImpl<EVT> &ValueVTs,
+ SmallVectorImpl<uint64_t> *Offsets,
+ uint64_t StartingOffset) {
+ // Given a struct type, recursively traverse the elements.
+ if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy);
+ for (StructType::element_iterator EB = STy->element_begin(),
+ EI = EB,
+ EE = STy->element_end();
+ EI != EE; ++EI)
+ ComputeValueVTs(TLI, *EI, ValueVTs, Offsets,
+ StartingOffset + SL->getElementOffset(EI - EB));
+ return;
+ }
+ // Given an array type, recursively traverse the elements.
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ const Type *EltTy = ATy->getElementType();
+ uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy);
+ for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
+ ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets,
+ StartingOffset + i * EltSize);
+ return;
+ }
+ // Interpret void as zero return values.
+ if (Ty->isVoidTy())
+ return;
+ // Base case: we can get an EVT for this LLVM IR type.
+ ValueVTs.push_back(TLI.getValueType(Ty));
+ if (Offsets)
+ Offsets->push_back(StartingOffset);
+}
+
+/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by
+/// PHI nodes or outside of the basic block that defines it, or used by a
+/// switch or atomic instruction, which may expand to multiple basic blocks.
+static bool isUsedOutsideOfDefiningBlock(Instruction *I) {
+ if (isa<PHINode>(I)) return true;
+ BasicBlock *BB = I->getParent();
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI)
+ if (cast<Instruction>(*UI)->getParent() != BB || isa<PHINode>(*UI))
+ return true;
+ return false;
+}
+
+/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
+/// entry block, return true. This includes arguments used by switches, since
+/// the switch may expand into multiple basic blocks.
+static bool isOnlyUsedInEntryBlock(Argument *A, bool EnableFastISel) {
+ // With FastISel active, we may be splitting blocks, so force creation
+ // of virtual registers for all non-dead arguments.
+ // Don't force virtual registers for byval arguments though, because
+ // fast-isel can't handle those in all cases.
+ if (EnableFastISel && !A->hasByValAttr())
+ return A->use_empty();
+
+ BasicBlock *Entry = A->getParent()->begin();
+ for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E; ++UI)
+ if (cast<Instruction>(*UI)->getParent() != Entry || isa<SwitchInst>(*UI))
+ return false; // Use not in entry block.
+ return true;
+}
+
+FunctionLoweringInfo::FunctionLoweringInfo(TargetLowering &tli)
+ : TLI(tli) {
+}
+
+void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
+ bool EnableFastISel) {
+ Fn = &fn;
+ MF = &mf;
+ RegInfo = &MF->getRegInfo();
+
+ // Create a vreg for each argument register that is not dead and is used
+ // outside of the entry block for the function.
+ for (Function::arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end();
+ AI != E; ++AI)
+ if (!isOnlyUsedInEntryBlock(AI, EnableFastISel))
+ InitializeRegForValue(AI);
+
+ // Initialize the mapping of values to registers. This is only set up for
+ // instruction values that are used outside of the block that defines
+ // them.
+ Function::iterator BB = Fn->begin(), EB = Fn->end();
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
+ if (ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
+ const Type *Ty = AI->getAllocatedType();
+ uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+ unsigned Align =
+ std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
+ AI->getAlignment());
+
+ TySize *= CUI->getZExtValue(); // Get total allocated size.
+ if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
+ StaticAllocaMap[AI] =
+ MF->getFrameInfo()->CreateStackObject(TySize, Align, false);
+ }
+
+ for (; BB != EB; ++BB)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (!I->use_empty() && isUsedOutsideOfDefiningBlock(I))
+ if (!isa<AllocaInst>(I) ||
+ !StaticAllocaMap.count(cast<AllocaInst>(I)))
+ InitializeRegForValue(I);
+
+ // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This
+ // also creates the initial PHI MachineInstrs, though none of the input
+ // operands are populated.
+ for (BB = Fn->begin(), EB = Fn->end(); BB != EB; ++BB) {
+ MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB);
+ MBBMap[BB] = MBB;
+ MF->push_back(MBB);
+
+ // Transfer the address-taken flag. This is necessary because there could
+ // be multiple MachineBasicBlocks corresponding to one BasicBlock, and only
+ // the first one should be marked.
+ if (BB->hasAddressTaken())
+ MBB->setHasAddressTaken();
+
+ // Create Machine PHI nodes for LLVM PHI nodes, lowering them as
+ // appropriate.
+ PHINode *PN;
+ DebugLoc DL;
+ for (BasicBlock::iterator
+ I = BB->begin(), E = BB->end(); I != E; ++I) {
+
+ PN = dyn_cast<PHINode>(I);
+ if (!PN || PN->use_empty()) continue;
+
+ unsigned PHIReg = ValueMap[PN];
+ assert(PHIReg && "PHI node does not have an assigned virtual register!");
+
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, PN->getType(), ValueVTs);
+ for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
+ EVT VT = ValueVTs[vti];
+ unsigned NumRegisters = TLI.getNumRegisters(Fn->getContext(), VT);
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ for (unsigned i = 0; i != NumRegisters; ++i)
+ BuildMI(MBB, DL, TII->get(TargetOpcode::PHI), PHIReg + i);
+ PHIReg += NumRegisters;
+ }
+ }
+ }
+}
+
+/// clear - Clear out all the function-specific state. This returns this
+/// FunctionLoweringInfo to an empty state, ready to be used for a
+/// different function.
+void FunctionLoweringInfo::clear() {
+ MBBMap.clear();
+ ValueMap.clear();
+ StaticAllocaMap.clear();
+#ifndef NDEBUG
+ CatchInfoLost.clear();
+ CatchInfoFound.clear();
+#endif
+ LiveOutRegInfo.clear();
+}
+
+unsigned FunctionLoweringInfo::MakeReg(EVT VT) {
+ return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT));
+}
+
+/// CreateRegForValue - Allocate the appropriate number of virtual registers of
+/// the correctly promoted or expanded types. Assign these registers
+/// consecutive vreg numbers and return the first assigned number.
+///
+/// In the case that the given value has struct or array type, this function
+/// will assign registers for each member or element.
+///
+unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, V->getType(), ValueVTs);
+
+ unsigned FirstReg = 0;
+ for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ EVT ValueVT = ValueVTs[Value];
+ EVT RegisterVT = TLI.getRegisterType(V->getContext(), ValueVT);
+
+ unsigned NumRegs = TLI.getNumRegisters(V->getContext(), ValueVT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ unsigned R = MakeReg(RegisterVT);
+ if (!FirstReg) FirstReg = R;
+ }
+ }
+ return FirstReg;
+}
+
+/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
+GlobalVariable *llvm::ExtractTypeInfo(Value *V) {
+ V = V->stripPointerCasts();
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
+ assert ((GV || isa<ConstantPointerNull>(V)) &&
+ "TypeInfo must be a global variable or NULL");
+ return GV;
+}
+
+/// AddCatchInfo - Extract the personality and type infos from an eh.selector
+/// call, and add them to the specified machine basic block.
+void llvm::AddCatchInfo(CallInst &I, MachineModuleInfo *MMI,
+ MachineBasicBlock *MBB) {
+ // Inform the MachineModuleInfo of the personality for this landing pad.
+ ConstantExpr *CE = cast<ConstantExpr>(I.getOperand(2));
+ assert(CE->getOpcode() == Instruction::BitCast &&
+ isa<Function>(CE->getOperand(0)) &&
+ "Personality should be a function");
+ MMI->addPersonality(MBB, cast<Function>(CE->getOperand(0)));
+
+ // Gather all the type infos for this landing pad and pass them along to
+ // MachineModuleInfo.
+ std::vector<GlobalVariable *> TyInfo;
+ unsigned N = I.getNumOperands();
+
+ for (unsigned i = N - 1; i > 2; --i) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(i))) {
+ unsigned FilterLength = CI->getZExtValue();
+ unsigned FirstCatch = i + FilterLength + !FilterLength;
+ assert (FirstCatch <= N && "Invalid filter length");
+
+ if (FirstCatch < N) {
+ TyInfo.reserve(N - FirstCatch);
+ for (unsigned j = FirstCatch; j < N; ++j)
+ TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+ MMI->addCatchTypeInfo(MBB, TyInfo);
+ TyInfo.clear();
+ }
+
+ if (!FilterLength) {
+ // Cleanup.
+ MMI->addCleanup(MBB);
+ } else {
+ // Filter.
+ TyInfo.reserve(FilterLength - 1);
+ for (unsigned j = i + 1; j < FirstCatch; ++j)
+ TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+ MMI->addFilterTypeInfo(MBB, TyInfo);
+ TyInfo.clear();
+ }
+
+ N = i;
+ }
+ }
+
+ if (N > 3) {
+ TyInfo.reserve(N - 3);
+ for (unsigned j = 3; j < N; ++j)
+ TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+ MMI->addCatchTypeInfo(MBB, TyInfo);
+ }
+}
+
+void llvm::CopyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB,
+ MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) {
+ for (BasicBlock::iterator I = SrcBB->begin(), E = --SrcBB->end(); I != E; ++I)
+ if (EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) {
+ // Apply the catch info to DestBB.
+ AddCatchInfo(*EHSel, MMI, FLI.MBBMap[DestBB]);
+#ifndef NDEBUG
+ if (!FLI.MBBMap[SrcBB]->isLandingPad())
+ FLI.CatchInfoFound.insert(EHSel);
+#endif
+ }
+}
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h
new file mode 100644
index 0000000..d851e64
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h
@@ -0,0 +1,151 @@
+//===-- FunctionLoweringInfo.h - Lower functions from LLVM IR to CodeGen --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating functions from LLVM IR into
+// Machine IR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FUNCTIONLOWERINGINFO_H
+#define FUNCTIONLOWERINGINFO_H
+
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#ifndef NDEBUG
+#include "llvm/ADT/SmallSet.h"
+#endif
+#include "llvm/CodeGen/ValueTypes.h"
+#include <vector>
+
+namespace llvm {
+
+class AllocaInst;
+class BasicBlock;
+class CallInst;
+class Function;
+class GlobalVariable;
+class Instruction;
+class MachineBasicBlock;
+class MachineFunction;
+class MachineModuleInfo;
+class MachineRegisterInfo;
+class TargetLowering;
+class Value;
+
+//===--------------------------------------------------------------------===//
+/// FunctionLoweringInfo - This contains information that is global to a
+/// function that is used when lowering a region of the function.
+///
+class FunctionLoweringInfo {
+public:
+ TargetLowering &TLI;
+ Function *Fn;
+ MachineFunction *MF;
+ MachineRegisterInfo *RegInfo;
+
+ /// CanLowerReturn - true iff the function's return value can be lowered to
+ /// registers.
+ bool CanLowerReturn;
+
+ /// DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg
+ /// allocated to hold a pointer to the hidden sret parameter.
+ unsigned DemoteRegister;
+
+ explicit FunctionLoweringInfo(TargetLowering &TLI);
+
+ /// set - Initialize this FunctionLoweringInfo with the given Function
+ /// and its associated MachineFunction.
+ ///
+ void set(Function &Fn, MachineFunction &MF, bool EnableFastISel);
+
+ /// MBBMap - A mapping from LLVM basic blocks to their machine code entry.
+ DenseMap<const BasicBlock*, MachineBasicBlock *> MBBMap;
+
+ /// ValueMap - Since we emit code for the function a basic block at a time,
+ /// we must remember which virtual registers hold the values for
+ /// cross-basic-block values.
+ DenseMap<const Value*, unsigned> ValueMap;
+
+ /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in
+ /// the entry block. This allows the allocas to be efficiently referenced
+ /// anywhere in the function.
+ DenseMap<const AllocaInst*, int> StaticAllocaMap;
+
+#ifndef NDEBUG
+ SmallSet<Instruction*, 8> CatchInfoLost;
+ SmallSet<Instruction*, 8> CatchInfoFound;
+#endif
+
+ unsigned MakeReg(EVT VT);
+
+ /// isExportedInst - Return true if the specified value is an instruction
+ /// exported from its block.
+ bool isExportedInst(const Value *V) {
+ return ValueMap.count(V);
+ }
+
+ unsigned CreateRegForValue(const Value *V);
+
+ unsigned InitializeRegForValue(const Value *V) {
+ unsigned &R = ValueMap[V];
+ assert(R == 0 && "Already initialized this value register!");
+ return R = CreateRegForValue(V);
+ }
+
+ struct LiveOutInfo {
+ unsigned NumSignBits;
+ APInt KnownOne, KnownZero;
+ LiveOutInfo() : NumSignBits(0), KnownOne(1, 0), KnownZero(1, 0) {}
+ };
+
+ /// LiveOutRegInfo - Information about live out vregs, indexed by their
+ /// register number offset by 'FirstVirtualRegister'.
+ std::vector<LiveOutInfo> LiveOutRegInfo;
+
+ /// clear - Clear out all the function-specific state. This returns this
+ /// FunctionLoweringInfo to an empty state, ready to be used for a
+ /// different function.
+ void clear();
+};
+
+/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
+/// of insertvalue or extractvalue indices that identify a member, return
+/// the linearized index of the start of the member.
+///
+unsigned ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
+ const unsigned *Indices,
+ const unsigned *IndicesEnd,
+ unsigned CurIndex = 0);
+
+/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
+/// EVTs that represent all the individual underlying
+/// non-aggregate types that comprise it.
+///
+/// If Offsets is non-null, it points to a vector to be filled in
+/// with the in-memory offsets of each of the individual values.
+///
+void ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
+ SmallVectorImpl<EVT> &ValueVTs,
+ SmallVectorImpl<uint64_t> *Offsets = 0,
+ uint64_t StartingOffset = 0);
+
+/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
+GlobalVariable *ExtractTypeInfo(Value *V);
+
+/// AddCatchInfo - Extract the personality and type infos from an eh.selector
+/// call, and add them to the specified machine basic block.
+void AddCatchInfo(CallInst &I, MachineModuleInfo *MMI, MachineBasicBlock *MBB);
+
+/// CopyCatchInfo - Copy catch information from DestBB to SrcBB.
+void CopyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB,
+ MachineModuleInfo *MMI, FunctionLoweringInfo &FLI);
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
new file mode 100644
index 0000000..02fe85d
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -0,0 +1,701 @@
+//==--- InstrEmitter.cpp - Emit MachineInstrs for the SelectionDAG class ---==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the Emit routines for the SelectionDAG class, which creates
+// MachineInstrs based on the decisions of the SelectionDAG instruction
+// selection.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instr-emitter"
+#include "InstrEmitter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+/// CountResults - The results of target nodes have register or immediate
+/// operands first, then an optional chain, and optional flag operands (which do
+/// not go into the resulting MachineInstr).
+unsigned InstrEmitter::CountResults(SDNode *Node) {
+ unsigned N = Node->getNumValues();
+ while (N && Node->getValueType(N - 1) == MVT::Flag)
+ --N;
+ if (N && Node->getValueType(N - 1) == MVT::Other)
+ --N; // Skip over chain result.
+ return N;
+}
+
+/// CountOperands - The inputs to target nodes have any actual inputs first,
+/// followed by an optional chain operand, then an optional flag operand.
+/// Compute the number of actual operands that will go into the resulting
+/// MachineInstr.
+unsigned InstrEmitter::CountOperands(SDNode *Node) {
+ unsigned N = Node->getNumOperands();
+ while (N && Node->getOperand(N - 1).getValueType() == MVT::Flag)
+ --N;
+ if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)
+ --N; // Ignore chain if it exists.
+ return N;
+}
+
+/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
+/// implicit physical register output.
+void InstrEmitter::
+EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
+ unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) {
+ unsigned VRBase = 0;
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ // Just use the input register directly!
+ SDValue Op(Node, ResNo);
+ if (IsClone)
+ VRBaseMap.erase(Op);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second;
+ isNew = isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ return;
+ }
+
+ // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+ // the CopyToReg'd destination register instead of creating a new vreg.
+ bool MatchReg = true;
+ const TargetRegisterClass *UseRC = NULL;
+ if (!IsClone && !IsCloned)
+ for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+ UI != E; ++UI) {
+ SDNode *User = *UI;
+ bool Match = true;
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node &&
+ User->getOperand(2).getResNo() == ResNo) {
+ unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+ VRBase = DestReg;
+ Match = false;
+ } else if (DestReg != SrcReg)
+ Match = false;
+ } else {
+ for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
+ SDValue Op = User->getOperand(i);
+ if (Op.getNode() != Node || Op.getResNo() != ResNo)
+ continue;
+ EVT VT = Node->getValueType(Op.getResNo());
+ if (VT == MVT::Other || VT == MVT::Flag)
+ continue;
+ Match = false;
+ if (User->isMachineOpcode()) {
+ const TargetInstrDesc &II = TII->get(User->getMachineOpcode());
+ const TargetRegisterClass *RC = 0;
+ if (i+II.getNumDefs() < II.getNumOperands())
+ RC = II.OpInfo[i+II.getNumDefs()].getRegClass(TRI);
+ if (!UseRC)
+ UseRC = RC;
+ else if (RC) {
+ const TargetRegisterClass *ComRC = getCommonSubClass(UseRC, RC);
+ // If multiple uses expect disjoint register classes, we emit
+ // copies in AddRegisterOperand.
+ if (ComRC)
+ UseRC = ComRC;
+ }
+ }
+ }
+ }
+ MatchReg &= Match;
+ if (VRBase)
+ break;
+ }
+
+ EVT VT = Node->getValueType(ResNo);
+ const TargetRegisterClass *SrcRC = 0, *DstRC = 0;
+ SrcRC = TRI->getPhysicalRegisterRegClass(SrcReg, VT);
+
+ // Figure out the register class to create for the destreg.
+ if (VRBase) {
+ DstRC = MRI->getRegClass(VRBase);
+ } else if (UseRC) {
+ assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!");
+ DstRC = UseRC;
+ } else {
+ DstRC = TLI->getRegClassFor(VT);
+ }
+
+ // If all uses are reading from the src physical register and copying the
+ // register is either impossible or very expensive, then don't create a copy.
+ if (MatchReg && SrcRC->getCopyCost() < 0) {
+ VRBase = SrcReg;
+ } else {
+ // Create the reg, emit the copy.
+ VRBase = MRI->createVirtualRegister(DstRC);
+ bool Emitted = TII->copyRegToReg(*MBB, InsertPos, VRBase, SrcReg,
+ DstRC, SrcRC);
+
+ assert(Emitted && "Unable to issue a copy instruction!\n");
+ (void) Emitted;
+ }
+
+ SDValue Op(Node, ResNo);
+ if (IsClone)
+ VRBaseMap.erase(Op);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+ isNew = isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+/// getDstOfCopyToRegUse - If the only use of the specified result number of
+/// node is a CopyToReg, return its destination register. Return 0 otherwise.
+unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node,
+ unsigned ResNo) const {
+ if (!Node->hasOneUse())
+ return 0;
+
+ SDNode *User = *Node->use_begin();
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node &&
+ User->getOperand(2).getResNo() == ResNo) {
+ unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return Reg;
+ }
+ return 0;
+}
+
+void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
+ const TargetInstrDesc &II,
+ bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF &&
+ "IMPLICIT_DEF should have been handled as a special case elsewhere!");
+
+ for (unsigned i = 0; i < II.getNumDefs(); ++i) {
+ // If the specific node value is only used by a CopyToReg and the dest reg
+ // is a vreg in the same register class, use the CopyToReg'd destination
+ // register instead of creating a new vreg.
+ unsigned VRBase = 0;
+ const TargetRegisterClass *RC = II.OpInfo[i].getRegClass(TRI);
+ if (II.OpInfo[i].isOptionalDef()) {
+ // Optional def must be a physical register.
+ unsigned NumResults = CountResults(Node);
+ VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg();
+ assert(TargetRegisterInfo::isPhysicalRegister(VRBase));
+ MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+ }
+
+ if (!VRBase && !IsClone && !IsCloned)
+ for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+ UI != E; ++UI) {
+ SDNode *User = *UI;
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node &&
+ User->getOperand(2).getResNo() == i) {
+ unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ const TargetRegisterClass *RegRC = MRI->getRegClass(Reg);
+ if (RegRC == RC) {
+ VRBase = Reg;
+ MI->addOperand(MachineOperand::CreateReg(Reg, true));
+ break;
+ }
+ }
+ }
+ }
+
+ // Create the result registers for this node and add the result regs to
+ // the machine instruction.
+ if (VRBase == 0) {
+ assert(RC && "Isn't a register operand!");
+ VRBase = MRI->createVirtualRegister(RC);
+ MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+ }
+
+ SDValue Op(Node, i);
+ if (IsClone)
+ VRBaseMap.erase(Op);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+ isNew = isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ }
+}
+
+/// getVR - Return the virtual register corresponding to the specified result
+/// of the specified node.
+unsigned InstrEmitter::getVR(SDValue Op,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ if (Op.isMachineOpcode() &&
+ Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
+ // Add an IMPLICIT_DEF instruction before every use.
+ unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo());
+ // IMPLICIT_DEF can produce any type of result so its TargetInstrDesc
+ // does not include operand register class info.
+ if (!VReg) {
+ const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType());
+ VReg = MRI->createVirtualRegister(RC);
+ }
+ BuildMI(MBB, Op.getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), VReg);
+ return VReg;
+ }
+
+ DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op);
+ assert(I != VRBaseMap.end() && "Node emitted out of order - late");
+ return I->second;
+}
+
+
+/// AddRegisterOperand - Add the specified register as an operand to the
+/// specified machine instr. Insert register copies if the register is
+/// not in the required register class.
+void
+InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
+ unsigned IIOpNum,
+ const TargetInstrDesc *II,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ assert(Op.getValueType() != MVT::Other &&
+ Op.getValueType() != MVT::Flag &&
+ "Chain and flag operands should occur at end of operand list!");
+ // Get/emit the operand.
+ unsigned VReg = getVR(Op, VRBaseMap);
+ assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?");
+
+ const TargetInstrDesc &TID = MI->getDesc();
+ bool isOptDef = IIOpNum < TID.getNumOperands() &&
+ TID.OpInfo[IIOpNum].isOptionalDef();
+
+ // If the instruction requires a register in a different class, create
+ // a new virtual register and copy the value into it.
+ if (II) {
+ const TargetRegisterClass *SrcRC = MRI->getRegClass(VReg);
+ const TargetRegisterClass *DstRC = 0;
+ if (IIOpNum < II->getNumOperands())
+ DstRC = II->OpInfo[IIOpNum].getRegClass(TRI);
+ assert((DstRC || (TID.isVariadic() && IIOpNum >= TID.getNumOperands())) &&
+ "Don't have operand info for this instruction!");
+ if (DstRC && SrcRC != DstRC && !SrcRC->hasSuperClass(DstRC)) {
+ unsigned NewVReg = MRI->createVirtualRegister(DstRC);
+ bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg,
+ DstRC, SrcRC);
+ assert(Emitted && "Unable to issue a copy instruction!\n");
+ (void) Emitted;
+ VReg = NewVReg;
+ }
+ }
+
+ MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef));
+}
+
+/// AddOperand - Add the specified operand to the specified machine instr. II
+/// specifies the instruction information for the node, and IIOpNum is the
+/// operand number (in the II) that we are adding. IIOpNum and II are used for
+/// assertions only.
+void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
+ unsigned IIOpNum,
+ const TargetInstrDesc *II,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ if (Op.isMachineOpcode()) {
+ AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap);
+ } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateImm(C->getSExtValue()));
+ } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) {
+ const ConstantFP *CFP = F->getConstantFPValue();
+ MI->addOperand(MachineOperand::CreateFPImm(CFP));
+ } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateReg(R->getReg(), false));
+ } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(), TGA->getOffset(),
+ TGA->getTargetFlags()));
+ } else if (BasicBlockSDNode *BBNode = dyn_cast<BasicBlockSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateMBB(BBNode->getBasicBlock()));
+ } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateFI(FI->getIndex()));
+ } else if (JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateJTI(JT->getIndex(),
+ JT->getTargetFlags()));
+ } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) {
+ int Offset = CP->getOffset();
+ unsigned Align = CP->getAlignment();
+ const Type *Type = CP->getType();
+ // MachineConstantPool wants an explicit alignment.
+ if (Align == 0) {
+ Align = TM->getTargetData()->getPrefTypeAlignment(Type);
+ if (Align == 0) {
+ // Alignment of vector types. FIXME!
+ Align = TM->getTargetData()->getTypeAllocSize(Type);
+ }
+ }
+
+ unsigned Idx;
+ MachineConstantPool *MCP = MF->getConstantPool();
+ if (CP->isMachineConstantPoolEntry())
+ Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Align);
+ else
+ Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Align);
+ MI->addOperand(MachineOperand::CreateCPI(Idx, Offset,
+ CP->getTargetFlags()));
+ } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateES(ES->getSymbol(),
+ ES->getTargetFlags()));
+ } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateBA(BA->getBlockAddress(),
+ BA->getTargetFlags()));
+ } else {
+ assert(Op.getValueType() != MVT::Other &&
+ Op.getValueType() != MVT::Flag &&
+ "Chain and flag operands should occur at end of operand list!");
+ AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap);
+ }
+}
+
+/// getSuperRegisterRegClass - Returns the register class of a superreg A whose
+/// "SubIdx"'th sub-register class is the specified register class and whose
+/// type matches the specified type.
+static const TargetRegisterClass*
+getSuperRegisterRegClass(const TargetRegisterClass *TRC,
+ unsigned SubIdx, EVT VT) {
+ // Pick the register class of the superegister for this type
+ for (TargetRegisterInfo::regclass_iterator I = TRC->superregclasses_begin(),
+ E = TRC->superregclasses_end(); I != E; ++I)
+ if ((*I)->hasType(VT) && (*I)->getSubRegisterRegClass(SubIdx) == TRC)
+ return *I;
+ assert(false && "Couldn't find the register class");
+ return 0;
+}
+
+/// EmitSubregNode - Generate machine code for subreg nodes.
+///
+void InstrEmitter::EmitSubregNode(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap){
+ unsigned VRBase = 0;
+ unsigned Opc = Node->getMachineOpcode();
+
+ // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+ // the CopyToReg'd destination register instead of creating a new vreg.
+ for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+ UI != E; ++UI) {
+ SDNode *User = *UI;
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node) {
+ unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+ VRBase = DestReg;
+ break;
+ }
+ }
+ }
+
+ if (Opc == TargetOpcode::EXTRACT_SUBREG) {
+ unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+
+ // Create the extract_subreg machine instruction.
+ MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
+ TII->get(TargetOpcode::EXTRACT_SUBREG));
+
+ // Figure out the register class to create for the destreg.
+ unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
+ const TargetRegisterClass *TRC = MRI->getRegClass(VReg);
+ const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx);
+ assert(SRC && "Invalid subregister index in EXTRACT_SUBREG");
+
+ // Figure out the register class to create for the destreg.
+ // Note that if we're going to directly use an existing register,
+ // it must be precisely the required class, and not a subclass
+ // thereof.
+ if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) {
+ // Create the reg
+ assert(SRC && "Couldn't find source register class");
+ VRBase = MRI->createVirtualRegister(SRC);
+ }
+
+ // Add def, source, and subreg index
+ MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+ AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap);
+ MI->addOperand(MachineOperand::CreateImm(SubIdx));
+ MBB->insert(InsertPos, MI);
+ } else if (Opc == TargetOpcode::INSERT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG) {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+ SDValue N2 = Node->getOperand(2);
+ unsigned SubReg = getVR(N1, VRBaseMap);
+ unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue();
+ const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
+ const TargetRegisterClass *SRC =
+ getSuperRegisterRegClass(TRC, SubIdx,
+ Node->getValueType(0));
+
+ // Figure out the register class to create for the destreg.
+ // Note that if we're going to directly use an existing register,
+ // it must be precisely the required class, and not a subclass
+ // thereof.
+ if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) {
+ // Create the reg
+ assert(SRC && "Couldn't find source register class");
+ VRBase = MRI->createVirtualRegister(SRC);
+ }
+
+ // Create the insert_subreg or subreg_to_reg machine instruction.
+ MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc));
+ MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+
+ // If creating a subreg_to_reg, then the first input operand
+ // is an implicit value immediate, otherwise it's a register
+ if (Opc == TargetOpcode::SUBREG_TO_REG) {
+ const ConstantSDNode *SD = cast<ConstantSDNode>(N0);
+ MI->addOperand(MachineOperand::CreateImm(SD->getZExtValue()));
+ } else
+ AddOperand(MI, N0, 0, 0, VRBaseMap);
+ // Add the subregster being inserted
+ AddOperand(MI, N1, 0, 0, VRBaseMap);
+ MI->addOperand(MachineOperand::CreateImm(SubIdx));
+ MBB->insert(InsertPos, MI);
+ } else
+ llvm_unreachable("Node is not insert_subreg, extract_subreg, or subreg_to_reg");
+
+ SDValue Op(Node, 0);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+ isNew = isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes.
+/// COPY_TO_REGCLASS is just a normal copy, except that the destination
+/// register is constrained to be in a particular register class.
+///
+void
+InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
+ const TargetRegisterClass *SrcRC = MRI->getRegClass(VReg);
+
+ unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ const TargetRegisterClass *DstRC = TRI->getRegClass(DstRCIdx);
+
+ // Create the new VReg in the destination class and emit a copy.
+ unsigned NewVReg = MRI->createVirtualRegister(DstRC);
+ bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg,
+ DstRC, SrcRC);
+ assert(Emitted &&
+ "Unable to issue a copy instruction for a COPY_TO_REGCLASS node!\n");
+ (void) Emitted;
+
+ SDValue Op(Node, 0);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
+ isNew = isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitNode - Generate machine code for a node and needed dependencies.
+///
+void InstrEmitter::EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
+ // If machine instruction
+ if (Node->isMachineOpcode()) {
+ unsigned Opc = Node->getMachineOpcode();
+
+ // Handle subreg insert/extract specially
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::INSERT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG) {
+ EmitSubregNode(Node, VRBaseMap);
+ return;
+ }
+
+ // Handle COPY_TO_REGCLASS specially.
+ if (Opc == TargetOpcode::COPY_TO_REGCLASS) {
+ EmitCopyToRegClassNode(Node, VRBaseMap);
+ return;
+ }
+
+ if (Opc == TargetOpcode::IMPLICIT_DEF)
+ // We want a unique VR for each IMPLICIT_DEF use.
+ return;
+
+ const TargetInstrDesc &II = TII->get(Opc);
+ unsigned NumResults = CountResults(Node);
+ unsigned NodeOperands = CountOperands(Node);
+ bool HasPhysRegOuts = (NumResults > II.getNumDefs()) &&
+ II.getImplicitDefs() != 0;
+#ifndef NDEBUG
+ unsigned NumMIOperands = NodeOperands + NumResults;
+ assert((II.getNumOperands() == NumMIOperands ||
+ HasPhysRegOuts || II.isVariadic()) &&
+ "#operands for dag node doesn't match .td file!");
+#endif
+
+ // Create the new machine instruction.
+ MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II);
+
+ // Add result register values for things that are defined by this
+ // instruction.
+ if (NumResults)
+ CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap);
+
+ // Emit all of the actual operands of this instruction, adding them to the
+ // instruction as appropriate.
+ bool HasOptPRefs = II.getNumDefs() > NumResults;
+ assert((!HasOptPRefs || !HasPhysRegOuts) &&
+ "Unable to cope with optional defs and phys regs defs!");
+ unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0;
+ for (unsigned i = NumSkip; i != NodeOperands; ++i)
+ AddOperand(MI, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II,
+ VRBaseMap);
+
+ // Transfer all of the memory reference descriptions of this instruction.
+ MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
+ cast<MachineSDNode>(Node)->memoperands_end());
+
+ if (II.usesCustomInsertionHook()) {
+ // Insert this instruction into the basic block using a target
+ // specific inserter which may returns a new basic block.
+ MBB = TLI->EmitInstrWithCustomInserter(MI, MBB, EM);
+ InsertPos = MBB->end();
+ } else {
+ MBB->insert(InsertPos, MI);
+ }
+
+ // Additional results must be an physical register def.
+ if (HasPhysRegOuts) {
+ for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
+ unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
+ if (Node->hasAnyUseOfValue(i))
+ EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
+ // If there are no uses, mark the register as dead now, so that
+ // MachineLICM/Sink can see that it's dead. Don't do this if the
+ // node has a Flag value, for the benefit of targets still using
+ // Flag for values in physregs.
+ else if (Node->getValueType(Node->getNumValues()-1) != MVT::Flag)
+ MI->addRegisterDead(Reg, TRI);
+ }
+ }
+ return;
+ }
+
+ switch (Node->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ Node->dump();
+#endif
+ llvm_unreachable("This target-independent node should have been selected!");
+ break;
+ case ISD::EntryToken:
+ llvm_unreachable("EntryToken should have been excluded from the schedule!");
+ break;
+ case ISD::MERGE_VALUES:
+ case ISD::TokenFactor: // fall thru
+ break;
+ case ISD::CopyToReg: {
+ unsigned SrcReg;
+ SDValue SrcVal = Node->getOperand(2);
+ if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal))
+ SrcReg = R->getReg();
+ else
+ SrcReg = getVR(SrcVal, VRBaseMap);
+
+ unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ if (SrcReg == DestReg) // Coalesced away the copy? Ignore.
+ break;
+
+ const TargetRegisterClass *SrcTRC = 0, *DstTRC = 0;
+ // Get the register classes of the src/dst.
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg))
+ SrcTRC = MRI->getRegClass(SrcReg);
+ else
+ SrcTRC = TRI->getPhysicalRegisterRegClass(SrcReg,SrcVal.getValueType());
+
+ if (TargetRegisterInfo::isVirtualRegister(DestReg))
+ DstTRC = MRI->getRegClass(DestReg);
+ else
+ DstTRC = TRI->getPhysicalRegisterRegClass(DestReg,
+ Node->getOperand(1).getValueType());
+
+ bool Emitted = TII->copyRegToReg(*MBB, InsertPos, DestReg, SrcReg,
+ DstTRC, SrcTRC);
+ assert(Emitted && "Unable to issue a copy instruction!\n");
+ (void) Emitted;
+ break;
+ }
+ case ISD::CopyFromReg: {
+ unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap);
+ break;
+ }
+ case ISD::INLINEASM: {
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
+ --NumOps; // Ignore the flag operand.
+
+ // Create the inline asm machine instruction.
+ MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
+ TII->get(TargetOpcode::INLINEASM));
+
+ // Add the asm string as an external symbol operand.
+ const char *AsmStr =
+ cast<ExternalSymbolSDNode>(Node->getOperand(1))->getSymbol();
+ MI->addOperand(MachineOperand::CreateES(AsmStr));
+
+ // Add all of the operand registers to the instruction.
+ for (unsigned i = 2; i != NumOps;) {
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+
+ MI->addOperand(MachineOperand::CreateImm(Flags));
+ ++i; // Skip the ID value.
+
+ switch (Flags & 7) {
+ default: llvm_unreachable("Bad flags!");
+ case 2: // Def of register.
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ MI->addOperand(MachineOperand::CreateReg(Reg, true));
+ }
+ break;
+ case 6: // Def of earlyclobber register.
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ MI->addOperand(MachineOperand::CreateReg(Reg, true, false, false,
+ false, false, true));
+ }
+ break;
+ case 1: // Use of register.
+ case 3: // Immediate.
+ case 4: // Addressing mode.
+ // The addressing mode has been selected, just add all of the
+ // operands to the machine instruction.
+ for (; NumVals; --NumVals, ++i)
+ AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap);
+ break;
+ }
+ }
+ MBB->insert(InsertPos, MI);
+ break;
+ }
+ }
+}
+
+/// InstrEmitter - Construct an InstrEmitter and set it to start inserting
+/// at the given position in the given block.
+InstrEmitter::InstrEmitter(MachineBasicBlock *mbb,
+ MachineBasicBlock::iterator insertpos)
+ : MF(mbb->getParent()),
+ MRI(&MF->getRegInfo()),
+ TM(&MF->getTarget()),
+ TII(TM->getInstrInfo()),
+ TRI(TM->getRegisterInfo()),
+ TLI(TM->getTargetLowering()),
+ MBB(mbb), InsertPos(insertpos) {
+}
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
new file mode 100644
index 0000000..91817e4
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -0,0 +1,119 @@
+//===---- InstrEmitter.h - Emit MachineInstrs for the SelectionDAG class ---==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This declares the Emit routines for the SelectionDAG class, which creates
+// MachineInstrs based on the decisions of the SelectionDAG instruction
+// selection.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INSTREMITTER_H
+#define INSTREMITTER_H
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+
+class TargetInstrDesc;
+
+class InstrEmitter {
+ MachineFunction *MF;
+ MachineRegisterInfo *MRI;
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const TargetLowering *TLI;
+
+ MachineBasicBlock *MBB;
+ MachineBasicBlock::iterator InsertPos;
+
+ /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
+ /// implicit physical register output.
+ void EmitCopyFromReg(SDNode *Node, unsigned ResNo,
+ bool IsClone, bool IsCloned,
+ unsigned SrcReg,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// getDstOfCopyToRegUse - If the only use of the specified result number of
+ /// node is a CopyToReg, return its destination register. Return 0 otherwise.
+ unsigned getDstOfOnlyCopyToRegUse(SDNode *Node,
+ unsigned ResNo) const;
+
+ void CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
+ const TargetInstrDesc &II,
+ bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// getVR - Return the virtual register corresponding to the specified result
+ /// of the specified node.
+ unsigned getVR(SDValue Op,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// AddRegisterOperand - Add the specified register as an operand to the
+ /// specified machine instr. Insert register copies if the register is
+ /// not in the required register class.
+ void AddRegisterOperand(MachineInstr *MI, SDValue Op,
+ unsigned IIOpNum,
+ const TargetInstrDesc *II,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// AddOperand - Add the specified operand to the specified machine instr. II
+ /// specifies the instruction information for the node, and IIOpNum is the
+ /// operand number (in the II) that we are adding. IIOpNum and II are used for
+ /// assertions only.
+ void AddOperand(MachineInstr *MI, SDValue Op,
+ unsigned IIOpNum,
+ const TargetInstrDesc *II,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// EmitSubregNode - Generate machine code for subreg nodes.
+ ///
+ void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes.
+ /// COPY_TO_REGCLASS is just a normal copy, except that the destination
+ /// register is constrained to be in a particular register class.
+ ///
+ void EmitCopyToRegClassNode(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+public:
+ /// CountResults - The results of target nodes have register or immediate
+ /// operands first, then an optional chain, and optional flag operands
+ /// (which do not go into the machine instrs.)
+ static unsigned CountResults(SDNode *Node);
+
+ /// CountOperands - The inputs to target nodes have any actual inputs first,
+ /// followed by an optional chain operand, then flag operands. Compute
+ /// the number of actual operands that will go into the resulting
+ /// MachineInstr.
+ static unsigned CountOperands(SDNode *Node);
+
+ /// EmitNode - Generate machine code for a node and needed dependencies.
+ ///
+ void EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM);
+
+ /// getBlock - Return the current basic block.
+ MachineBasicBlock *getBlock() { return MBB; }
+
+ /// getInsertPos - Return the current insertion position.
+ MachineBasicBlock::iterator getInsertPos() { return InsertPos; }
+
+ /// InstrEmitter - Construct an InstrEmitter and set it to start inserting
+ /// at the given position in the given block.
+ InstrEmitter(MachineBasicBlock *mbb, MachineBasicBlock::iterator insertpos);
+};
+
+}
+
+#endif
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
new file mode 100644
index 0000000..78e6e4e
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -0,0 +1,3071 @@
+//===-- LegalizeDAG.cpp - Implement SelectionDAG::Legalize ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::Legalize method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <map>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+/// SelectionDAGLegalize - This takes an arbitrary SelectionDAG as input and
+/// hacks on it until the target machine can handle it. This involves
+/// eliminating value sizes the machine cannot handle (promoting small sizes to
+/// large sizes or splitting up large values into small values) as well as
+/// eliminating operations the machine cannot handle.
+///
+/// This code also does a small amount of optimization and recognition of idioms
+/// as part of its processing. For example, if a target does not support a
+/// 'setcc' instruction efficiently, but does support 'brcc' instruction, this
+/// will attempt merge setcc and brc instructions into brcc's.
+///
+namespace {
+class SelectionDAGLegalize {
+ TargetLowering &TLI;
+ SelectionDAG &DAG;
+ CodeGenOpt::Level OptLevel;
+
+ // Libcall insertion helpers.
+
+ /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been
+ /// legalized. We use this to ensure that calls are properly serialized
+ /// against each other, including inserted libcalls.
+ SDValue LastCALLSEQ_END;
+
+ /// IsLegalizingCall - This member is used *only* for purposes of providing
+ /// helpful assertions that a libcall isn't created while another call is
+ /// being legalized (which could lead to non-serialized call sequences).
+ bool IsLegalizingCall;
+
+ enum LegalizeAction {
+ Legal, // The target natively supports this operation.
+ Promote, // This operation should be executed in a larger type.
+ Expand // Try to expand this to other ops, otherwise use a libcall.
+ };
+
+ /// ValueTypeActions - This is a bitvector that contains two bits for each
+ /// value type, where the two bits correspond to the LegalizeAction enum.
+ /// This can be queried with "getTypeAction(VT)".
+ TargetLowering::ValueTypeActionImpl ValueTypeActions;
+
+ /// LegalizedNodes - For nodes that are of legal width, and that have more
+ /// than one use, this map indicates what regularized operand to use. This
+ /// allows us to avoid legalizing the same thing more than once.
+ DenseMap<SDValue, SDValue> LegalizedNodes;
+
+ void AddLegalizedOperand(SDValue From, SDValue To) {
+ LegalizedNodes.insert(std::make_pair(From, To));
+ // If someone requests legalization of the new node, return itself.
+ if (From != To)
+ LegalizedNodes.insert(std::make_pair(To, To));
+ }
+
+public:
+ SelectionDAGLegalize(SelectionDAG &DAG, CodeGenOpt::Level ol);
+
+ /// getTypeAction - Return how we should legalize values of this type, either
+ /// it is already legal or we need to expand it into multiple registers of
+ /// smaller integer type, or we need to promote it to a larger type.
+ LegalizeAction getTypeAction(EVT VT) const {
+ return
+ (LegalizeAction)ValueTypeActions.getTypeAction(*DAG.getContext(), VT);
+ }
+
+ /// isTypeLegal - Return true if this type is legal on this target.
+ ///
+ bool isTypeLegal(EVT VT) const {
+ return getTypeAction(VT) == Legal;
+ }
+
+ void LegalizeDAG();
+
+private:
+ /// LegalizeOp - We know that the specified value has a legal type.
+ /// Recursively ensure that the operands have legal types, then return the
+ /// result.
+ SDValue LegalizeOp(SDValue O);
+
+ SDValue OptimizeFloatStore(StoreSDNode *ST);
+
+ /// PerformInsertVectorEltInMemory - Some target cannot handle a variable
+ /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it
+ /// is necessary to spill the vector being inserted into to memory, perform
+ /// the insert there, and then read the result back.
+ SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val,
+ SDValue Idx, DebugLoc dl);
+ SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,
+ SDValue Idx, DebugLoc dl);
+
+ /// ShuffleWithNarrowerEltType - Return a vector shuffle operation which
+ /// performs the same shuffe in terms of order or result bytes, but on a type
+ /// whose vector element type is narrower than the original shuffle type.
+ /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
+ SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
+ SDValue N1, SDValue N2,
+ SmallVectorImpl<int> &Mask) const;
+
+ bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
+ SmallPtrSet<SDNode*, 32> &NodesLeadingTo);
+
+ void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
+ DebugLoc dl);
+
+ SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
+ SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_PPCF128);
+ SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
+ RTLIB::Libcall Call_I8,
+ RTLIB::Libcall Call_I16,
+ RTLIB::Libcall Call_I32,
+ RTLIB::Libcall Call_I64,
+ RTLIB::Libcall Call_I128);
+
+ SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl);
+ SDValue ExpandBUILD_VECTOR(SDNode *Node);
+ SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);
+ void ExpandDYNAMIC_STACKALLOC(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results);
+ SDValue ExpandFCOPYSIGN(SDNode *Node);
+ SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT,
+ DebugLoc dl);
+ SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned,
+ DebugLoc dl);
+ SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned,
+ DebugLoc dl);
+
+ SDValue ExpandBSWAP(SDValue Op, DebugLoc dl);
+ SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl);
+
+ SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
+ SDValue ExpandVectorBuildThroughStack(SDNode* Node);
+
+ void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+};
+}
+
+/// ShuffleWithNarrowerEltType - Return a vector shuffle operation which
+/// performs the same shuffe in terms of order or result bytes, but on a type
+/// whose vector element type is narrower than the original shuffle type.
+/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
+SDValue
+SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
+ SDValue N1, SDValue N2,
+ SmallVectorImpl<int> &Mask) const {
+ unsigned NumMaskElts = VT.getVectorNumElements();
+ unsigned NumDestElts = NVT.getVectorNumElements();
+ unsigned NumEltsGrowth = NumDestElts / NumMaskElts;
+
+ assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!");
+
+ if (NumEltsGrowth == 1)
+ return DAG.getVectorShuffle(NVT, dl, N1, N2, &Mask[0]);
+
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumMaskElts; ++i) {
+ int Idx = Mask[i];
+ for (unsigned j = 0; j != NumEltsGrowth; ++j) {
+ if (Idx < 0)
+ NewMask.push_back(-1);
+ else
+ NewMask.push_back(Idx * NumEltsGrowth + j);
+ }
+ }
+ assert(NewMask.size() == NumDestElts && "Non-integer NumEltsGrowth?");
+ assert(TLI.isShuffleMaskLegal(NewMask, NVT) && "Shuffle not legal?");
+ return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]);
+}
+
+SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag,
+ CodeGenOpt::Level ol)
+ : TLI(dag.getTargetLoweringInfo()), DAG(dag), OptLevel(ol),
+ ValueTypeActions(TLI.getValueTypeActions()) {
+ assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
+ "Too many value types for ValueTypeActions to hold!");
+}
+
+void SelectionDAGLegalize::LegalizeDAG() {
+ LastCALLSEQ_END = DAG.getEntryNode();
+ IsLegalizingCall = false;
+
+ // The legalize process is inherently a bottom-up recursive process (users
+ // legalize their uses before themselves). Given infinite stack space, we
+ // could just start legalizing on the root and traverse the whole graph. In
+ // practice however, this causes us to run out of stack space on large basic
+ // blocks. To avoid this problem, compute an ordering of the nodes where each
+ // node is only legalized after all of its operands are legalized.
+ DAG.AssignTopologicalOrder();
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I)
+ LegalizeOp(SDValue(I, 0));
+
+ // Finally, it's possible the root changed. Get the new root.
+ SDValue OldRoot = DAG.getRoot();
+ assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
+ DAG.setRoot(LegalizedNodes[OldRoot]);
+
+ LegalizedNodes.clear();
+
+ // Remove dead nodes now.
+ DAG.RemoveDeadNodes();
+}
+
+
+/// FindCallEndFromCallStart - Given a chained node that is part of a call
+/// sequence, find the CALLSEQ_END node that terminates the call sequence.
+static SDNode *FindCallEndFromCallStart(SDNode *Node) {
+ if (Node->getOpcode() == ISD::CALLSEQ_END)
+ return Node;
+ if (Node->use_empty())
+ return 0; // No CallSeqEnd
+
+ // The chain is usually at the end.
+ SDValue TheChain(Node, Node->getNumValues()-1);
+ if (TheChain.getValueType() != MVT::Other) {
+ // Sometimes it's at the beginning.
+ TheChain = SDValue(Node, 0);
+ if (TheChain.getValueType() != MVT::Other) {
+ // Otherwise, hunt for it.
+ for (unsigned i = 1, e = Node->getNumValues(); i != e; ++i)
+ if (Node->getValueType(i) == MVT::Other) {
+ TheChain = SDValue(Node, i);
+ break;
+ }
+
+ // Otherwise, we walked into a node without a chain.
+ if (TheChain.getValueType() != MVT::Other)
+ return 0;
+ }
+ }
+
+ for (SDNode::use_iterator UI = Node->use_begin(),
+ E = Node->use_end(); UI != E; ++UI) {
+
+ // Make sure to only follow users of our token chain.
+ SDNode *User = *UI;
+ for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i)
+ if (User->getOperand(i) == TheChain)
+ if (SDNode *Result = FindCallEndFromCallStart(User))
+ return Result;
+ }
+ return 0;
+}
+
+/// FindCallStartFromCallEnd - Given a chained node that is part of a call
+/// sequence, find the CALLSEQ_START node that initiates the call sequence.
+static SDNode *FindCallStartFromCallEnd(SDNode *Node) {
+ assert(Node && "Didn't find callseq_start for a call??");
+ if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
+
+ assert(Node->getOperand(0).getValueType() == MVT::Other &&
+ "Node doesn't have a token chain argument!");
+ return FindCallStartFromCallEnd(Node->getOperand(0).getNode());
+}
+
+/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to
+/// see if any uses can reach Dest. If no dest operands can get to dest,
+/// legalize them, legalize ourself, and return false, otherwise, return true.
+///
+/// Keep track of the nodes we fine that actually do lead to Dest in
+/// NodesLeadingTo. This avoids retraversing them exponential number of times.
+///
+bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
+ SmallPtrSet<SDNode*, 32> &NodesLeadingTo) {
+ if (N == Dest) return true; // N certainly leads to Dest :)
+
+ // If we've already processed this node and it does lead to Dest, there is no
+ // need to reprocess it.
+ if (NodesLeadingTo.count(N)) return true;
+
+ // If the first result of this node has been already legalized, then it cannot
+ // reach N.
+ if (LegalizedNodes.count(SDValue(N, 0))) return false;
+
+ // Okay, this node has not already been legalized. Check and legalize all
+ // operands. If none lead to Dest, then we can legalize this node.
+ bool OperandsLeadToDest = false;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ OperandsLeadToDest |= // If an operand leads to Dest, so do we.
+ LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest, NodesLeadingTo);
+
+ if (OperandsLeadToDest) {
+ NodesLeadingTo.insert(N);
+ return true;
+ }
+
+ // Okay, this node looks safe, legalize it and return false.
+ LegalizeOp(SDValue(N, 0));
+ return false;
+}
+
+/// ExpandConstantFP - Expands the ConstantFP node to an integer constant or
+/// a load from the constant pool.
+static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
+ SelectionDAG &DAG, const TargetLowering &TLI) {
+ bool Extend = false;
+ DebugLoc dl = CFP->getDebugLoc();
+
+ // If a FP immediate is precise when represented as a float and if the
+ // target can do an extending load from float to double, we put it into
+ // the constant pool as a float, even if it's is statically typed as a
+ // double. This shrinks FP constants and canonicalizes them for targets where
+ // an FP extending load is the same cost as a normal load (such as on the x87
+ // fp stack or PPC FP unit).
+ EVT VT = CFP->getValueType(0);
+ ConstantFP *LLVMC = const_cast<ConstantFP*>(CFP->getConstantFPValue());
+ if (!UseCP) {
+ assert((VT == MVT::f64 || VT == MVT::f32) && "Invalid type expansion");
+ return DAG.getConstant(LLVMC->getValueAPF().bitcastToAPInt(),
+ (VT == MVT::f64) ? MVT::i64 : MVT::i32);
+ }
+
+ EVT OrigVT = VT;
+ EVT SVT = VT;
+ while (SVT != MVT::f32) {
+ SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1);
+ if (CFP->isValueValidForType(SVT, CFP->getValueAPF()) &&
+ // Only do this if the target has a native EXTLOAD instruction from
+ // smaller type.
+ TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) &&
+ TLI.ShouldShrinkFPConstant(OrigVT)) {
+ const Type *SType = SVT.getTypeForEVT(*DAG.getContext());
+ LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType));
+ VT = SVT;
+ Extend = true;
+ }
+ }
+
+ SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ if (Extend)
+ return DAG.getExtLoad(ISD::EXTLOAD, dl,
+ OrigVT, DAG.getEntryNode(),
+ CPIdx, PseudoSourceValue::getConstantPool(),
+ 0, VT, false, Alignment);
+ return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0, false, Alignment);
+}
+
+/// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores.
+static
+SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+ SDValue Val = ST->getValue();
+ EVT VT = Val.getValueType();
+ int Alignment = ST->getAlignment();
+ int SVOffset = ST->getSrcValueOffset();
+ DebugLoc dl = ST->getDebugLoc();
+ if (ST->getMemoryVT().isFloatingPoint() ||
+ ST->getMemoryVT().isVector()) {
+ EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ if (TLI.isTypeLegal(intVT)) {
+ // Expand to a bitconvert of the value to the integer type of the
+ // same size, then a (misaligned) int store.
+ // FIXME: Does not handle truncating floating point stores!
+ SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, intVT, Val);
+ return DAG.getStore(Chain, dl, Result, Ptr, ST->getSrcValue(),
+ SVOffset, ST->isVolatile(), Alignment);
+ } else {
+ // Do a (aligned) store to a stack slot, then copy from the stack slot
+ // to the final destination using (unaligned) integer loads and stores.
+ EVT StoredVT = ST->getMemoryVT();
+ EVT RegVT =
+ TLI.getRegisterType(*DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), StoredVT.getSizeInBits()));
+ unsigned StoredBytes = StoredVT.getSizeInBits() / 8;
+ unsigned RegBytes = RegVT.getSizeInBits() / 8;
+ unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
+
+ // Make sure the stack slot is also aligned for the register type.
+ SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
+
+ // Perform the original store, only redirected to the stack slot.
+ SDValue Store = DAG.getTruncStore(Chain, dl,
+ Val, StackPtr, NULL, 0, StoredVT);
+ SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+ SmallVector<SDValue, 8> Stores;
+ unsigned Offset = 0;
+
+ // Do all but one copies using the full register width.
+ for (unsigned i = 1; i < NumRegs; i++) {
+ // Load one integer register's worth from the stack slot.
+ SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, NULL, 0);
+ // Store it to the final location. Remember the store.
+ Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
+ ST->getSrcValue(), SVOffset + Offset,
+ ST->isVolatile(),
+ MinAlign(ST->getAlignment(), Offset)));
+ // Increment the pointers.
+ Offset += RegBytes;
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ Increment);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ }
+
+ // The last store may be partial. Do a truncating store. On big-endian
+ // machines this requires an extending load from the stack slot to ensure
+ // that the bits are in the right place.
+ EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
+
+ // Load from the stack slot.
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
+ NULL, 0, MemVT);
+
+ Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
+ ST->getSrcValue(), SVOffset + Offset,
+ MemVT, ST->isVolatile(),
+ MinAlign(ST->getAlignment(), Offset)));
+ // The order of the stores doesn't matter - say it with a TokenFactor.
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+ Stores.size());
+ }
+ }
+ assert(ST->getMemoryVT().isInteger() &&
+ !ST->getMemoryVT().isVector() &&
+ "Unaligned store of unknown type.");
+ // Get the half-size VT
+ EVT NewStoredVT = ST->getMemoryVT().getHalfSizedIntegerVT(*DAG.getContext());
+ int NumBits = NewStoredVT.getSizeInBits();
+ int IncrementSize = NumBits / 8;
+
+ // Divide the stored value in two parts.
+ SDValue ShiftAmount = DAG.getConstant(NumBits, TLI.getShiftAmountTy());
+ SDValue Lo = Val;
+ SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
+
+ // Store the two parts
+ SDValue Store1, Store2;
+ Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr,
+ ST->getSrcValue(), SVOffset, NewStoredVT,
+ ST->isVolatile(), Alignment);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+ Alignment = MinAlign(Alignment, IncrementSize);
+ Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr,
+ ST->getSrcValue(), SVOffset + IncrementSize,
+ NewStoredVT, ST->isVolatile(), Alignment);
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
+}
+
+/// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads.
+static
+SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ int SVOffset = LD->getSrcValueOffset();
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ EVT VT = LD->getValueType(0);
+ EVT LoadedVT = LD->getMemoryVT();
+ DebugLoc dl = LD->getDebugLoc();
+ if (VT.isFloatingPoint() || VT.isVector()) {
+ EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
+ if (TLI.isTypeLegal(intVT)) {
+ // Expand to a (misaligned) integer load of the same size,
+ // then bitconvert to floating point or vector.
+ SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getSrcValue(),
+ SVOffset, LD->isVolatile(),
+ LD->getAlignment());
+ SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, LoadedVT, newLoad);
+ if (VT.isFloatingPoint() && LoadedVT != VT)
+ Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result);
+
+ SDValue Ops[] = { Result, Chain };
+ return DAG.getMergeValues(Ops, 2, dl);
+ } else {
+ // Copy the value to a (aligned) stack slot using (unaligned) integer
+ // loads and stores, then do a (aligned) load from the stack slot.
+ EVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT);
+ unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;
+ unsigned RegBytes = RegVT.getSizeInBits() / 8;
+ unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
+
+ // Make sure the stack slot is also aligned for the register type.
+ SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
+
+ SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+ SmallVector<SDValue, 8> Stores;
+ SDValue StackPtr = StackBase;
+ unsigned Offset = 0;
+
+ // Do all but one copies using the full register width.
+ for (unsigned i = 1; i < NumRegs; i++) {
+ // Load one integer register's worth from the original location.
+ SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, LD->getSrcValue(),
+ SVOffset + Offset, LD->isVolatile(),
+ MinAlign(LD->getAlignment(), Offset));
+ // Follow the load with a store to the stack slot. Remember the store.
+ Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
+ NULL, 0));
+ // Increment the pointers.
+ Offset += RegBytes;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ Increment);
+ }
+
+ // The last copy may be partial. Do an extending load.
+ EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), 8 * (LoadedBytes - Offset));
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
+ LD->getSrcValue(), SVOffset + Offset,
+ MemVT, LD->isVolatile(),
+ MinAlign(LD->getAlignment(), Offset));
+ // Follow the load with a store to the stack slot. Remember the store.
+ // On big-endian machines this requires a truncating store to ensure
+ // that the bits end up in the right place.
+ Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
+ NULL, 0, MemVT));
+
+ // The order of the stores doesn't matter - say it with a TokenFactor.
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+ Stores.size());
+
+ // Finally, perform the original load only redirected to the stack slot.
+ Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
+ NULL, 0, LoadedVT);
+
+ // Callers expect a MERGE_VALUES node.
+ SDValue Ops[] = { Load, TF };
+ return DAG.getMergeValues(Ops, 2, dl);
+ }
+ }
+ assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
+ "Unaligned load of unsupported type.");
+
+ // Compute the new VT that is half the size of the old one. This is an
+ // integer MVT.
+ unsigned NumBits = LoadedVT.getSizeInBits();
+ EVT NewLoadedVT;
+ NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
+ NumBits >>= 1;
+
+ unsigned Alignment = LD->getAlignment();
+ unsigned IncrementSize = NumBits / 8;
+ ISD::LoadExtType HiExtType = LD->getExtensionType();
+
+ // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
+ if (HiExtType == ISD::NON_EXTLOAD)
+ HiExtType = ISD::ZEXTLOAD;
+
+ // Load the value in two parts
+ SDValue Lo, Hi;
+ if (TLI.isLittleEndian()) {
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(),
+ SVOffset, NewLoadedVT, LD->isVolatile(), Alignment);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+ Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(),
+ SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),
+ MinAlign(Alignment, IncrementSize));
+ } else {
+ Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(),
+ SVOffset, NewLoadedVT, LD->isVolatile(), Alignment);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(),
+ SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),
+ MinAlign(Alignment, IncrementSize));
+ }
+
+ // aggregate the two parts
+ SDValue ShiftAmount = DAG.getConstant(NumBits, TLI.getShiftAmountTy());
+ SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
+ Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ SDValue Ops[] = { Result, TF };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+/// PerformInsertVectorEltInMemory - Some target cannot handle a variable
+/// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it
+/// is necessary to spill the vector being inserted into to memory, perform
+/// the insert there, and then read the result back.
+SDValue SelectionDAGLegalize::
+PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
+ DebugLoc dl) {
+ SDValue Tmp1 = Vec;
+ SDValue Tmp2 = Val;
+ SDValue Tmp3 = Idx;
+
+ // If the target doesn't support this, we have to spill the input vector
+ // to a temporary stack slot, update the element, then reload it. This is
+ // badness. We could also load the value into a vector register (either
+ // with a "move to register" or "extload into register" instruction, then
+ // permute it into place, if the idx is a constant and if the idx is
+ // supported by the target.
+ EVT VT = Tmp1.getValueType();
+ EVT EltVT = VT.getVectorElementType();
+ EVT IdxVT = Tmp3.getValueType();
+ EVT PtrVT = TLI.getPointerTy();
+ SDValue StackPtr = DAG.CreateStackTemporary(VT);
+
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+
+ // Store the vector.
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr,
+ PseudoSourceValue::getFixedStack(SPFI), 0);
+
+ // Truncate or zero extend offset to target pointer type.
+ unsigned CastOpc = IdxVT.bitsGT(PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
+ Tmp3 = DAG.getNode(CastOpc, dl, PtrVT, Tmp3);
+ // Add the offset to the index.
+ unsigned EltSize = EltVT.getSizeInBits()/8;
+ Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT));
+ SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr);
+ // Store the scalar value.
+ Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2,
+ PseudoSourceValue::getFixedStack(SPFI), 0, EltVT);
+ // Load the updated vector.
+ return DAG.getLoad(VT, dl, Ch, StackPtr,
+ PseudoSourceValue::getFixedStack(SPFI), 0);
+}
+
+
+SDValue SelectionDAGLegalize::
+ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, DebugLoc dl) {
+ if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Idx)) {
+ // SCALAR_TO_VECTOR requires that the type of the value being inserted
+ // match the element type of the vector being created, except for
+ // integers in which case the inserted value can be over width.
+ EVT EltVT = Vec.getValueType().getVectorElementType();
+ if (Val.getValueType() == EltVT ||
+ (EltVT.isInteger() && Val.getValueType().bitsGE(EltVT))) {
+ SDValue ScVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ Vec.getValueType(), Val);
+
+ unsigned NumElts = Vec.getValueType().getVectorNumElements();
+ // We generate a shuffle of InVec and ScVec, so the shuffle mask
+ // should be 0,1,2,3,4,5... with the appropriate element replaced with
+ // elt 0 of the RHS.
+ SmallVector<int, 8> ShufOps;
+ for (unsigned i = 0; i != NumElts; ++i)
+ ShufOps.push_back(i != InsertPos->getZExtValue() ? i : NumElts);
+
+ return DAG.getVectorShuffle(Vec.getValueType(), dl, Vec, ScVec,
+ &ShufOps[0]);
+ }
+ }
+ return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl);
+}
+
+SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
+ // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+ // FIXME: We shouldn't do this for TargetConstantFP's.
+ // FIXME: move this to the DAG Combiner! Note that we can't regress due
+ // to phase ordering between legalized code and the dag combiner. This
+ // probably means that we need to integrate dag combiner and legalizer
+ // together.
+ // We generally can't do this one for long doubles.
+ SDValue Tmp1 = ST->getChain();
+ SDValue Tmp2 = ST->getBasePtr();
+ SDValue Tmp3;
+ int SVOffset = ST->getSrcValueOffset();
+ unsigned Alignment = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ DebugLoc dl = ST->getDebugLoc();
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
+ if (CFP->getValueType(0) == MVT::f32 &&
+ getTypeAction(MVT::i32) == Legal) {
+ Tmp3 = DAG.getConstant(CFP->getValueAPF().
+ bitcastToAPInt().zextOrTrunc(32),
+ MVT::i32);
+ return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
+ SVOffset, isVolatile, Alignment);
+ } else if (CFP->getValueType(0) == MVT::f64) {
+ // If this target supports 64-bit registers, do a single 64-bit store.
+ if (getTypeAction(MVT::i64) == Legal) {
+ Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+ zextOrTrunc(64), MVT::i64);
+ return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
+ SVOffset, isVolatile, Alignment);
+ } else if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) {
+ // Otherwise, if the target supports 32-bit registers, use 2 32-bit
+ // stores. If the target supports neither 32- nor 64-bits, this
+ // xform is certainly not worth it.
+ const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt();
+ SDValue Lo = DAG.getConstant(APInt(IntVal).trunc(32), MVT::i32);
+ SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32);
+ if (TLI.isBigEndian()) std::swap(Lo, Hi);
+
+ Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getSrcValue(),
+ SVOffset, isVolatile, Alignment);
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(4));
+ Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset+4,
+ isVolatile, MinAlign(Alignment, 4U));
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ }
+ }
+ }
+ return SDValue();
+}
+
+/// LegalizeOp - We know that the specified value has a legal type, and
+/// that its operands are legal. Now ensure that the operation itself
+/// is legal, recursively ensuring that the operands' operations remain
+/// legal.
+SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
+ if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
+ return Op;
+
+ SDNode *Node = Op.getNode();
+ DebugLoc dl = Node->getDebugLoc();
+
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ assert(getTypeAction(Node->getValueType(i)) == Legal &&
+ "Unexpected illegal type!");
+
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+ assert((isTypeLegal(Node->getOperand(i).getValueType()) ||
+ Node->getOperand(i).getOpcode() == ISD::TargetConstant) &&
+ "Unexpected illegal type!");
+
+ // Note that LegalizeOp may be reentered even from single-use nodes, which
+ // means that we always must cache transformed nodes.
+ DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
+ if (I != LegalizedNodes.end()) return I->second;
+
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4;
+ SDValue Result = Op;
+ bool isCustom = false;
+
+ // Figure out the correct action; the way to query this varies by opcode
+ TargetLowering::LegalizeAction Action;
+ bool SimpleFinishLegalizing = true;
+ switch (Node->getOpcode()) {
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ case ISD::VAARG:
+ case ISD::STACKSAVE:
+ Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
+ break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::EXTRACT_VECTOR_ELT:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(0).getValueType());
+ break;
+ case ISD::FP_ROUND_INREG:
+ case ISD::SIGN_EXTEND_INREG: {
+ EVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ Action = TLI.getOperationAction(Node->getOpcode(), InnerType);
+ break;
+ }
+ case ISD::SELECT_CC:
+ case ISD::SETCC:
+ case ISD::BR_CC: {
+ unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 :
+ Node->getOpcode() == ISD::SETCC ? 2 : 1;
+ unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0;
+ EVT OpVT = Node->getOperand(CompareOperand).getValueType();
+ ISD::CondCode CCCode =
+ cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get();
+ Action = TLI.getCondCodeAction(CCCode, OpVT);
+ if (Action == TargetLowering::Legal) {
+ if (Node->getOpcode() == ISD::SELECT_CC)
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getValueType(0));
+ else
+ Action = TLI.getOperationAction(Node->getOpcode(), OpVT);
+ }
+ break;
+ }
+ case ISD::LOAD:
+ case ISD::STORE:
+ // FIXME: Model these properly. LOAD and STORE are complicated, and
+ // STORE expects the unlegalized operand in some cases.
+ SimpleFinishLegalizing = false;
+ break;
+ case ISD::CALLSEQ_START:
+ case ISD::CALLSEQ_END:
+ // FIXME: This shouldn't be necessary. These nodes have special properties
+ // dealing with the recursive nature of legalization. Removing this
+ // special case should be done as part of making LegalizeDAG non-recursive.
+ SimpleFinishLegalizing = false;
+ break;
+ case ISD::EXTRACT_ELEMENT:
+ case ISD::FLT_ROUNDS_:
+ case ISD::SADDO:
+ case ISD::SSUBO:
+ case ISD::UADDO:
+ case ISD::USUBO:
+ case ISD::SMULO:
+ case ISD::UMULO:
+ case ISD::FPOWI:
+ case ISD::MERGE_VALUES:
+ case ISD::EH_RETURN:
+ case ISD::FRAME_TO_ARGS_OFFSET:
+ // These operations lie about being legal: when they claim to be legal,
+ // they should actually be expanded.
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ if (Action == TargetLowering::Legal)
+ Action = TargetLowering::Expand;
+ break;
+ case ISD::TRAMPOLINE:
+ case ISD::FRAMEADDR:
+ case ISD::RETURNADDR:
+ // These operations lie about being legal: when they claim to be legal,
+ // they should actually be custom-lowered.
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ if (Action == TargetLowering::Legal)
+ Action = TargetLowering::Custom;
+ break;
+ case ISD::BUILD_VECTOR:
+ // A weird case: legalization for BUILD_VECTOR never legalizes the
+ // operands!
+ // FIXME: This really sucks... changing it isn't semantically incorrect,
+ // but it massively pessimizes the code for floating-point BUILD_VECTORs
+ // because ConstantFP operands get legalized into constant pool loads
+ // before the BUILD_VECTOR code can see them. It doesn't usually bite,
+ // though, because BUILD_VECTORS usually get lowered into other nodes
+ // which get legalized properly.
+ SimpleFinishLegalizing = false;
+ break;
+ default:
+ if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
+ Action = TargetLowering::Legal;
+ } else {
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ }
+ break;
+ }
+
+ if (SimpleFinishLegalizing) {
+ SmallVector<SDValue, 8> Ops, ResultVals;
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+ Ops.push_back(LegalizeOp(Node->getOperand(i)));
+ switch (Node->getOpcode()) {
+ default: break;
+ case ISD::BR:
+ case ISD::BRIND:
+ case ISD::BR_JT:
+ case ISD::BR_CC:
+ case ISD::BRCOND:
+ // Branches tweak the chain to include LastCALLSEQ_END
+ Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0],
+ LastCALLSEQ_END);
+ Ops[0] = LegalizeOp(Ops[0]);
+ LastCALLSEQ_END = DAG.getEntryNode();
+ break;
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SRA:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ // Legalizing shifts/rotates requires adjusting the shift amount
+ // to the appropriate width.
+ if (!Ops[1].getValueType().isVector())
+ Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[1]));
+ break;
+ case ISD::SRL_PARTS:
+ case ISD::SRA_PARTS:
+ case ISD::SHL_PARTS:
+ // Legalizing shifts/rotates requires adjusting the shift amount
+ // to the appropriate width.
+ if (!Ops[2].getValueType().isVector())
+ Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[2]));
+ break;
+ }
+
+ Result = DAG.UpdateNodeOperands(Result.getValue(0), Ops.data(),
+ Ops.size());
+ switch (Action) {
+ case TargetLowering::Legal:
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ ResultVals.push_back(Result.getValue(i));
+ break;
+ case TargetLowering::Custom:
+ // FIXME: The handling for custom lowering with multiple results is
+ // a complete mess.
+ Tmp1 = TLI.LowerOperation(Result, DAG);
+ if (Tmp1.getNode()) {
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) {
+ if (e == 1)
+ ResultVals.push_back(Tmp1);
+ else
+ ResultVals.push_back(Tmp1.getValue(i));
+ }
+ break;
+ }
+
+ // FALL THROUGH
+ case TargetLowering::Expand:
+ ExpandNode(Result.getNode(), ResultVals);
+ break;
+ case TargetLowering::Promote:
+ PromoteNode(Result.getNode(), ResultVals);
+ break;
+ }
+ if (!ResultVals.empty()) {
+ for (unsigned i = 0, e = ResultVals.size(); i != e; ++i) {
+ if (ResultVals[i] != SDValue(Node, i))
+ ResultVals[i] = LegalizeOp(ResultVals[i]);
+ AddLegalizedOperand(SDValue(Node, i), ResultVals[i]);
+ }
+ return ResultVals[Op.getResNo()];
+ }
+ }
+
+ switch (Node->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "NODE: ";
+ Node->dump( &DAG);
+ dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to legalize this operator!");
+
+ case ISD::BUILD_VECTOR:
+ switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Custom:
+ Tmp3 = TLI.LowerOperation(Result, DAG);
+ if (Tmp3.getNode()) {
+ Result = Tmp3;
+ break;
+ }
+ // FALLTHROUGH
+ case TargetLowering::Expand:
+ Result = ExpandBUILD_VECTOR(Result.getNode());
+ break;
+ }
+ break;
+ case ISD::CALLSEQ_START: {
+ SDNode *CallEnd = FindCallEndFromCallStart(Node);
+
+ // Recursively Legalize all of the inputs of the call end that do not lead
+ // to this call start. This ensures that any libcalls that need be inserted
+ // are inserted *before* the CALLSEQ_START.
+ {SmallPtrSet<SDNode*, 32> NodesLeadingTo;
+ for (unsigned i = 0, e = CallEnd->getNumOperands(); i != e; ++i)
+ LegalizeAllNodesNotLeadingTo(CallEnd->getOperand(i).getNode(), Node,
+ NodesLeadingTo);
+ }
+
+ // Now that we legalized all of the inputs (which may have inserted
+ // libcalls) create the new CALLSEQ_START node.
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
+
+ // Merge in the last call, to ensure that this call start after the last
+ // call ended.
+ if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) {
+ Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Tmp1, LastCALLSEQ_END);
+ Tmp1 = LegalizeOp(Tmp1);
+ }
+
+ // Do not try to legalize the target-specific arguments (#1+).
+ if (Tmp1 != Node->getOperand(0)) {
+ SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
+ Ops[0] = Tmp1;
+ Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+ }
+
+ // Remember that the CALLSEQ_START is legalized.
+ AddLegalizedOperand(Op.getValue(0), Result);
+ if (Node->getNumValues() == 2) // If this has a flag result, remember it.
+ AddLegalizedOperand(Op.getValue(1), Result.getValue(1));
+
+ // Now that the callseq_start and all of the non-call nodes above this call
+ // sequence have been legalized, legalize the call itself. During this
+ // process, no libcalls can/will be inserted, guaranteeing that no calls
+ // can overlap.
+ assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!");
+ // Note that we are selecting this call!
+ LastCALLSEQ_END = SDValue(CallEnd, 0);
+ IsLegalizingCall = true;
+
+ // Legalize the call, starting from the CALLSEQ_END.
+ LegalizeOp(LastCALLSEQ_END);
+ assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!");
+ return Result;
+ }
+ case ISD::CALLSEQ_END:
+ // If the CALLSEQ_START node hasn't been legalized first, legalize it. This
+ // will cause this node to be legalized as well as handling libcalls right.
+ if (LastCALLSEQ_END.getNode() != Node) {
+ LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0));
+ DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
+ assert(I != LegalizedNodes.end() &&
+ "Legalizing the call start should have legalized this node!");
+ return I->second;
+ }
+
+ // Otherwise, the call start has been legalized and everything is going
+ // according to plan. Just legalize ourselves normally here.
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
+ // Do not try to legalize the target-specific arguments (#1+), except for
+ // an optional flag input.
+ if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Flag){
+ if (Tmp1 != Node->getOperand(0)) {
+ SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
+ Ops[0] = Tmp1;
+ Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+ }
+ } else {
+ Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1));
+ if (Tmp1 != Node->getOperand(0) ||
+ Tmp2 != Node->getOperand(Node->getNumOperands()-1)) {
+ SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
+ Ops[0] = Tmp1;
+ Ops.back() = Tmp2;
+ Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+ }
+ }
+ assert(IsLegalizingCall && "Call sequence imbalance between start/end?");
+ // This finishes up call legalization.
+ IsLegalizingCall = false;
+
+ // If the CALLSEQ_END node has a flag, remember that we legalized it.
+ AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0));
+ if (Node->getNumValues() == 2)
+ AddLegalizedOperand(SDValue(Node, 1), Result.getValue(1));
+ return Result.getValue(Op.getResNo());
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Node);
+ Tmp1 = LegalizeOp(LD->getChain()); // Legalize the chain.
+ Tmp2 = LegalizeOp(LD->getBasePtr()); // Legalize the base pointer.
+
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD) {
+ EVT VT = Node->getValueType(0);
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset());
+ Tmp3 = Result.getValue(0);
+ Tmp4 = Result.getValue(1);
+
+ switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal:
+ // If this is an unaligned load and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+ const Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
+ if (LD->getAlignment() < ABIAlignment){
+ Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
+ DAG, TLI);
+ Tmp3 = Result.getOperand(0);
+ Tmp4 = Result.getOperand(1);
+ Tmp3 = LegalizeOp(Tmp3);
+ Tmp4 = LegalizeOp(Tmp4);
+ }
+ }
+ break;
+ case TargetLowering::Custom:
+ Tmp1 = TLI.LowerOperation(Tmp3, DAG);
+ if (Tmp1.getNode()) {
+ Tmp3 = LegalizeOp(Tmp1);
+ Tmp4 = LegalizeOp(Tmp1.getValue(1));
+ }
+ break;
+ case TargetLowering::Promote: {
+ // Only promote a load of vector type to another.
+ assert(VT.isVector() && "Cannot promote this load!");
+ // Change base type to a different vector type.
+ EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+
+ Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
+ LD->getSrcValueOffset(),
+ LD->isVolatile(), LD->getAlignment());
+ Tmp3 = LegalizeOp(DAG.getNode(ISD::BIT_CONVERT, dl, VT, Tmp1));
+ Tmp4 = LegalizeOp(Tmp1.getValue(1));
+ break;
+ }
+ }
+ // Since loads produce two values, make sure to remember that we
+ // legalized both of them.
+ AddLegalizedOperand(SDValue(Node, 0), Tmp3);
+ AddLegalizedOperand(SDValue(Node, 1), Tmp4);
+ return Op.getResNo() ? Tmp4 : Tmp3;
+ } else {
+ EVT SrcVT = LD->getMemoryVT();
+ unsigned SrcWidth = SrcVT.getSizeInBits();
+ int SVOffset = LD->getSrcValueOffset();
+ unsigned Alignment = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+
+ if (SrcWidth != SrcVT.getStoreSizeInBits() &&
+ // Some targets pretend to have an i1 loading operation, and actually
+ // load an i8. This trick is correct for ZEXTLOAD because the top 7
+ // bits are guaranteed to be zero; it helps the optimizers understand
+ // that these bits are zero. It is also useful for EXTLOAD, since it
+ // tells the optimizers that those bits are undefined. It would be
+ // nice to have an effective generic way of getting these benefits...
+ // Until such a way is found, don't insist on promoting i1 here.
+ (SrcVT != MVT::i1 ||
+ TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) {
+ // Promote to a byte-sized load if not loading an integral number of
+ // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
+ unsigned NewWidth = SrcVT.getStoreSizeInBits();
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth);
+ SDValue Ch;
+
+ // The extra bits are guaranteed to be zero, since we stored them that
+ // way. A zext load from NVT thus automatically gives zext from SrcVT.
+
+ ISD::LoadExtType NewExtType =
+ ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+
+ Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
+ Tmp1, Tmp2, LD->getSrcValue(), SVOffset,
+ NVT, isVolatile, Alignment);
+
+ Ch = Result.getValue(1); // The chain.
+
+ if (ExtType == ISD::SEXTLOAD)
+ // Having the top bits zero doesn't help when sign extending.
+ Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType())
+ // All the top bits are guaranteed to be zero - inform the optimizers.
+ Result = DAG.getNode(ISD::AssertZext, dl,
+ Result.getValueType(), Result,
+ DAG.getValueType(SrcVT));
+
+ Tmp1 = LegalizeOp(Result);
+ Tmp2 = LegalizeOp(Ch);
+ } else if (SrcWidth & (SrcWidth - 1)) {
+ // If not loading a power-of-2 number of bits, expand as two loads.
+ assert(!SrcVT.isVector() && "Unsupported extload!");
+ unsigned RoundWidth = 1 << Log2_32(SrcWidth);
+ assert(RoundWidth < SrcWidth);
+ unsigned ExtraWidth = SrcWidth - RoundWidth;
+ assert(ExtraWidth < RoundWidth);
+ assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+ "Load size not an integral number of bytes!");
+ EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+ EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+ SDValue Lo, Hi, Ch;
+ unsigned IncrementSize;
+
+ if (TLI.isLittleEndian()) {
+ // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
+ // Load the bottom RoundWidth bits.
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl,
+ Node->getValueType(0), Tmp1, Tmp2,
+ LD->getSrcValue(), SVOffset, RoundVT, isVolatile,
+ Alignment);
+
+ // Load the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
+ LD->getSrcValue(), SVOffset + IncrementSize,
+ ExtraVT, isVolatile,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Move the top bits to the right place.
+ Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
+
+ // Join the hi and lo parts.
+ Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ } else {
+ // Big endian - avoid unaligned loads.
+ // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
+ // Load the top RoundWidth bits.
+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
+ LD->getSrcValue(), SVOffset, RoundVT, isVolatile,
+ Alignment);
+
+ // Load the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(IncrementSize));
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl,
+ Node->getValueType(0), Tmp1, Tmp2,
+ LD->getSrcValue(), SVOffset + IncrementSize,
+ ExtraVT, isVolatile,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Move the top bits to the right place.
+ Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
+
+ // Join the hi and lo parts.
+ Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ }
+
+ Tmp1 = LegalizeOp(Result);
+ Tmp2 = LegalizeOp(Ch);
+ } else {
+ switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Custom:
+ isCustom = true;
+ // FALLTHROUGH
+ case TargetLowering::Legal:
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset());
+ Tmp1 = Result.getValue(0);
+ Tmp2 = Result.getValue(1);
+
+ if (isCustom) {
+ Tmp3 = TLI.LowerOperation(Result, DAG);
+ if (Tmp3.getNode()) {
+ Tmp1 = LegalizeOp(Tmp3);
+ Tmp2 = LegalizeOp(Tmp3.getValue(1));
+ }
+ } else {
+ // If this is an unaligned load and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+ const Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
+ if (LD->getAlignment() < ABIAlignment){
+ Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
+ DAG, TLI);
+ Tmp1 = Result.getOperand(0);
+ Tmp2 = Result.getOperand(1);
+ Tmp1 = LegalizeOp(Tmp1);
+ Tmp2 = LegalizeOp(Tmp2);
+ }
+ }
+ }
+ break;
+ case TargetLowering::Expand:
+ // f64 = EXTLOAD f32 should expand to LOAD, FP_EXTEND
+ // f128 = EXTLOAD {f32,f64} too
+ if ((SrcVT == MVT::f32 && (Node->getValueType(0) == MVT::f64 ||
+ Node->getValueType(0) == MVT::f128)) ||
+ (SrcVT == MVT::f64 && Node->getValueType(0) == MVT::f128)) {
+ SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
+ LD->getSrcValueOffset(),
+ LD->isVolatile(), LD->getAlignment());
+ Result = DAG.getNode(ISD::FP_EXTEND, dl,
+ Node->getValueType(0), Load);
+ Tmp1 = LegalizeOp(Result); // Relegalize new nodes.
+ Tmp2 = LegalizeOp(Load.getValue(1));
+ break;
+ }
+ assert(ExtType != ISD::EXTLOAD &&"EXTLOAD should always be supported!");
+ // Turn the unsupported load into an EXTLOAD followed by an explicit
+ // zero/sign extend inreg.
+ Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
+ Tmp1, Tmp2, LD->getSrcValue(),
+ LD->getSrcValueOffset(), SrcVT,
+ LD->isVolatile(), LD->getAlignment());
+ SDValue ValRes;
+ if (ExtType == ISD::SEXTLOAD)
+ ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else
+ ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT);
+ Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes.
+ Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes.
+ break;
+ }
+ }
+
+ // Since loads produce two values, make sure to remember that we legalized
+ // both of them.
+ AddLegalizedOperand(SDValue(Node, 0), Tmp1);
+ AddLegalizedOperand(SDValue(Node, 1), Tmp2);
+ return Op.getResNo() ? Tmp2 : Tmp1;
+ }
+ }
+ case ISD::STORE: {
+ StoreSDNode *ST = cast<StoreSDNode>(Node);
+ Tmp1 = LegalizeOp(ST->getChain()); // Legalize the chain.
+ Tmp2 = LegalizeOp(ST->getBasePtr()); // Legalize the pointer.
+ int SVOffset = ST->getSrcValueOffset();
+ unsigned Alignment = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+
+ if (!ST->isTruncatingStore()) {
+ if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
+ Result = SDValue(OptStore, 0);
+ break;
+ }
+
+ {
+ Tmp3 = LegalizeOp(ST->getValue());
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2,
+ ST->getOffset());
+
+ EVT VT = Tmp3.getValueType();
+ switch (TLI.getOperationAction(ISD::STORE, VT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal:
+ // If this is an unaligned store and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+ const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
+ if (ST->getAlignment() < ABIAlignment)
+ Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
+ DAG, TLI);
+ }
+ break;
+ case TargetLowering::Custom:
+ Tmp1 = TLI.LowerOperation(Result, DAG);
+ if (Tmp1.getNode()) Result = Tmp1;
+ break;
+ case TargetLowering::Promote:
+ assert(VT.isVector() && "Unknown legal promote case!");
+ Tmp3 = DAG.getNode(ISD::BIT_CONVERT, dl,
+ TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3);
+ Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
+ ST->getSrcValue(), SVOffset, isVolatile,
+ Alignment);
+ break;
+ }
+ break;
+ }
+ } else {
+ Tmp3 = LegalizeOp(ST->getValue());
+
+ EVT StVT = ST->getMemoryVT();
+ unsigned StWidth = StVT.getSizeInBits();
+
+ if (StWidth != StVT.getStoreSizeInBits()) {
+ // Promote to a byte-sized store with upper bits zero if not
+ // storing an integral number of bytes. For example, promote
+ // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StVT.getStoreSizeInBits());
+ Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT);
+ Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
+ SVOffset, NVT, isVolatile, Alignment);
+ } else if (StWidth & (StWidth - 1)) {
+ // If not storing a power-of-2 number of bits, expand as two stores.
+ assert(!StVT.isVector() && "Unsupported truncstore!");
+ unsigned RoundWidth = 1 << Log2_32(StWidth);
+ assert(RoundWidth < StWidth);
+ unsigned ExtraWidth = StWidth - RoundWidth;
+ assert(ExtraWidth < RoundWidth);
+ assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+ "Store size not an integral number of bytes!");
+ EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+ EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+ SDValue Lo, Hi;
+ unsigned IncrementSize;
+
+ if (TLI.isLittleEndian()) {
+ // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
+ // Store the bottom RoundWidth bits.
+ Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
+ SVOffset, RoundVT,
+ isVolatile, Alignment);
+
+ // Store the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
+ DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
+ Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(),
+ SVOffset + IncrementSize, ExtraVT, isVolatile,
+ MinAlign(Alignment, IncrementSize));
+ } else {
+ // Big endian - avoid unaligned stores.
+ // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
+ // Store the top RoundWidth bits.
+ Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
+ DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
+ Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(),
+ SVOffset, RoundVT, isVolatile, Alignment);
+
+ // Store the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(IncrementSize));
+ Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
+ SVOffset + IncrementSize, ExtraVT, isVolatile,
+ MinAlign(Alignment, IncrementSize));
+ }
+
+ // The order of the stores doesn't matter.
+ Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ } else {
+ if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() ||
+ Tmp2 != ST->getBasePtr())
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2,
+ ST->getOffset());
+
+ switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal:
+ // If this is an unaligned store and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+ const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
+ if (ST->getAlignment() < ABIAlignment)
+ Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
+ DAG, TLI);
+ }
+ break;
+ case TargetLowering::Custom:
+ Result = TLI.LowerOperation(Result, DAG);
+ break;
+ case Expand:
+ // TRUNCSTORE:i16 i32 -> STORE i16
+ assert(isTypeLegal(StVT) && "Do not know how to expand this store!");
+ Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);
+ Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
+ SVOffset, isVolatile, Alignment);
+ break;
+ }
+ }
+ }
+ break;
+ }
+ }
+ assert(Result.getValueType() == Op.getValueType() &&
+ "Bad legalization!");
+
+ // Make sure that the generated code is itself legal.
+ if (Result != Op)
+ Result = LegalizeOp(Result);
+
+ // Note that LegalizeOp may be reentered even from single-use nodes, which
+ // means that we always must cache transformed nodes.
+ AddLegalizedOperand(Op, Result);
+ return Result;
+}
+
+SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
+ SDValue Vec = Op.getOperand(0);
+ SDValue Idx = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ // Store the value to a temporary stack slot, then LOAD the returned part.
+ SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0);
+
+ // Add the offset to the index.
+ unsigned EltSize =
+ Vec.getValueType().getVectorElementType().getSizeInBits()/8;
+ Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
+ DAG.getConstant(EltSize, Idx.getValueType()));
+
+ if (Idx.getValueType().bitsGT(TLI.getPointerTy()))
+ Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx);
+ else
+ Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+
+ StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
+
+ if (Op.getValueType().isVector())
+ return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0);
+ else
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
+ NULL, 0, Vec.getValueType().getVectorElementType());
+}
+
+SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
+ // We can't handle this case efficiently. Allocate a sufficiently
+ // aligned object on the stack, store each element into it, then load
+ // the result as a vector.
+ // Create the stack frame object.
+ EVT VT = Node->getValueType(0);
+ EVT OpVT = Node->getOperand(0).getValueType();
+ EVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue FIPtr = DAG.CreateStackTemporary(VT);
+ int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
+ const Value *SV = PseudoSourceValue::getFixedStack(FI);
+
+ // Emit a store of each element to the stack slot.
+ SmallVector<SDValue, 8> Stores;
+ unsigned TypeByteSize = EltVT.getSizeInBits() / 8;
+ // Store (in the right endianness) the elements to memory.
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+ // Ignore undef elements.
+ if (Node->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+
+ unsigned Offset = TypeByteSize*i;
+
+ SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType());
+ Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx);
+
+ // If EltVT smaller than OpVT, only store the bits necessary.
+ if (!OpVT.isVector() && EltVT.bitsLT(OpVT)) {
+ Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
+ Node->getOperand(i), Idx, SV, Offset, EltVT));
+ } else
+ Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl,
+ Node->getOperand(i), Idx, SV, Offset));
+ }
+
+ SDValue StoreChain;
+ if (!Stores.empty()) // Not all undef elements?
+ StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &Stores[0], Stores.size());
+ else
+ StoreChain = DAG.getEntryNode();
+
+ // Result is a load from the stack slot.
+ return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0);
+}
+
+SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue Tmp1 = Node->getOperand(0);
+ SDValue Tmp2 = Node->getOperand(1);
+ assert((Tmp2.getValueType() == MVT::f32 ||
+ Tmp2.getValueType() == MVT::f64) &&
+ "Ugly special-cased code!");
+ // Get the sign bit of the RHS.
+ SDValue SignBit;
+ EVT IVT = Tmp2.getValueType() == MVT::f64 ? MVT::i64 : MVT::i32;
+ if (isTypeLegal(IVT)) {
+ SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, IVT, Tmp2);
+ } else {
+ assert(isTypeLegal(TLI.getPointerTy()) &&
+ (TLI.getPointerTy() == MVT::i32 ||
+ TLI.getPointerTy() == MVT::i64) &&
+ "Legal type for load?!");
+ SDValue StackPtr = DAG.CreateStackTemporary(Tmp2.getValueType());
+ SDValue StorePtr = StackPtr, LoadPtr = StackPtr;
+ SDValue Ch =
+ DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StorePtr, NULL, 0);
+ if (Tmp2.getValueType() == MVT::f64 && TLI.isLittleEndian())
+ LoadPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(),
+ LoadPtr, DAG.getIntPtrConstant(4));
+ SignBit = DAG.getExtLoad(ISD::SEXTLOAD, dl, TLI.getPointerTy(),
+ Ch, LoadPtr, NULL, 0, MVT::i32);
+ }
+ SignBit =
+ DAG.getSetCC(dl, TLI.getSetCCResultType(SignBit.getValueType()),
+ SignBit, DAG.getConstant(0, SignBit.getValueType()),
+ ISD::SETLT);
+ // Get the absolute value of the result.
+ SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1);
+ // Select between the nabs and abs value based on the sign bit of
+ // the input.
+ return DAG.getNode(ISD::SELECT, dl, AbsVal.getValueType(), SignBit,
+ DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal),
+ AbsVal);
+}
+
+void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
+ SmallVectorImpl<SDValue> &Results) {
+ unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
+ assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
+ " not tell us which reg is the stack pointer!");
+ DebugLoc dl = Node->getDebugLoc();
+ EVT VT = Node->getValueType(0);
+ SDValue Tmp1 = SDValue(Node, 0);
+ SDValue Tmp2 = SDValue(Node, 1);
+ SDValue Tmp3 = Node->getOperand(2);
+ SDValue Chain = Tmp1.getOperand(0);
+
+ // Chain the dynamic stack allocation so that it doesn't modify the stack
+ // pointer when other instructions are using the stack.
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true));
+
+ SDValue Size = Tmp2.getOperand(1);
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
+ Chain = SP.getValue(1);
+ unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
+ unsigned StackAlign =
+ TLI.getTargetMachine().getFrameInfo()->getStackAlignment();
+ if (Align > StackAlign)
+ SP = DAG.getNode(ISD::AND, dl, VT, SP,
+ DAG.getConstant(-(uint64_t)Align, VT));
+ Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
+ Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain
+
+ Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true),
+ DAG.getIntPtrConstant(0, true), SDValue());
+
+ Results.push_back(Tmp1);
+ Results.push_back(Tmp2);
+}
+
+/// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and
+/// condition code CC on the current target. This routine expands SETCC with
+/// illegal condition code into AND / OR of multiple SETCC values.
+void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
+ SDValue &LHS, SDValue &RHS,
+ SDValue &CC,
+ DebugLoc dl) {
+ EVT OpVT = LHS.getValueType();
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
+ switch (TLI.getCondCodeAction(CCCode, OpVT)) {
+ default: llvm_unreachable("Unknown condition code action!");
+ case TargetLowering::Legal:
+ // Nothing to do.
+ break;
+ case TargetLowering::Expand: {
+ ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
+ unsigned Opc = 0;
+ switch (CCCode) {
+ default: llvm_unreachable("Don't know how to expand this condition!");
+ case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO; Opc = ISD::AND; break;
+ case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO; Opc = ISD::AND; break;
+ case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO; Opc = ISD::AND; break;
+ case ISD::SETOLT: CC1 = ISD::SETLT; CC2 = ISD::SETO; Opc = ISD::AND; break;
+ case ISD::SETOLE: CC1 = ISD::SETLE; CC2 = ISD::SETO; Opc = ISD::AND; break;
+ case ISD::SETONE: CC1 = ISD::SETNE; CC2 = ISD::SETO; Opc = ISD::AND; break;
+ case ISD::SETUEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETUO; Opc = ISD::OR; break;
+ case ISD::SETUGT: CC1 = ISD::SETGT; CC2 = ISD::SETUO; Opc = ISD::OR; break;
+ case ISD::SETUGE: CC1 = ISD::SETGE; CC2 = ISD::SETUO; Opc = ISD::OR; break;
+ case ISD::SETULT: CC1 = ISD::SETLT; CC2 = ISD::SETUO; Opc = ISD::OR; break;
+ case ISD::SETULE: CC1 = ISD::SETLE; CC2 = ISD::SETUO; Opc = ISD::OR; break;
+ case ISD::SETUNE: CC1 = ISD::SETNE; CC2 = ISD::SETUO; Opc = ISD::OR; break;
+ // FIXME: Implement more expansions.
+ }
+
+ SDValue SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1);
+ SDValue SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2);
+ LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
+ RHS = SDValue();
+ CC = SDValue();
+ break;
+ }
+ }
+}
+
+/// EmitStackConvert - Emit a store/load combination to the stack. This stores
+/// SrcOp to a stack slot of type SlotVT, truncating it if needed. It then does
+/// a load from the stack slot to DestVT, extending it if needed.
+/// The resultant code need not be legal.
+SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
+ EVT SlotVT,
+ EVT DestVT,
+ DebugLoc dl) {
+ // Create the stack frame object.
+ unsigned SrcAlign =
+ TLI.getTargetData()->getPrefTypeAlignment(SrcOp.getValueType().
+ getTypeForEVT(*DAG.getContext()));
+ SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign);
+
+ FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr);
+ int SPFI = StackPtrFI->getIndex();
+ const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
+
+ unsigned SrcSize = SrcOp.getValueType().getSizeInBits();
+ unsigned SlotSize = SlotVT.getSizeInBits();
+ unsigned DestSize = DestVT.getSizeInBits();
+ unsigned DestAlign =
+ TLI.getTargetData()->getPrefTypeAlignment(DestVT.getTypeForEVT(*DAG.getContext()));
+
+ // Emit a store to the stack slot. Use a truncstore if the input value is
+ // later than DestVT.
+ SDValue Store;
+
+ if (SrcSize > SlotSize)
+ Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
+ SV, 0, SlotVT, false, SrcAlign);
+ else {
+ assert(SrcSize == SlotSize && "Invalid store");
+ Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
+ SV, 0, false, SrcAlign);
+ }
+
+ // Result is a load from the stack slot.
+ if (SlotSize == DestSize)
+ return DAG.getLoad(DestVT, dl, Store, FIPtr, SV, 0, false, DestAlign);
+
+ assert(SlotSize < DestSize && "Unknown extension!");
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, SV, 0, SlotVT,
+ false, DestAlign);
+}
+
+SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
+ DebugLoc dl = Node->getDebugLoc();
+ // Create a vector sized/aligned stack slot, store the value to element #0,
+ // then load the whole vector back out.
+ SDValue StackPtr = DAG.CreateStackTemporary(Node->getValueType(0));
+
+ FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(StackPtr);
+ int SPFI = StackPtrFI->getIndex();
+
+ SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0),
+ StackPtr,
+ PseudoSourceValue::getFixedStack(SPFI), 0,
+ Node->getValueType(0).getVectorElementType());
+ return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr,
+ PseudoSourceValue::getFixedStack(SPFI), 0);
+}
+
+
+/// ExpandBUILD_VECTOR - Expand a BUILD_VECTOR node on targets that don't
+/// support the operation, but do support the resultant vector type.
+SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
+ unsigned NumElems = Node->getNumOperands();
+ SDValue Value1, Value2;
+ DebugLoc dl = Node->getDebugLoc();
+ EVT VT = Node->getValueType(0);
+ EVT OpVT = Node->getOperand(0).getValueType();
+ EVT EltVT = VT.getVectorElementType();
+
+ // If the only non-undef value is the low element, turn this into a
+ // SCALAR_TO_VECTOR node. If this is { X, X, X, X }, determine X.
+ bool isOnlyLowElement = true;
+ bool MoreThanTwoValues = false;
+ bool isConstant = true;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue V = Node->getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ if (i > 0)
+ isOnlyLowElement = false;
+ if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
+ isConstant = false;
+
+ if (!Value1.getNode()) {
+ Value1 = V;
+ } else if (!Value2.getNode()) {
+ if (V != Value1)
+ Value2 = V;
+ } else if (V != Value1 && V != Value2) {
+ MoreThanTwoValues = true;
+ }
+ }
+
+ if (!Value1.getNode())
+ return DAG.getUNDEF(VT);
+
+ if (isOnlyLowElement)
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Node->getOperand(0));
+
+ // If all elements are constants, create a load from the constant pool.
+ if (isConstant) {
+ std::vector<Constant*> CV;
+ for (unsigned i = 0, e = NumElems; i != e; ++i) {
+ if (ConstantFPSDNode *V =
+ dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) {
+ CV.push_back(const_cast<ConstantFP *>(V->getConstantFPValue()));
+ } else if (ConstantSDNode *V =
+ dyn_cast<ConstantSDNode>(Node->getOperand(i))) {
+ if (OpVT==EltVT)
+ CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
+ else {
+ // If OpVT and EltVT don't match, EltVT is not legal and the
+ // element values have been promoted/truncated earlier. Undo this;
+ // we don't want a v16i8 to become a v16i32 for example.
+ const ConstantInt *CI = V->getConstantIntValue();
+ CV.push_back(ConstantInt::get(EltVT.getTypeForEVT(*DAG.getContext()),
+ CI->getZExtValue()));
+ }
+ } else {
+ assert(Node->getOperand(i).getOpcode() == ISD::UNDEF);
+ const Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext());
+ CV.push_back(UndefValue::get(OpNTy));
+ }
+ }
+ Constant *CP = ConstantVector::get(CV);
+ SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0,
+ false, Alignment);
+ }
+
+ if (!MoreThanTwoValues) {
+ SmallVector<int, 8> ShuffleVec(NumElems, -1);
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue V = Node->getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ ShuffleVec[i] = V == Value1 ? 0 : NumElems;
+ }
+ if (TLI.isShuffleMaskLegal(ShuffleVec, Node->getValueType(0))) {
+ // Get the splatted value into the low element of a vector register.
+ SDValue Vec1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value1);
+ SDValue Vec2;
+ if (Value2.getNode())
+ Vec2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value2);
+ else
+ Vec2 = DAG.getUNDEF(VT);
+
+ // Return shuffle(LowValVec, undef, <0,0,0,0>)
+ return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data());
+ }
+ }
+
+ // Otherwise, we can't handle this case efficiently.
+ return ExpandVectorBuildThroughStack(Node);
+}
+
+// ExpandLibCall - Expand a node into a call to a libcall. If the result value
+// does not fit into a register, return the lo part and set the hi part to the
+// by-reg argument. If it does fit into a single register, return the result
+// and leave the Hi part unset.
+SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
+ bool isSigned) {
+ assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
+ // The input chain to this libcall is the entry node of the function.
+ // Legalizing the call will automatically add the previous call to the
+ // dependence.
+ SDValue InChain = DAG.getEntryNode();
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Node->getOperand(i).getValueType();
+ const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ // Splice the libcall in wherever FindInputOutputChains tells us to.
+ const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+ std::pair<SDValue, SDValue> CallInfo =
+ TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+ 0, TLI.getLibcallCallingConv(LC), false,
+ /*isReturnValueUsed=*/true,
+ Callee, Args, DAG,
+ Node->getDebugLoc(), DAG.GetOrdering(Node));
+
+ // Legalize the call sequence, starting with the chain. This will advance
+ // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
+ // was added by LowerCallTo (guaranteeing proper serialization of calls).
+ LegalizeOp(CallInfo.second);
+ return CallInfo.first;
+}
+
+SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
+ RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_PPCF128) {
+ RTLIB::Libcall LC;
+ switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::f32: LC = Call_F32; break;
+ case MVT::f64: LC = Call_F64; break;
+ case MVT::f80: LC = Call_F80; break;
+ case MVT::ppcf128: LC = Call_PPCF128; break;
+ }
+ return ExpandLibCall(LC, Node, false);
+}
+
+SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
+ RTLIB::Libcall Call_I8,
+ RTLIB::Libcall Call_I16,
+ RTLIB::Libcall Call_I32,
+ RTLIB::Libcall Call_I64,
+ RTLIB::Libcall Call_I128) {
+ RTLIB::Libcall LC;
+ switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::i8: LC = Call_I8; break;
+ case MVT::i16: LC = Call_I16; break;
+ case MVT::i32: LC = Call_I32; break;
+ case MVT::i64: LC = Call_I64; break;
+ case MVT::i128: LC = Call_I128; break;
+ }
+ return ExpandLibCall(LC, Node, isSigned);
+}
+
+/// ExpandLegalINT_TO_FP - This function is responsible for legalizing a
+/// INT_TO_FP operation of the specified operand when the target requests that
+/// we expand it. At this point, we know that the result and operand types are
+/// legal for the target.
+SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
+ SDValue Op0,
+ EVT DestVT,
+ DebugLoc dl) {
+ if (Op0.getValueType() == MVT::i32) {
+ // simple 32-bit [signed|unsigned] integer to float/double expansion
+
+ // Get the stack frame index of a 8 byte buffer.
+ SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64);
+
+ // word offset constant for Hi/Lo address computation
+ SDValue WordOff = DAG.getConstant(sizeof(int), TLI.getPointerTy());
+ // set up Hi and Lo (into buffer) address based on endian
+ SDValue Hi = StackSlot;
+ SDValue Lo = DAG.getNode(ISD::ADD, dl,
+ TLI.getPointerTy(), StackSlot, WordOff);
+ if (TLI.isLittleEndian())
+ std::swap(Hi, Lo);
+
+ // if signed map to unsigned space
+ SDValue Op0Mapped;
+ if (isSigned) {
+ // constant used to invert sign bit (signed to unsigned mapping)
+ SDValue SignBit = DAG.getConstant(0x80000000u, MVT::i32);
+ Op0Mapped = DAG.getNode(ISD::XOR, dl, MVT::i32, Op0, SignBit);
+ } else {
+ Op0Mapped = Op0;
+ }
+ // store the lo of the constructed double - based on integer input
+ SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl,
+ Op0Mapped, Lo, NULL, 0);
+ // initial hi portion of constructed double
+ SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32);
+ // store the hi of the constructed double - biased exponent
+ SDValue Store2=DAG.getStore(Store1, dl, InitialHi, Hi, NULL, 0);
+ // load the constructed double
+ SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, NULL, 0);
+ // FP constant to bias correct the final result
+ SDValue Bias = DAG.getConstantFP(isSigned ?
+ BitsToDouble(0x4330000080000000ULL) :
+ BitsToDouble(0x4330000000000000ULL),
+ MVT::f64);
+ // subtract the bias
+ SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias);
+ // final result
+ SDValue Result;
+ // handle final rounding
+ if (DestVT == MVT::f64) {
+ // do nothing
+ Result = Sub;
+ } else if (DestVT.bitsLT(MVT::f64)) {
+ Result = DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
+ DAG.getIntPtrConstant(0));
+ } else if (DestVT.bitsGT(MVT::f64)) {
+ Result = DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);
+ }
+ return Result;
+ }
+ assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
+ SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
+
+ SDValue SignSet = DAG.getSetCC(dl, TLI.getSetCCResultType(Op0.getValueType()),
+ Op0, DAG.getConstant(0, Op0.getValueType()),
+ ISD::SETLT);
+ SDValue Zero = DAG.getIntPtrConstant(0), Four = DAG.getIntPtrConstant(4);
+ SDValue CstOffset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(),
+ SignSet, Four, Zero);
+
+ // If the sign bit of the integer is set, the large number will be treated
+ // as a negative number. To counteract this, the dynamic code adds an
+ // offset depending on the data type.
+ uint64_t FF;
+ switch (Op0.getValueType().getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported integer type!");
+ case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float)
+ case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float)
+ case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float)
+ case MVT::i64: FF = 0x5F800000ULL; break; // 2^64 (as a float)
+ }
+ if (TLI.isLittleEndian()) FF <<= 32;
+ Constant *FudgeFactor = ConstantInt::get(
+ Type::getInt64Ty(*DAG.getContext()), FF);
+
+ SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ CPIdx = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), CPIdx, CstOffset);
+ Alignment = std::min(Alignment, 4u);
+ SDValue FudgeInReg;
+ if (DestVT == MVT::f32)
+ FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0,
+ false, Alignment);
+ else {
+ FudgeInReg =
+ LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
+ DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0,
+ MVT::f32, false, Alignment));
+ }
+
+ return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg);
+}
+
+/// PromoteLegalINT_TO_FP - This function is responsible for legalizing a
+/// *INT_TO_FP operation of the specified operand when the target requests that
+/// we promote it. At this point, we know that the result and operand types are
+/// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP
+/// operation that takes a larger input.
+SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp,
+ EVT DestVT,
+ bool isSigned,
+ DebugLoc dl) {
+ // First step, figure out the appropriate *INT_TO_FP operation to use.
+ EVT NewInTy = LegalOp.getValueType();
+
+ unsigned OpToUse = 0;
+
+ // Scan for the appropriate larger type to use.
+ while (1) {
+ NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT().SimpleTy+1);
+ assert(NewInTy.isInteger() && "Ran out of possibilities!");
+
+ // If the target supports SINT_TO_FP of this type, use it.
+ if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) {
+ OpToUse = ISD::SINT_TO_FP;
+ break;
+ }
+ if (isSigned) continue;
+
+ // If the target supports UINT_TO_FP of this type, use it.
+ if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) {
+ OpToUse = ISD::UINT_TO_FP;
+ break;
+ }
+
+ // Otherwise, try a larger type.
+ }
+
+ // Okay, we found the operation and type to use. Zero extend our input to the
+ // desired type then run the operation on it.
+ return DAG.getNode(OpToUse, dl, DestVT,
+ DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+ dl, NewInTy, LegalOp));
+}
+
+/// PromoteLegalFP_TO_INT - This function is responsible for legalizing a
+/// FP_TO_*INT operation of the specified operand when the target requests that
+/// we promote it. At this point, we know that the result and operand types are
+/// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT
+/// operation that returns a larger result.
+SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp,
+ EVT DestVT,
+ bool isSigned,
+ DebugLoc dl) {
+ // First step, figure out the appropriate FP_TO*INT operation to use.
+ EVT NewOutTy = DestVT;
+
+ unsigned OpToUse = 0;
+
+ // Scan for the appropriate larger type to use.
+ while (1) {
+ NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1);
+ assert(NewOutTy.isInteger() && "Ran out of possibilities!");
+
+ if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) {
+ OpToUse = ISD::FP_TO_SINT;
+ break;
+ }
+
+ if (TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) {
+ OpToUse = ISD::FP_TO_UINT;
+ break;
+ }
+
+ // Otherwise, try a larger type.
+ }
+
+
+ // Okay, we found the operation and type to use.
+ SDValue Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp);
+
+ // Truncate the result of the extended FP_TO_*INT operation to the desired
+ // size.
+ return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation);
+}
+
+/// ExpandBSWAP - Open code the operations for BSWAP of the specified operation.
+///
+SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
+ EVT VT = Op.getValueType();
+ EVT SHVT = TLI.getShiftAmountTy();
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unhandled Expand type in BSWAP!");
+ case MVT::i16:
+ Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ return DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+ case MVT::i32:
+ Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT));
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(0xFF0000, VT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, VT));
+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
+ Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
+ return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
+ case MVT::i64:
+ Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, SHVT));
+ Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, SHVT));
+ Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT));
+ Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, SHVT));
+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, SHVT));
+ Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7, DAG.getConstant(255ULL<<48, VT));
+ Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6, DAG.getConstant(255ULL<<40, VT));
+ Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5, DAG.getConstant(255ULL<<32, VT));
+ Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4, DAG.getConstant(255ULL<<24, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(255ULL<<16, VT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(255ULL<<8 , VT));
+ Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
+ Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
+ Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
+ Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
+ return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
+ }
+}
+
+/// ExpandBitCount - Expand the specified bitcount instruction into operations.
+///
+SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
+ DebugLoc dl) {
+ switch (Opc) {
+ default: llvm_unreachable("Cannot expand this yet!");
+ case ISD::CTPOP: {
+ static const uint64_t mask[6] = {
+ 0x5555555555555555ULL, 0x3333333333333333ULL,
+ 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
+ 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
+ };
+ EVT VT = Op.getValueType();
+ EVT ShVT = TLI.getShiftAmountTy();
+ unsigned len = VT.getSizeInBits();
+ for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
+ //x = (x & mask[i][len/8]) + (x >> (1 << i) & mask[i][len/8])
+ unsigned EltSize = VT.isVector() ?
+ VT.getVectorElementType().getSizeInBits() : len;
+ SDValue Tmp2 = DAG.getConstant(APInt(EltSize, mask[i]), VT);
+ SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT);
+ Op = DAG.getNode(ISD::ADD, dl, VT,
+ DAG.getNode(ISD::AND, dl, VT, Op, Tmp2),
+ DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3),
+ Tmp2));
+ }
+ return Op;
+ }
+ case ISD::CTLZ: {
+ // for now, we do this:
+ // x = x | (x >> 1);
+ // x = x | (x >> 2);
+ // ...
+ // x = x | (x >>16);
+ // x = x | (x >>32); // for 64-bit input
+ // return popcount(~x);
+ //
+ // but see also: http://www.hackersdelight.org/HDcode/nlz.cc
+ EVT VT = Op.getValueType();
+ EVT ShVT = TLI.getShiftAmountTy();
+ unsigned len = VT.getSizeInBits();
+ for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
+ SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT);
+ Op = DAG.getNode(ISD::OR, dl, VT, Op,
+ DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3));
+ }
+ Op = DAG.getNOT(dl, Op, VT);
+ return DAG.getNode(ISD::CTPOP, dl, VT, Op);
+ }
+ case ISD::CTTZ: {
+ // for now, we use: { return popcount(~x & (x - 1)); }
+ // unless the target has ctlz but not ctpop, in which case we use:
+ // { return 32 - nlz(~x & (x-1)); }
+ // see also http://www.hackersdelight.org/HDcode/ntz.cc
+ EVT VT = Op.getValueType();
+ SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNOT(dl, Op, VT),
+ DAG.getNode(ISD::SUB, dl, VT, Op,
+ DAG.getConstant(1, VT)));
+ // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
+ if (!TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::CTLZ, VT))
+ return DAG.getNode(ISD::SUB, dl, VT,
+ DAG.getConstant(VT.getSizeInBits(), VT),
+ DAG.getNode(ISD::CTLZ, dl, VT, Tmp3));
+ return DAG.getNode(ISD::CTPOP, dl, VT, Tmp3);
+ }
+ }
+}
+
+void SelectionDAGLegalize::ExpandNode(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4;
+ switch (Node->getOpcode()) {
+ case ISD::CTPOP:
+ case ISD::CTLZ:
+ case ISD::CTTZ:
+ Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::BSWAP:
+ Results.push_back(ExpandBSWAP(Node->getOperand(0), dl));
+ break;
+ case ISD::FRAMEADDR:
+ case ISD::RETURNADDR:
+ case ISD::FRAME_TO_ARGS_OFFSET:
+ Results.push_back(DAG.getConstant(0, Node->getValueType(0)));
+ break;
+ case ISD::FLT_ROUNDS_:
+ Results.push_back(DAG.getConstant(1, Node->getValueType(0)));
+ break;
+ case ISD::EH_RETURN:
+ case ISD::EH_LABEL:
+ case ISD::PREFETCH:
+ case ISD::MEMBARRIER:
+ case ISD::VAEND:
+ Results.push_back(Node->getOperand(0));
+ break;
+ case ISD::DYNAMIC_STACKALLOC:
+ ExpandDYNAMIC_STACKALLOC(Node, Results);
+ break;
+ case ISD::MERGE_VALUES:
+ for (unsigned i = 0; i < Node->getNumValues(); i++)
+ Results.push_back(Node->getOperand(i));
+ break;
+ case ISD::UNDEF: {
+ EVT VT = Node->getValueType(0);
+ if (VT.isInteger())
+ Results.push_back(DAG.getConstant(0, VT));
+ else if (VT.isFloatingPoint())
+ Results.push_back(DAG.getConstantFP(0, VT));
+ else
+ llvm_unreachable("Unknown value type!");
+ break;
+ }
+ case ISD::TRAP: {
+ // If this operation is not supported, lower it to 'abort()' call
+ TargetLowering::ArgListTy Args;
+ std::pair<SDValue, SDValue> CallResult =
+ TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
+ false, false, false, false, 0, CallingConv::C, false,
+ /*isReturnValueUsed=*/true,
+ DAG.getExternalSymbol("abort", TLI.getPointerTy()),
+ Args, DAG, dl, DAG.GetOrdering(Node));
+ Results.push_back(CallResult.second);
+ break;
+ }
+ case ISD::FP_ROUND:
+ case ISD::BIT_CONVERT:
+ Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0),
+ Node->getValueType(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::FP_EXTEND:
+ Tmp1 = EmitStackConvert(Node->getOperand(0),
+ Node->getOperand(0).getValueType(),
+ Node->getValueType(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::SIGN_EXTEND_INREG: {
+ // NOTE: we could fall back on load/store here too for targets without
+ // SAR. However, it is doubtful that any exist.
+ EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ EVT VT = Node->getValueType(0);
+ EVT ShiftAmountTy = TLI.getShiftAmountTy();
+ if (VT.isVector())
+ ShiftAmountTy = VT;
+ unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
+ ExtraVT.getScalarType().getSizeInBits();
+ SDValue ShiftCst = DAG.getConstant(BitsDiff, ShiftAmountTy);
+ Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0),
+ Node->getOperand(0), ShiftCst);
+ Tmp1 = DAG.getNode(ISD::SRA, dl, Node->getValueType(0), Tmp1, ShiftCst);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::FP_ROUND_INREG: {
+ // The only way we can lower this is to turn it into a TRUNCSTORE,
+ // EXTLOAD pair, targetting a temporary location (a stack slot).
+
+ // NOTE: there is a choice here between constantly creating new stack
+ // slots and always reusing the same one. We currently always create
+ // new ones, as reuse may inhibit scheduling.
+ EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ Tmp1 = EmitStackConvert(Node->getOperand(0), ExtraVT,
+ Node->getValueType(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP,
+ Node->getOperand(0), Node->getValueType(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::FP_TO_UINT: {
+ SDValue True, False;
+ EVT VT = Node->getOperand(0).getValueType();
+ EVT NVT = Node->getValueType(0);
+ const uint64_t zero[] = {0, 0};
+ APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero));
+ APInt x = APInt::getSignBit(NVT.getSizeInBits());
+ (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven);
+ Tmp1 = DAG.getConstantFP(apf, VT);
+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(VT),
+ Node->getOperand(0),
+ Tmp1, ISD::SETLT);
+ True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0));
+ False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT,
+ DAG.getNode(ISD::FSUB, dl, VT,
+ Node->getOperand(0), Tmp1));
+ False = DAG.getNode(ISD::XOR, dl, NVT, False,
+ DAG.getConstant(x, NVT));
+ Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, True, False);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::VAARG: {
+ const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+ EVT VT = Node->getValueType(0);
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ SDValue VAList = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0);
+ // Increment the pointer, VAList, to the next vaarg
+ Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
+ DAG.getConstant(TLI.getTargetData()->
+ getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
+ TLI.getPointerTy()));
+ // Store the incremented VAList to the legalized pointer
+ Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0);
+ // Load the actual argument out of the pointer VAList
+ Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0));
+ Results.push_back(Results[0].getValue(1));
+ break;
+ }
+ case ISD::VACOPY: {
+ // This defaults to loading a pointer from the input and storing it to the
+ // output, returning the chain.
+ const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
+ const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
+ Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0),
+ Node->getOperand(2), VS, 0);
+ Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), VD, 0);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::EXTRACT_VECTOR_ELT:
+ if (Node->getOperand(0).getValueType().getVectorNumElements() == 1)
+ // This must be an access of the only element. Return it.
+ Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, Node->getValueType(0),
+ Node->getOperand(0));
+ else
+ Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0));
+ Results.push_back(Tmp1);
+ break;
+ case ISD::EXTRACT_SUBVECTOR:
+ Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0)));
+ break;
+ case ISD::CONCAT_VECTORS: {
+ Results.push_back(ExpandVectorBuildThroughStack(Node));
+ break;
+ }
+ case ISD::SCALAR_TO_VECTOR:
+ Results.push_back(ExpandSCALAR_TO_VECTOR(Node));
+ break;
+ case ISD::INSERT_VECTOR_ELT:
+ Results.push_back(ExpandINSERT_VECTOR_ELT(Node->getOperand(0),
+ Node->getOperand(1),
+ Node->getOperand(2), dl));
+ break;
+ case ISD::VECTOR_SHUFFLE: {
+ SmallVector<int, 8> Mask;
+ cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
+
+ EVT VT = Node->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ if (Mask[i] < 0) {
+ Ops.push_back(DAG.getUNDEF(EltVT));
+ continue;
+ }
+ unsigned Idx = Mask[i];
+ if (Idx < NumElems)
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ Node->getOperand(0),
+ DAG.getIntPtrConstant(Idx)));
+ else
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ Node->getOperand(1),
+ DAG.getIntPtrConstant(Idx - NumElems)));
+ }
+ Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::EXTRACT_ELEMENT: {
+ EVT OpTy = Node->getOperand(0).getValueType();
+ if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) {
+ // 1 -> Hi
+ Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0),
+ DAG.getConstant(OpTy.getSizeInBits()/2,
+ TLI.getShiftAmountTy()));
+ Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1);
+ } else {
+ // 0 -> Lo
+ Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0),
+ Node->getOperand(0));
+ }
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::STACKSAVE:
+ // Expand to CopyFromReg if the target set
+ // StackPointerRegisterToSaveRestore.
+ if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {
+ Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, SP,
+ Node->getValueType(0)));
+ Results.push_back(Results[0].getValue(1));
+ } else {
+ Results.push_back(DAG.getUNDEF(Node->getValueType(0)));
+ Results.push_back(Node->getOperand(0));
+ }
+ break;
+ case ISD::STACKRESTORE:
+ // Expand to CopyToReg if the target set
+ // StackPointerRegisterToSaveRestore.
+ if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {
+ Results.push_back(DAG.getCopyToReg(Node->getOperand(0), dl, SP,
+ Node->getOperand(1)));
+ } else {
+ Results.push_back(Node->getOperand(0));
+ }
+ break;
+ case ISD::FCOPYSIGN:
+ Results.push_back(ExpandFCOPYSIGN(Node));
+ break;
+ case ISD::FNEG:
+ // Expand Y = FNEG(X) -> Y = SUB -0.0, X
+ Tmp1 = DAG.getConstantFP(-0.0, Node->getValueType(0));
+ Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1,
+ Node->getOperand(0));
+ Results.push_back(Tmp1);
+ break;
+ case ISD::FABS: {
+ // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X).
+ EVT VT = Node->getValueType(0);
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = DAG.getConstantFP(0.0, VT);
+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(Tmp1.getValueType()),
+ Tmp1, Tmp2, ISD::SETUGT);
+ Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1);
+ Tmp1 = DAG.getNode(ISD::SELECT, dl, VT, Tmp2, Tmp1, Tmp3);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::FSQRT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128));
+ break;
+ case ISD::FSIN:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
+ RTLIB::SIN_F80, RTLIB::SIN_PPCF128));
+ break;
+ case ISD::FCOS:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
+ RTLIB::COS_F80, RTLIB::COS_PPCF128));
+ break;
+ case ISD::FLOG:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
+ RTLIB::LOG_F80, RTLIB::LOG_PPCF128));
+ break;
+ case ISD::FLOG2:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128));
+ break;
+ case ISD::FLOG10:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80, RTLIB::LOG10_PPCF128));
+ break;
+ case ISD::FEXP:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
+ RTLIB::EXP_F80, RTLIB::EXP_PPCF128));
+ break;
+ case ISD::FEXP2:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128));
+ break;
+ case ISD::FTRUNC:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128));
+ break;
+ case ISD::FFLOOR:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80, RTLIB::FLOOR_PPCF128));
+ break;
+ case ISD::FCEIL:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128));
+ break;
+ case ISD::FRINT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
+ RTLIB::RINT_F80, RTLIB::RINT_PPCF128));
+ break;
+ case ISD::FNEARBYINT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_PPCF128));
+ break;
+ case ISD::FPOWI:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
+ RTLIB::POWI_F80, RTLIB::POWI_PPCF128));
+ break;
+ case ISD::FPOW:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
+ RTLIB::POW_F80, RTLIB::POW_PPCF128));
+ break;
+ case ISD::FDIV:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
+ RTLIB::DIV_F80, RTLIB::DIV_PPCF128));
+ break;
+ case ISD::FREM:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
+ RTLIB::REM_F80, RTLIB::REM_PPCF128));
+ break;
+ case ISD::ConstantFP: {
+ ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
+ // Check to see if this FP immediate is already legal.
+ // If this is a legal constant, turn it into a TargetConstantFP node.
+ if (TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0)))
+ Results.push_back(SDValue(Node, 0));
+ else
+ Results.push_back(ExpandConstantFP(CFP, true, DAG, TLI));
+ break;
+ }
+ case ISD::EHSELECTION: {
+ unsigned Reg = TLI.getExceptionSelectorRegister();
+ assert(Reg && "Can't expand to unknown register!");
+ Results.push_back(DAG.getCopyFromReg(Node->getOperand(1), dl, Reg,
+ Node->getValueType(0)));
+ Results.push_back(Results[0].getValue(1));
+ break;
+ }
+ case ISD::EXCEPTIONADDR: {
+ unsigned Reg = TLI.getExceptionAddressRegister();
+ assert(Reg && "Can't expand to unknown register!");
+ Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, Reg,
+ Node->getValueType(0)));
+ Results.push_back(Results[0].getValue(1));
+ break;
+ }
+ case ISD::SUB: {
+ EVT VT = Node->getValueType(0);
+ assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::XOR, VT) &&
+ "Don't know how to expand this subtraction!");
+ Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1),
+ DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT));
+ Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp2, DAG.getConstant(1, VT));
+ Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1));
+ break;
+ }
+ case ISD::UREM:
+ case ISD::SREM: {
+ EVT VT = Node->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ bool isSigned = Node->getOpcode() == ISD::SREM;
+ unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+ Tmp2 = Node->getOperand(0);
+ Tmp3 = Node->getOperand(1);
+ if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) {
+ Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
+ } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
+ // X % Y -> X-X/Y*Y
+ Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3);
+ Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3);
+ Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1);
+ } else if (isSigned) {
+ Tmp1 = ExpandIntLibCall(Node, true,
+ RTLIB::SREM_I8,
+ RTLIB::SREM_I16, RTLIB::SREM_I32,
+ RTLIB::SREM_I64, RTLIB::SREM_I128);
+ } else {
+ Tmp1 = ExpandIntLibCall(Node, false,
+ RTLIB::UREM_I8,
+ RTLIB::UREM_I16, RTLIB::UREM_I32,
+ RTLIB::UREM_I64, RTLIB::UREM_I128);
+ }
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::UDIV:
+ case ISD::SDIV: {
+ bool isSigned = Node->getOpcode() == ISD::SDIV;
+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+ EVT VT = Node->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ if (TLI.isOperationLegalOrCustom(DivRemOpc, VT))
+ Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0),
+ Node->getOperand(1));
+ else if (isSigned)
+ Tmp1 = ExpandIntLibCall(Node, true,
+ RTLIB::SDIV_I8,
+ RTLIB::SDIV_I16, RTLIB::SDIV_I32,
+ RTLIB::SDIV_I64, RTLIB::SDIV_I128);
+ else
+ Tmp1 = ExpandIntLibCall(Node, false,
+ RTLIB::UDIV_I8,
+ RTLIB::UDIV_I16, RTLIB::UDIV_I32,
+ RTLIB::UDIV_I64, RTLIB::UDIV_I128);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::MULHU:
+ case ISD::MULHS: {
+ unsigned ExpandOpcode = Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI :
+ ISD::SMUL_LOHI;
+ EVT VT = Node->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ assert(TLI.isOperationLegalOrCustom(ExpandOpcode, VT) &&
+ "If this wasn't legal, it shouldn't have been created!");
+ Tmp1 = DAG.getNode(ExpandOpcode, dl, VTs, Node->getOperand(0),
+ Node->getOperand(1));
+ Results.push_back(Tmp1.getValue(1));
+ break;
+ }
+ case ISD::MUL: {
+ EVT VT = Node->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ // See if multiply or divide can be lowered using two-result operations.
+ // We just need the low half of the multiply; try both the signed
+ // and unsigned forms. If the target supports both SMUL_LOHI and
+ // UMUL_LOHI, form a preference by checking which forms of plain
+ // MULH it supports.
+ bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, VT);
+ bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, VT);
+ bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, VT);
+ bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, VT);
+ unsigned OpToUse = 0;
+ if (HasSMUL_LOHI && !HasMULHS) {
+ OpToUse = ISD::SMUL_LOHI;
+ } else if (HasUMUL_LOHI && !HasMULHU) {
+ OpToUse = ISD::UMUL_LOHI;
+ } else if (HasSMUL_LOHI) {
+ OpToUse = ISD::SMUL_LOHI;
+ } else if (HasUMUL_LOHI) {
+ OpToUse = ISD::UMUL_LOHI;
+ }
+ if (OpToUse) {
+ Results.push_back(DAG.getNode(OpToUse, dl, VTs, Node->getOperand(0),
+ Node->getOperand(1)));
+ break;
+ }
+ Tmp1 = ExpandIntLibCall(Node, false,
+ RTLIB::MUL_I8,
+ RTLIB::MUL_I16, RTLIB::MUL_I32,
+ RTLIB::MUL_I64, RTLIB::MUL_I128);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SADDO:
+ case ISD::SSUBO: {
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+ LHS, RHS);
+ Results.push_back(Sum);
+ EVT OType = Node->getValueType(1);
+
+ SDValue Zero = DAG.getConstant(0, LHS.getValueType());
+
+ // LHSSign -> LHS >= 0
+ // RHSSign -> RHS >= 0
+ // SumSign -> Sum >= 0
+ //
+ // Add:
+ // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+ // Sub:
+ // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+ //
+ SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
+ SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
+ SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
+ Node->getOpcode() == ISD::SADDO ?
+ ISD::SETEQ : ISD::SETNE);
+
+ SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
+ SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
+
+ SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
+ Results.push_back(Cmp);
+ break;
+ }
+ case ISD::UADDO:
+ case ISD::USUBO: {
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::UADDO ?
+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+ LHS, RHS);
+ Results.push_back(Sum);
+ Results.push_back(DAG.getSetCC(dl, Node->getValueType(1), Sum, LHS,
+ Node->getOpcode () == ISD::UADDO ?
+ ISD::SETULT : ISD::SETUGT));
+ break;
+ }
+ case ISD::UMULO:
+ case ISD::SMULO: {
+ EVT VT = Node->getValueType(0);
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ SDValue BottomHalf;
+ SDValue TopHalf;
+ static const unsigned Ops[2][3] =
+ { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
+ { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
+ bool isSigned = Node->getOpcode() == ISD::SMULO;
+ if (TLI.isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
+ BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
+ } else if (TLI.isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
+ BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
+ RHS);
+ TopHalf = BottomHalf.getValue(1);
+ } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2))) {
+ EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
+ LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
+ RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
+ Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
+ BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
+ DAG.getIntPtrConstant(0));
+ TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
+ DAG.getIntPtrConstant(1));
+ } else {
+ // FIXME: We should be able to fall back to a libcall with an illegal
+ // type in some cases.
+ // Also, we can fall back to a division in some cases, but that's a big
+ // performance hit in the general case.
+ llvm_unreachable("Don't know how to expand this operation yet!");
+ }
+ if (isSigned) {
+ Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy());
+ Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1);
+ TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf, Tmp1,
+ ISD::SETNE);
+ } else {
+ TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf,
+ DAG.getConstant(0, VT), ISD::SETNE);
+ }
+ Results.push_back(BottomHalf);
+ Results.push_back(TopHalf);
+ break;
+ }
+ case ISD::BUILD_PAIR: {
+ EVT PairTy = Node->getValueType(0);
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1));
+ Tmp2 = DAG.getNode(ISD::SHL, dl, PairTy, Tmp2,
+ DAG.getConstant(PairTy.getSizeInBits()/2,
+ TLI.getShiftAmountTy()));
+ Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2));
+ break;
+ }
+ case ISD::SELECT:
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ Tmp3 = Node->getOperand(2);
+ if (Tmp1.getOpcode() == ISD::SETCC) {
+ Tmp1 = DAG.getSelectCC(dl, Tmp1.getOperand(0), Tmp1.getOperand(1),
+ Tmp2, Tmp3,
+ cast<CondCodeSDNode>(Tmp1.getOperand(2))->get());
+ } else {
+ Tmp1 = DAG.getSelectCC(dl, Tmp1,
+ DAG.getConstant(0, Tmp1.getValueType()),
+ Tmp2, Tmp3, ISD::SETNE);
+ }
+ Results.push_back(Tmp1);
+ break;
+ case ISD::BR_JT: {
+ SDValue Chain = Node->getOperand(0);
+ SDValue Table = Node->getOperand(1);
+ SDValue Index = Node->getOperand(2);
+
+ EVT PTy = TLI.getPointerTy();
+
+ const TargetData &TD = *TLI.getTargetData();
+ unsigned EntrySize =
+ DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
+
+ Index = DAG.getNode(ISD::MUL, dl, PTy,
+ Index, DAG.getConstant(EntrySize, PTy));
+ SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
+
+ EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
+ SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
+ PseudoSourceValue::getJumpTable(), 0, MemVT);
+ Addr = LD;
+ if (TLI.getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+ // For PIC, the sequence is:
+ // BRIND(load(Jumptable + index) + RelocBase)
+ // RelocBase can be JumpTable, GOT or some sort of global base.
+ Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr,
+ TLI.getPICJumpTableRelocBase(Table, DAG));
+ }
+ Tmp1 = DAG.getNode(ISD::BRIND, dl, MVT::Other, LD.getValue(1), Addr);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::BRCOND:
+ // Expand brcond's setcc into its constituent parts and create a BR_CC
+ // Node.
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ if (Tmp2.getOpcode() == ISD::SETCC) {
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other,
+ Tmp1, Tmp2.getOperand(2),
+ Tmp2.getOperand(0), Tmp2.getOperand(1),
+ Node->getOperand(2));
+ } else {
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1,
+ DAG.getCondCode(ISD::SETNE), Tmp2,
+ DAG.getConstant(0, Tmp2.getValueType()),
+ Node->getOperand(2));
+ }
+ Results.push_back(Tmp1);
+ break;
+ case ISD::SETCC: {
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ Tmp3 = Node->getOperand(2);
+ LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3, dl);
+
+ // If we expanded the SETCC into an AND/OR, return the new node
+ if (Tmp2.getNode() == 0) {
+ Results.push_back(Tmp1);
+ break;
+ }
+
+ // Otherwise, SETCC for the given comparison type must be completely
+ // illegal; expand it into a SELECT_CC.
+ EVT VT = Node->getValueType(0);
+ Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2,
+ DAG.getConstant(1, VT), DAG.getConstant(0, VT), Tmp3);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SELECT_CC: {
+ Tmp1 = Node->getOperand(0); // LHS
+ Tmp2 = Node->getOperand(1); // RHS
+ Tmp3 = Node->getOperand(2); // True
+ Tmp4 = Node->getOperand(3); // False
+ SDValue CC = Node->getOperand(4);
+
+ LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp1.getValueType()),
+ Tmp1, Tmp2, CC, dl);
+
+ assert(!Tmp2.getNode() && "Can't legalize SELECT_CC with legal condition!");
+ Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
+ CC = DAG.getCondCode(ISD::SETNE);
+ Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2,
+ Tmp3, Tmp4, CC);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::BR_CC: {
+ Tmp1 = Node->getOperand(0); // Chain
+ Tmp2 = Node->getOperand(2); // LHS
+ Tmp3 = Node->getOperand(3); // RHS
+ Tmp4 = Node->getOperand(1); // CC
+
+ LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()),
+ Tmp2, Tmp3, Tmp4, dl);
+ LastCALLSEQ_END = DAG.getEntryNode();
+
+ assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!");
+ Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
+ Tmp4 = DAG.getCondCode(ISD::SETNE);
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2,
+ Tmp3, Node->getOperand(4));
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::GLOBAL_OFFSET_TABLE:
+ case ISD::GlobalAddress:
+ case ISD::GlobalTLSAddress:
+ case ISD::ExternalSymbol:
+ case ISD::ConstantPool:
+ case ISD::JumpTable:
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ // FIXME: Custom lowering for these operations shouldn't return null!
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ Results.push_back(SDValue(Node, i));
+ break;
+ }
+}
+void SelectionDAGLegalize::PromoteNode(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ EVT OVT = Node->getValueType(0);
+ if (Node->getOpcode() == ISD::UINT_TO_FP ||
+ Node->getOpcode() == ISD::SINT_TO_FP ||
+ Node->getOpcode() == ISD::SETCC) {
+ OVT = Node->getOperand(0).getValueType();
+ }
+ EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue Tmp1, Tmp2, Tmp3;
+ switch (Node->getOpcode()) {
+ case ISD::CTTZ:
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ // Zero extend the argument.
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+ // Perform the larger operation.
+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
+ if (Node->getOpcode() == ISD::CTTZ) {
+ //if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT)
+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT),
+ Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT),
+ ISD::SETEQ);
+ Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2,
+ DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1);
+ } else if (Node->getOpcode() == ISD::CTLZ) {
+ // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
+ Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1,
+ DAG.getConstant(NVT.getSizeInBits() -
+ OVT.getSizeInBits(), NVT));
+ }
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1));
+ break;
+ case ISD::BSWAP: {
+ unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1);
+ Tmp1 = DAG.getNode(ISD::SRL, dl, NVT, Tmp1,
+ DAG.getConstant(DiffBits, TLI.getShiftAmountTy()));
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::FP_TO_UINT:
+ case ISD::FP_TO_SINT:
+ Tmp1 = PromoteLegalFP_TO_INT(Node->getOperand(0), Node->getValueType(0),
+ Node->getOpcode() == ISD::FP_TO_SINT, dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP:
+ Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0),
+ Node->getOpcode() == ISD::SINT_TO_FP, dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: {
+ unsigned ExtOp, TruncOp;
+ if (OVT.isVector()) {
+ ExtOp = ISD::BIT_CONVERT;
+ TruncOp = ISD::BIT_CONVERT;
+ } else if (OVT.isInteger()) {
+ ExtOp = ISD::ANY_EXTEND;
+ TruncOp = ISD::TRUNCATE;
+ } else {
+ llvm_report_error("Cannot promote logic operation");
+ }
+ // Promote each of the values to the new type.
+ Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+ // Perform the larger operation, then convert back
+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+ Results.push_back(DAG.getNode(TruncOp, dl, OVT, Tmp1));
+ break;
+ }
+ case ISD::SELECT: {
+ unsigned ExtOp, TruncOp;
+ if (Node->getValueType(0).isVector()) {
+ ExtOp = ISD::BIT_CONVERT;
+ TruncOp = ISD::BIT_CONVERT;
+ } else if (Node->getValueType(0).isInteger()) {
+ ExtOp = ISD::ANY_EXTEND;
+ TruncOp = ISD::TRUNCATE;
+ } else {
+ ExtOp = ISD::FP_EXTEND;
+ TruncOp = ISD::FP_ROUND;
+ }
+ Tmp1 = Node->getOperand(0);
+ // Promote each of the values to the new type.
+ Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+ Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2));
+ // Perform the larger operation, then round down.
+ Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp1, Tmp2, Tmp3);
+ if (TruncOp != ISD::FP_ROUND)
+ Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1);
+ else
+ Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1,
+ DAG.getIntPtrConstant(0));
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ SmallVector<int, 8> Mask;
+ cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
+
+ // Cast the two input vectors.
+ Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(1));
+
+ // Convert the shuffle mask to the right # elements.
+ Tmp1 = ShuffleWithNarrowerEltType(NVT, OVT, dl, Tmp1, Tmp2, Mask);
+ Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Tmp1);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SETCC: {
+ unsigned ExtOp = ISD::FP_EXTEND;
+ if (NVT.isInteger()) {
+ ISD::CondCode CCCode =
+ cast<CondCodeSDNode>(Node->getOperand(2))->get();
+ ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ }
+ Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+ Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0),
+ Tmp1, Tmp2, Node->getOperand(2)));
+ break;
+ }
+ }
+}
+
+// SelectionDAG::Legalize - This is the entry point for the file.
+//
+void SelectionDAG::Legalize(CodeGenOpt::Level OptLevel) {
+ /// run - This is the main entry point to this class.
+ ///
+ SelectionDAGLegalize(*this, OptLevel).LegalizeDAG();
+}
+
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
new file mode 100644
index 0000000..4f0fce7
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -0,0 +1,1387 @@
+//===-------- LegalizeFloatTypes.cpp - Legalization of float types --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements float type expansion and softening for LegalizeTypes.
+// Softening is the act of turning a computation in an illegal floating point
+// type into a computation in an integer type of the same size; also known as
+// "soft float". For example, turning f32 arithmetic into operations using i32.
+// The resulting integer value is the same as what you would get by performing
+// the floating point operation and bitcasting the result to the integer type.
+// Expansion is the act of changing a computation in an illegal type to be a
+// computation in two identical registers of a smaller type. For example,
+// implementing ppcf128 arithmetic in two f64 registers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// GetFPLibCall - Return the right libcall for the given floating point type.
+static RTLIB::Libcall GetFPLibCall(EVT VT,
+ RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_PPCF128) {
+ return
+ VT == MVT::f32 ? Call_F32 :
+ VT == MVT::f64 ? Call_F64 :
+ VT == MVT::f80 ? Call_F80 :
+ VT == MVT::ppcf128 ? Call_PPCF128 :
+ RTLIB::UNKNOWN_LIBCALL;
+}
+
+//===----------------------------------------------------------------------===//
+// Result Float to Integer Conversion.
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue R = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "SoftenFloatResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to soften the result of this operator!");
+
+ case ISD::BIT_CONVERT: R = SoftenFloatRes_BIT_CONVERT(N); break;
+ case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break;
+ case ISD::ConstantFP:
+ R = SoftenFloatRes_ConstantFP(cast<ConstantFPSDNode>(N));
+ break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::FABS: R = SoftenFloatRes_FABS(N); break;
+ case ISD::FADD: R = SoftenFloatRes_FADD(N); break;
+ case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break;
+ case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break;
+ case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break;
+ case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break;
+ case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break;
+ case ISD::FEXP2: R = SoftenFloatRes_FEXP2(N); break;
+ case ISD::FFLOOR: R = SoftenFloatRes_FFLOOR(N); break;
+ case ISD::FLOG: R = SoftenFloatRes_FLOG(N); break;
+ case ISD::FLOG2: R = SoftenFloatRes_FLOG2(N); break;
+ case ISD::FLOG10: R = SoftenFloatRes_FLOG10(N); break;
+ case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break;
+ case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break;
+ case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break;
+ case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break;
+ case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break;
+ case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break;
+ case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break;
+ case ISD::FREM: R = SoftenFloatRes_FREM(N); break;
+ case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break;
+ case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break;
+ case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break;
+ case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break;
+ case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break;
+ case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break;
+ case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break;
+ case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break;
+ case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break;
+ case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break;
+ }
+
+ // If R is null, the sub-method took care of registering the result.
+ if (R.getNode())
+ SetSoftenedFloat(SDValue(N, ResNo), R);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_BIT_CONVERT(SDNode *N) {
+ return BitConvertToInteger(N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) {
+ // Convert the inputs to integers, and build a new pair out of them.
+ return DAG.getNode(ISD::BUILD_PAIR, N->getDebugLoc(),
+ TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)),
+ BitConvertToInteger(N->getOperand(0)),
+ BitConvertToInteger(N->getOperand(1)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) {
+ return DAG.getConstant(N->getValueAPF().bitcastToAPInt(),
+ TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(),
+ NewOp.getValueType().getVectorElementType(),
+ NewOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned Size = NVT.getSizeInBits();
+
+ // Mask = ~(1 << (Size-1))
+ SDValue Mask = DAG.getConstant(APInt::getAllOnesValue(Size).clear(Size-1),
+ NVT);
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), NVT, Op, Mask);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::ADD_F32,
+ RTLIB::ADD_F64,
+ RTLIB::ADD_F80,
+ RTLIB::ADD_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::CEIL_F32,
+ RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80,
+ RTLIB::CEIL_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
+ SDValue LHS = GetSoftenedFloat(N->getOperand(0));
+ SDValue RHS = BitConvertToInteger(N->getOperand(1));
+ DebugLoc dl = N->getDebugLoc();
+
+ EVT LVT = LHS.getValueType();
+ EVT RVT = RHS.getValueType();
+
+ unsigned LSize = LVT.getSizeInBits();
+ unsigned RSize = RVT.getSizeInBits();
+
+ // First get the sign bit of second operand.
+ SDValue SignBit = DAG.getNode(ISD::SHL, dl, RVT, DAG.getConstant(1, RVT),
+ DAG.getConstant(RSize - 1,
+ TLI.getShiftAmountTy()));
+ SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit);
+
+ // Shift right or sign-extend it if the two operands have different types.
+ int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits();
+ if (SizeDiff > 0) {
+ SignBit = DAG.getNode(ISD::SRL, dl, RVT, SignBit,
+ DAG.getConstant(SizeDiff, TLI.getShiftAmountTy()));
+ SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit);
+ } else if (SizeDiff < 0) {
+ SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit);
+ SignBit = DAG.getNode(ISD::SHL, dl, LVT, SignBit,
+ DAG.getConstant(-SizeDiff, TLI.getShiftAmountTy()));
+ }
+
+ // Clear the sign bit of the first operand.
+ SDValue Mask = DAG.getNode(ISD::SHL, dl, LVT, DAG.getConstant(1, LVT),
+ DAG.getConstant(LSize - 1,
+ TLI.getShiftAmountTy()));
+ Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, LVT));
+ LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask);
+
+ // Or the value with the sign bit.
+ return DAG.getNode(ISD::OR, dl, LVT, LHS, SignBit);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::COS_F32,
+ RTLIB::COS_F64,
+ RTLIB::COS_F80,
+ RTLIB::COS_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::DIV_F32,
+ RTLIB::DIV_F64,
+ RTLIB::DIV_F80,
+ RTLIB::DIV_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP_F32,
+ RTLIB::EXP_F64,
+ RTLIB::EXP_F80,
+ RTLIB::EXP_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP2_F32,
+ RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80,
+ RTLIB::EXP2_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::FLOOR_F32,
+ RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80,
+ RTLIB::FLOOR_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG_F32,
+ RTLIB::LOG_F64,
+ RTLIB::LOG_F80,
+ RTLIB::LOG_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG2_F32,
+ RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80,
+ RTLIB::LOG2_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG10_F32,
+ RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80,
+ RTLIB::LOG10_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::MUL_F32,
+ RTLIB::MUL_F64,
+ RTLIB::MUL_F80,
+ RTLIB::MUL_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ // Expand Y = FNEG(X) -> Y = SUB -0.0, X
+ SDValue Ops[2] = { DAG.getConstantFP(-0.0, N->getValueType(0)),
+ GetSoftenedFloat(N->getOperand(0)) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = N->getOperand(0);
+ RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
+ return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = N->getOperand(0);
+ RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");
+ return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::POW_F32,
+ RTLIB::POW_F64,
+ RTLIB::POW_F80,
+ RTLIB::POW_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
+ assert(N->getOperand(1).getValueType() == MVT::i32 &&
+ "Unsupported power type!");
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::POWI_F32,
+ RTLIB::POWI_F64,
+ RTLIB::POWI_F80,
+ RTLIB::POWI_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::REM_F32,
+ RTLIB::REM_F64,
+ RTLIB::REM_F80,
+ RTLIB::REM_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::RINT_F32,
+ RTLIB::RINT_F64,
+ RTLIB::RINT_F80,
+ RTLIB::RINT_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::SIN_F32,
+ RTLIB::SIN_F64,
+ RTLIB::SIN_F80,
+ RTLIB::SIN_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::SQRT_F32,
+ RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80,
+ RTLIB::SQRT_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::TRUNC_F32,
+ RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80,
+ RTLIB::TRUNC_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
+ LoadSDNode *L = cast<LoadSDNode>(N);
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue NewL;
+ if (L->getExtensionType() == ISD::NON_EXTLOAD) {
+ NewL = DAG.getLoad(L->getAddressingMode(), dl, L->getExtensionType(),
+ NVT, L->getChain(), L->getBasePtr(), L->getOffset(),
+ L->getSrcValue(), L->getSrcValueOffset(), NVT,
+ L->isVolatile(), L->getAlignment());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+ return NewL;
+ }
+
+ // Do a non-extending load followed by FP_EXTEND.
+ NewL = DAG.getLoad(L->getAddressingMode(), dl, ISD::NON_EXTLOAD,
+ L->getMemoryVT(), L->getChain(),
+ L->getBasePtr(), L->getOffset(),
+ L->getSrcValue(), L->getSrcValueOffset(),
+ L->getMemoryVT(),
+ L->isVolatile(), L->getAlignment());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+ return BitConvertToInteger(DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) {
+ SDValue LHS = GetSoftenedFloat(N->getOperand(1));
+ SDValue RHS = GetSoftenedFloat(N->getOperand(2));
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+ LHS.getValueType(), N->getOperand(0),LHS,RHS);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) {
+ SDValue LHS = GetSoftenedFloat(N->getOperand(2));
+ SDValue RHS = GetSoftenedFloat(N->getOperand(3));
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(),
+ LHS.getValueType(), N->getOperand(0),
+ N->getOperand(1), LHS, RHS, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) {
+ SDValue Chain = N->getOperand(0); // Get the chain.
+ SDValue Ptr = N->getOperand(1); // Get the pointer.
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue NewVAARG;
+ NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2));
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1));
+ return NewVAARG;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
+ bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
+ EVT SVT = N->getOperand(0).getValueType();
+ EVT RVT = N->getValueType(0);
+ EVT NVT = EVT();
+ DebugLoc dl = N->getDebugLoc();
+
+ // If the input is not legal, eg: i1 -> fp, then it needs to be promoted to
+ // a larger type, eg: i8 -> fp. Even if it is legal, no libcall may exactly
+ // match. Look for an appropriate libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ for (unsigned t = MVT::FIRST_INTEGER_VALUETYPE;
+ t <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL; ++t) {
+ NVT = (MVT::SimpleValueType)t;
+ // The source needs to big enough to hold the operand.
+ if (NVT.bitsGE(SVT))
+ LC = Signed ? RTLIB::getSINTTOFP(NVT, RVT):RTLIB::getUINTTOFP (NVT, RVT);
+ }
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
+
+ // Sign/zero extend the argument if the libcall takes a larger type.
+ SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+ NVT, N->getOperand(0));
+ return MakeLibCall(LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), &Op, 1, false, dl);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Operand Float to Integer Conversion..
+//===----------------------------------------------------------------------===//
+
+bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "SoftenFloatOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to soften this operator's operand!");
+
+ case ISD::BIT_CONVERT: Res = SoftenFloatOp_BIT_CONVERT(N); break;
+ case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;
+ case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
+ case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break;
+ case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_UINT(N); break;
+ case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break;
+ case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break;
+ case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// SoftenSetCCOperands - Soften the operands of a comparison. This code is
+/// shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode, DebugLoc dl) {
+ SDValue LHSInt = GetSoftenedFloat(NewLHS);
+ SDValue RHSInt = GetSoftenedFloat(NewRHS);
+ EVT VT = NewLHS.getValueType();
+
+ assert((VT == MVT::f32 || VT == MVT::f64) && "Unsupported setcc type!");
+
+ // Expand into one or more soft-fp libcall(s).
+ RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
+ switch (CCCode) {
+ case ISD::SETEQ:
+ case ISD::SETOEQ:
+ LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;
+ break;
+ case ISD::SETNE:
+ case ISD::SETUNE:
+ LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 : RTLIB::UNE_F64;
+ break;
+ case ISD::SETGE:
+ case ISD::SETOGE:
+ LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64;
+ break;
+ case ISD::SETLT:
+ case ISD::SETOLT:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
+ break;
+ case ISD::SETLE:
+ case ISD::SETOLE:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64;
+ break;
+ case ISD::SETGT:
+ case ISD::SETOGT:
+ LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64;
+ break;
+ case ISD::SETUO:
+ LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64;
+ break;
+ case ISD::SETO:
+ LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : RTLIB::O_F64;
+ break;
+ default:
+ LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64;
+ switch (CCCode) {
+ case ISD::SETONE:
+ // SETONE = SETOLT | SETOGT
+ LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
+ // Fallthrough
+ case ISD::SETUGT:
+ LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64;
+ break;
+ case ISD::SETUGE:
+ LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64;
+ break;
+ case ISD::SETULT:
+ LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
+ break;
+ case ISD::SETULE:
+ LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64;
+ break;
+ case ISD::SETUEQ:
+ LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;
+ break;
+ default: assert(false && "Do not know how to soften this setcc!");
+ }
+ }
+
+ // Use the target specific return value for comparions lib calls.
+ EVT RetVT = TLI.getCmpLibcallReturnType();
+ SDValue Ops[2] = { LHSInt, RHSInt };
+ NewLHS = MakeLibCall(LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl);
+ NewRHS = DAG.getConstant(0, RetVT);
+ CCCode = TLI.getCmpLibcallCC(LC1);
+ if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
+ SDValue Tmp = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT),
+ NewLHS, NewRHS, DAG.getCondCode(CCCode));
+ NewLHS = MakeLibCall(LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl);
+ NewLHS = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT), NewLHS,
+ NewRHS, DAG.getCondCode(TLI.getCmpLibcallCC(LC2)));
+ NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
+ NewRHS = SDValue();
+ }
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_BIT_CONVERT(SDNode *N) {
+ return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0),
+ GetSoftenedFloat(N->getOperand(0)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
+ EVT SVT = N->getOperand(0).getValueType();
+ EVT RVT = N->getValueType(0);
+
+ RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
+
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If SoftenSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+ DAG.getCondCode(CCCode), NewLHS, NewRHS,
+ N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+ SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If SoftenSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
+ N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(CCCode));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If SoftenSetCCOperands returned a scalar, use it.
+ if (NewRHS.getNode() == 0) {
+ assert(NewLHS.getValueType() == N->getValueType(0) &&
+ "Unexpected setcc expansion!");
+ return NewLHS;
+ }
+
+ // Otherwise, update N to have the operands specified.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
+ DAG.getCondCode(CCCode));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ assert(OpNo == 1 && "Can only soften the stored value!");
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Val = ST->getValue();
+ DebugLoc dl = N->getDebugLoc();
+
+ if (ST->isTruncatingStore())
+ // Do an FP_ROUND followed by a non-truncating store.
+ Val = BitConvertToInteger(DAG.getNode(ISD::FP_ROUND, dl, ST->getMemoryVT(),
+ Val, DAG.getIntPtrConstant(0)));
+ else
+ Val = GetSoftenedFloat(Val);
+
+ return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(),
+ ST->getSrcValue(), ST->getSrcValueOffset(),
+ ST->isVolatile(), ST->getAlignment());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Float Result Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandFloatResult - This method is called when the specified result of the
+/// specified node is found to need expansion. At this point, the node may also
+/// have invalid operands or may have other results that need promotion, we just
+/// know that (at least) one result needs expansion.
+void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Lo, Hi;
+ Lo = Hi = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ExpandFloatResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to expand the result of this operator!");
+
+ case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
+ case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
+ case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
+ case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
+
+ case ISD::BIT_CONVERT: ExpandRes_BIT_CONVERT(N, Lo, Hi); break;
+ case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
+ case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
+ case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
+ case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break;
+
+ case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break;
+ case ISD::FABS: ExpandFloatRes_FABS(N, Lo, Hi); break;
+ case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break;
+ case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break;
+ case ISD::FCOS: ExpandFloatRes_FCOS(N, Lo, Hi); break;
+ case ISD::FDIV: ExpandFloatRes_FDIV(N, Lo, Hi); break;
+ case ISD::FEXP: ExpandFloatRes_FEXP(N, Lo, Hi); break;
+ case ISD::FEXP2: ExpandFloatRes_FEXP2(N, Lo, Hi); break;
+ case ISD::FFLOOR: ExpandFloatRes_FFLOOR(N, Lo, Hi); break;
+ case ISD::FLOG: ExpandFloatRes_FLOG(N, Lo, Hi); break;
+ case ISD::FLOG2: ExpandFloatRes_FLOG2(N, Lo, Hi); break;
+ case ISD::FLOG10: ExpandFloatRes_FLOG10(N, Lo, Hi); break;
+ case ISD::FMUL: ExpandFloatRes_FMUL(N, Lo, Hi); break;
+ case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break;
+ case ISD::FNEG: ExpandFloatRes_FNEG(N, Lo, Hi); break;
+ case ISD::FP_EXTEND: ExpandFloatRes_FP_EXTEND(N, Lo, Hi); break;
+ case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break;
+ case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break;
+ case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break;
+ case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break;
+ case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break;
+ case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break;
+ case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break;
+ case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break;
+ }
+
+ // If Lo/Hi is null, the sub-method took care of registering results etc.
+ if (Lo.getNode())
+ SetExpandedFloat(SDValue(N, ResNo), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ assert(NVT.getSizeInBits() == integerPartWidth &&
+ "Do not know how to expand this float constant!");
+ APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt();
+ Lo = DAG.getConstantFP(APFloat(APInt(integerPartWidth, 1,
+ &C.getRawData()[1])), NVT);
+ Hi = DAG.getConstantFP(APFloat(APInt(integerPartWidth, 1,
+ &C.getRawData()[0])), NVT);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(N->getValueType(0) == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Tmp;
+ GetExpandedFloat(N->getOperand(0), Lo, Tmp);
+ Hi = DAG.getNode(ISD::FABS, dl, Tmp.getValueType(), Tmp);
+ // Lo = Hi==fabs(Hi) ? Lo : -Lo;
+ Lo = DAG.getNode(ISD::SELECT_CC, dl, Lo.getValueType(), Tmp, Hi, Lo,
+ DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo),
+ DAG.getCondCode(ISD::SETEQ));
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::ADD_F32, RTLIB::ADD_F64,
+ RTLIB::ADD_F80, RTLIB::ADD_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::COS_F32, RTLIB::COS_F64,
+ RTLIB::COS_F80, RTLIB::COS_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::DIV_F32,
+ RTLIB::DIV_F64,
+ RTLIB::DIV_F80,
+ RTLIB::DIV_PPCF128),
+ N->getValueType(0), Ops, 2, false,
+ N->getDebugLoc());
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP_F32, RTLIB::EXP_F64,
+ RTLIB::EXP_F80, RTLIB::EXP_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::FLOOR_F32,RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80,RTLIB::FLOOR_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG_F32, RTLIB::LOG_F64,
+ RTLIB::LOG_F80, RTLIB::LOG_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG10_F32,RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80,RTLIB::LOG10_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::MUL_F32,
+ RTLIB::MUL_F64,
+ RTLIB::MUL_F80,
+ RTLIB::MUL_PPCF128),
+ N->getValueType(0), Ops, 2, false,
+ N->getDebugLoc());
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedFloat(N->getOperand(0), Lo, Hi);
+ Lo = DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo);
+ Hi = DAG.getNode(ISD::FNEG, dl, Hi.getValueType(), Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ Hi = DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), NVT, N->getOperand(0));
+ Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::POW_F32, RTLIB::POW_F64,
+ RTLIB::POW_F80, RTLIB::POW_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::POWI_F32, RTLIB::POWI_F64,
+ RTLIB::POWI_F80, RTLIB::POWI_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::RINT_F32, RTLIB::RINT_F64,
+ RTLIB::RINT_F80, RTLIB::RINT_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::SIN_F32, RTLIB::SIN_F64,
+ RTLIB::SIN_F80, RTLIB::SIN_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_PPCF128),
+ N->getValueType(0), Ops, 2, false,
+ N->getDebugLoc());
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ if (ISD::isNormalLoad(N)) {
+ ExpandRes_NormalLoad(N, Lo, Hi);
+ return;
+ }
+
+ assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ DebugLoc dl = N->getDebugLoc();
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+ assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?");
+
+ Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr,
+ LD->getSrcValue(), LD->getSrcValueOffset(),
+ LD->getMemoryVT(),
+ LD->isVolatile(), LD->getAlignment());
+
+ // Remember the chain.
+ Chain = Hi.getValue(1);
+
+ // The low part is zero.
+ Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT);
+
+ // Modified the chain - switch anything that used the old chain to use the
+ // new one.
+ ReplaceValueWith(SDValue(LD, 1), Chain);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(N->getValueType(0) == MVT::ppcf128 && "Unsupported XINT_TO_FP!");
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Src = N->getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ bool isSigned = N->getOpcode() == ISD::SINT_TO_FP;
+ DebugLoc dl = N->getDebugLoc();
+
+ // First do an SINT_TO_FP, whether the original was signed or unsigned.
+ // When promoting partial word types to i32 we must honor the signedness,
+ // though.
+ if (SrcVT.bitsLE(MVT::i32)) {
+ // The integer can be represented exactly in an f64.
+ Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+ MVT::i32, Src);
+ Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT);
+ Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src);
+ } else {
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (SrcVT.bitsLE(MVT::i64)) {
+ Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+ MVT::i64, Src);
+ LC = RTLIB::SINTTOFP_I64_PPCF128;
+ } else if (SrcVT.bitsLE(MVT::i128)) {
+ Src = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i128, Src);
+ LC = RTLIB::SINTTOFP_I128_PPCF128;
+ }
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
+
+ Hi = MakeLibCall(LC, VT, &Src, 1, true, dl);
+ GetPairElements(Hi, Lo, Hi);
+ }
+
+ if (isSigned)
+ return;
+
+ // Unsigned - fix up the SINT_TO_FP value just calculated.
+ Hi = DAG.getNode(ISD::BUILD_PAIR, dl, VT, Lo, Hi);
+ SrcVT = Src.getValueType();
+
+ // x>=0 ? (ppcf128)(iN)x : (ppcf128)(iN)x + 2^N; N=32,64,128.
+ static const uint64_t TwoE32[] = { 0x41f0000000000000LL, 0 };
+ static const uint64_t TwoE64[] = { 0x43f0000000000000LL, 0 };
+ static const uint64_t TwoE128[] = { 0x47f0000000000000LL, 0 };
+ const uint64_t *Parts = 0;
+
+ switch (SrcVT.getSimpleVT().SimpleTy) {
+ default:
+ assert(false && "Unsupported UINT_TO_FP!");
+ case MVT::i32:
+ Parts = TwoE32;
+ break;
+ case MVT::i64:
+ Parts = TwoE64;
+ break;
+ case MVT::i128:
+ Parts = TwoE128;
+ break;
+ }
+
+ Lo = DAG.getNode(ISD::FADD, dl, VT, Hi,
+ DAG.getConstantFP(APFloat(APInt(128, 2, Parts)),
+ MVT::ppcf128));
+ Lo = DAG.getNode(ISD::SELECT_CC, dl, VT, Src, DAG.getConstant(0, SrcVT),
+ Lo, Hi, DAG.getCondCode(ISD::SETLT));
+ GetPairElements(Lo, Lo, Hi);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Float Operand Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandFloatOperand - This method is called when the specified operand of the
+/// specified node is found to need expansion. At this point, all of the result
+/// types of the node are known to be legal, but other operands of the node may
+/// need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ if (TLI.getOperationAction(N->getOpcode(), N->getOperand(OpNo).getValueType())
+ == TargetLowering::Custom)
+ Res = TLI.LowerOperation(SDValue(N, 0), DAG);
+
+ if (Res.getNode() == 0) {
+ switch (N->getOpcode()) {
+ default:
+ #ifndef NDEBUG
+ dbgs() << "ExpandFloatOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+ #endif
+ llvm_unreachable("Do not know how to expand this operator's operand!");
+
+ case ISD::BIT_CONVERT: Res = ExpandOp_BIT_CONVERT(N); break;
+ case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
+ case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
+
+ case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break;
+ case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break;
+ case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;
+ case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
+ case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break;
+ case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break;
+ case ISD::STORE: Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N),
+ OpNo); break;
+ }
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// FloatExpandSetCCOperands - Expand the operands of a comparison. This code
+/// is shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS,
+ SDValue &NewRHS,
+ ISD::CondCode &CCCode,
+ DebugLoc dl) {
+ SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+ GetExpandedFloat(NewLHS, LHSLo, LHSHi);
+ GetExpandedFloat(NewRHS, RHSLo, RHSHi);
+
+ EVT VT = NewLHS.getValueType();
+ assert(VT == MVT::ppcf128 && "Unsupported setcc type!");
+
+ // FIXME: This generated code sucks. We want to generate
+ // FCMPU crN, hi1, hi2
+ // BNE crN, L:
+ // FCMPU crN, lo1, lo2
+ // The following can be improved, but not that much.
+ SDValue Tmp1, Tmp2, Tmp3;
+ Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, ISD::SETOEQ);
+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()),
+ LHSLo, RHSLo, CCCode);
+ Tmp3 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2);
+ Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, ISD::SETUNE);
+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, CCCode);
+ Tmp1 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2);
+ NewLHS = DAG.getNode(ISD::OR, dl, Tmp1.getValueType(), Tmp1, Tmp3);
+ NewRHS = SDValue(); // LHS is the result, not a compare.
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+ DAG.getCondCode(CCCode), NewLHS, NewRHS,
+ N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) {
+ assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ SDValue Lo, Hi;
+ GetExpandedFloat(N->getOperand(0), Lo, Hi);
+ // Round it the rest of the way (e.g. to f32) if needed.
+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(),
+ N->getValueType(0), Hi, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
+ // PPC (the libcall is not available). FIXME: Do this in a less hacky way.
+ if (RVT == MVT::i32) {
+ assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ SDValue Res = DAG.getNode(ISD::FP_ROUND_INREG, dl, MVT::ppcf128,
+ N->getOperand(0), DAG.getValueType(MVT::f64));
+ Res = DAG.getNode(ISD::FP_ROUND, dl, MVT::f64, Res,
+ DAG.getIntPtrConstant(1));
+ return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
+ }
+
+ RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
+ return MakeLibCall(LC, RVT, &N->getOperand(0), 1, false, dl);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
+ // PPC (the libcall is not available). FIXME: Do this in a less hacky way.
+ if (RVT == MVT::i32) {
+ assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
+ APFloat APF = APFloat(APInt(128, 2, TwoE31));
+ SDValue Tmp = DAG.getConstantFP(APF, MVT::ppcf128);
+ // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
+ // FIXME: generated code sucks.
+ return DAG.getNode(ISD::SELECT_CC, dl, MVT::i32, N->getOperand(0), Tmp,
+ DAG.getNode(ISD::ADD, dl, MVT::i32,
+ DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32,
+ DAG.getNode(ISD::FSUB, dl,
+ MVT::ppcf128,
+ N->getOperand(0),
+ Tmp)),
+ DAG.getConstant(0x80000000, MVT::i32)),
+ DAG.getNode(ISD::FP_TO_SINT, dl,
+ MVT::i32, N->getOperand(0)),
+ DAG.getCondCode(ISD::SETGE));
+ }
+
+ RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
+ return MakeLibCall(LC, N->getValueType(0), &N->getOperand(0), 1, false, dl);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
+ N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(CCCode));
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, use it.
+ if (NewRHS.getNode() == 0) {
+ assert(NewLHS.getValueType() == N->getValueType(0) &&
+ "Unexpected setcc expansion!");
+ return NewLHS;
+ }
+
+ // Otherwise, update N to have the operands specified.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
+ DAG.getCondCode(CCCode));
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
+ if (ISD::isNormalStore(N))
+ return ExpandOp_NormalStore(N, OpNo);
+
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ assert(OpNo == 1 && "Can only expand the stored value so far");
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ST->getValue().getValueType());
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+ assert(ST->getMemoryVT().bitsLE(NVT) && "Float type not round?");
+
+ SDValue Lo, Hi;
+ GetExpandedOp(ST->getValue(), Lo, Hi);
+
+ return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr,
+ ST->getSrcValue(), ST->getSrcValueOffset(),
+ ST->getMemoryVT(),
+ ST->isVolatile(), ST->getAlignment());
+}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
new file mode 100644
index 0000000..9932cf4
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -0,0 +1,2353 @@
+//===----- LegalizeIntegerTypes.cpp - Legalization of integer types -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements integer type expansion and promotion for LegalizeTypes.
+// Promotion is the act of changing a computation in an illegal type into a
+// computation in a larger type. For example, implementing i8 arithmetic in an
+// i32 register (often needed on powerpc).
+// Expansion is the act of changing a computation in an illegal type into a
+// computation in two identical registers of a smaller type. For example,
+// implementing i64 arithmetic in two i32 registers (often needed on 32-bit
+// targets).
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Integer Result Promotion
+//===----------------------------------------------------------------------===//
+
+/// PromoteIntegerResult - This method is called when a result of a node is
+/// found to be in need of promotion to a larger type. At this point, the node
+/// may also have invalid operands or may have other results that need
+/// expansion, we just know that (at least) one result needs promotion.
+void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "PromoteIntegerResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to promote this operator!");
+ case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break;
+ case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break;
+ case ISD::BIT_CONVERT: Res = PromoteIntRes_BIT_CONVERT(N); break;
+ case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break;
+ case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break;
+ case ISD::Constant: Res = PromoteIntRes_Constant(N); break;
+ case ISD::CONVERT_RNDSAT:
+ Res = PromoteIntRes_CONVERT_RNDSAT(N); break;
+ case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break;
+ case ISD::CTPOP: Res = PromoteIntRes_CTPOP(N); break;
+ case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N));break;
+ case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break;
+ case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break;
+ case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break;
+ case ISD::SHL: Res = PromoteIntRes_SHL(N); break;
+ case ISD::SIGN_EXTEND_INREG:
+ Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break;
+ case ISD::SRA: Res = PromoteIntRes_SRA(N); break;
+ case ISD::SRL: Res = PromoteIntRes_SRL(N); break;
+ case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break;
+ case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break;
+ case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break;
+
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break;
+
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break;
+
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break;
+
+ case ISD::SDIV:
+ case ISD::SREM: Res = PromoteIntRes_SDIV(N); break;
+
+ case ISD::UDIV:
+ case ISD::UREM: Res = PromoteIntRes_UDIV(N); break;
+
+ case ISD::SADDO:
+ case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break;
+ case ISD::UADDO:
+ case ISD::USUBO: Res = PromoteIntRes_UADDSUBO(N, ResNo); break;
+ case ISD::SMULO:
+ case ISD::UMULO: Res = PromoteIntRes_XMULO(N, ResNo); break;
+
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_SWAP:
+ Res = PromoteIntRes_Atomic1(cast<AtomicSDNode>(N)); break;
+
+ case ISD::ATOMIC_CMP_SWAP:
+ Res = PromoteIntRes_Atomic2(cast<AtomicSDNode>(N)); break;
+ }
+
+ // If the result is null then the sub-method took care of registering it.
+ if (Res.getNode())
+ SetPromotedInteger(SDValue(N, ResNo), Res);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_AssertSext(SDNode *N) {
+ // Sign-extend the new bits, and continue the assertion.
+ SDValue Op = SExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::AssertSext, N->getDebugLoc(),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_AssertZext(SDNode *N) {
+ // Zero the new bits, and continue the assertion.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::AssertZext, N->getDebugLoc(),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
+ SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+ SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
+ N->getMemoryVT(),
+ N->getChain(), N->getBasePtr(),
+ Op2, N->getSrcValue(), N->getAlignment());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) {
+ SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+ SDValue Op3 = GetPromotedInteger(N->getOperand(3));
+ SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
+ N->getMemoryVT(), N->getChain(), N->getBasePtr(),
+ Op2, Op3, N->getSrcValue(), N->getAlignment());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
+ SDValue InOp = N->getOperand(0);
+ EVT InVT = InOp.getValueType();
+ EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ DebugLoc dl = N->getDebugLoc();
+
+ switch (getTypeAction(InVT)) {
+ default:
+ assert(false && "Unknown type action!");
+ break;
+ case Legal:
+ break;
+ case PromoteInteger:
+ if (NOutVT.bitsEq(NInVT))
+ // The input promotes to the same size. Convert the promoted value.
+ return DAG.getNode(ISD::BIT_CONVERT, dl,
+ NOutVT, GetPromotedInteger(InOp));
+ break;
+ case SoftenFloat:
+ // Promote the integer operand by hand.
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp));
+ case ExpandInteger:
+ case ExpandFloat:
+ break;
+ case ScalarizeVector:
+ // Convert the element to an integer and promote it by hand.
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+ BitConvertToInteger(GetScalarizedVector(InOp)));
+ case SplitVector: {
+ // For example, i32 = BIT_CONVERT v2i16 on alpha. Convert the split
+ // pieces of the input into integers and reassemble in the final type.
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ Lo = BitConvertToInteger(Lo);
+ Hi = BitConvertToInteger(Hi);
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ InOp = DAG.getNode(ISD::ANY_EXTEND, dl,
+ EVT::getIntegerVT(*DAG.getContext(), NOutVT.getSizeInBits()),
+ JoinIntegers(Lo, Hi));
+ return DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, InOp);
+ }
+ case WidenVector:
+ if (OutVT.bitsEq(NInVT))
+ // The input is widened to the same size. Convert to the widened value.
+ return DAG.getNode(ISD::BIT_CONVERT, dl, OutVT, GetWidenedVector(InOp));
+ }
+
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+ CreateStackStoreLoad(InOp, OutVT));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ EVT OVT = N->getValueType(0);
+ EVT NVT = Op.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
+ return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
+ DAG.getConstant(DiffBits, TLI.getPointerTy()));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
+ // The pair element type may be legal, or may not promote to the same type as
+ // the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases.
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(),
+ TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)),
+ JoinIntegers(N->getOperand(0), N->getOperand(1)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ // FIXME there is no actual debug info here
+ DebugLoc dl = N->getDebugLoc();
+ // Zero extend things like i1, sign extend everything else. It shouldn't
+ // matter in theory which one we pick, but this tends to give better code?
+ unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ SDValue Result = DAG.getNode(Opc, dl, TLI.getTypeToTransformTo(*DAG.getContext(), VT),
+ SDValue(N, 0));
+ assert(isa<ConstantSDNode>(Result) && "Didn't constant fold ext?");
+ return Result;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CONVERT_RNDSAT(SDNode *N) {
+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+ assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU ||
+ CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU ||
+ CvtCode == ISD::CVT_SF || CvtCode == ISD::CVT_UF) &&
+ "can only promote integers");
+ EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getConvertRndSat(OutVT, N->getDebugLoc(), N->getOperand(0),
+ N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), N->getOperand(4), CvtCode);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
+ // Zero extend to the promoted type and do the count there.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ DebugLoc dl = N->getDebugLoc();
+ EVT OVT = N->getValueType(0);
+ EVT NVT = Op.getValueType();
+ Op = DAG.getNode(ISD::CTLZ, dl, NVT, Op);
+ // Subtract off the extra leading bits in the bigger type.
+ return DAG.getNode(ISD::SUB, dl, NVT, Op,
+ DAG.getConstant(NVT.getSizeInBits() -
+ OVT.getSizeInBits(), NVT));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) {
+ // Zero extend to the promoted type and do the count there.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), Op.getValueType(), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ EVT OVT = N->getValueType(0);
+ EVT NVT = Op.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+ // The count is the same in the promoted type except if the original
+ // value was zero. This can be handled by setting the bit just off
+ // the top of the original type.
+ APInt TopBit(NVT.getSizeInBits(), 0);
+ TopBit.set(OVT.getSizeInBits());
+ Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));
+ return DAG.getNode(ISD::CTTZ, dl, NVT, Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, N->getOperand(0),
+ N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned NewOpc = N->getOpcode();
+ DebugLoc dl = N->getDebugLoc();
+
+ // If we're promoting a UINT to a larger size and the larger FP_TO_UINT is
+ // not Legal, check to see if we can use FP_TO_SINT instead. (If both UINT
+ // and SINT conversions are Custom, there is no way to tell which is preferable.
+ // We choose SINT because that's the right thing on PPC.)
+ if (N->getOpcode() == ISD::FP_TO_UINT &&
+ !TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) &&
+ TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
+ NewOpc = ISD::FP_TO_SINT;
+
+ SDValue Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0));
+
+ // Assert that the converted value fits in the original type. If it doesn't
+ // (eg: because the value being converted is too big), then the result of the
+ // original operation was undefined anyway, so the assert is still correct.
+ return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ?
+ ISD::AssertZext : ISD::AssertSext, dl,
+ NVT, Res, DAG.getValueType(N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+
+ if (getTypeAction(N->getOperand(0).getValueType()) == PromoteInteger) {
+ SDValue Res = GetPromotedInteger(N->getOperand(0));
+ assert(Res.getValueType().bitsLE(NVT) && "Extension doesn't make sense!");
+
+ // If the result and operand types are the same after promotion, simplify
+ // to an in-register extension.
+ if (NVT == Res.getValueType()) {
+ // The high bits are not guaranteed to be anything. Insert an extend.
+ if (N->getOpcode() == ISD::SIGN_EXTEND)
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
+ DAG.getValueType(N->getOperand(0).getValueType()));
+ if (N->getOpcode() == ISD::ZERO_EXTEND)
+ return DAG.getZeroExtendInReg(Res, dl, N->getOperand(0).getValueType());
+ assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!");
+ return Res;
+ }
+ }
+
+ // Otherwise, just extend the original operand all the way to the larger type.
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
+ assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ ISD::LoadExtType ExtType =
+ ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType();
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
+ N->getSrcValue(), N->getSrcValueOffset(),
+ N->getMemoryVT(), N->isVolatile(),
+ N->getAlignment());
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+/// Promote the overflow flag of an overflowing arithmetic node.
+SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
+ // Simply change the return type of the boolean result.
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1));
+ EVT ValueVTs[] = { N->getValueType(0), NVT };
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
+ SDValue Res = DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ DAG.getVTList(ValueVTs, 2), Ops, 2);
+
+ // Modified the sum result - switch anything that used the old sum to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 0), Res);
+
+ return SDValue(Res.getNode(), 1);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
+ if (ResNo == 1)
+ return PromoteIntRes_Overflow(N);
+
+ // The operation overflowed iff the result in the larger type is not the
+ // sign extension of its truncation to the original type.
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+ EVT OVT = N->getOperand(0).getValueType();
+ EVT NVT = LHS.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Do the arithmetic in the larger type.
+ unsigned Opcode = N->getOpcode() == ISD::SADDO ? ISD::ADD : ISD::SUB;
+ SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS);
+
+ // Calculate the overflow flag: sign extend the arithmetic result from
+ // the original type.
+ SDValue Ofl = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
+ DAG.getValueType(OVT));
+ // Overflowed if and only if this is not equal to Res.
+ Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SDIV(SDNode *N) {
+ // Sign extend the input.
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) {
+ SDValue LHS = GetPromotedInteger(N->getOperand(1));
+ SDValue RHS = GetPromotedInteger(N->getOperand(2));
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+ LHS.getValueType(), N->getOperand(0),LHS,RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) {
+ SDValue LHS = GetPromotedInteger(N->getOperand(2));
+ SDValue RHS = GetPromotedInteger(N->getOperand(3));
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(),
+ LHS.getValueType(), N->getOperand(0),
+ N->getOperand(1), LHS, RHS, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
+ EVT SVT = TLI.getSetCCResultType(N->getOperand(0).getValueType());
+ assert(isTypeLegal(SVT) && "Illegal SetCC type!");
+ DebugLoc dl = N->getDebugLoc();
+
+ // Get the SETCC result using the canonical SETCC type.
+ SDValue SetCC = DAG.getNode(ISD::SETCC, dl, SVT, N->getOperand(0),
+ N->getOperand(1), N->getOperand(2));
+
+ // Convert to the expected type.
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ assert(NVT.bitsLE(SVT) && "Integer type overpromoted?");
+ return DAG.getNode(ISD::TRUNCATE, dl, NVT, SetCC);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(),
+ TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)),
+ GetPromotedInteger(N->getOperand(0)), N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {
+ // The input may have strange things in the top bits of the registers, but
+ // these operations don't care. They may have weird bits going out, but
+ // that too is okay if they are integer operations.
+ SDValue LHS = GetPromotedInteger(N->getOperand(0));
+ SDValue RHS = GetPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
+ // The input value must be properly sign extended.
+ SDValue Res = SExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::SRA, N->getDebugLoc(),
+ Res.getValueType(), Res, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
+ // The input value must be properly zero extended.
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Res = ZExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), NVT, Res, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Res;
+
+ switch (getTypeAction(N->getOperand(0).getValueType())) {
+ default: llvm_unreachable("Unknown type action!");
+ case Legal:
+ case ExpandInteger:
+ Res = N->getOperand(0);
+ break;
+ case PromoteInteger:
+ Res = GetPromotedInteger(N->getOperand(0));
+ break;
+ }
+
+ // Truncate to NVT instead of VT
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Res);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
+ if (ResNo == 1)
+ return PromoteIntRes_Overflow(N);
+
+ // The operation overflowed iff the result in the larger type is not the
+ // zero extension of its truncation to the original type.
+ SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
+ EVT OVT = N->getOperand(0).getValueType();
+ EVT NVT = LHS.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Do the arithmetic in the larger type.
+ unsigned Opcode = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB;
+ SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS);
+
+ // Calculate the overflow flag: zero extend the arithmetic result from
+ // the original type.
+ SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT);
+ // Overflowed if and only if this is not equal to Res.
+ Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) {
+ // Zero extend the input.
+ SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
+ SDValue Chain = N->getOperand(0); // Get the chain.
+ SDValue Ptr = N->getOperand(1); // Get the pointer.
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ EVT RegVT = TLI.getRegisterType(*DAG.getContext(), VT);
+ unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), VT);
+ // The argument is passed as NumRegs registers of type RegVT.
+
+ SmallVector<SDValue, 8> Parts(NumRegs);
+ for (unsigned i = 0; i < NumRegs; ++i) {
+ Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2));
+ Chain = Parts[i].getValue(1);
+ }
+
+ // Handle endianness of the load.
+ if (TLI.isBigEndian())
+ std::reverse(Parts.begin(), Parts.end());
+
+ // Assemble the parts in the promoted type.
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Res = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[0]);
+ for (unsigned i = 1; i < NumRegs; ++i) {
+ SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]);
+ // Shift it to the right position and "or" it in.
+ Part = DAG.getNode(ISD::SHL, dl, NVT, Part,
+ DAG.getConstant(i * RegVT.getSizeInBits(),
+ TLI.getPointerTy()));
+ Res = DAG.getNode(ISD::OR, dl, NVT, Res, Part);
+ }
+
+ // Modified the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
+
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
+ assert(ResNo == 1 && "Only boolean result promotion currently supported!");
+ return PromoteIntRes_Overflow(N);
+}
+
+//===----------------------------------------------------------------------===//
+// Integer Operand Promotion
+//===----------------------------------------------------------------------===//
+
+/// PromoteIntegerOperand - This method is called when the specified operand of
+/// the specified node is found to need promotion. At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ switch (N->getOpcode()) {
+ default:
+ #ifndef NDEBUG
+ dbgs() << "PromoteIntegerOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+ #endif
+ llvm_unreachable("Do not know how to promote this operator's operand!");
+
+ case ISD::ANY_EXTEND: Res = PromoteIntOp_ANY_EXTEND(N); break;
+ case ISD::BIT_CONVERT: Res = PromoteIntOp_BIT_CONVERT(N); break;
+ case ISD::BR_CC: Res = PromoteIntOp_BR_CC(N, OpNo); break;
+ case ISD::BRCOND: Res = PromoteIntOp_BRCOND(N, OpNo); break;
+ case ISD::BUILD_PAIR: Res = PromoteIntOp_BUILD_PAIR(N); break;
+ case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break;
+ case ISD::CONVERT_RNDSAT:
+ Res = PromoteIntOp_CONVERT_RNDSAT(N); break;
+ case ISD::INSERT_VECTOR_ELT:
+ Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break;
+ case ISD::MEMBARRIER: Res = PromoteIntOp_MEMBARRIER(N); break;
+ case ISD::SCALAR_TO_VECTOR:
+ Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break;
+ case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break;
+ case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break;
+ case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break;
+ case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break;
+ case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break;
+ case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
+ OpNo); break;
+ case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break;
+ case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
+ case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR: Res = PromoteIntOp_Shift(N); break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// PromoteSetCCOperands - Promote the operands of a comparison. This code is
+/// shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
+ ISD::CondCode CCCode) {
+ // We have to insert explicit sign or zero extends. Note that we could
+ // insert sign extends for ALL conditions, but zero extend is cheaper on
+ // many machines (an AND instead of two shifts), so prefer it.
+ switch (CCCode) {
+ default: llvm_unreachable("Unknown integer comparison!");
+ case ISD::SETEQ:
+ case ISD::SETNE:
+ case ISD::SETUGE:
+ case ISD::SETUGT:
+ case ISD::SETULE:
+ case ISD::SETULT:
+ // ALL of these operations will work if we either sign or zero extend
+ // the operands (including the unsigned comparisons!). Zero extend is
+ // usually a simpler/cheaper operation, so prefer it.
+ NewLHS = ZExtPromotedInteger(NewLHS);
+ NewRHS = ZExtPromotedInteger(NewRHS);
+ break;
+ case ISD::SETGE:
+ case ISD::SETGT:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ NewLHS = SExtPromotedInteger(NewLHS);
+ NewRHS = SExtPromotedInteger(NewRHS);
+ break;
+ }
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BIT_CONVERT(SDNode *N) {
+ // This should only occur in unusual situations like bitcasting to an
+ // x86_fp80, so just turn it into a store+load
+ return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 2 && "Don't know how to promote this operand!");
+
+ SDValue LHS = N->getOperand(2);
+ SDValue RHS = N->getOperand(3);
+ PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(1))->get());
+
+ // The chain (Op#0), CC (#1) and basic block destination (Op#4) are always
+ // legal types.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+ N->getOperand(1), LHS, RHS, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 1 && "only know how to promote condition");
+
+ // Promote all the way up to the canonical SetCC type.
+ EVT SVT = TLI.getSetCCResultType(MVT::Other);
+ SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT);
+
+ // The chain (Op#0) and basic block destination (Op#2) are always legal types.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), Cond,
+ N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) {
+ // Since the result type is legal, the operands must promote to it.
+ EVT OVT = N->getOperand(0).getValueType();
+ SDValue Lo = ZExtPromotedInteger(N->getOperand(0));
+ SDValue Hi = GetPromotedInteger(N->getOperand(1));
+ assert(Lo.getValueType() == N->getValueType(0) && "Operand over promoted?");
+ DebugLoc dl = N->getDebugLoc();
+
+ Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi,
+ DAG.getConstant(OVT.getSizeInBits(), TLI.getPointerTy()));
+ return DAG.getNode(ISD::OR, dl, N->getValueType(0), Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) {
+ // The vector type is legal but the element type is not. This implies
+ // that the vector is a power-of-two in length and that the element
+ // type does not have a strange size (eg: it is not i1).
+ EVT VecVT = N->getValueType(0);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ assert(!(NumElts & 1) && "Legal vector of one illegal element?");
+
+ // Promote the inserted value. The type does not need to match the
+ // vector element type. Check that any extra bits introduced will be
+ // truncated away.
+ assert(N->getOperand(0).getValueType().getSizeInBits() >=
+ N->getValueType(0).getVectorElementType().getSizeInBits() &&
+ "Type of inserted value narrower than vector element type!");
+
+ SmallVector<SDValue, 16> NewOps;
+ for (unsigned i = 0; i < NumElts; ++i)
+ NewOps.push_back(GetPromotedInteger(N->getOperand(i)));
+
+ return DAG.UpdateNodeOperands(SDValue(N, 0), &NewOps[0], NumElts);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) {
+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+ assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU ||
+ CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU ||
+ CvtCode == ISD::CVT_FS || CvtCode == ISD::CVT_FU) &&
+ "can only promote integer arguments");
+ SDValue InOp = GetPromotedInteger(N->getOperand(0));
+ return DAG.getConvertRndSat(N->getValueType(0), N->getDebugLoc(), InOp,
+ N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), N->getOperand(4), CvtCode);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
+ unsigned OpNo) {
+ if (OpNo == 1) {
+ // Promote the inserted value. This is valid because the type does not
+ // have to match the vector element type.
+
+ // Check that any extra bits introduced will be truncated away.
+ assert(N->getOperand(1).getValueType().getSizeInBits() >=
+ N->getValueType(0).getVectorElementType().getSizeInBits() &&
+ "Type of inserted value narrower than vector element type!");
+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+ GetPromotedInteger(N->getOperand(1)),
+ N->getOperand(2));
+ }
+
+ assert(OpNo == 2 && "Different operand and result vector types?");
+
+ // Promote the index.
+ SDValue Idx = ZExtPromotedInteger(N->getOperand(2));
+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+ N->getOperand(1), Idx);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) {
+ SDValue NewOps[6];
+ DebugLoc dl = N->getDebugLoc();
+ NewOps[0] = N->getOperand(0);
+ for (unsigned i = 1; i < array_lengthof(NewOps); ++i) {
+ SDValue Flag = GetPromotedInteger(N->getOperand(i));
+ NewOps[i] = DAG.getZeroExtendInReg(Flag, dl, MVT::i1);
+ }
+ return DAG.UpdateNodeOperands(SDValue (N, 0), NewOps,
+ array_lengthof(NewOps));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) {
+ // Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote
+ // the operand in place.
+ return DAG.UpdateNodeOperands(SDValue(N, 0),
+ GetPromotedInteger(N->getOperand(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 0 && "Only know how to promote condition");
+
+ // Promote all the way up to the canonical SetCC type.
+ EVT SVT = TLI.getSetCCResultType(N->getOperand(1).getValueType());
+ SDValue Cond = PromoteTargetBoolean(N->getOperand(0), SVT);
+
+ return DAG.UpdateNodeOperands(SDValue(N, 0), Cond,
+ N->getOperand(1), N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 0 && "Don't know how to promote this operand!");
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(4))->get());
+
+ // The CC (#4) and the possible return values (#2 and #3) have legal types.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), LHS, RHS, N->getOperand(2),
+ N->getOperand(3), N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 0 && "Don't know how to promote this operand!");
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(2))->get());
+
+ // The CC (#2) is always legal.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), LHS, RHS, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) {
+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+ ZExtPromotedInteger(N->getOperand(1)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ DebugLoc dl = N->getDebugLoc();
+ Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(),
+ Op, DAG.getValueType(N->getOperand(0).getValueType()));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
+ return DAG.UpdateNodeOperands(SDValue(N, 0),
+ SExtPromotedInteger(N->getOperand(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ SDValue Ch = N->getChain(), Ptr = N->getBasePtr();
+ int SVOffset = N->getSrcValueOffset();
+ unsigned Alignment = N->getAlignment();
+ bool isVolatile = N->isVolatile();
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value.
+
+ // Truncate the value and store the result.
+ return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getSrcValue(),
+ SVOffset, N->getMemoryVT(),
+ isVolatile, Alignment);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) {
+ return DAG.UpdateNodeOperands(SDValue(N, 0),
+ ZExtPromotedInteger(N->getOperand(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
+ return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Integer Result Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandIntegerResult - This method is called when the specified result of the
+/// specified node is found to need expansion. At this point, the node may also
+/// have invalid operands or may have other results that need promotion, we just
+/// know that (at least) one result needs expansion.
+void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Lo, Hi;
+ Lo = Hi = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ExpandIntegerResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to expand the result of this operator!");
+
+ case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
+ case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
+ case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
+ case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
+
+ case ISD::BIT_CONVERT: ExpandRes_BIT_CONVERT(N, Lo, Hi); break;
+ case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
+ case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
+ case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
+ case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break;
+
+ case ISD::ANY_EXTEND: ExpandIntRes_ANY_EXTEND(N, Lo, Hi); break;
+ case ISD::AssertSext: ExpandIntRes_AssertSext(N, Lo, Hi); break;
+ case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break;
+ case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break;
+ case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break;
+ case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break;
+ case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break;
+ case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break;
+ case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
+ case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
+ case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
+ case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break;
+ case ISD::SDIV: ExpandIntRes_SDIV(N, Lo, Hi); break;
+ case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break;
+ case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break;
+ case ISD::SREM: ExpandIntRes_SREM(N, Lo, Hi); break;
+ case ISD::TRUNCATE: ExpandIntRes_TRUNCATE(N, Lo, Hi); break;
+ case ISD::UDIV: ExpandIntRes_UDIV(N, Lo, Hi); break;
+ case ISD::UREM: ExpandIntRes_UREM(N, Lo, Hi); break;
+ case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break;
+
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break;
+
+ case ISD::ADD:
+ case ISD::SUB: ExpandIntRes_ADDSUB(N, Lo, Hi); break;
+
+ case ISD::ADDC:
+ case ISD::SUBC: ExpandIntRes_ADDSUBC(N, Lo, Hi); break;
+
+ case ISD::ADDE:
+ case ISD::SUBE: ExpandIntRes_ADDSUBE(N, Lo, Hi); break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break;
+ }
+
+ // If Lo/Hi is null, the sub-method took care of registering results etc.
+ if (Lo.getNode())
+ SetExpandedInteger(SDValue(N, ResNo), Lo, Hi);
+}
+
+/// ExpandShiftByConstant - N is a shift by a value that needs to be expanded,
+/// and the shift amount is a constant 'Amt'. Expand the operation.
+void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ // Expand the incoming operand to be shifted, so that we have its parts
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+
+ EVT NVT = InL.getValueType();
+ unsigned VTBits = N->getValueType(0).getSizeInBits();
+ unsigned NVTBits = NVT.getSizeInBits();
+ EVT ShTy = N->getOperand(1).getValueType();
+
+ if (N->getOpcode() == ISD::SHL) {
+ if (Amt > VTBits) {
+ Lo = Hi = DAG.getConstant(0, NVT);
+ } else if (Amt > NVTBits) {
+ Lo = DAG.getConstant(0, NVT);
+ Hi = DAG.getNode(ISD::SHL, dl,
+ NVT, InL, DAG.getConstant(Amt-NVTBits,ShTy));
+ } else if (Amt == NVTBits) {
+ Lo = DAG.getConstant(0, NVT);
+ Hi = InL;
+ } else if (Amt == 1 &&
+ TLI.isOperationLegalOrCustom(ISD::ADDC,
+ TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) {
+ // Emit this X << 1 as X+X.
+ SDVTList VTList = DAG.getVTList(NVT, MVT::Flag);
+ SDValue LoOps[2] = { InL, InL };
+ Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
+ SDValue HiOps[3] = { InH, InH, Lo.getValue(1) };
+ Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);
+ } else {
+ Lo = DAG.getNode(ISD::SHL, dl, NVT, InL, DAG.getConstant(Amt, ShTy));
+ Hi = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SHL, dl, NVT, InH,
+ DAG.getConstant(Amt, ShTy)),
+ DAG.getNode(ISD::SRL, dl, NVT, InL,
+ DAG.getConstant(NVTBits-Amt, ShTy)));
+ }
+ return;
+ }
+
+ if (N->getOpcode() == ISD::SRL) {
+ if (Amt > VTBits) {
+ Lo = DAG.getConstant(0, NVT);
+ Hi = DAG.getConstant(0, NVT);
+ } else if (Amt > NVTBits) {
+ Lo = DAG.getNode(ISD::SRL, dl,
+ NVT, InH, DAG.getConstant(Amt-NVTBits,ShTy));
+ Hi = DAG.getConstant(0, NVT);
+ } else if (Amt == NVTBits) {
+ Lo = InH;
+ Hi = DAG.getConstant(0, NVT);
+ } else {
+ Lo = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SRL, dl, NVT, InL,
+ DAG.getConstant(Amt, ShTy)),
+ DAG.getNode(ISD::SHL, dl, NVT, InH,
+ DAG.getConstant(NVTBits-Amt, ShTy)));
+ Hi = DAG.getNode(ISD::SRL, dl, NVT, InH, DAG.getConstant(Amt, ShTy));
+ }
+ return;
+ }
+
+ assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+ if (Amt > VTBits) {
+ Hi = Lo = DAG.getNode(ISD::SRA, dl, NVT, InH,
+ DAG.getConstant(NVTBits-1, ShTy));
+ } else if (Amt > NVTBits) {
+ Lo = DAG.getNode(ISD::SRA, dl, NVT, InH,
+ DAG.getConstant(Amt-NVTBits, ShTy));
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, InH,
+ DAG.getConstant(NVTBits-1, ShTy));
+ } else if (Amt == NVTBits) {
+ Lo = InH;
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, InH,
+ DAG.getConstant(NVTBits-1, ShTy));
+ } else {
+ Lo = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SRL, dl, NVT, InL,
+ DAG.getConstant(Amt, ShTy)),
+ DAG.getNode(ISD::SHL, dl, NVT, InH,
+ DAG.getConstant(NVTBits-Amt, ShTy)));
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, DAG.getConstant(Amt, ShTy));
+ }
+}
+
+/// ExpandShiftWithKnownAmountBit - Try to determine whether we can simplify
+/// this shift based on knowledge of the high bit of the shift amount. If we
+/// can tell this, we know that it is >= 32 or < 32, without knowing the actual
+/// shift amount.
+bool DAGTypeLegalizer::
+ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDValue Amt = N->getOperand(1);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT ShTy = Amt.getValueType();
+ unsigned ShBits = ShTy.getSizeInBits();
+ unsigned NVTBits = NVT.getSizeInBits();
+ assert(isPowerOf2_32(NVTBits) &&
+ "Expanded integer type size not a power of two!");
+ DebugLoc dl = N->getDebugLoc();
+
+ APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits));
+ APInt KnownZero, KnownOne;
+ DAG.ComputeMaskedBits(N->getOperand(1), HighBitMask, KnownZero, KnownOne);
+
+ // If we don't know anything about the high bits, exit.
+ if (((KnownZero|KnownOne) & HighBitMask) == 0)
+ return false;
+
+ // Get the incoming operand to be shifted.
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+
+ // If we know that any of the high bits of the shift amount are one, then we
+ // can do this as a couple of simple shifts.
+ if (KnownOne.intersects(HighBitMask)) {
+ // Mask out the high bit, which we know is set.
+ Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt,
+ DAG.getConstant(~HighBitMask, ShTy));
+
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unknown shift");
+ case ISD::SHL:
+ Lo = DAG.getConstant(0, NVT); // Low part is zero.
+ Hi = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part.
+ return true;
+ case ISD::SRL:
+ Hi = DAG.getConstant(0, NVT); // Hi part is zero.
+ Lo = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part.
+ return true;
+ case ISD::SRA:
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign extend high part.
+ DAG.getConstant(NVTBits-1, ShTy));
+ Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part.
+ return true;
+ }
+ }
+
+#if 0
+ // FIXME: This code is broken for shifts with a zero amount!
+ // If we know that all of the high bits of the shift amount are zero, then we
+ // can do this as a couple of simple shifts.
+ if ((KnownZero & HighBitMask) == HighBitMask) {
+ // Compute 32-amt.
+ SDValue Amt2 = DAG.getNode(ISD::SUB, ShTy,
+ DAG.getConstant(NVTBits, ShTy),
+ Amt);
+ unsigned Op1, Op2;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unknown shift");
+ case ISD::SHL: Op1 = ISD::SHL; Op2 = ISD::SRL; break;
+ case ISD::SRL:
+ case ISD::SRA: Op1 = ISD::SRL; Op2 = ISD::SHL; break;
+ }
+
+ Lo = DAG.getNode(N->getOpcode(), NVT, InL, Amt);
+ Hi = DAG.getNode(ISD::OR, NVT,
+ DAG.getNode(Op1, NVT, InH, Amt),
+ DAG.getNode(Op2, NVT, InL, Amt2));
+ return true;
+ }
+#endif
+
+ return false;
+}
+
+/// ExpandShiftWithUnknownAmountBit - Fully general expansion of integer shift
+/// of any size.
+bool DAGTypeLegalizer::
+ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDValue Amt = N->getOperand(1);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT ShTy = Amt.getValueType();
+ unsigned NVTBits = NVT.getSizeInBits();
+ assert(isPowerOf2_32(NVTBits) &&
+ "Expanded integer type size not a power of two!");
+ DebugLoc dl = N->getDebugLoc();
+
+ // Get the incoming operand to be shifted.
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+
+ SDValue NVBitsNode = DAG.getConstant(NVTBits, ShTy);
+ SDValue AmtExcess = DAG.getNode(ISD::SUB, dl, ShTy, Amt, NVBitsNode);
+ SDValue AmtLack = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt);
+ SDValue isShort = DAG.getSetCC(dl, TLI.getSetCCResultType(ShTy),
+ Amt, NVBitsNode, ISD::SETULT);
+
+ SDValue LoS, HiS, LoL, HiL;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unknown shift");
+ case ISD::SHL:
+ // Short: ShAmt < NVTBits
+ LoS = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt);
+ HiS = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SHL, dl, NVT, InH, Amt),
+ // FIXME: If Amt is zero, the following shift generates an undefined result
+ // on some architectures.
+ DAG.getNode(ISD::SRL, dl, NVT, InL, AmtLack));
+
+ // Long: ShAmt >= NVTBits
+ LoL = DAG.getConstant(0, NVT); // Lo part is zero.
+ HiL = DAG.getNode(ISD::SHL, dl, NVT, InL, AmtExcess); // Hi from Lo part.
+
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL);
+ Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
+ return true;
+ case ISD::SRL:
+ // Short: ShAmt < NVTBits
+ HiS = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt);
+ LoS = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
+ // FIXME: If Amt is zero, the following shift generates an undefined result
+ // on some architectures.
+ DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));
+
+ // Long: ShAmt >= NVTBits
+ HiL = DAG.getConstant(0, NVT); // Hi part is zero.
+ LoL = DAG.getNode(ISD::SRL, dl, NVT, InH, AmtExcess); // Lo from Hi part.
+
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL);
+ Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
+ return true;
+ case ISD::SRA:
+ // Short: ShAmt < NVTBits
+ HiS = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt);
+ LoS = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
+ // FIXME: If Amt is zero, the following shift generates an undefined result
+ // on some architectures.
+ DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));
+
+ // Long: ShAmt >= NVTBits
+ HiL = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign of Hi part.
+ DAG.getConstant(NVTBits-1, ShTy));
+ LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part.
+
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL);
+ Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
+ return true;
+ }
+
+ return false;
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+
+ EVT NVT = LHSL.getValueType();
+ SDValue LoOps[2] = { LHSL, RHSL };
+ SDValue HiOps[3] = { LHSH, RHSH };
+
+ // Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support
+ // them. TODO: Teach operation legalization how to expand unsupported
+ // ADDC/ADDE/SUBC/SUBE. The problem is that these operations generate
+ // a carry of type MVT::Flag, but there doesn't seem to be any way to
+ // generate a value of this type in the expanded code sequence.
+ bool hasCarry =
+ TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
+ ISD::ADDC : ISD::SUBC,
+ TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
+
+ if (hasCarry) {
+ SDVTList VTList = DAG.getVTList(NVT, MVT::Flag);
+ if (N->getOpcode() == ISD::ADD) {
+ Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);
+ } else {
+ Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);
+ }
+ } else {
+ if (N->getOpcode() == ISD::ADD) {
+ Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2);
+ SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0],
+ ISD::SETULT);
+ SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1,
+ DAG.getConstant(1, NVT),
+ DAG.getConstant(0, NVT));
+ SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1],
+ ISD::SETULT);
+ SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2,
+ DAG.getConstant(1, NVT), Carry1);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);
+ } else {
+ Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2);
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2);
+ SDValue Cmp =
+ DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()),
+ LoOps[0], LoOps[1], ISD::SETULT);
+ SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp,
+ DAG.getConstant(1, NVT),
+ DAG.getConstant(0, NVT));
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
+ }
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag);
+ SDValue LoOps[2] = { LHSL, RHSL };
+ SDValue HiOps[3] = { LHSH, RHSH };
+
+ if (N->getOpcode() == ISD::ADDC) {
+ Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);
+ } else {
+ Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);
+ }
+
+ // Legalized the flag result - switch anything that used the old flag to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag);
+ SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) };
+ SDValue HiOps[3] = { LHSH, RHSH };
+
+ Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps, 3);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps, 3);
+
+ // Legalized the flag result - switch anything that used the old flag to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Op = N->getOperand(0);
+ if (Op.getValueType().bitsLE(NVT)) {
+ // The low part is any extension of the input (which degenerates to a copy).
+ Lo = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Op);
+ Hi = DAG.getUNDEF(NVT); // The high part is undefined.
+ } else {
+ // For example, extension of an i48 to an i64. The operand type necessarily
+ // promotes to the result type, so will end up being expanded too.
+ assert(getTypeAction(Op.getValueType()) == PromoteInteger &&
+ "Only know how to promote this result!");
+ SDValue Res = GetPromotedInteger(Op);
+ assert(Res.getValueType() == N->getValueType(0) &&
+ "Operand over promoted?");
+ // Split the promoted operand. This will simplify when it is expanded.
+ SplitInteger(Res, Lo, Hi);
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+ EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ unsigned NVTBits = NVT.getSizeInBits();
+ unsigned EVTBits = EVT.getSizeInBits();
+
+ if (NVTBits < EVTBits) {
+ Hi = DAG.getNode(ISD::AssertSext, dl, NVT, Hi,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), EVTBits - NVTBits)));
+ } else {
+ Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT));
+ // The high part replicates the sign bit of Lo, make it explicit.
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+ DAG.getConstant(NVTBits-1, TLI.getPointerTy()));
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+ EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ unsigned NVTBits = NVT.getSizeInBits();
+ unsigned EVTBits = EVT.getSizeInBits();
+
+ if (NVTBits < EVTBits) {
+ Hi = DAG.getNode(ISD::AssertZext, dl, NVT, Hi,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), EVTBits - NVTBits)));
+ } else {
+ Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT));
+ // The high part must be zero, make it explicit.
+ Hi = DAG.getConstant(0, NVT);
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands.
+ Lo = DAG.getNode(ISD::BSWAP, dl, Lo.getValueType(), Lo);
+ Hi = DAG.getNode(ISD::BSWAP, dl, Hi.getValueType(), Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned NBitWidth = NVT.getSizeInBits();
+ const APInt &Cst = cast<ConstantSDNode>(N)->getAPIntValue();
+ Lo = DAG.getConstant(APInt(Cst).trunc(NBitWidth), NVT);
+ Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ // ctlz (HiLo) -> Hi != 0 ? ctlz(Hi) : (ctlz(Lo)+32)
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+
+ SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi,
+ DAG.getConstant(0, NVT), ISD::SETNE);
+
+ SDValue LoLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Lo);
+ SDValue HiLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Hi);
+
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, HiNotZero, HiLZ,
+ DAG.getNode(ISD::ADD, dl, NVT, LoLZ,
+ DAG.getConstant(NVT.getSizeInBits(), NVT)));
+ Hi = DAG.getConstant(0, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ // ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo)
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+ Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo),
+ DAG.getNode(ISD::CTPOP, dl, NVT, Hi));
+ Hi = DAG.getConstant(0, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ // cttz (HiLo) -> Lo != 0 ? cttz(Lo) : (cttz(Hi)+32)
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+
+ SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo,
+ DAG.getConstant(0, NVT), ISD::SETNE);
+
+ SDValue LoLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Lo);
+ SDValue HiLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Hi);
+
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, LoNotZero, LoLZ,
+ DAG.getNode(ISD::ADD, dl, NVT, HiLZ,
+ DAG.getConstant(NVT.getSizeInBits(), NVT)));
+ Hi = DAG.getConstant(0, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
+ SplitInteger(MakeLibCall(LC, VT, &Op, 1, true/*irrelevant*/, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
+ SplitInteger(MakeLibCall(LC, VT, &Op, 1, false/*irrelevant*/, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ if (ISD::isNormalLoad(N)) {
+ ExpandRes_NormalLoad(N, Lo, Hi);
+ return;
+ }
+
+ assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ ISD::LoadExtType ExtType = N->getExtensionType();
+ int SVOffset = N->getSrcValueOffset();
+ unsigned Alignment = N->getAlignment();
+ bool isVolatile = N->isVolatile();
+ DebugLoc dl = N->getDebugLoc();
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+ if (N->getMemoryVT().bitsLE(NVT)) {
+ EVT MemVT = N->getMemoryVT();
+
+ Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset,
+ MemVT, isVolatile, Alignment);
+
+ // Remember the chain.
+ Ch = Lo.getValue(1);
+
+ if (ExtType == ISD::SEXTLOAD) {
+ // The high part is obtained by SRA'ing all but one of the bits of the
+ // lo part.
+ unsigned LoSize = Lo.getValueType().getSizeInBits();
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+ DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+ } else if (ExtType == ISD::ZEXTLOAD) {
+ // The high part is just a zero.
+ Hi = DAG.getConstant(0, NVT);
+ } else {
+ assert(ExtType == ISD::EXTLOAD && "Unknown extload!");
+ // The high part is undefined.
+ Hi = DAG.getUNDEF(NVT);
+ }
+ } else if (TLI.isLittleEndian()) {
+ // Little-endian - low bits are at low addresses.
+ Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset,
+ isVolatile, Alignment);
+
+ unsigned ExcessBits =
+ N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
+ EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(),
+ SVOffset+IncrementSize, NEVT,
+ isVolatile, MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+ } else {
+ // Big-endian - high bits are at low addresses. Favor aligned loads at
+ // the cost of some bit-fiddling.
+ EVT MemVT = N->getMemoryVT();
+ unsigned EBytes = MemVT.getStoreSize();
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ unsigned ExcessBits = (EBytes - IncrementSize)*8;
+
+ // Load both the high bits and maybe some of the low bits.
+ Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset,
+ EVT::getIntegerVT(*DAG.getContext(),
+ MemVT.getSizeInBits() - ExcessBits),
+ isVolatile, Alignment);
+
+ // Increment the pointer to the other half.
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ // Load the rest of the low bits.
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getSrcValue(),
+ SVOffset+IncrementSize,
+ EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
+ isVolatile, MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ if (ExcessBits < NVT.getSizeInBits()) {
+ // Transfer low bits from the bottom of Hi to the top of Lo.
+ Lo = DAG.getNode(ISD::OR, dl, NVT, Lo,
+ DAG.getNode(ISD::SHL, dl, NVT, Hi,
+ DAG.getConstant(ExcessBits,
+ TLI.getPointerTy())));
+ // Move high bits to the right position in Hi.
+ Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl,
+ NVT, Hi,
+ DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
+ TLI.getPointerTy()));
+ }
+ }
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Ch);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Logical(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue LL, LH, RL, RH;
+ GetExpandedInteger(N->getOperand(0), LL, LH);
+ GetExpandedInteger(N->getOperand(1), RL, RH);
+ Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LL, RL);
+ Hi = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LH, RH);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ DebugLoc dl = N->getDebugLoc();
+
+ bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, NVT);
+ bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, NVT);
+ bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, NVT);
+ bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, NVT);
+ if (HasMULHU || HasMULHS || HasUMUL_LOHI || HasSMUL_LOHI) {
+ SDValue LL, LH, RL, RH;
+ GetExpandedInteger(N->getOperand(0), LL, LH);
+ GetExpandedInteger(N->getOperand(1), RL, RH);
+ unsigned OuterBitSize = VT.getSizeInBits();
+ unsigned InnerBitSize = NVT.getSizeInBits();
+ unsigned LHSSB = DAG.ComputeNumSignBits(N->getOperand(0));
+ unsigned RHSSB = DAG.ComputeNumSignBits(N->getOperand(1));
+
+ APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
+ if (DAG.MaskedValueIsZero(N->getOperand(0), HighMask) &&
+ DAG.MaskedValueIsZero(N->getOperand(1), HighMask)) {
+ // The inputs are both zero-extended.
+ if (HasUMUL_LOHI) {
+ // We can emit a umul_lohi.
+ Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL);
+ Hi = SDValue(Lo.getNode(), 1);
+ return;
+ }
+ if (HasMULHU) {
+ // We can emit a mulhu+mul.
+ Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);
+ Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL);
+ return;
+ }
+ }
+ if (LHSSB > InnerBitSize && RHSSB > InnerBitSize) {
+ // The input values are both sign-extended.
+ if (HasSMUL_LOHI) {
+ // We can emit a smul_lohi.
+ Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL);
+ Hi = SDValue(Lo.getNode(), 1);
+ return;
+ }
+ if (HasMULHS) {
+ // We can emit a mulhs+mul.
+ Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);
+ Hi = DAG.getNode(ISD::MULHS, dl, NVT, LL, RL);
+ return;
+ }
+ }
+ if (HasUMUL_LOHI) {
+ // Lo,Hi = umul LHS, RHS.
+ SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl,
+ DAG.getVTList(NVT, NVT), LL, RL);
+ Lo = UMulLOHI;
+ Hi = UMulLOHI.getValue(1);
+ RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH);
+ LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH);
+ return;
+ }
+ if (HasMULHU) {
+ Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);
+ Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL);
+ RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH);
+ LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH);
+ return;
+ }
+ }
+
+ // If nothing else, we can make a libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::MUL_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::MUL_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::MUL_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::MUL_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!");
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(MakeLibCall(LC, VT, Ops, 2, true/*irrelevant*/, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::SDIV_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SDIV_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SDIV_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SDIV_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // If we can emit an efficient shift operation, do so now. Check to see if
+ // the RHS is a constant.
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ return ExpandShiftByConstant(N, CN->getZExtValue(), Lo, Hi);
+
+ // If we can determine that the high bit of the shift is zero or one, even if
+ // the low bits are variable, emit this shift in an optimized form.
+ if (ExpandShiftWithKnownAmountBit(N, Lo, Hi))
+ return;
+
+ // If this target supports shift_PARTS, use it. First, map to the _PARTS opc.
+ unsigned PartsOpc;
+ if (N->getOpcode() == ISD::SHL) {
+ PartsOpc = ISD::SHL_PARTS;
+ } else if (N->getOpcode() == ISD::SRL) {
+ PartsOpc = ISD::SRL_PARTS;
+ } else {
+ assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+ PartsOpc = ISD::SRA_PARTS;
+ }
+
+ // Next check to see if the target supports this SHL_PARTS operation or if it
+ // will custom expand it.
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ TargetLowering::LegalizeAction Action = TLI.getOperationAction(PartsOpc, NVT);
+ if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
+ Action == TargetLowering::Custom) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH;
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+
+ SDValue Ops[] = { LHSL, LHSH, N->getOperand(1) };
+ EVT VT = LHSL.getValueType();
+ Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops, 3);
+ Hi = Lo.getValue(1);
+ return;
+ }
+
+ // Otherwise, emit a libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ bool isSigned;
+ if (N->getOpcode() == ISD::SHL) {
+ isSigned = false; /*sign irrelevant*/
+ if (VT == MVT::i16)
+ LC = RTLIB::SHL_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SHL_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SHL_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SHL_I128;
+ } else if (N->getOpcode() == ISD::SRL) {
+ isSigned = false;
+ if (VT == MVT::i16)
+ LC = RTLIB::SRL_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SRL_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SRL_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SRL_I128;
+ } else {
+ assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+ isSigned = true;
+ if (VT == MVT::i16)
+ LC = RTLIB::SRA_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SRA_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SRA_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SRA_I128;
+ }
+
+ if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(MakeLibCall(LC, VT, Ops, 2, isSigned, dl), Lo, Hi);
+ return;
+ }
+
+ if (!ExpandShiftWithUnknownAmountBit(N, Lo, Hi))
+ llvm_unreachable("Unsupported shift!");
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Op = N->getOperand(0);
+ if (Op.getValueType().bitsLE(NVT)) {
+ // The low part is sign extension of the input (degenerates to a copy).
+ Lo = DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, N->getOperand(0));
+ // The high part is obtained by SRA'ing all but one of the bits of low part.
+ unsigned LoSize = NVT.getSizeInBits();
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+ DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+ } else {
+ // For example, extension of an i48 to an i64. The operand type necessarily
+ // promotes to the result type, so will end up being expanded too.
+ assert(getTypeAction(Op.getValueType()) == PromoteInteger &&
+ "Only know how to promote this result!");
+ SDValue Res = GetPromotedInteger(Op);
+ assert(Res.getValueType() == N->getValueType(0) &&
+ "Operand over promoted?");
+ // Split the promoted operand. This will simplify when it is expanded.
+ SplitInteger(Res, Lo, Hi);
+ unsigned ExcessBits =
+ Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
+ Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), ExcessBits)));
+ }
+}
+
+void DAGTypeLegalizer::
+ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+
+ if (EVT.bitsLE(Lo.getValueType())) {
+ // sext_inreg the low part if needed.
+ Lo = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Lo.getValueType(), Lo,
+ N->getOperand(1));
+
+ // The high part gets the sign extension from the lo-part. This handles
+ // things like sextinreg V:i64 from i8.
+ Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo,
+ DAG.getConstant(Hi.getValueType().getSizeInBits()-1,
+ TLI.getPointerTy()));
+ } else {
+ // For example, extension of an i48 to an i64. Leave the low part alone,
+ // sext_inreg the high part.
+ unsigned ExcessBits =
+ EVT.getSizeInBits() - Lo.getValueType().getSizeInBits();
+ Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), ExcessBits)));
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::SREM_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SREM_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SREM_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SREM_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+ Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0));
+ Hi = DAG.getNode(ISD::SRL, dl,
+ N->getOperand(0).getValueType(), N->getOperand(0),
+ DAG.getConstant(NVT.getSizeInBits(), TLI.getPointerTy()));
+ Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::UDIV_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::UDIV_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::UDIV_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::UDIV_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::UREM_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::UREM_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::UREM_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::UREM_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Op = N->getOperand(0);
+ if (Op.getValueType().bitsLE(NVT)) {
+ // The low part is zero extension of the input (degenerates to a copy).
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0));
+ Hi = DAG.getConstant(0, NVT); // The high part is just a zero.
+ } else {
+ // For example, extension of an i48 to an i64. The operand type necessarily
+ // promotes to the result type, so will end up being expanded too.
+ assert(getTypeAction(Op.getValueType()) == PromoteInteger &&
+ "Only know how to promote this result!");
+ SDValue Res = GetPromotedInteger(Op);
+ assert(Res.getValueType() == N->getValueType(0) &&
+ "Operand over promoted?");
+ // Split the promoted operand. This will simplify when it is expanded.
+ SplitInteger(Res, Lo, Hi);
+ unsigned ExcessBits =
+ Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
+ Hi = DAG.getZeroExtendInReg(Hi, dl, EVT::getIntegerVT(*DAG.getContext(), ExcessBits));
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Integer Operand Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandIntegerOperand - This method is called when the specified operand of
+/// the specified node is found to need expansion. At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ switch (N->getOpcode()) {
+ default:
+ #ifndef NDEBUG
+ dbgs() << "ExpandIntegerOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+ #endif
+ llvm_unreachable("Do not know how to expand this operator's operand!");
+
+ case ISD::BIT_CONVERT: Res = ExpandOp_BIT_CONVERT(N); break;
+ case ISD::BR_CC: Res = ExpandIntOp_BR_CC(N); break;
+ case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
+ case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
+ case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break;
+ case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break;
+ case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break;
+ case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break;
+ case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break;
+ case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break;
+ case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break;
+ case ISD::UINT_TO_FP: Res = ExpandIntOp_UINT_TO_FP(N); break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR: Res = ExpandIntOp_Shift(N); break;
+ case ISD::RETURNADDR:
+ case ISD::FRAMEADDR: Res = ExpandIntOp_RETURNADDR(N); break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// IntegerExpandSetCCOperands - Expand the operands of a comparison. This code
+/// is shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
+ SDValue &NewRHS,
+ ISD::CondCode &CCCode,
+ DebugLoc dl) {
+ SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+ GetExpandedInteger(NewLHS, LHSLo, LHSHi);
+ GetExpandedInteger(NewRHS, RHSLo, RHSHi);
+
+ if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) {
+ if (RHSLo == RHSHi) {
+ if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) {
+ if (RHSCST->isAllOnesValue()) {
+ // Equality comparison to -1.
+ NewLHS = DAG.getNode(ISD::AND, dl,
+ LHSLo.getValueType(), LHSLo, LHSHi);
+ NewRHS = RHSLo;
+ return;
+ }
+ }
+ }
+
+ NewLHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSLo, RHSLo);
+ NewRHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSHi, RHSHi);
+ NewLHS = DAG.getNode(ISD::OR, dl, NewLHS.getValueType(), NewLHS, NewRHS);
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ return;
+ }
+
+ // If this is a comparison of the sign bit, just look at the top part.
+ // X > -1, x < 0
+ if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(NewRHS))
+ if ((CCCode == ISD::SETLT && CST->isNullValue()) || // X < 0
+ (CCCode == ISD::SETGT && CST->isAllOnesValue())) { // X > -1
+ NewLHS = LHSHi;
+ NewRHS = RHSHi;
+ return;
+ }
+
+ // FIXME: This generated code sucks.
+ ISD::CondCode LowCC;
+ switch (CCCode) {
+ default: llvm_unreachable("Unknown integer setcc!");
+ case ISD::SETLT:
+ case ISD::SETULT: LowCC = ISD::SETULT; break;
+ case ISD::SETGT:
+ case ISD::SETUGT: LowCC = ISD::SETUGT; break;
+ case ISD::SETLE:
+ case ISD::SETULE: LowCC = ISD::SETULE; break;
+ case ISD::SETGE:
+ case ISD::SETUGE: LowCC = ISD::SETUGE; break;
+ }
+
+ // Tmp1 = lo(op1) < lo(op2) // Always unsigned comparison
+ // Tmp2 = hi(op1) < hi(op2) // Signedness depends on operands
+ // dest = hi(op1) == hi(op2) ? Tmp1 : Tmp2;
+
+ // NOTE: on targets without efficient SELECT of bools, we can always use
+ // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3)
+ TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, false, true, true, NULL);
+ SDValue Tmp1, Tmp2;
+ Tmp1 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSLo.getValueType()),
+ LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl);
+ if (!Tmp1.getNode())
+ Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()),
+ LHSLo, RHSLo, LowCC);
+ Tmp2 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl);
+ if (!Tmp2.getNode())
+ Tmp2 = DAG.getNode(ISD::SETCC, dl,
+ TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, DAG.getCondCode(CCCode));
+
+ ConstantSDNode *Tmp1C = dyn_cast<ConstantSDNode>(Tmp1.getNode());
+ ConstantSDNode *Tmp2C = dyn_cast<ConstantSDNode>(Tmp2.getNode());
+ if ((Tmp1C && Tmp1C->isNullValue()) ||
+ (Tmp2C && Tmp2C->isNullValue() &&
+ (CCCode == ISD::SETLE || CCCode == ISD::SETGE ||
+ CCCode == ISD::SETUGE || CCCode == ISD::SETULE)) ||
+ (Tmp2C && Tmp2C->getAPIntValue() == 1 &&
+ (CCCode == ISD::SETLT || CCCode == ISD::SETGT ||
+ CCCode == ISD::SETUGT || CCCode == ISD::SETULT))) {
+ // low part is known false, returns high part.
+ // For LE / GE, if high part is known false, ignore the low part.
+ // For LT / GT, if high part is known true, ignore the low part.
+ NewLHS = Tmp2;
+ NewRHS = SDValue();
+ return;
+ }
+
+ NewLHS = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, ISD::SETEQ, false,
+ DagCombineInfo, dl);
+ if (!NewLHS.getNode())
+ NewLHS = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, ISD::SETEQ);
+ NewLHS = DAG.getNode(ISD::SELECT, dl, Tmp1.getValueType(),
+ NewLHS, Tmp1, Tmp2);
+ NewRHS = SDValue();
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+ DAG.getCondCode(CCCode), NewLHS, NewRHS,
+ N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+ IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
+ N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(CCCode));
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, use it.
+ if (NewRHS.getNode() == 0) {
+ assert(NewLHS.getValueType() == N->getValueType(0) &&
+ "Unexpected setcc expansion!");
+ return NewLHS;
+ }
+
+ // Otherwise, update N to have the operands specified.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
+ DAG.getCondCode(CCCode));
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) {
+ // The value being shifted is legal, but the shift amount is too big.
+ // It follows that either the result of the shift is undefined, or the
+ // upper half of the shift amount is zero. Just use the lower half.
+ SDValue Lo, Hi;
+ GetExpandedInteger(N->getOperand(1), Lo, Hi);
+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), Lo);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) {
+ // The argument of RETURNADDR / FRAMEADDR builtin is 32 bit contant. This
+ // surely makes pretty nice problems on 8/16 bit targets. Just truncate this
+ // constant to valid type.
+ SDValue Lo, Hi;
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ return DAG.UpdateNodeOperands(SDValue(N, 0), Lo);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
+ SDValue Op = N->getOperand(0);
+ EVT DstVT = N->getValueType(0);
+ RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+ "Don't know how to expand this SINT_TO_FP!");
+ return MakeLibCall(LC, DstVT, &Op, 1, true, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
+ if (ISD::isNormalStore(N))
+ return ExpandOp_NormalStore(N, OpNo);
+
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ assert(OpNo == 1 && "Can only expand the stored value so far");
+
+ EVT VT = N->getOperand(1).getValueType();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ int SVOffset = N->getSrcValueOffset();
+ unsigned Alignment = N->getAlignment();
+ bool isVolatile = N->isVolatile();
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Lo, Hi;
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+ if (N->getMemoryVT().bitsLE(NVT)) {
+ GetExpandedInteger(N->getValue(), Lo, Hi);
+ return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+ N->getMemoryVT(), isVolatile, Alignment);
+ } else if (TLI.isLittleEndian()) {
+ // Little-endian - low bits are at low addresses.
+ GetExpandedInteger(N->getValue(), Lo, Hi);
+
+ Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+ isVolatile, Alignment);
+
+ unsigned ExcessBits =
+ N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
+ EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(),
+ SVOffset+IncrementSize, NEVT,
+ isVolatile, MinAlign(Alignment, IncrementSize));
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ } else {
+ // Big-endian - high bits are at low addresses. Favor aligned stores at
+ // the cost of some bit-fiddling.
+ GetExpandedInteger(N->getValue(), Lo, Hi);
+
+ EVT ExtVT = N->getMemoryVT();
+ unsigned EBytes = ExtVT.getStoreSize();
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ unsigned ExcessBits = (EBytes - IncrementSize)*8;
+ EVT HiVT = EVT::getIntegerVT(*DAG.getContext(), ExtVT.getSizeInBits() - ExcessBits);
+
+ if (ExcessBits < NVT.getSizeInBits()) {
+ // Transfer high bits from the top of Lo to the bottom of Hi.
+ Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi,
+ DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
+ TLI.getPointerTy()));
+ Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
+ DAG.getNode(ISD::SRL, dl, NVT, Lo,
+ DAG.getConstant(ExcessBits,
+ TLI.getPointerTy())));
+ }
+
+ // Store both the high bits and maybe some of the low bits.
+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(),
+ SVOffset, HiVT, isVolatile, Alignment);
+
+ // Increment the pointer to the other half.
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ // Store the lowest ExcessBits bits in the second half.
+ Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(),
+ SVOffset+IncrementSize,
+ EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
+ isVolatile, MinAlign(Alignment, IncrementSize));
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ }
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+ // Just truncate the low part of the source.
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), InL);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
+ SDValue Op = N->getOperand(0);
+ EVT SrcVT = Op.getValueType();
+ EVT DstVT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ if (TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){
+ // Do a signed conversion then adjust the result.
+ SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op);
+ SignedConv = TLI.LowerOperation(SignedConv, DAG);
+
+ // The result of the signed conversion needs adjusting if the 'sign bit' of
+ // the incoming integer was set. To handle this, we dynamically test to see
+ // if it is set, and, if so, add a fudge factor.
+
+ const uint64_t F32TwoE32 = 0x4F800000ULL;
+ const uint64_t F32TwoE64 = 0x5F800000ULL;
+ const uint64_t F32TwoE128 = 0x7F800000ULL;
+
+ APInt FF(32, 0);
+ if (SrcVT == MVT::i32)
+ FF = APInt(32, F32TwoE32);
+ else if (SrcVT == MVT::i64)
+ FF = APInt(32, F32TwoE64);
+ else if (SrcVT == MVT::i128)
+ FF = APInt(32, F32TwoE128);
+ else
+ assert(false && "Unsupported UINT_TO_FP!");
+
+ // Check whether the sign bit is set.
+ SDValue Lo, Hi;
+ GetExpandedInteger(Op, Lo, Hi);
+ SDValue SignSet = DAG.getSetCC(dl,
+ TLI.getSetCCResultType(Hi.getValueType()),
+ Hi, DAG.getConstant(0, Hi.getValueType()),
+ ISD::SETLT);
+
+ // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
+ SDValue FudgePtr = DAG.getConstantPool(
+ ConstantInt::get(*DAG.getContext(), FF.zext(64)),
+ TLI.getPointerTy());
+
+ // Get a pointer to FF if the sign bit was set, or to 0 otherwise.
+ SDValue Zero = DAG.getIntPtrConstant(0);
+ SDValue Four = DAG.getIntPtrConstant(4);
+ if (TLI.isBigEndian()) std::swap(Zero, Four);
+ SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet,
+ Zero, Four);
+ unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment();
+ FudgePtr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), FudgePtr, Offset);
+ Alignment = std::min(Alignment, 4u);
+
+ // Load the value out, extending it from f32 to the destination float type.
+ // FIXME: Avoid the extend by constructing the right constant pool?
+ SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(),
+ FudgePtr, NULL, 0, MVT::f32,
+ false, Alignment);
+ return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
+ }
+
+ // Otherwise, use a libcall.
+ RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+ "Don't know how to expand this UINT_TO_FP!");
+ return MakeLibCall(LC, DstVT, &Op, 1, true, dl);
+}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
new file mode 100644
index 0000000..37f36a3
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -0,0 +1,1102 @@
+//===-- LegalizeTypes.cpp - Common code for DAG type legalizer ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::LegalizeTypes method. It transforms
+// an arbitrary well-formed SelectionDAG to only consist of legal types. This
+// is common code shared among the LegalizeTypes*.cpp files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<bool>
+EnableExpensiveChecks("enable-legalize-types-checking", cl::Hidden);
+
+/// PerformExpensiveChecks - Do extensive, expensive, sanity checking.
+void DAGTypeLegalizer::PerformExpensiveChecks() {
+ // If a node is not processed, then none of its values should be mapped by any
+ // of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues.
+
+ // If a node is processed, then each value with an illegal type must be mapped
+ // by exactly one of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues.
+ // Values with a legal type may be mapped by ReplacedValues, but not by any of
+ // the other maps.
+
+ // Note that these invariants may not hold momentarily when processing a node:
+ // the node being processed may be put in a map before being marked Processed.
+
+ // Note that it is possible to have nodes marked NewNode in the DAG. This can
+ // occur in two ways. Firstly, a node may be created during legalization but
+ // never passed to the legalization core. This is usually due to the implicit
+ // folding that occurs when using the DAG.getNode operators. Secondly, a new
+ // node may be passed to the legalization core, but when analyzed may morph
+ // into a different node, leaving the original node as a NewNode in the DAG.
+ // A node may morph if one of its operands changes during analysis. Whether
+ // it actually morphs or not depends on whether, after updating its operands,
+ // it is equivalent to an existing node: if so, it morphs into that existing
+ // node (CSE). An operand can change during analysis if the operand is a new
+ // node that morphs, or it is a processed value that was mapped to some other
+ // value (as recorded in ReplacedValues) in which case the operand is turned
+ // into that other value. If a node morphs then the node it morphed into will
+ // be used instead of it for legalization, however the original node continues
+ // to live on in the DAG.
+ // The conclusion is that though there may be nodes marked NewNode in the DAG,
+ // all uses of such nodes are also marked NewNode: the result is a fungus of
+ // NewNodes growing on top of the useful nodes, and perhaps using them, but
+ // not used by them.
+
+ // If a value is mapped by ReplacedValues, then it must have no uses, except
+ // by nodes marked NewNode (see above).
+
+ // The final node obtained by mapping by ReplacedValues is not marked NewNode.
+ // Note that ReplacedValues should be applied iteratively.
+
+ // Note that the ReplacedValues map may also map deleted nodes (by iterating
+ // over the DAG we never dereference deleted nodes). This means that it may
+ // also map nodes marked NewNode if the deallocated memory was reallocated as
+ // another node, and that new node was not seen by the LegalizeTypes machinery
+ // (for example because it was created but not used). In general, we cannot
+ // distinguish between new nodes and deleted nodes.
+ SmallVector<SDNode*, 16> NewNodes;
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ++I) {
+ // Remember nodes marked NewNode - they are subject to extra checking below.
+ if (I->getNodeId() == NewNode)
+ NewNodes.push_back(I);
+
+ for (unsigned i = 0, e = I->getNumValues(); i != e; ++i) {
+ SDValue Res(I, i);
+ bool Failed = false;
+
+ unsigned Mapped = 0;
+ if (ReplacedValues.find(Res) != ReplacedValues.end()) {
+ Mapped |= 1;
+ // Check that remapped values are only used by nodes marked NewNode.
+ for (SDNode::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI)
+ if (UI.getUse().getResNo() == i)
+ assert(UI->getNodeId() == NewNode &&
+ "Remapped value has non-trivial use!");
+
+ // Check that the final result of applying ReplacedValues is not
+ // marked NewNode.
+ SDValue NewVal = ReplacedValues[Res];
+ DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(NewVal);
+ while (I != ReplacedValues.end()) {
+ NewVal = I->second;
+ I = ReplacedValues.find(NewVal);
+ }
+ assert(NewVal.getNode()->getNodeId() != NewNode &&
+ "ReplacedValues maps to a new node!");
+ }
+ if (PromotedIntegers.find(Res) != PromotedIntegers.end())
+ Mapped |= 2;
+ if (SoftenedFloats.find(Res) != SoftenedFloats.end())
+ Mapped |= 4;
+ if (ScalarizedVectors.find(Res) != ScalarizedVectors.end())
+ Mapped |= 8;
+ if (ExpandedIntegers.find(Res) != ExpandedIntegers.end())
+ Mapped |= 16;
+ if (ExpandedFloats.find(Res) != ExpandedFloats.end())
+ Mapped |= 32;
+ if (SplitVectors.find(Res) != SplitVectors.end())
+ Mapped |= 64;
+ if (WidenedVectors.find(Res) != WidenedVectors.end())
+ Mapped |= 128;
+
+ if (I->getNodeId() != Processed) {
+ // Since we allow ReplacedValues to map deleted nodes, it may map nodes
+ // marked NewNode too, since a deleted node may have been reallocated as
+ // another node that has not been seen by the LegalizeTypes machinery.
+ if ((I->getNodeId() == NewNode && Mapped > 1) ||
+ (I->getNodeId() != NewNode && Mapped != 0)) {
+ dbgs() << "Unprocessed value in a map!";
+ Failed = true;
+ }
+ } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) {
+ if (Mapped > 1) {
+ dbgs() << "Value with legal type was transformed!";
+ Failed = true;
+ }
+ } else {
+ if (Mapped == 0) {
+ dbgs() << "Processed value not in any map!";
+ Failed = true;
+ } else if (Mapped & (Mapped - 1)) {
+ dbgs() << "Value in multiple maps!";
+ Failed = true;
+ }
+ }
+
+ if (Failed) {
+ if (Mapped & 1)
+ dbgs() << " ReplacedValues";
+ if (Mapped & 2)
+ dbgs() << " PromotedIntegers";
+ if (Mapped & 4)
+ dbgs() << " SoftenedFloats";
+ if (Mapped & 8)
+ dbgs() << " ScalarizedVectors";
+ if (Mapped & 16)
+ dbgs() << " ExpandedIntegers";
+ if (Mapped & 32)
+ dbgs() << " ExpandedFloats";
+ if (Mapped & 64)
+ dbgs() << " SplitVectors";
+ if (Mapped & 128)
+ dbgs() << " WidenedVectors";
+ dbgs() << "\n";
+ llvm_unreachable(0);
+ }
+ }
+ }
+
+ // Checked that NewNodes are only used by other NewNodes.
+ for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) {
+ SDNode *N = NewNodes[i];
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI)
+ assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!");
+ }
+}
+
+/// run - This is the main entry point for the type legalizer. This does a
+/// top-down traversal of the dag, legalizing types as it goes. Returns "true"
+/// if it made any changes.
+bool DAGTypeLegalizer::run() {
+ bool Changed = false;
+
+ // Create a dummy node (which is not added to allnodes), that adds a reference
+ // to the root node, preventing it from being deleted, and tracking any
+ // changes of the root.
+ HandleSDNode Dummy(DAG.getRoot());
+ Dummy.setNodeId(Unanalyzed);
+
+ // The root of the dag may dangle to deleted nodes until the type legalizer is
+ // done. Set it to null to avoid confusion.
+ DAG.setRoot(SDValue());
+
+ // Walk all nodes in the graph, assigning them a NodeId of 'ReadyToProcess'
+ // (and remembering them) if they are leaves and assigning 'Unanalyzed' if
+ // non-leaves.
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ++I) {
+ if (I->getNumOperands() == 0) {
+ I->setNodeId(ReadyToProcess);
+ Worklist.push_back(I);
+ } else {
+ I->setNodeId(Unanalyzed);
+ }
+ }
+
+ // Now that we have a set of nodes to process, handle them all.
+ while (!Worklist.empty()) {
+#ifndef XDEBUG
+ if (EnableExpensiveChecks)
+#endif
+ PerformExpensiveChecks();
+
+ SDNode *N = Worklist.back();
+ Worklist.pop_back();
+ assert(N->getNodeId() == ReadyToProcess &&
+ "Node should be ready if on worklist!");
+
+ if (IgnoreNodeResults(N))
+ goto ScanOperands;
+
+ // Scan the values produced by the node, checking to see if any result
+ // types are illegal.
+ for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) {
+ EVT ResultVT = N->getValueType(i);
+ switch (getTypeAction(ResultVT)) {
+ default:
+ assert(false && "Unknown action!");
+ case Legal:
+ break;
+ // The following calls must take care of *all* of the node's results,
+ // not just the illegal result they were passed (this includes results
+ // with a legal type). Results can be remapped using ReplaceValueWith,
+ // or their promoted/expanded/etc values registered in PromotedIntegers,
+ // ExpandedIntegers etc.
+ case PromoteInteger:
+ PromoteIntegerResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case ExpandInteger:
+ ExpandIntegerResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case SoftenFloat:
+ SoftenFloatResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case ExpandFloat:
+ ExpandFloatResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case ScalarizeVector:
+ ScalarizeVectorResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case SplitVector:
+ SplitVectorResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case WidenVector:
+ WidenVectorResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ }
+ }
+
+ScanOperands:
+ // Scan the operand list for the node, handling any nodes with operands that
+ // are illegal.
+ {
+ unsigned NumOperands = N->getNumOperands();
+ bool NeedsReanalyzing = false;
+ unsigned i;
+ for (i = 0; i != NumOperands; ++i) {
+ if (IgnoreNodeResults(N->getOperand(i).getNode()))
+ continue;
+
+ EVT OpVT = N->getOperand(i).getValueType();
+ switch (getTypeAction(OpVT)) {
+ default:
+ assert(false && "Unknown action!");
+ case Legal:
+ continue;
+ // The following calls must either replace all of the node's results
+ // using ReplaceValueWith, and return "false"; or update the node's
+ // operands in place, and return "true".
+ case PromoteInteger:
+ NeedsReanalyzing = PromoteIntegerOperand(N, i);
+ Changed = true;
+ break;
+ case ExpandInteger:
+ NeedsReanalyzing = ExpandIntegerOperand(N, i);
+ Changed = true;
+ break;
+ case SoftenFloat:
+ NeedsReanalyzing = SoftenFloatOperand(N, i);
+ Changed = true;
+ break;
+ case ExpandFloat:
+ NeedsReanalyzing = ExpandFloatOperand(N, i);
+ Changed = true;
+ break;
+ case ScalarizeVector:
+ NeedsReanalyzing = ScalarizeVectorOperand(N, i);
+ Changed = true;
+ break;
+ case SplitVector:
+ NeedsReanalyzing = SplitVectorOperand(N, i);
+ Changed = true;
+ break;
+ case WidenVector:
+ NeedsReanalyzing = WidenVectorOperand(N, i);
+ Changed = true;
+ break;
+ }
+ break;
+ }
+
+ // The sub-method updated N in place. Check to see if any operands are new,
+ // and if so, mark them. If the node needs revisiting, don't add all users
+ // to the worklist etc.
+ if (NeedsReanalyzing) {
+ assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");
+ N->setNodeId(NewNode);
+ // Recompute the NodeId and correct processed operands, adding the node to
+ // the worklist if ready.
+ SDNode *M = AnalyzeNewNode(N);
+ if (M == N)
+ // The node didn't morph - nothing special to do, it will be revisited.
+ continue;
+
+ // The node morphed - this is equivalent to legalizing by replacing every
+ // value of N with the corresponding value of M. So do that now.
+ assert(N->getNumValues() == M->getNumValues() &&
+ "Node morphing changed the number of results!");
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
+ // Replacing the value takes care of remapping the new value.
+ ReplaceValueWith(SDValue(N, i), SDValue(M, i));
+ assert(N->getNodeId() == NewNode && "Unexpected node state!");
+ // The node continues to live on as part of the NewNode fungus that
+ // grows on top of the useful nodes. Nothing more needs to be done
+ // with it - move on to the next node.
+ continue;
+ }
+
+ if (i == NumOperands) {
+ DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG); dbgs() << "\n");
+ }
+ }
+NodeDone:
+
+ // If we reach here, the node was processed, potentially creating new nodes.
+ // Mark it as processed and add its users to the worklist as appropriate.
+ assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");
+ N->setNodeId(Processed);
+
+ for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+ UI != E; ++UI) {
+ SDNode *User = *UI;
+ int NodeId = User->getNodeId();
+
+ // This node has two options: it can either be a new node or its Node ID
+ // may be a count of the number of operands it has that are not ready.
+ if (NodeId > 0) {
+ User->setNodeId(NodeId-1);
+
+ // If this was the last use it was waiting on, add it to the ready list.
+ if (NodeId-1 == ReadyToProcess)
+ Worklist.push_back(User);
+ continue;
+ }
+
+ // If this is an unreachable new node, then ignore it. If it ever becomes
+ // reachable by being used by a newly created node then it will be handled
+ // by AnalyzeNewNode.
+ if (NodeId == NewNode)
+ continue;
+
+ // Otherwise, this node is new: this is the first operand of it that
+ // became ready. Its new NodeId is the number of operands it has minus 1
+ // (as this node is now processed).
+ assert(NodeId == Unanalyzed && "Unknown node ID!");
+ User->setNodeId(User->getNumOperands() - 1);
+
+ // If the node only has a single operand, it is now ready.
+ if (User->getNumOperands() == 1)
+ Worklist.push_back(User);
+ }
+ }
+
+#ifndef XDEBUG
+ if (EnableExpensiveChecks)
+#endif
+ PerformExpensiveChecks();
+
+ // If the root changed (e.g. it was a dead load) update the root.
+ DAG.setRoot(Dummy.getValue());
+
+ // Remove dead nodes. This is important to do for cleanliness but also before
+ // the checking loop below. Implicit folding by the DAG.getNode operators and
+ // node morphing can cause unreachable nodes to be around with their flags set
+ // to new.
+ DAG.RemoveDeadNodes();
+
+ // In a debug build, scan all the nodes to make sure we found them all. This
+ // ensures that there are no cycles and that everything got processed.
+#ifndef NDEBUG
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ++I) {
+ bool Failed = false;
+
+ // Check that all result types are legal.
+ if (!IgnoreNodeResults(I))
+ for (unsigned i = 0, NumVals = I->getNumValues(); i < NumVals; ++i)
+ if (!isTypeLegal(I->getValueType(i))) {
+ dbgs() << "Result type " << i << " illegal!\n";
+ Failed = true;
+ }
+
+ // Check that all operand types are legal.
+ for (unsigned i = 0, NumOps = I->getNumOperands(); i < NumOps; ++i)
+ if (!IgnoreNodeResults(I->getOperand(i).getNode()) &&
+ !isTypeLegal(I->getOperand(i).getValueType())) {
+ dbgs() << "Operand type " << i << " illegal!\n";
+ Failed = true;
+ }
+
+ if (I->getNodeId() != Processed) {
+ if (I->getNodeId() == NewNode)
+ dbgs() << "New node not analyzed?\n";
+ else if (I->getNodeId() == Unanalyzed)
+ dbgs() << "Unanalyzed node not noticed?\n";
+ else if (I->getNodeId() > 0)
+ dbgs() << "Operand not processed?\n";
+ else if (I->getNodeId() == ReadyToProcess)
+ dbgs() << "Not added to worklist?\n";
+ Failed = true;
+ }
+
+ if (Failed) {
+ I->dump(&DAG); dbgs() << "\n";
+ llvm_unreachable(0);
+ }
+ }
+#endif
+
+ return Changed;
+}
+
+/// AnalyzeNewNode - The specified node is the root of a subtree of potentially
+/// new nodes. Correct any processed operands (this may change the node) and
+/// calculate the NodeId. If the node itself changes to a processed node, it
+/// is not remapped - the caller needs to take care of this.
+/// Returns the potentially changed node.
+SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
+ // If this was an existing node that is already done, we're done.
+ if (N->getNodeId() != NewNode && N->getNodeId() != Unanalyzed)
+ return N;
+
+ // Remove any stale map entries.
+ ExpungeNode(N);
+
+ // Okay, we know that this node is new. Recursively walk all of its operands
+ // to see if they are new also. The depth of this walk is bounded by the size
+ // of the new tree that was constructed (usually 2-3 nodes), so we don't worry
+ // about revisiting of nodes.
+ //
+ // As we walk the operands, keep track of the number of nodes that are
+ // processed. If non-zero, this will become the new nodeid of this node.
+ // Operands may morph when they are analyzed. If so, the node will be
+ // updated after all operands have been analyzed. Since this is rare,
+ // the code tries to minimize overhead in the non-morphing case.
+
+ SmallVector<SDValue, 8> NewOps;
+ unsigned NumProcessed = 0;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDValue OrigOp = N->getOperand(i);
+ SDValue Op = OrigOp;
+
+ AnalyzeNewValue(Op); // Op may morph.
+
+ if (Op.getNode()->getNodeId() == Processed)
+ ++NumProcessed;
+
+ if (!NewOps.empty()) {
+ // Some previous operand changed. Add this one to the list.
+ NewOps.push_back(Op);
+ } else if (Op != OrigOp) {
+ // This is the first operand to change - add all operands so far.
+ NewOps.insert(NewOps.end(), N->op_begin(), N->op_begin() + i);
+ NewOps.push_back(Op);
+ }
+ }
+
+ // Some operands changed - update the node.
+ if (!NewOps.empty()) {
+ SDNode *M = DAG.UpdateNodeOperands(SDValue(N, 0), &NewOps[0],
+ NewOps.size()).getNode();
+ if (M != N) {
+ // The node morphed into a different node. Normally for this to happen
+ // the original node would have to be marked NewNode. However this can
+ // in theory momentarily not be the case while ReplaceValueWith is doing
+ // its stuff. Mark the original node NewNode to help sanity checking.
+ N->setNodeId(NewNode);
+ if (M->getNodeId() != NewNode && M->getNodeId() != Unanalyzed)
+ // It morphed into a previously analyzed node - nothing more to do.
+ return M;
+
+ // It morphed into a different new node. Do the equivalent of passing
+ // it to AnalyzeNewNode: expunge it and calculate the NodeId. No need
+ // to remap the operands, since they are the same as the operands we
+ // remapped above.
+ N = M;
+ ExpungeNode(N);
+ }
+ }
+
+ // Calculate the NodeId.
+ N->setNodeId(N->getNumOperands() - NumProcessed);
+ if (N->getNodeId() == ReadyToProcess)
+ Worklist.push_back(N);
+
+ return N;
+}
+
+/// AnalyzeNewValue - Call AnalyzeNewNode, updating the node in Val if needed.
+/// If the node changes to a processed node, then remap it.
+void DAGTypeLegalizer::AnalyzeNewValue(SDValue &Val) {
+ Val.setNode(AnalyzeNewNode(Val.getNode()));
+ if (Val.getNode()->getNodeId() == Processed)
+ // We were passed a processed node, or it morphed into one - remap it.
+ RemapValue(Val);
+}
+
+/// ExpungeNode - If N has a bogus mapping in ReplacedValues, eliminate it.
+/// This can occur when a node is deleted then reallocated as a new node -
+/// the mapping in ReplacedValues applies to the deleted node, not the new
+/// one.
+/// The only map that can have a deleted node as a source is ReplacedValues.
+/// Other maps can have deleted nodes as targets, but since their looked-up
+/// values are always immediately remapped using RemapValue, resulting in a
+/// not-deleted node, this is harmless as long as ReplacedValues/RemapValue
+/// always performs correct mappings. In order to keep the mapping correct,
+/// ExpungeNode should be called on any new nodes *before* adding them as
+/// either source or target to ReplacedValues (which typically means calling
+/// Expunge when a new node is first seen, since it may no longer be marked
+/// NewNode by the time it is added to ReplacedValues).
+void DAGTypeLegalizer::ExpungeNode(SDNode *N) {
+ if (N->getNodeId() != NewNode)
+ return;
+
+ // If N is not remapped by ReplacedValues then there is nothing to do.
+ unsigned i, e;
+ for (i = 0, e = N->getNumValues(); i != e; ++i)
+ if (ReplacedValues.find(SDValue(N, i)) != ReplacedValues.end())
+ break;
+
+ if (i == e)
+ return;
+
+ // Remove N from all maps - this is expensive but rare.
+
+ for (DenseMap<SDValue, SDValue>::iterator I = PromotedIntegers.begin(),
+ E = PromotedIntegers.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second);
+ }
+
+ for (DenseMap<SDValue, SDValue>::iterator I = SoftenedFloats.begin(),
+ E = SoftenedFloats.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second);
+ }
+
+ for (DenseMap<SDValue, SDValue>::iterator I = ScalarizedVectors.begin(),
+ E = ScalarizedVectors.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second);
+ }
+
+ for (DenseMap<SDValue, SDValue>::iterator I = WidenedVectors.begin(),
+ E = WidenedVectors.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second);
+ }
+
+ for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
+ I = ExpandedIntegers.begin(), E = ExpandedIntegers.end(); I != E; ++I){
+ assert(I->first.getNode() != N);
+ RemapValue(I->second.first);
+ RemapValue(I->second.second);
+ }
+
+ for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
+ I = ExpandedFloats.begin(), E = ExpandedFloats.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second.first);
+ RemapValue(I->second.second);
+ }
+
+ for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
+ I = SplitVectors.begin(), E = SplitVectors.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second.first);
+ RemapValue(I->second.second);
+ }
+
+ for (DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.begin(),
+ E = ReplacedValues.end(); I != E; ++I)
+ RemapValue(I->second);
+
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
+ ReplacedValues.erase(SDValue(N, i));
+}
+
+/// RemapValue - If the specified value was already legalized to another value,
+/// replace it by that value.
+void DAGTypeLegalizer::RemapValue(SDValue &N) {
+ DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(N);
+ if (I != ReplacedValues.end()) {
+ // Use path compression to speed up future lookups if values get multiply
+ // replaced with other values.
+ RemapValue(I->second);
+ N = I->second;
+ assert(N.getNode()->getNodeId() != NewNode && "Mapped to new node!");
+ }
+}
+
+namespace {
+ /// NodeUpdateListener - This class is a DAGUpdateListener that listens for
+ /// updates to nodes and recomputes their ready state.
+ class NodeUpdateListener : public SelectionDAG::DAGUpdateListener {
+ DAGTypeLegalizer &DTL;
+ SmallSetVector<SDNode*, 16> &NodesToAnalyze;
+ public:
+ explicit NodeUpdateListener(DAGTypeLegalizer &dtl,
+ SmallSetVector<SDNode*, 16> &nta)
+ : DTL(dtl), NodesToAnalyze(nta) {}
+
+ virtual void NodeDeleted(SDNode *N, SDNode *E) {
+ assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess &&
+ N->getNodeId() != DAGTypeLegalizer::Processed &&
+ "Invalid node ID for RAUW deletion!");
+ // It is possible, though rare, for the deleted node N to occur as a
+ // target in a map, so note the replacement N -> E in ReplacedValues.
+ assert(E && "Node not replaced?");
+ DTL.NoteDeletion(N, E);
+
+ // In theory the deleted node could also have been scheduled for analysis.
+ // So remove it from the set of nodes which will be analyzed.
+ NodesToAnalyze.remove(N);
+
+ // In general nothing needs to be done for E, since it didn't change but
+ // only gained new uses. However N -> E was just added to ReplacedValues,
+ // and the result of a ReplacedValues mapping is not allowed to be marked
+ // NewNode. So if E is marked NewNode, then it needs to be analyzed.
+ if (E->getNodeId() == DAGTypeLegalizer::NewNode)
+ NodesToAnalyze.insert(E);
+ }
+
+ virtual void NodeUpdated(SDNode *N) {
+ // Node updates can mean pretty much anything. It is possible that an
+ // operand was set to something already processed (f.e.) in which case
+ // this node could become ready. Recompute its flags.
+ assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess &&
+ N->getNodeId() != DAGTypeLegalizer::Processed &&
+ "Invalid node ID for RAUW deletion!");
+ N->setNodeId(DAGTypeLegalizer::NewNode);
+ NodesToAnalyze.insert(N);
+ }
+ };
+}
+
+
+/// ReplaceValueWith - The specified value was legalized to the specified other
+/// value. Update the DAG and NodeIds replacing any uses of From to use To
+/// instead.
+void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
+ assert(From.getNode() != To.getNode() && "Potential legalization loop!");
+
+ // If expansion produced new nodes, make sure they are properly marked.
+ ExpungeNode(From.getNode());
+ AnalyzeNewValue(To); // Expunges To.
+
+ // Anything that used the old node should now use the new one. Note that this
+ // can potentially cause recursive merging.
+ SmallSetVector<SDNode*, 16> NodesToAnalyze;
+ NodeUpdateListener NUL(*this, NodesToAnalyze);
+ DAG.ReplaceAllUsesOfValueWith(From, To, &NUL);
+
+ // The old node may still be present in a map like ExpandedIntegers or
+ // PromotedIntegers. Inform maps about the replacement.
+ ReplacedValues[From] = To;
+
+ // Process the list of nodes that need to be reanalyzed.
+ while (!NodesToAnalyze.empty()) {
+ SDNode *N = NodesToAnalyze.back();
+ NodesToAnalyze.pop_back();
+ if (N->getNodeId() != DAGTypeLegalizer::NewNode)
+ // The node was analyzed while reanalyzing an earlier node - it is safe to
+ // skip. Note that this is not a morphing node - otherwise it would still
+ // be marked NewNode.
+ continue;
+
+ // Analyze the node's operands and recalculate the node ID.
+ SDNode *M = AnalyzeNewNode(N);
+ if (M != N) {
+ // The node morphed into a different node. Make everyone use the new node
+ // instead.
+ assert(M->getNodeId() != NewNode && "Analysis resulted in NewNode!");
+ assert(N->getNumValues() == M->getNumValues() &&
+ "Node morphing changed the number of results!");
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ SDValue OldVal(N, i);
+ SDValue NewVal(M, i);
+ if (M->getNodeId() == Processed)
+ RemapValue(NewVal);
+ DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL);
+ }
+ // The original node continues to exist in the DAG, marked NewNode.
+ }
+ }
+}
+
+void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
+ assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ "Invalid type for promoted integer");
+ AnalyzeNewValue(Result);
+
+ SDValue &OpEntry = PromotedIntegers[Op];
+ assert(OpEntry.getNode() == 0 && "Node is already promoted!");
+ OpEntry = Result;
+}
+
+void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
+ assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ "Invalid type for softened float");
+ AnalyzeNewValue(Result);
+
+ SDValue &OpEntry = SoftenedFloats[Op];
+ assert(OpEntry.getNode() == 0 && "Node is already converted to integer!");
+ OpEntry = Result;
+}
+
+void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
+ assert(Result.getValueType() == Op.getValueType().getVectorElementType() &&
+ "Invalid type for scalarized vector");
+ AnalyzeNewValue(Result);
+
+ SDValue &OpEntry = ScalarizedVectors[Op];
+ assert(OpEntry.getNode() == 0 && "Node is already scalarized!");
+ OpEntry = Result;
+}
+
+void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo,
+ SDValue &Hi) {
+ std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op];
+ RemapValue(Entry.first);
+ RemapValue(Entry.second);
+ assert(Entry.first.getNode() && "Operand isn't expanded");
+ Lo = Entry.first;
+ Hi = Entry.second;
+}
+
+void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo,
+ SDValue Hi) {
+ assert(Lo.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ Hi.getValueType() == Lo.getValueType() &&
+ "Invalid type for expanded integer");
+ // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+ AnalyzeNewValue(Lo);
+ AnalyzeNewValue(Hi);
+
+ // Remember that this is the result of the node.
+ std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op];
+ assert(Entry.first.getNode() == 0 && "Node already expanded");
+ Entry.first = Lo;
+ Entry.second = Hi;
+}
+
+void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo,
+ SDValue &Hi) {
+ std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op];
+ RemapValue(Entry.first);
+ RemapValue(Entry.second);
+ assert(Entry.first.getNode() && "Operand isn't expanded");
+ Lo = Entry.first;
+ Hi = Entry.second;
+}
+
+void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo,
+ SDValue Hi) {
+ assert(Lo.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ Hi.getValueType() == Lo.getValueType() &&
+ "Invalid type for expanded float");
+ // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+ AnalyzeNewValue(Lo);
+ AnalyzeNewValue(Hi);
+
+ // Remember that this is the result of the node.
+ std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op];
+ assert(Entry.first.getNode() == 0 && "Node already expanded");
+ Entry.first = Lo;
+ Entry.second = Hi;
+}
+
+void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo,
+ SDValue &Hi) {
+ std::pair<SDValue, SDValue> &Entry = SplitVectors[Op];
+ RemapValue(Entry.first);
+ RemapValue(Entry.second);
+ assert(Entry.first.getNode() && "Operand isn't split");
+ Lo = Entry.first;
+ Hi = Entry.second;
+}
+
+void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,
+ SDValue Hi) {
+ assert(Lo.getValueType().getVectorElementType() ==
+ Op.getValueType().getVectorElementType() &&
+ 2*Lo.getValueType().getVectorNumElements() ==
+ Op.getValueType().getVectorNumElements() &&
+ Hi.getValueType() == Lo.getValueType() &&
+ "Invalid type for split vector");
+ // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+ AnalyzeNewValue(Lo);
+ AnalyzeNewValue(Hi);
+
+ // Remember that this is the result of the node.
+ std::pair<SDValue, SDValue> &Entry = SplitVectors[Op];
+ assert(Entry.first.getNode() == 0 && "Node already split");
+ Entry.first = Lo;
+ Entry.second = Hi;
+}
+
+void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
+ assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ "Invalid type for widened vector");
+ AnalyzeNewValue(Result);
+
+ SDValue &OpEntry = WidenedVectors[Op];
+ assert(OpEntry.getNode() == 0 && "Node already widened!");
+ OpEntry = Result;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Utilities.
+//===----------------------------------------------------------------------===//
+
+/// BitConvertToInteger - Convert to an integer of the same size.
+SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) {
+ unsigned BitWidth = Op.getValueType().getSizeInBits();
+ return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(),
+ EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op);
+}
+
+/// BitConvertVectorToIntegerVector - Convert to a vector of integers of the
+/// same size.
+SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) {
+ assert(Op.getValueType().isVector() && "Only applies to vectors!");
+ unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits();
+ EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
+ unsigned NumElts = Op.getValueType().getVectorNumElements();
+ return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(),
+ EVT::getVectorVT(*DAG.getContext(), EltNVT, NumElts), Op);
+}
+
+SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
+ EVT DestVT) {
+ DebugLoc dl = Op.getDebugLoc();
+ // Create the stack frame object. Make sure it is aligned for both
+ // the source and destination types.
+ SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT);
+ // Emit a store to the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, NULL, 0);
+ // Result is a load from the stack slot.
+ return DAG.getLoad(DestVT, dl, Store, StackPtr, NULL, 0);
+}
+
+/// CustomLowerNode - Replace the node's results with custom code provided
+/// by the target and return "true", or do nothing and return "false".
+/// The last parameter is FALSE if we are dealing with a node with legal
+/// result types and illegal operand. The second parameter denotes the type of
+/// illegal OperandNo in that case.
+/// The last parameter being TRUE means we are dealing with a
+/// node with illegal result types. The second parameter denotes the type of
+/// illegal ResNo in that case.
+bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) {
+ // See if the target wants to custom lower this node.
+ if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom)
+ return false;
+
+ SmallVector<SDValue, 8> Results;
+ if (LegalizeResult)
+ TLI.ReplaceNodeResults(N, Results, DAG);
+ else
+ TLI.LowerOperationWrapper(N, Results, DAG);
+
+ if (Results.empty())
+ // The target didn't want to custom lower it after all.
+ return false;
+
+ // Make everything that once used N's values now use those in Results instead.
+ assert(Results.size() == N->getNumValues() &&
+ "Custom lowering returned the wrong number of results!");
+ for (unsigned i = 0, e = Results.size(); i != e; ++i)
+ ReplaceValueWith(SDValue(N, i), Results[i]);
+ return true;
+}
+
+
+/// CustomWidenLowerNode - Widen the node's results with custom code provided
+/// by the target and return "true", or do nothing and return "false".
+bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) {
+ // See if the target wants to custom lower this node.
+ if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom)
+ return false;
+
+ SmallVector<SDValue, 8> Results;
+ TLI.ReplaceNodeResults(N, Results, DAG);
+
+ if (Results.empty())
+ // The target didn't want to custom widen lower its result after all.
+ return false;
+
+ // Update the widening map.
+ assert(Results.size() == N->getNumValues() &&
+ "Custom lowering returned the wrong number of results!");
+ for (unsigned i = 0, e = Results.size(); i != e; ++i)
+ SetWidenedVector(SDValue(N, i), Results[i]);
+ return true;
+}
+
+/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
+/// which is split into two not necessarily identical pieces.
+void DAGTypeLegalizer::GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT) {
+ // Currently all types are split in half.
+ if (!InVT.isVector()) {
+ LoVT = HiVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+ } else {
+ unsigned NumElements = InVT.getVectorNumElements();
+ assert(!(NumElements & 1) && "Splitting vector, but not in half!");
+ LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), NumElements/2);
+ }
+}
+
+/// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and
+/// high parts of the given value.
+void DAGTypeLegalizer::GetPairElements(SDValue Pair,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = Pair.getDebugLoc();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Pair.getValueType());
+ Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
+ DAG.getIntPtrConstant(0));
+ Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
+ DAG.getIntPtrConstant(1));
+}
+
+SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT,
+ SDValue Index) {
+ DebugLoc dl = Index.getDebugLoc();
+ // Make sure the index type is big enough to compute in.
+ if (Index.getValueType().bitsGT(TLI.getPointerTy()))
+ Index = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Index);
+ else
+ Index = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Index);
+
+ // Calculate the element offset and add it to the pointer.
+ unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
+
+ Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
+ DAG.getConstant(EltSize, Index.getValueType()));
+ return DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, VecPtr);
+}
+
+/// JoinIntegers - Build an integer with low bits Lo and high bits Hi.
+SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
+ // Arbitrarily use dlHi for result DebugLoc
+ DebugLoc dlHi = Hi.getDebugLoc();
+ DebugLoc dlLo = Lo.getDebugLoc();
+ EVT LVT = Lo.getValueType();
+ EVT HVT = Hi.getValueType();
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(), LVT.getSizeInBits() + HVT.getSizeInBits());
+
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo);
+ Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi);
+ Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi,
+ DAG.getConstant(LVT.getSizeInBits(), TLI.getPointerTy()));
+ return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi);
+}
+
+/// LibCallify - Convert the node into a libcall with the same prototype.
+SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N,
+ bool isSigned) {
+ unsigned NumOps = N->getNumOperands();
+ DebugLoc dl = N->getDebugLoc();
+ if (NumOps == 0) {
+ return MakeLibCall(LC, N->getValueType(0), 0, 0, isSigned, dl);
+ } else if (NumOps == 1) {
+ SDValue Op = N->getOperand(0);
+ return MakeLibCall(LC, N->getValueType(0), &Op, 1, isSigned, dl);
+ } else if (NumOps == 2) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ return MakeLibCall(LC, N->getValueType(0), Ops, 2, isSigned, dl);
+ }
+ SmallVector<SDValue, 8> Ops(NumOps);
+ for (unsigned i = 0; i < NumOps; ++i)
+ Ops[i] = N->getOperand(i);
+
+ return MakeLibCall(LC, N->getValueType(0), &Ops[0], NumOps, isSigned, dl);
+}
+
+/// MakeLibCall - Generate a libcall taking the given operands as arguments and
+/// returning a result of type RetVT.
+SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
+ const SDValue *Ops, unsigned NumOps,
+ bool isSigned, DebugLoc dl) {
+ TargetLowering::ArgListTy Args;
+ Args.reserve(NumOps);
+
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ Entry.Node = Ops[i];
+ Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+ std::pair<SDValue,SDValue> CallInfo =
+ TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
+ false, 0, TLI.getLibcallCallingConv(LC), false,
+ /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, dl,
+ DAG.GetOrdering(DAG.getEntryNode().getNode()));
+ return CallInfo.first;
+}
+
+/// PromoteTargetBoolean - Promote the given target boolean to a target boolean
+/// of the given type. A target boolean is an integer value, not necessarily of
+/// type i1, the bits of which conform to getBooleanContents.
+SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT VT) {
+ DebugLoc dl = Bool.getDebugLoc();
+ ISD::NodeType ExtendCode;
+ switch (TLI.getBooleanContents()) {
+ default:
+ assert(false && "Unknown BooleanContent!");
+ case TargetLowering::UndefinedBooleanContent:
+ // Extend to VT by adding rubbish bits.
+ ExtendCode = ISD::ANY_EXTEND;
+ break;
+ case TargetLowering::ZeroOrOneBooleanContent:
+ // Extend to VT by adding zero bits.
+ ExtendCode = ISD::ZERO_EXTEND;
+ break;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent: {
+ // Extend to VT by copying the sign bit.
+ ExtendCode = ISD::SIGN_EXTEND;
+ break;
+ }
+ }
+ return DAG.getNode(ExtendCode, dl, VT, Bool);
+}
+
+/// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT
+/// bits in Hi.
+void DAGTypeLegalizer::SplitInteger(SDValue Op,
+ EVT LoVT, EVT HiVT,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = Op.getDebugLoc();
+ assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() ==
+ Op.getValueType().getSizeInBits() && "Invalid integer splitting!");
+ Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op);
+ Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op,
+ DAG.getConstant(LoVT.getSizeInBits(), TLI.getPointerTy()));
+ Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
+}
+
+/// SplitInteger - Return the lower and upper halves of Op's bits in a value
+/// type half the size of Op's.
+void DAGTypeLegalizer::SplitInteger(SDValue Op,
+ SDValue &Lo, SDValue &Hi) {
+ EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Op.getValueType().getSizeInBits()/2);
+ SplitInteger(Op, HalfVT, HalfVT, Lo, Hi);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Entry Point
+//===----------------------------------------------------------------------===//
+
+/// LegalizeTypes - This transforms the SelectionDAG into a SelectionDAG that
+/// only uses types natively supported by the target. Returns "true" if it made
+/// any changes.
+///
+/// Note that this is an involved process that may invalidate pointers into
+/// the graph.
+bool SelectionDAG::LegalizeTypes() {
+ return DAGTypeLegalizer(*this).run();
+}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
new file mode 100644
index 0000000..b0af357
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -0,0 +1,736 @@
+//===-- LegalizeTypes.h - Definition of the DAG Type Legalizer class ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the DAGTypeLegalizer class. This is a private interface
+// shared between the code that implements the SelectionDAG::LegalizeTypes
+// method.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SELECTIONDAG_LEGALIZETYPES_H
+#define SELECTIONDAG_LEGALIZETYPES_H
+
+#define DEBUG_TYPE "legalize-types"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+/// DAGTypeLegalizer - This takes an arbitrary SelectionDAG as input and hacks
+/// on it until only value types the target machine can handle are left. This
+/// involves promoting small sizes to large sizes or splitting up large values
+/// into small values.
+///
+class VISIBILITY_HIDDEN DAGTypeLegalizer {
+ TargetLowering &TLI;
+ SelectionDAG &DAG;
+public:
+ // NodeIdFlags - This pass uses the NodeId on the SDNodes to hold information
+ // about the state of the node. The enum has all the values.
+ enum NodeIdFlags {
+ /// ReadyToProcess - All operands have been processed, so this node is ready
+ /// to be handled.
+ ReadyToProcess = 0,
+
+ /// NewNode - This is a new node, not before seen, that was created in the
+ /// process of legalizing some other node.
+ NewNode = -1,
+
+ /// Unanalyzed - This node's ID needs to be set to the number of its
+ /// unprocessed operands.
+ Unanalyzed = -2,
+
+ /// Processed - This is a node that has already been processed.
+ Processed = -3
+
+ // 1+ - This is a node which has this many unprocessed operands.
+ };
+private:
+ enum LegalizeAction {
+ Legal, // The target natively supports this type.
+ PromoteInteger, // Replace this integer type with a larger one.
+ ExpandInteger, // Split this integer type into two of half the size.
+ SoftenFloat, // Convert this float type to a same size integer type.
+ ExpandFloat, // Split this float type into two of half the size.
+ ScalarizeVector, // Replace this one-element vector with its element type.
+ SplitVector, // Split this vector type into two of half the size.
+ WidenVector // This vector type should be widened into a larger vector.
+ };
+
+ /// ValueTypeActions - This is a bitvector that contains two bits for each
+ /// simple value type, where the two bits correspond to the LegalizeAction
+ /// enum from TargetLowering. This can be queried with "getTypeAction(VT)".
+ TargetLowering::ValueTypeActionImpl ValueTypeActions;
+
+ /// getTypeAction - Return how we should legalize values of this type.
+ LegalizeAction getTypeAction(EVT VT) const {
+ switch (ValueTypeActions.getTypeAction(*DAG.getContext(), VT)) {
+ default:
+ assert(false && "Unknown legalize action!");
+ case TargetLowering::Legal:
+ return Legal;
+ case TargetLowering::Promote:
+ // Promote can mean
+ // 1) For integers, use a larger integer type (e.g. i8 -> i32).
+ // 2) For vectors, use a wider vector type (e.g. v3i32 -> v4i32).
+ if (!VT.isVector())
+ return PromoteInteger;
+ else
+ return WidenVector;
+ case TargetLowering::Expand:
+ // Expand can mean
+ // 1) split scalar in half, 2) convert a float to an integer,
+ // 3) scalarize a single-element vector, 4) split a vector in two.
+ if (!VT.isVector()) {
+ if (VT.isInteger())
+ return ExpandInteger;
+ else if (VT.getSizeInBits() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), VT).getSizeInBits())
+ return SoftenFloat;
+ else
+ return ExpandFloat;
+ } else if (VT.getVectorNumElements() == 1) {
+ return ScalarizeVector;
+ } else {
+ return SplitVector;
+ }
+ }
+ }
+
+ /// isTypeLegal - Return true if this type is legal on this target.
+ bool isTypeLegal(EVT VT) const {
+ return (ValueTypeActions.getTypeAction(*DAG.getContext(), VT) ==
+ TargetLowering::Legal);
+ }
+
+ /// IgnoreNodeResults - Pretend all of this node's results are legal.
+ bool IgnoreNodeResults(SDNode *N) const {
+ return N->getOpcode() == ISD::TargetConstant;
+ }
+
+ /// PromotedIntegers - For integer nodes that are below legal width, this map
+ /// indicates what promoted value to use.
+ DenseMap<SDValue, SDValue> PromotedIntegers;
+
+ /// ExpandedIntegers - For integer nodes that need to be expanded this map
+ /// indicates which operands are the expanded version of the input.
+ DenseMap<SDValue, std::pair<SDValue, SDValue> > ExpandedIntegers;
+
+ /// SoftenedFloats - For floating point nodes converted to integers of
+ /// the same size, this map indicates the converted value to use.
+ DenseMap<SDValue, SDValue> SoftenedFloats;
+
+ /// ExpandedFloats - For float nodes that need to be expanded this map
+ /// indicates which operands are the expanded version of the input.
+ DenseMap<SDValue, std::pair<SDValue, SDValue> > ExpandedFloats;
+
+ /// ScalarizedVectors - For nodes that are <1 x ty>, this map indicates the
+ /// scalar value of type 'ty' to use.
+ DenseMap<SDValue, SDValue> ScalarizedVectors;
+
+ /// SplitVectors - For nodes that need to be split this map indicates
+ /// which operands are the expanded version of the input.
+ DenseMap<SDValue, std::pair<SDValue, SDValue> > SplitVectors;
+
+ /// WidenedVectors - For vector nodes that need to be widened, indicates
+ /// the widened value to use.
+ DenseMap<SDValue, SDValue> WidenedVectors;
+
+ /// ReplacedValues - For values that have been replaced with another,
+ /// indicates the replacement value to use.
+ DenseMap<SDValue, SDValue> ReplacedValues;
+
+ /// Worklist - This defines a worklist of nodes to process. In order to be
+ /// pushed onto this worklist, all operands of a node must have already been
+ /// processed.
+ SmallVector<SDNode*, 128> Worklist;
+
+public:
+ explicit DAGTypeLegalizer(SelectionDAG &dag)
+ : TLI(dag.getTargetLoweringInfo()), DAG(dag),
+ ValueTypeActions(TLI.getValueTypeActions()) {
+ assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
+ "Too many value types for ValueTypeActions to hold!");
+ }
+
+ /// run - This is the main entry point for the type legalizer. This does a
+ /// top-down traversal of the dag, legalizing types as it goes. Returns
+ /// "true" if it made any changes.
+ bool run();
+
+ void NoteDeletion(SDNode *Old, SDNode *New) {
+ ExpungeNode(Old);
+ ExpungeNode(New);
+ for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i)
+ ReplacedValues[SDValue(Old, i)] = SDValue(New, i);
+ }
+
+private:
+ SDNode *AnalyzeNewNode(SDNode *N);
+ void AnalyzeNewValue(SDValue &Val);
+ void ExpungeNode(SDNode *N);
+ void PerformExpensiveChecks();
+ void RemapValue(SDValue &N);
+
+ // Common routines.
+ SDValue BitConvertToInteger(SDValue Op);
+ SDValue BitConvertVectorToIntegerVector(SDValue Op);
+ SDValue CreateStackStoreLoad(SDValue Op, EVT DestVT);
+ bool CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult);
+ bool CustomWidenLowerNode(SDNode *N, EVT VT);
+ SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index);
+ SDValue JoinIntegers(SDValue Lo, SDValue Hi);
+ SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned);
+ SDValue MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
+ const SDValue *Ops, unsigned NumOps, bool isSigned,
+ DebugLoc dl);
+ SDValue PromoteTargetBoolean(SDValue Bool, EVT VT);
+ void ReplaceValueWith(SDValue From, SDValue To);
+ void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
+ SDValue &Lo, SDValue &Hi);
+
+ //===--------------------------------------------------------------------===//
+ // Integer Promotion Support: LegalizeIntegerTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetPromotedInteger - Given a processed operand Op which was promoted to a
+ /// larger integer type, this returns the promoted value. The low bits of the
+ /// promoted value corresponding to the original type are exactly equal to Op.
+ /// The extra bits contain rubbish, so the promoted value may need to be zero-
+ /// or sign-extended from the original type before it is usable (the helpers
+ /// SExtPromotedInteger and ZExtPromotedInteger can do this for you).
+ /// For example, if Op is an i16 and was promoted to an i32, then this method
+ /// returns an i32, the lower 16 bits of which coincide with Op, and the upper
+ /// 16 bits of which contain rubbish.
+ SDValue GetPromotedInteger(SDValue Op) {
+ SDValue &PromotedOp = PromotedIntegers[Op];
+ RemapValue(PromotedOp);
+ assert(PromotedOp.getNode() && "Operand wasn't promoted?");
+ return PromotedOp;
+ }
+ void SetPromotedInteger(SDValue Op, SDValue Result);
+
+ /// SExtPromotedInteger - Get a promoted operand and sign extend it to the
+ /// final size.
+ SDValue SExtPromotedInteger(SDValue Op) {
+ EVT OldVT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ Op = GetPromotedInteger(Op);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op,
+ DAG.getValueType(OldVT));
+ }
+
+ /// ZExtPromotedInteger - Get a promoted operand and zero extend it to the
+ /// final size.
+ SDValue ZExtPromotedInteger(SDValue Op) {
+ EVT OldVT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ Op = GetPromotedInteger(Op);
+ return DAG.getZeroExtendInReg(Op, dl, OldVT);
+ }
+
+ // Integer Result Promotion.
+ void PromoteIntegerResult(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_AssertSext(SDNode *N);
+ SDValue PromoteIntRes_AssertZext(SDNode *N);
+ SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);
+ SDValue PromoteIntRes_Atomic2(AtomicSDNode *N);
+ SDValue PromoteIntRes_BIT_CONVERT(SDNode *N);
+ SDValue PromoteIntRes_BSWAP(SDNode *N);
+ SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);
+ SDValue PromoteIntRes_Constant(SDNode *N);
+ SDValue PromoteIntRes_CONVERT_RNDSAT(SDNode *N);
+ SDValue PromoteIntRes_CTLZ(SDNode *N);
+ SDValue PromoteIntRes_CTPOP(SDNode *N);
+ SDValue PromoteIntRes_CTTZ(SDNode *N);
+ SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
+ SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
+ SDValue PromoteIntRes_LOAD(LoadSDNode *N);
+ SDValue PromoteIntRes_Overflow(SDNode *N);
+ SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_SDIV(SDNode *N);
+ SDValue PromoteIntRes_SELECT(SDNode *N);
+ SDValue PromoteIntRes_SELECT_CC(SDNode *N);
+ SDValue PromoteIntRes_SETCC(SDNode *N);
+ SDValue PromoteIntRes_SHL(SDNode *N);
+ SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N);
+ SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N);
+ SDValue PromoteIntRes_SRA(SDNode *N);
+ SDValue PromoteIntRes_SRL(SDNode *N);
+ SDValue PromoteIntRes_TRUNCATE(SDNode *N);
+ SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_UDIV(SDNode *N);
+ SDValue PromoteIntRes_UNDEF(SDNode *N);
+ SDValue PromoteIntRes_VAARG(SDNode *N);
+ SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
+
+ // Integer Operand Promotion.
+ bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo);
+ SDValue PromoteIntOp_ANY_EXTEND(SDNode *N);
+ SDValue PromoteIntOp_BIT_CONVERT(SDNode *N);
+ SDValue PromoteIntOp_BUILD_PAIR(SDNode *N);
+ SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N);
+ SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N);
+ SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_MEMBARRIER(SDNode *N);
+ SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_Shift(SDNode *N);
+ SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N);
+ SDValue PromoteIntOp_SINT_TO_FP(SDNode *N);
+ SDValue PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_TRUNCATE(SDNode *N);
+ SDValue PromoteIntOp_UINT_TO_FP(SDNode *N);
+ SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N);
+
+ void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
+
+ //===--------------------------------------------------------------------===//
+ // Integer Expansion Support: LegalizeIntegerTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetExpandedInteger - Given a processed operand Op which was expanded into
+ /// two integers of half the size, this returns the two halves. The low bits
+ /// of Op are exactly equal to the bits of Lo; the high bits exactly equal Hi.
+ /// For example, if Op is an i64 which was expanded into two i32's, then this
+ /// method returns the two i32's, with Lo being equal to the lower 32 bits of
+ /// Op, and Hi being equal to the upper 32 bits.
+ void GetExpandedInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SetExpandedInteger(SDValue Op, SDValue Lo, SDValue Hi);
+
+ // Integer Result Expansion.
+ void ExpandIntegerResult(SDNode *N, unsigned ResNo);
+ void ExpandIntRes_ANY_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_Constant (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_CTLZ (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_LOAD (LoadSDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SIGN_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ZERO_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SREM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_UDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandShiftByConstant(SDNode *N, unsigned Amt,
+ SDValue &Lo, SDValue &Hi);
+ bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
+ bool ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ // Integer Operand Expansion.
+ bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo);
+ SDValue ExpandIntOp_BIT_CONVERT(SDNode *N);
+ SDValue ExpandIntOp_BR_CC(SDNode *N);
+ SDValue ExpandIntOp_BUILD_VECTOR(SDNode *N);
+ SDValue ExpandIntOp_EXTRACT_ELEMENT(SDNode *N);
+ SDValue ExpandIntOp_SELECT_CC(SDNode *N);
+ SDValue ExpandIntOp_SETCC(SDNode *N);
+ SDValue ExpandIntOp_Shift(SDNode *N);
+ SDValue ExpandIntOp_SINT_TO_FP(SDNode *N);
+ SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue ExpandIntOp_TRUNCATE(SDNode *N);
+ SDValue ExpandIntOp_UINT_TO_FP(SDNode *N);
+ SDValue ExpandIntOp_RETURNADDR(SDNode *N);
+
+ void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode, DebugLoc dl);
+
+ //===--------------------------------------------------------------------===//
+ // Float to Integer Conversion Support: LegalizeFloatTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetSoftenedFloat - Given a processed operand Op which was converted to an
+ /// integer of the same size, this returns the integer. The integer contains
+ /// exactly the same bits as Op - only the type changed. For example, if Op
+ /// is an f32 which was softened to an i32, then this method returns an i32,
+ /// the bits of which coincide with those of Op.
+ SDValue GetSoftenedFloat(SDValue Op) {
+ SDValue &SoftenedOp = SoftenedFloats[Op];
+ RemapValue(SoftenedOp);
+ assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?");
+ return SoftenedOp;
+ }
+ void SetSoftenedFloat(SDValue Op, SDValue Result);
+
+ // Result Float to Integer Conversion.
+ void SoftenFloatResult(SDNode *N, unsigned OpNo);
+ SDValue SoftenFloatRes_BIT_CONVERT(SDNode *N);
+ SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
+ SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N);
+ SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue SoftenFloatRes_FABS(SDNode *N);
+ SDValue SoftenFloatRes_FADD(SDNode *N);
+ SDValue SoftenFloatRes_FCEIL(SDNode *N);
+ SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N);
+ SDValue SoftenFloatRes_FCOS(SDNode *N);
+ SDValue SoftenFloatRes_FDIV(SDNode *N);
+ SDValue SoftenFloatRes_FEXP(SDNode *N);
+ SDValue SoftenFloatRes_FEXP2(SDNode *N);
+ SDValue SoftenFloatRes_FFLOOR(SDNode *N);
+ SDValue SoftenFloatRes_FLOG(SDNode *N);
+ SDValue SoftenFloatRes_FLOG2(SDNode *N);
+ SDValue SoftenFloatRes_FLOG10(SDNode *N);
+ SDValue SoftenFloatRes_FMUL(SDNode *N);
+ SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);
+ SDValue SoftenFloatRes_FNEG(SDNode *N);
+ SDValue SoftenFloatRes_FP_EXTEND(SDNode *N);
+ SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
+ SDValue SoftenFloatRes_FPOW(SDNode *N);
+ SDValue SoftenFloatRes_FPOWI(SDNode *N);
+ SDValue SoftenFloatRes_FREM(SDNode *N);
+ SDValue SoftenFloatRes_FRINT(SDNode *N);
+ SDValue SoftenFloatRes_FSIN(SDNode *N);
+ SDValue SoftenFloatRes_FSQRT(SDNode *N);
+ SDValue SoftenFloatRes_FSUB(SDNode *N);
+ SDValue SoftenFloatRes_FTRUNC(SDNode *N);
+ SDValue SoftenFloatRes_LOAD(SDNode *N);
+ SDValue SoftenFloatRes_SELECT(SDNode *N);
+ SDValue SoftenFloatRes_SELECT_CC(SDNode *N);
+ SDValue SoftenFloatRes_UNDEF(SDNode *N);
+ SDValue SoftenFloatRes_VAARG(SDNode *N);
+ SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N);
+
+ // Operand Float to Integer Conversion.
+ bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
+ SDValue SoftenFloatOp_BIT_CONVERT(SDNode *N);
+ SDValue SoftenFloatOp_BR_CC(SDNode *N);
+ SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
+ SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N);
+ SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N);
+ SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
+ SDValue SoftenFloatOp_SETCC(SDNode *N);
+ SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
+
+ void SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode, DebugLoc dl);
+
+ //===--------------------------------------------------------------------===//
+ // Float Expansion Support: LegalizeFloatTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetExpandedFloat - Given a processed operand Op which was expanded into
+ /// two floating point values of half the size, this returns the two halves.
+ /// The low bits of Op are exactly equal to the bits of Lo; the high bits
+ /// exactly equal Hi. For example, if Op is a ppcf128 which was expanded
+ /// into two f64's, then this method returns the two f64's, with Lo being
+ /// equal to the lower 64 bits of Op, and Hi to the upper 64 bits.
+ void GetExpandedFloat(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SetExpandedFloat(SDValue Op, SDValue Lo, SDValue Hi);
+
+ // Float Result Expansion.
+ void ExpandFloatResult(SDNode *N, unsigned ResNo);
+ void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FCOS (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FEXP (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FEXP2 (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FFLOOR (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FLOG (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FLOG2 (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FLOG10 (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FMUL (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FNEG (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FPOW (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ // Float Operand Expansion.
+ bool ExpandFloatOperand(SDNode *N, unsigned OperandNo);
+ SDValue ExpandFloatOp_BR_CC(SDNode *N);
+ SDValue ExpandFloatOp_FP_ROUND(SDNode *N);
+ SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N);
+ SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N);
+ SDValue ExpandFloatOp_SELECT_CC(SDNode *N);
+ SDValue ExpandFloatOp_SETCC(SDNode *N);
+ SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo);
+
+ void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode, DebugLoc dl);
+
+ //===--------------------------------------------------------------------===//
+ // Scalarization Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetScalarizedVector - Given a processed one-element vector Op which was
+ /// scalarized to its element type, this returns the element. For example,
+ /// if Op is a v1i32, Op = < i32 val >, this method returns val, an i32.
+ SDValue GetScalarizedVector(SDValue Op) {
+ SDValue &ScalarizedOp = ScalarizedVectors[Op];
+ RemapValue(ScalarizedOp);
+ assert(ScalarizedOp.getNode() && "Operand wasn't scalarized?");
+ return ScalarizedOp;
+ }
+ void SetScalarizedVector(SDValue Op, SDValue Result);
+
+ // Vector Result Scalarization: <1 x ty> -> ty.
+ void ScalarizeVectorResult(SDNode *N, unsigned OpNo);
+ SDValue ScalarizeVecRes_BinOp(SDNode *N);
+ SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
+ SDValue ScalarizeVecRes_InregOp(SDNode *N);
+
+ SDValue ScalarizeVecRes_BIT_CONVERT(SDNode *N);
+ SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N);
+ SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue ScalarizeVecRes_FPOWI(SDNode *N);
+ SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
+ SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
+ SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N);
+ SDValue ScalarizeVecRes_SELECT(SDNode *N);
+ SDValue ScalarizeVecRes_SELECT_CC(SDNode *N);
+ SDValue ScalarizeVecRes_SETCC(SDNode *N);
+ SDValue ScalarizeVecRes_UNDEF(SDNode *N);
+ SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
+ SDValue ScalarizeVecRes_VSETCC(SDNode *N);
+
+ // Vector Operand Scalarization: <1 x ty> -> ty.
+ bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
+ SDValue ScalarizeVecOp_BIT_CONVERT(SDNode *N);
+ SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+
+ //===--------------------------------------------------------------------===//
+ // Vector Splitting Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetSplitVector - Given a processed vector Op which was split into vectors
+ /// of half the size, this method returns the halves. The first elements of
+ /// Op coincide with the elements of Lo; the remaining elements of Op coincide
+ /// with the elements of Hi: Op is what you would get by concatenating Lo and
+ /// Hi. For example, if Op is a v8i32 that was split into two v4i32's, then
+ /// this method returns the two v4i32's, with Lo corresponding to the first 4
+ /// elements of Op, and Hi to the last 4 elements.
+ void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi);
+
+ // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>.
+ void SplitVectorResult(SDNode *N, unsigned OpNo);
+ void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
+ SDValue &Hi);
+
+ // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>.
+ bool SplitVectorOperand(SDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_UnaryOp(SDNode *N);
+
+ SDValue SplitVecOp_BIT_CONVERT(SDNode *N);
+ SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+
+ //===--------------------------------------------------------------------===//
+ // Vector Widening Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetWidenedVector - Given a processed vector Op which was widened into a
+ /// larger vector, this method returns the larger vector. The elements of
+ /// the returned vector consist of the elements of Op followed by elements
+ /// containing rubbish. For example, if Op is a v2i32 that was widened to a
+ /// v4i32, then this method returns a v4i32 for which the first two elements
+ /// are the same as those of Op, while the last two elements contain rubbish.
+ SDValue GetWidenedVector(SDValue Op) {
+ SDValue &WidenedOp = WidenedVectors[Op];
+ RemapValue(WidenedOp);
+ assert(WidenedOp.getNode() && "Operand wasn't widened?");
+ return WidenedOp;
+ }
+ void SetWidenedVector(SDValue Op, SDValue Result);
+
+ // Widen Vector Result Promotion.
+ void WidenVectorResult(SDNode *N, unsigned ResNo);
+ SDValue WidenVecRes_BIT_CONVERT(SDNode* N);
+ SDValue WidenVecRes_BUILD_VECTOR(SDNode* N);
+ SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
+ SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N);
+ SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
+ SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
+ SDValue WidenVecRes_LOAD(SDNode* N);
+ SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N);
+ SDValue WidenVecRes_SIGN_EXTEND_INREG(SDNode* N);
+ SDValue WidenVecRes_SELECT(SDNode* N);
+ SDValue WidenVecRes_SELECT_CC(SDNode* N);
+ SDValue WidenVecRes_SETCC(SDNode* N);
+ SDValue WidenVecRes_UNDEF(SDNode *N);
+ SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);
+ SDValue WidenVecRes_VSETCC(SDNode* N);
+
+ SDValue WidenVecRes_Binary(SDNode *N);
+ SDValue WidenVecRes_Convert(SDNode *N);
+ SDValue WidenVecRes_Shift(SDNode *N);
+ SDValue WidenVecRes_Unary(SDNode *N);
+ SDValue WidenVecRes_InregOp(SDNode *N);
+
+ // Widen Vector Operand.
+ bool WidenVectorOperand(SDNode *N, unsigned ResNo);
+ SDValue WidenVecOp_BIT_CONVERT(SDNode *N);
+ SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue WidenVecOp_STORE(SDNode* N);
+
+ SDValue WidenVecOp_Convert(SDNode *N);
+
+ //===--------------------------------------------------------------------===//
+ // Vector Widening Utilities Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// Helper GenWidenVectorLoads - Helper function to generate a set of
+ /// loads to load a vector with a resulting wider type. It takes
+ /// LdChain: list of chains for the load to be generated.
+ /// Ld: load to widen
+ SDValue GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
+ LoadSDNode *LD);
+
+ /// GenWidenVectorExtLoads - Helper function to generate a set of extension
+ /// loads to load a ector with a resulting wider type. It takes
+ /// LdChain: list of chains for the load to be generated.
+ /// Ld: load to widen
+ /// ExtType: extension element type
+ SDValue GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
+ LoadSDNode *LD, ISD::LoadExtType ExtType);
+
+ /// Helper genWidenVectorStores - Helper function to generate a set of
+ /// stores to store a widen vector into non widen memory
+ /// StChain: list of chains for the stores we have generated
+ /// ST: store of a widen value
+ void GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, StoreSDNode *ST);
+
+ /// Helper genWidenVectorTruncStores - Helper function to generate a set of
+ /// stores to store a truncate widen vector into non widen memory
+ /// StChain: list of chains for the stores we have generated
+ /// ST: store of a widen value
+ void GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
+ StoreSDNode *ST);
+
+ /// Modifies a vector input (widen or narrows) to a vector of NVT. The
+ /// input vector must have the same element type as NVT.
+ SDValue ModifyToType(SDValue InOp, EVT WidenVT);
+
+
+ //===--------------------------------------------------------------------===//
+ // Generic Splitting: LegalizeTypesGeneric.cpp
+ //===--------------------------------------------------------------------===//
+
+ // Legalization methods which only use that the illegal type is split into two
+ // not necessarily identical types. As such they can be used for splitting
+ // vectors and expanding integers and floats.
+
+ void GetSplitOp(SDValue Op, SDValue &Lo, SDValue &Hi) {
+ if (Op.getValueType().isVector())
+ GetSplitVector(Op, Lo, Hi);
+ else if (Op.getValueType().isInteger())
+ GetExpandedInteger(Op, Lo, Hi);
+ else
+ GetExpandedFloat(Op, Lo, Hi);
+ }
+
+ /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
+ /// which is split (or expanded) into two not necessarily identical pieces.
+ void GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT);
+
+ /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and
+ /// high parts of the given value.
+ void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi);
+
+ // Generic Result Splitting.
+ void SplitRes_MERGE_VALUES(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ //===--------------------------------------------------------------------===//
+ // Generic Expansion: LegalizeTypesGeneric.cpp
+ //===--------------------------------------------------------------------===//
+
+ // Legalization methods which only use that the illegal type is split into two
+ // identical types of half the size, and that the Lo/Hi part is stored first
+ // in memory on little/big-endian machines, followed by the Hi/Lo part. As
+ // such they can be used for expanding integers and floats.
+
+ void GetExpandedOp(SDValue Op, SDValue &Lo, SDValue &Hi) {
+ if (Op.getValueType().isInteger())
+ GetExpandedInteger(Op, Lo, Hi);
+ else
+ GetExpandedFloat(Op, Lo, Hi);
+ }
+
+ // Generic Result Expansion.
+ void ExpandRes_BIT_CONVERT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_BUILD_PAIR (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_EXTRACT_ELEMENT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_NormalLoad (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_VAARG (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ // Generic Operand Expansion.
+ SDValue ExpandOp_BIT_CONVERT (SDNode *N);
+ SDValue ExpandOp_BUILD_VECTOR (SDNode *N);
+ SDValue ExpandOp_EXTRACT_ELEMENT (SDNode *N);
+ SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N);
+ SDValue ExpandOp_SCALAR_TO_VECTOR (SDNode *N);
+ SDValue ExpandOp_NormalStore (SDNode *N, unsigned OpNo);
+};
+
+} // end namespace llvm.
+
+#endif
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
new file mode 100644
index 0000000..a1b6ced
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -0,0 +1,470 @@
+//===-------- LegalizeTypesGeneric.cpp - Generic type legalization --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements generic type expansion and splitting for LegalizeTypes.
+// The routines here perform legalization when the details of the type (such as
+// whether it is an integer or a float) do not matter.
+// Expansion is the act of changing a computation in an illegal type to be a
+// computation in two identical registers of a smaller type. The Lo/Hi part
+// is required to be stored first in memory on little/big-endian machines.
+// Splitting is the act of changing a computation in an illegal type to be a
+// computation in two not necessarily identical registers of a smaller type.
+// There are no requirements on how the type is represented in memory.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Generic Result Expansion.
+//===----------------------------------------------------------------------===//
+
+// These routines assume that the Lo/Hi part is stored first in memory on
+// little/big-endian machines, followed by the Hi/Lo part. This means that
+// they cannot be used as is on vectors, for which Lo is always stored first.
+
+void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ SDValue InOp = N->getOperand(0);
+ EVT InVT = InOp.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Handle some special cases efficiently.
+ switch (getTypeAction(InVT)) {
+ default:
+ assert(false && "Unknown type action!");
+ case Legal:
+ case PromoteInteger:
+ break;
+ case SoftenFloat:
+ // Convert the integer operand instead.
+ SplitInteger(GetSoftenedFloat(InOp), Lo, Hi);
+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ return;
+ case ExpandInteger:
+ case ExpandFloat:
+ // Convert the expanded pieces of the input.
+ GetExpandedOp(InOp, Lo, Hi);
+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ return;
+ case SplitVector:
+ GetSplitVector(InOp, Lo, Hi);
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ return;
+ case ScalarizeVector:
+ // Convert the element instead.
+ SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi);
+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ return;
+ case WidenVector: {
+ assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BIT_CONVERT");
+ InOp = GetWidenedVector(InOp);
+ EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+ InVT.getVectorNumElements()/2);
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+ DAG.getIntPtrConstant(0));
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ return;
+ }
+ }
+
+ if (InVT.isVector() && OutVT.isInteger()) {
+ // Handle cases like i64 = BIT_CONVERT v1i64 on x86, where the operand
+ // is legal but the result is not.
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), NOutVT, 2);
+
+ if (isTypeLegal(NVT)) {
+ SDValue CastInOp = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, InOp);
+ Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
+ DAG.getIntPtrConstant(0));
+ Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
+ DAG.getIntPtrConstant(1));
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ return;
+ }
+ }
+
+ // Lower the bit-convert to a store/load from the stack.
+ assert(NOutVT.isByteSized() && "Expanded type not byte sized!");
+
+ // Create the stack frame object. Make sure it is aligned for both
+ // the source and expanded destination types.
+ unsigned Alignment =
+ TLI.getTargetData()->getPrefTypeAlignment(NOutVT.
+ getTypeForEVT(*DAG.getContext()));
+ SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
+
+ // Emit a store to the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0);
+
+ // Load the first half from the stack slot.
+ Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ DAG.getIntPtrConstant(IncrementSize));
+
+ // Load the second half from the stack slot.
+ Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, IncrementSize, false,
+ MinAlign(Alignment, IncrementSize));
+
+ // Handle endianness of the load.
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_BUILD_PAIR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // Return the operands.
+ Lo = N->getOperand(0);
+ Hi = N->getOperand(1);
+}
+
+void DAGTypeLegalizer::ExpandRes_EXTRACT_ELEMENT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ GetExpandedOp(N->getOperand(0), Lo, Hi);
+ SDValue Part = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ?
+ Hi : Lo;
+
+ assert(Part.getValueType() == N->getValueType(0) &&
+ "Type twice as big as expanded type not itself expanded!");
+
+ GetPairElements(Part, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue OldVec = N->getOperand(0);
+ unsigned OldElts = OldVec.getValueType().getVectorNumElements();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Convert to a vector of the expanded element type, for example
+ // <3 x i64> -> <6 x i32>.
+ EVT OldVT = N->getValueType(0);
+ EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
+
+ SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
+ EVT::getVectorVT(*DAG.getContext(), NewVT, 2*OldElts),
+ OldVec);
+
+ // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector.
+ SDValue Idx = N->getOperand(1);
+
+ // Make sure the type of Idx is big enough to hold the new values.
+ if (Idx.getValueType().bitsLT(TLI.getPointerTy()))
+ Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+
+ Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
+ Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);
+
+ Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
+ DAG.getConstant(1, Idx.getValueType()));
+ Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(ISD::isNormalLoad(N) && "This routine only for normal loads!");
+ DebugLoc dl = N->getDebugLoc();
+
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ int SVOffset = LD->getSrcValueOffset();
+ unsigned Alignment = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+ Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset,
+ isVolatile, Alignment);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NVT.getSizeInBits() / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(),
+ SVOffset+IncrementSize,
+ isVolatile, MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Handle endianness of the load.
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
+}
+
+void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Chain = N->getOperand(0);
+ SDValue Ptr = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+
+ Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2));
+ Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2));
+
+ // Handle endianness of the load.
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+
+//===--------------------------------------------------------------------===//
+// Generic Operand Expansion.
+//===--------------------------------------------------------------------===//
+
+SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ if (N->getValueType(0).isVector()) {
+ // An illegal expanding type is being converted to a legal vector type.
+ // Make a two element vector out of the expanded parts and convert that
+ // instead, but only if the new vector type is legal (otherwise there
+ // is no point, and it might create expansion loops). For example, on
+ // x86 this turns v1i64 = BIT_CONVERT i64 into v1i64 = BIT_CONVERT v2i32.
+ EVT OVT = N->getOperand(0).getValueType();
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), TLI.getTypeToTransformTo(*DAG.getContext(), OVT), 2);
+
+ if (isTypeLegal(NVT)) {
+ SDValue Parts[2];
+ GetExpandedOp(N->getOperand(0), Parts[0], Parts[1]);
+
+ if (TLI.isBigEndian())
+ std::swap(Parts[0], Parts[1]);
+
+ SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Parts, 2);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, N->getValueType(0), Vec);
+ }
+ }
+
+ // Otherwise, store to a temporary and load out again as the new type.
+ return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
+ // The vector type is legal but the element type needs expansion.
+ EVT VecVT = N->getValueType(0);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ EVT OldVT = N->getOperand(0).getValueType();
+ EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
+ DebugLoc dl = N->getDebugLoc();
+
+ assert(OldVT == VecVT.getVectorElementType() &&
+ "BUILD_VECTOR operand type doesn't match vector element type!");
+
+ // Build a vector of twice the length out of the expanded elements.
+ // For example <3 x i64> -> <6 x i32>.
+ std::vector<SDValue> NewElts;
+ NewElts.reserve(NumElts*2);
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Lo, Hi;
+ GetExpandedOp(N->getOperand(i), Lo, Hi);
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ NewElts.push_back(Lo);
+ NewElts.push_back(Hi);
+ }
+
+ SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
+ EVT::getVectorVT(*DAG.getContext(), NewVT, NewElts.size()),
+ &NewElts[0], NewElts.size());
+
+ // Convert the new vector to the old vector type.
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) {
+ SDValue Lo, Hi;
+ GetExpandedOp(N->getOperand(0), Lo, Hi);
+ return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ? Hi : Lo;
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
+ // The vector type is legal but the element type needs expansion.
+ EVT VecVT = N->getValueType(0);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue Val = N->getOperand(1);
+ EVT OldEVT = Val.getValueType();
+ EVT NewEVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldEVT);
+
+ assert(OldEVT == VecVT.getVectorElementType() &&
+ "Inserted element type doesn't match vector element type!");
+
+ // Bitconvert to a vector of twice the length with elements of the expanded
+ // type, insert the expanded vector elements, and then convert back.
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEVT, NumElts*2);
+ SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
+ NewVecVT, N->getOperand(0));
+
+ SDValue Lo, Hi;
+ GetExpandedOp(Val, Lo, Hi);
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ SDValue Idx = N->getOperand(2);
+ Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
+ NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx);
+ Idx = DAG.getNode(ISD::ADD, dl,
+ Idx.getValueType(), Idx, DAG.getIntPtrConstant(1));
+ NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx);
+
+ // Convert the new vector to the old vector type.
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ assert(VT.getVectorElementType() == N->getOperand(0).getValueType() &&
+ "SCALAR_TO_VECTOR operand type doesn't match vector element type!");
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(NumElts);
+ Ops[0] = N->getOperand(0);
+ SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType());
+ for (unsigned i = 1; i < NumElts; ++i)
+ Ops[i] = UndefVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
+ assert(ISD::isNormalStore(N) && "This routine only for normal stores!");
+ assert(OpNo == 1 && "Can only expand the stored value so far");
+ DebugLoc dl = N->getDebugLoc();
+
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), St->getValue().getValueType());
+ SDValue Chain = St->getChain();
+ SDValue Ptr = St->getBasePtr();
+ int SVOffset = St->getSrcValueOffset();
+ unsigned Alignment = St->getAlignment();
+ bool isVolatile = St->isVolatile();
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+ unsigned IncrementSize = NVT.getSizeInBits() / 8;
+
+ SDValue Lo, Hi;
+ GetExpandedOp(St->getValue(), Lo, Hi);
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getSrcValue(), SVOffset,
+ isVolatile, Alignment);
+
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!");
+ Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getSrcValue(),
+ SVOffset + IncrementSize,
+ isVolatile, MinAlign(Alignment, IncrementSize));
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+}
+
+
+//===--------------------------------------------------------------------===//
+// Generic Result Splitting.
+//===--------------------------------------------------------------------===//
+
+// Be careful to make no assumptions about which of Lo/Hi is stored first in
+// memory (for vectors it is always Lo first followed by Hi in the following
+// bytes; for integers and floats it is Lo first if and only if the machine is
+// little-endian).
+
+void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // A MERGE_VALUES node can produce any number of values. We know that the
+ // first illegal one needs to be expanded into Lo/Hi.
+ unsigned i;
+
+ // The string of legal results gets turned into input operands, which have
+ // the same type.
+ for (i = 0; isTypeLegal(N->getValueType(i)); ++i)
+ ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i)));
+
+ // The first illegal result must be the one that needs to be expanded.
+ GetSplitOp(N->getOperand(i), Lo, Hi);
+
+ // Legalize the rest of the results into the input operands whether they are
+ // legal or not.
+ unsigned e = N->getNumValues();
+ for (++i; i != e; ++i)
+ ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i)));
+}
+
+void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LL, LH, RL, RH;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitOp(N->getOperand(1), LL, LH);
+ GetSplitOp(N->getOperand(2), RL, RH);
+
+ SDValue Cond = N->getOperand(0);
+ Lo = DAG.getNode(ISD::SELECT, dl, LL.getValueType(), Cond, LL, RL);
+ Hi = DAG.getNode(ISD::SELECT, dl, LH.getValueType(), Cond, LH, RH);
+}
+
+void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LL, LH, RL, RH;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitOp(N->getOperand(2), LL, LH);
+ GetSplitOp(N->getOperand(3), RL, RH);
+
+ Lo = DAG.getNode(ISD::SELECT_CC, dl, LL.getValueType(), N->getOperand(0),
+ N->getOperand(1), LL, RL, N->getOperand(4));
+ Hi = DAG.getNode(ISD::SELECT_CC, dl, LH.getValueType(), N->getOperand(0),
+ N->getOperand(1), LH, RH, N->getOperand(4));
+}
+
+void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ EVT LoVT, HiVT;
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ Lo = DAG.getUNDEF(LoVT);
+ Hi = DAG.getUNDEF(HiVT);
+}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
new file mode 100644
index 0000000..b5f84c0
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -0,0 +1,290 @@
+//===-- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::LegalizeVectors method.
+//
+// The vector legalizer looks for vector operations which might need to be
+// scalarized and legalizes them. This is a separate step from Legalize because
+// scalarizing can introduce illegal types. For example, suppose we have an
+// ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition
+// on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
+// operation, which introduces nodes with the illegal type i64 which must be
+// expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
+// the operation must be unrolled, which introduces nodes with the illegal
+// type i8 which must be promoted.
+//
+// This does not legalize vector manipulations like ISD::BUILD_VECTOR,
+// or operations that happen to take a vector which are custom-lowered;
+// the legalization for such operations never produces nodes
+// with illegal types, so it's okay to put off legalizing them until
+// SelectionDAG::Legalize runs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+namespace {
+class VectorLegalizer {
+ SelectionDAG& DAG;
+ TargetLowering& TLI;
+ bool Changed; // Keep track of whether anything changed
+
+ /// LegalizedNodes - For nodes that are of legal width, and that have more
+ /// than one use, this map indicates what regularized operand to use. This
+ /// allows us to avoid legalizing the same thing more than once.
+ DenseMap<SDValue, SDValue> LegalizedNodes;
+
+ // Adds a node to the translation cache
+ void AddLegalizedOperand(SDValue From, SDValue To) {
+ LegalizedNodes.insert(std::make_pair(From, To));
+ // If someone requests legalization of the new node, return itself.
+ if (From != To)
+ LegalizedNodes.insert(std::make_pair(To, To));
+ }
+
+ // Legalizes the given node
+ SDValue LegalizeOp(SDValue Op);
+ // Assuming the node is legal, "legalize" the results
+ SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
+ // Implements unrolling a VSETCC.
+ SDValue UnrollVSETCC(SDValue Op);
+ // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB
+ // isn't legal.
+ SDValue ExpandFNEG(SDValue Op);
+ // Implements vector promotion; this is essentially just bitcasting the
+ // operands to a different type and bitcasting the result back to the
+ // original type.
+ SDValue PromoteVectorOp(SDValue Op);
+
+ public:
+ bool Run();
+ VectorLegalizer(SelectionDAG& dag) :
+ DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {}
+};
+
+bool VectorLegalizer::Run() {
+ // The legalize process is inherently a bottom-up recursive process (users
+ // legalize their uses before themselves). Given infinite stack space, we
+ // could just start legalizing on the root and traverse the whole graph. In
+ // practice however, this causes us to run out of stack space on large basic
+ // blocks. To avoid this problem, compute an ordering of the nodes where each
+ // node is only legalized after all of its operands are legalized.
+ DAG.AssignTopologicalOrder();
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I)
+ LegalizeOp(SDValue(I, 0));
+
+ // Finally, it's possible the root changed. Get the new root.
+ SDValue OldRoot = DAG.getRoot();
+ assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
+ DAG.setRoot(LegalizedNodes[OldRoot]);
+
+ LegalizedNodes.clear();
+
+ // Remove dead nodes now.
+ DAG.RemoveDeadNodes();
+
+ return Changed;
+}
+
+SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) {
+ // Generic legalization: just pass the operand through.
+ for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i)
+ AddLegalizedOperand(Op.getValue(i), Result.getValue(i));
+ return Result.getValue(Op.getResNo());
+}
+
+SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
+ // Note that LegalizeOp may be reentered even from single-use nodes, which
+ // means that we always must cache transformed nodes.
+ DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
+ if (I != LegalizedNodes.end()) return I->second;
+
+ SDNode* Node = Op.getNode();
+
+ // Legalize the operands
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+ Ops.push_back(LegalizeOp(Node->getOperand(i)));
+
+ SDValue Result =
+ DAG.UpdateNodeOperands(Op.getValue(0), Ops.data(), Ops.size());
+
+ bool HasVectorValue = false;
+ for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
+ J != E;
+ ++J)
+ HasVectorValue |= J->isVector();
+ if (!HasVectorValue)
+ return TranslateLegalizeResults(Op, Result);
+
+ EVT QueryType;
+ switch (Op.getOpcode()) {
+ default:
+ return TranslateLegalizeResults(Op, Result);
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM:
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ case ISD::CTTZ:
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::SELECT:
+ case ISD::SELECT_CC:
+ case ISD::VSETCC:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ case ISD::TRUNCATE:
+ case ISD::SIGN_EXTEND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::FNEG:
+ case ISD::FABS:
+ case ISD::FSQRT:
+ case ISD::FSIN:
+ case ISD::FCOS:
+ case ISD::FPOWI:
+ case ISD::FPOW:
+ case ISD::FLOG:
+ case ISD::FLOG2:
+ case ISD::FLOG10:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FCEIL:
+ case ISD::FTRUNC:
+ case ISD::FRINT:
+ case ISD::FNEARBYINT:
+ case ISD::FFLOOR:
+ QueryType = Node->getValueType(0);
+ break;
+ case ISD::SIGN_EXTEND_INREG:
+ case ISD::FP_ROUND_INREG:
+ QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ QueryType = Node->getOperand(0).getValueType();
+ break;
+ }
+
+ switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {
+ case TargetLowering::Promote:
+ // "Promote" the operation by bitcasting
+ Result = PromoteVectorOp(Op);
+ Changed = true;
+ break;
+ case TargetLowering::Legal: break;
+ case TargetLowering::Custom: {
+ SDValue Tmp1 = TLI.LowerOperation(Op, DAG);
+ if (Tmp1.getNode()) {
+ Result = Tmp1;
+ break;
+ }
+ // FALL THROUGH
+ }
+ case TargetLowering::Expand:
+ if (Node->getOpcode() == ISD::FNEG)
+ Result = ExpandFNEG(Op);
+ else if (Node->getOpcode() == ISD::VSETCC)
+ Result = UnrollVSETCC(Op);
+ else
+ Result = DAG.UnrollVectorOp(Op.getNode());
+ break;
+ }
+
+ // Make sure that the generated code is itself legal.
+ if (Result != Op) {
+ Result = LegalizeOp(Result);
+ Changed = true;
+ }
+
+ // Note that LegalizeOp may be reentered even from single-use nodes, which
+ // means that we always must cache transformed nodes.
+ AddLegalizedOperand(Op, Result);
+ return Result;
+}
+
+SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
+ // Vector "promotion" is basically just bitcasting and doing the operation
+ // in a different type. For example, x86 promotes ISD::AND on v2i32 to
+ // v1i64.
+ EVT VT = Op.getValueType();
+ assert(Op.getNode()->getNumValues() == 1 &&
+ "Can't promote a vector with multiple results!");
+ EVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
+ DebugLoc dl = Op.getDebugLoc();
+ SmallVector<SDValue, 4> Operands(Op.getNumOperands());
+
+ for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
+ if (Op.getOperand(j).getValueType().isVector())
+ Operands[j] = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Op.getOperand(j));
+ else
+ Operands[j] = Op.getOperand(j);
+ }
+
+ Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size());
+
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Op);
+}
+
+SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
+ if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
+ SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType());
+ return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+ Zero, Op.getOperand(0));
+ }
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
+ EVT VT = Op.getValueType();
+ unsigned NumElems = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2);
+ EVT TmpEltVT = LHS.getValueType().getVectorElementType();
+ DebugLoc dl = Op.getDebugLoc();
+ SmallVector<SDValue, 8> Ops(NumElems);
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
+ DAG.getIntPtrConstant(i));
+ SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
+ DAG.getIntPtrConstant(i));
+ Ops[i] = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(TmpEltVT),
+ LHSElem, RHSElem, CC);
+ Ops[i] = DAG.getNode(ISD::SELECT, dl, EltVT, Ops[i],
+ DAG.getConstant(APInt::getAllOnesValue
+ (EltVT.getSizeInBits()), EltVT),
+ DAG.getConstant(0, EltVT));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElems);
+}
+
+}
+
+bool SelectionDAG::LegalizeVectors() {
+ return VectorLegalizer(*this).Run();
+}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
new file mode 100644
index 0000000..bf95bb5
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -0,0 +1,2364 @@
+//===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file performs vector type splitting and scalarization for LegalizeTypes.
+// Scalarization is the act of changing a computation in an illegal one-element
+// vector type to be a computation in its scalar element type. For example,
+// implementing <1 x f32> arithmetic in a scalar f32 register. This is needed
+// as a base case when scalarizing vector arithmetic like <4 x f32>, which
+// eventually decomposes to scalars if the target doesn't support v4f32 or v2f32
+// types.
+// Splitting is the act of changing a computation in an invalid vector type to
+// be a computation in two vectors of half the size. For example, implementing
+// <128 x f32> operations in terms of two <64 x f32> operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Result Vector Scalarization: <1 x ty> -> ty.
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Scalarize node result " << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue R = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ScalarizeVectorResult #" << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to scalarize the result of this operator!");
+
+ case ISD::BIT_CONVERT: R = ScalarizeVecRes_BIT_CONVERT(N); break;
+ case ISD::BUILD_VECTOR: R = N->getOperand(0); break;
+ case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break;
+ case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break;
+ case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break;
+ case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
+ case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
+ case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break;
+ case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break;
+ case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break;
+ case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break;
+ case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
+ case ISD::VSETCC: R = ScalarizeVecRes_VSETCC(N); break;
+
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::CTTZ:
+ case ISD::FABS:
+ case ISD::FCOS:
+ case ISD::FNEG:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC:
+ case ISD::FFLOOR:
+ case ISD::FCEIL:
+ case ISD::FRINT:
+ case ISD::FNEARBYINT:
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ R = ScalarizeVecRes_UnaryOp(N);
+ break;
+
+ case ISD::ADD:
+ case ISD::AND:
+ case ISD::FADD:
+ case ISD::FDIV:
+ case ISD::FMUL:
+ case ISD::FPOW:
+ case ISD::FREM:
+ case ISD::FSUB:
+ case ISD::MUL:
+ case ISD::OR:
+ case ISD::SDIV:
+ case ISD::SREM:
+ case ISD::SUB:
+ case ISD::UDIV:
+ case ISD::UREM:
+ case ISD::XOR:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ R = ScalarizeVecRes_BinOp(N);
+ break;
+ }
+
+ // If R is null, the sub-method took care of registering the result.
+ if (R.getNode())
+ SetScalarizedVector(SDValue(N, ResNo), R);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ SDValue RHS = GetScalarizedVector(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BIT_CONVERT(SDNode *N) {
+ EVT NewVT = N->getValueType(0).getVectorElementType();
+ return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+ NewVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) {
+ EVT NewVT = N->getValueType(0).getVectorElementType();
+ SDValue Op0 = GetScalarizedVector(N->getOperand(0));
+ return DAG.getConvertRndSat(NewVT, N->getDebugLoc(),
+ Op0, DAG.getValueType(NewVT),
+ DAG.getValueType(Op0.getValueType()),
+ N->getOperand(3),
+ N->getOperand(4),
+ cast<CvtRndSatSDNode>(N)->getCvtCode());
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(),
+ N->getValueType(0).getVectorElementType(),
+ N->getOperand(0), N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {
+ SDValue Op = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(ISD::FPOWI, N->getDebugLoc(),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
+ // The value to insert may have a wider type than the vector element type,
+ // so be sure to truncate it to the element type if necessary.
+ SDValue Op = N->getOperand(1);
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ if (Op.getValueType() != EltVT)
+ // FIXME: Can this happen for floating point types?
+ Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, Op);
+ return Op;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
+ assert(N->isUnindexed() && "Indexed vector load?");
+
+ SDValue Result = DAG.getLoad(ISD::UNINDEXED, N->getDebugLoc(),
+ N->getExtensionType(),
+ N->getValueType(0).getVectorElementType(),
+ N->getChain(), N->getBasePtr(),
+ DAG.getUNDEF(N->getBasePtr().getValueType()),
+ N->getSrcValue(), N->getSrcValueOffset(),
+ N->getMemoryVT().getVectorElementType(),
+ N->isVolatile(), N->getOriginalAlignment());
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
+ return Result;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
+ // Get the dest type - it doesn't always match the input type, e.g. int_to_fp.
+ EVT DestVT = N->getValueType(0).getVectorElementType();
+ SDValue Op = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), DestVT, Op);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) {
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ EVT ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT().getVectorElementType();
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), EltVT,
+ LHS, DAG.getValueType(ExtVT));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
+ // If the operand is wider than the vector element type then it is implicitly
+ // truncated. Make that explicit here.
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ SDValue InOp = N->getOperand(0);
+ if (InOp.getValueType() != EltVT)
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp);
+ return InOp;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(1));
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+ LHS.getValueType(), N->getOperand(0), LHS,
+ GetScalarizedVector(N->getOperand(2)));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(2));
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), LHS.getValueType(),
+ N->getOperand(0), N->getOperand(1),
+ LHS, GetScalarizedVector(N->getOperand(3)),
+ N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ SDValue RHS = GetScalarizedVector(N->getOperand(1));
+ DebugLoc DL = N->getDebugLoc();
+
+ // Turn it into a scalar SETCC.
+ return DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {
+ // Figure out if the scalar is the LHS or RHS and return it.
+ SDValue Arg = N->getOperand(2).getOperand(0);
+ if (Arg.getOpcode() == ISD::UNDEF)
+ return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
+ unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue();
+ return GetScalarizedVector(N->getOperand(Op));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ SDValue RHS = GetScalarizedVector(N->getOperand(1));
+ EVT NVT = N->getValueType(0).getVectorElementType();
+ EVT SVT = TLI.getSetCCResultType(LHS.getValueType());
+ DebugLoc DL = N->getDebugLoc();
+
+ // Turn it into a scalar SETCC.
+ SDValue Res = DAG.getNode(ISD::SETCC, DL, SVT, LHS, RHS, N->getOperand(2));
+
+ // VSETCC always returns a sign-extended value, while SETCC may not. The
+ // SETCC result type may not match the vector element type. Correct these.
+ if (NVT.bitsLE(SVT)) {
+ // The SETCC result type is bigger than the vector element type.
+ // Ensure the SETCC result is sign-extended.
+ if (TLI.getBooleanContents() !=
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
+ Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, SVT, Res,
+ DAG.getValueType(MVT::i1));
+ // Truncate to the final type.
+ return DAG.getNode(ISD::TRUNCATE, DL, NVT, Res);
+ }
+
+ // The SETCC result type is smaller than the vector element type.
+ // If the SetCC result is not sign-extended, chop it down to MVT::i1.
+ if (TLI.getBooleanContents() !=
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
+ Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Res);
+ // Sign extend to the final type.
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, NVT, Res);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Operand Vector Scalarization <1 x ty> -> ty.
+//===----------------------------------------------------------------------===//
+
+bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ if (Res.getNode() == 0) {
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to scalarize this operator's operand!");
+ case ISD::BIT_CONVERT:
+ Res = ScalarizeVecOp_BIT_CONVERT(N);
+ break;
+ case ISD::CONCAT_VECTORS:
+ Res = ScalarizeVecOp_CONCAT_VECTORS(N);
+ break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N);
+ break;
+ case ISD::STORE:
+ Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
+ break;
+ }
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// ScalarizeVecOp_BIT_CONVERT - If the value to convert is a vector that needs
+/// to be scalarized, it must be <1 x ty>. Convert the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_BIT_CONVERT(SDNode *N) {
+ SDValue Elt = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+ N->getValueType(0), Elt);
+}
+
+/// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one -
+/// use a BUILD_VECTOR instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
+ SmallVector<SDValue, 8> Ops(N->getNumOperands());
+ for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
+ Ops[i] = GetScalarizedVector(N->getOperand(i));
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), N->getValueType(0),
+ &Ops[0], Ops.size());
+}
+
+/// ScalarizeVecOp_EXTRACT_VECTOR_ELT - If the input is a vector that needs to
+/// be scalarized, it must be <1 x ty>, so just return the element, ignoring the
+/// index.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue Res = GetScalarizedVector(N->getOperand(0));
+ if (Res.getValueType() != N->getValueType(0))
+ Res = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0),
+ Res);
+ return Res;
+}
+
+/// ScalarizeVecOp_STORE - If the value to store is a vector that needs to be
+/// scalarized, it must be <1 x ty>. Just store the element.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
+ assert(N->isUnindexed() && "Indexed store of one-element vector?");
+ assert(OpNo == 1 && "Do not know how to scalarize this operand!");
+ DebugLoc dl = N->getDebugLoc();
+
+ if (N->isTruncatingStore())
+ return DAG.getTruncStore(N->getChain(), dl,
+ GetScalarizedVector(N->getOperand(1)),
+ N->getBasePtr(),
+ N->getSrcValue(), N->getSrcValueOffset(),
+ N->getMemoryVT().getVectorElementType(),
+ N->isVolatile(), N->getAlignment());
+
+ return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
+ N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(),
+ N->isVolatile(), N->getOriginalAlignment());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Result Vector Splitting
+//===----------------------------------------------------------------------===//
+
+/// SplitVectorResult - This method is called when the specified result of the
+/// specified node is found to need vector splitting. At this point, the node
+/// may also have invalid operands or may have other results that need
+/// legalization, we just know that (at least) one result needs vector
+/// splitting.
+void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Split node result: ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Lo, Hi;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "SplitVectorResult #" << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to split the result of this operator!");
+
+ case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
+ case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
+ case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
+ case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
+
+ case ISD::BIT_CONVERT: SplitVecRes_BIT_CONVERT(N, Lo, Hi); break;
+ case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;
+ case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
+ case ISD::CONVERT_RNDSAT: SplitVecRes_CONVERT_RNDSAT(N, Lo, Hi); break;
+ case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
+ case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
+ case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;
+ case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
+ case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break;
+ case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
+ case ISD::LOAD:
+ SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
+ break;
+ case ISD::SETCC:
+ case ISD::VSETCC:
+ SplitVecRes_SETCC(N, Lo, Hi);
+ break;
+ case ISD::VECTOR_SHUFFLE:
+ SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
+ break;
+
+ case ISD::CTTZ:
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::FNEG:
+ case ISD::FABS:
+ case ISD::FSQRT:
+ case ISD::FSIN:
+ case ISD::FCOS:
+ case ISD::FTRUNC:
+ case ISD::FFLOOR:
+ case ISD::FCEIL:
+ case ISD::FRINT:
+ case ISD::FNEARBYINT:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ SplitVecRes_UnaryOp(N, Lo, Hi);
+ break;
+
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::FDIV:
+ case ISD::FPOW:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::UREM:
+ case ISD::SREM:
+ case ISD::FREM:
+ SplitVecRes_BinOp(N, Lo, Hi);
+ break;
+ }
+
+ // If Lo/Hi is null, the sub-method took care of registering results etc.
+ if (Lo.getNode())
+ SetSplitVector(SDValue(N, ResNo), Lo, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LHSLo, LHSHi;
+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+ SDValue RHSLo, RHSHi;
+ GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
+ DebugLoc dl = N->getDebugLoc();
+
+ Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, RHSLo);
+ Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // We know the result is a vector. The input may be either a vector or a
+ // scalar value.
+ EVT LoVT, HiVT;
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue InOp = N->getOperand(0);
+ EVT InVT = InOp.getValueType();
+
+ // Handle some special cases efficiently.
+ switch (getTypeAction(InVT)) {
+ default:
+ assert(false && "Unknown type action!");
+ case Legal:
+ case PromoteInteger:
+ case SoftenFloat:
+ case ScalarizeVector:
+ break;
+ case ExpandInteger:
+ case ExpandFloat:
+ // A scalar to vector conversion, where the scalar needs expansion.
+ // If the vector is being split in two then we can just convert the
+ // expanded pieces.
+ if (LoVT == HiVT) {
+ GetExpandedOp(InOp, Lo, Hi);
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);
+ return;
+ }
+ break;
+ case SplitVector:
+ // If the input is a vector that needs to be split, convert each split
+ // piece of the input now.
+ GetSplitVector(InOp, Lo, Hi);
+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);
+ return;
+ }
+
+ // In the general case, convert the input to an integer and split it by hand.
+ EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits());
+ EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits());
+ if (TLI.isBigEndian())
+ std::swap(LoIntVT, HiIntVT);
+
+ SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi);
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT LoVT, HiVT;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ unsigned LoNumElts = LoVT.getVectorNumElements();
+ SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts);
+ Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, &LoOps[0], LoOps.size());
+
+ SmallVector<SDValue, 8> HiOps(N->op_begin()+LoNumElts, N->op_end());
+ Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, &HiOps[0], HiOps.size());
+}
+
+void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS");
+ DebugLoc dl = N->getDebugLoc();
+ unsigned NumSubvectors = N->getNumOperands() / 2;
+ if (NumSubvectors == 1) {
+ Lo = N->getOperand(0);
+ Hi = N->getOperand(1);
+ return;
+ }
+
+ EVT LoVT, HiVT;
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+ SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors);
+ Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, &LoOps[0], LoOps.size());
+
+ SmallVector<SDValue, 8> HiOps(N->op_begin()+NumSubvectors, N->op_end());
+ Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, &HiOps[0], HiOps.size());
+}
+
+void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT LoVT, HiVT;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+ SDValue DTyOpLo = DAG.getValueType(LoVT);
+ SDValue DTyOpHi = DAG.getValueType(HiVT);
+
+ SDValue RndOp = N->getOperand(3);
+ SDValue SatOp = N->getOperand(4);
+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+
+ // Split the input.
+ SDValue VLo, VHi;
+ EVT InVT = N->getOperand(0).getValueType();
+ switch (getTypeAction(InVT)) {
+ default: llvm_unreachable("Unexpected type action!");
+ case Legal: {
+ EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+ LoVT.getVectorNumElements());
+ VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
+ DAG.getIntPtrConstant(0));
+ VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+ break;
+ }
+ case SplitVector:
+ GetSplitVector(N->getOperand(0), VLo, VHi);
+ break;
+ case WidenVector: {
+ // If the result needs to be split and the input needs to be widened,
+ // the two types must have different lengths. Use the widened result
+ // and extract from it to do the split.
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+ LoVT.getVectorNumElements());
+ VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+ DAG.getIntPtrConstant(0));
+ VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+ break;
+ }
+ }
+
+ SDValue STyOpLo = DAG.getValueType(VLo.getValueType());
+ SDValue STyOpHi = DAG.getValueType(VHi.getValueType());
+
+ Lo = DAG.getConvertRndSat(LoVT, dl, VLo, DTyOpLo, STyOpLo, RndOp, SatOp,
+ CvtCode);
+ Hi = DAG.getConvertRndSat(HiVT, dl, VHi, DTyOpHi, STyOpHi, RndOp, SatOp,
+ CvtCode);
+}
+
+void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ EVT IdxVT = Idx.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ EVT LoVT, HiVT;
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
+ Idx = DAG.getNode(ISD::ADD, dl, IdxVT, Idx,
+ DAG.getConstant(LoVT.getVectorNumElements(), IdxVT));
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, Idx);
+}
+
+void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1));
+ Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1));
+}
+
+void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LHSLo, LHSHi;
+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+ DebugLoc dl = N->getDebugLoc();
+
+ EVT LoVT, HiVT;
+ GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT(), LoVT, HiVT);
+
+ Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo,
+ DAG.getValueType(LoVT));
+ Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi,
+ DAG.getValueType(HiVT));
+}
+
+void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Elt = N->getOperand(1);
+ SDValue Idx = N->getOperand(2);
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitVector(Vec, Lo, Hi);
+
+ if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
+ unsigned IdxVal = CIdx->getZExtValue();
+ unsigned LoNumElts = Lo.getValueType().getVectorNumElements();
+ if (IdxVal < LoNumElts)
+ Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
+ Lo.getValueType(), Lo, Elt, Idx);
+ else
+ Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt,
+ DAG.getIntPtrConstant(IdxVal - LoNumElts));
+ return;
+ }
+
+ // Spill the vector to the stack.
+ EVT VecVT = Vec.getValueType();
+ EVT EltVT = VecVT.getVectorElementType();
+ SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0);
+
+ // Store the new element. This may be larger than the vector element type,
+ // so use a truncating store.
+ SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
+ unsigned Alignment =
+ TLI.getTargetData()->getPrefTypeAlignment(VecVT.getTypeForEVT(*DAG.getContext()));
+ Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT);
+
+ // Load the Lo part from the stack slot.
+ Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, NULL, 0);
+
+ // Increment the pointer to the other part.
+ unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ DAG.getIntPtrConstant(IncrementSize));
+
+ // Load the Hi part from the stack slot.
+ Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, NULL, 0, false,
+ MinAlign(Alignment, IncrementSize));
+}
+
+void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT LoVT, HiVT;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0));
+ Hi = DAG.getUNDEF(HiVT);
+}
+
+void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
+ SDValue &Hi) {
+ assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
+ EVT LoVT, HiVT;
+ DebugLoc dl = LD->getDebugLoc();
+ GetSplitDestVTs(LD->getValueType(0), LoVT, HiVT);
+
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ SDValue Ch = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
+ const Value *SV = LD->getSrcValue();
+ int SVOffset = LD->getSrcValueOffset();
+ EVT MemoryVT = LD->getMemoryVT();
+ unsigned Alignment = LD->getOriginalAlignment();
+ bool isVolatile = LD->isVolatile();
+
+ EVT LoMemVT, HiMemVT;
+ GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
+
+ Lo = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, LoVT, Ch, Ptr, Offset,
+ SV, SVOffset, LoMemVT, isVolatile, Alignment);
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ SVOffset += IncrementSize;
+ Hi = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, HiVT, Ch, Ptr, Offset,
+ SV, SVOffset, HiMemVT, isVolatile, Alignment);
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(LD, 1), Ch);
+}
+
+void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ EVT LoVT, HiVT;
+ DebugLoc DL = N->getDebugLoc();
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+ // Split the input.
+ EVT InVT = N->getOperand(0).getValueType();
+ SDValue LL, LH, RL, RH;
+ EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+ LoVT.getVectorNumElements());
+ LL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0),
+ DAG.getIntPtrConstant(0));
+ LH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0),
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+
+ RL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1),
+ DAG.getIntPtrConstant(0));
+ RH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1),
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+
+ Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
+ Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
+}
+
+void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // Get the dest types - they may not match the input types, e.g. int_to_fp.
+ EVT LoVT, HiVT;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+ // Split the input.
+ EVT InVT = N->getOperand(0).getValueType();
+ switch (getTypeAction(InVT)) {
+ default: llvm_unreachable("Unexpected type action!");
+ case Legal: {
+ EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+ LoVT.getVectorNumElements());
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
+ DAG.getIntPtrConstant(0));
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+ break;
+ }
+ case SplitVector:
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ break;
+ case WidenVector: {
+ // If the result needs to be split and the input needs to be widened,
+ // the two types must have different lengths. Use the widened result
+ // and extract from it to do the split.
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+ LoVT.getVectorNumElements());
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+ DAG.getIntPtrConstant(0));
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+ break;
+ }
+ }
+
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // The low and high parts of the original input give four input vectors.
+ SDValue Inputs[4];
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]);
+ GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]);
+ EVT NewVT = Inputs[0].getValueType();
+ unsigned NewElts = NewVT.getVectorNumElements();
+
+ // If Lo or Hi uses elements from at most two of the four input vectors, then
+ // express it as a vector shuffle of those two inputs. Otherwise extract the
+ // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
+ SmallVector<int, 16> Ops;
+ for (unsigned High = 0; High < 2; ++High) {
+ SDValue &Output = High ? Hi : Lo;
+
+ // Build a shuffle mask for the output, discovering on the fly which
+ // input vectors to use as shuffle operands (recorded in InputUsed).
+ // If building a suitable shuffle vector proves too hard, then bail
+ // out with useBuildVector set.
+ unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered.
+ unsigned FirstMaskIdx = High * NewElts;
+ bool useBuildVector = false;
+ for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
+ // The mask element. This indexes into the input.
+ int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);
+
+ // The input vector this mask element indexes into.
+ unsigned Input = (unsigned)Idx / NewElts;
+
+ if (Input >= array_lengthof(Inputs)) {
+ // The mask element does not index into any input vector.
+ Ops.push_back(-1);
+ continue;
+ }
+
+ // Turn the index into an offset from the start of the input vector.
+ Idx -= Input * NewElts;
+
+ // Find or create a shuffle vector operand to hold this input.
+ unsigned OpNo;
+ for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
+ if (InputUsed[OpNo] == Input) {
+ // This input vector is already an operand.
+ break;
+ } else if (InputUsed[OpNo] == -1U) {
+ // Create a new operand for this input vector.
+ InputUsed[OpNo] = Input;
+ break;
+ }
+ }
+
+ if (OpNo >= array_lengthof(InputUsed)) {
+ // More than two input vectors used! Give up on trying to create a
+ // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
+ useBuildVector = true;
+ break;
+ }
+
+ // Add the mask index for the new shuffle vector.
+ Ops.push_back(Idx + OpNo * NewElts);
+ }
+
+ if (useBuildVector) {
+ EVT EltVT = NewVT.getVectorElementType();
+ SmallVector<SDValue, 16> SVOps;
+
+ // Extract the input elements by hand.
+ for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
+ // The mask element. This indexes into the input.
+ int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);
+
+ // The input vector this mask element indexes into.
+ unsigned Input = (unsigned)Idx / NewElts;
+
+ if (Input >= array_lengthof(Inputs)) {
+ // The mask element is "undef" or indexes off the end of the input.
+ SVOps.push_back(DAG.getUNDEF(EltVT));
+ continue;
+ }
+
+ // Turn the index into an offset from the start of the input vector.
+ Idx -= Input * NewElts;
+
+ // Extract the vector element by hand.
+ SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ Inputs[Input], DAG.getIntPtrConstant(Idx)));
+ }
+
+ // Construct the Lo/Hi output using a BUILD_VECTOR.
+ Output = DAG.getNode(ISD::BUILD_VECTOR,dl,NewVT, &SVOps[0], SVOps.size());
+ } else if (InputUsed[0] == -1U) {
+ // No input vectors were used! The result is undefined.
+ Output = DAG.getUNDEF(NewVT);
+ } else {
+ SDValue Op0 = Inputs[InputUsed[0]];
+ // If only one input was used, use an undefined vector for the other.
+ SDValue Op1 = InputUsed[1] == -1U ?
+ DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]];
+ // At least one input vector was used. Create a new shuffle vector.
+ Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, &Ops[0]);
+ }
+
+ Ops.clear();
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Operand Vector Splitting
+//===----------------------------------------------------------------------===//
+
+/// SplitVectorOperand - This method is called when the specified operand of the
+/// specified node is found to need vector splitting. At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need legalization as well as the specified one.
+bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Split node operand: ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ if (Res.getNode() == 0) {
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "SplitVectorOperand Op #" << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to split this operator's operand!");
+
+ case ISD::BIT_CONVERT: Res = SplitVecOp_BIT_CONVERT(N); break;
+ case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
+ case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::STORE:
+ Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
+ break;
+
+ case ISD::CTTZ:
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ Res = SplitVecOp_UnaryOp(N);
+ break;
+ }
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
+ // The result has a legal vector type, but the input needs splitting.
+ EVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ EVT InVT = Lo.getValueType();
+
+ EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
+ InVT.getVectorNumElements());
+
+ Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_BIT_CONVERT(SDNode *N) {
+ // For example, i64 = BIT_CONVERT v4i16 on alpha. Typically the vector will
+ // end up being split all the way down to individual components. Convert the
+ // split pieces into integers and reassemble.
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ Lo = BitConvertToInteger(Lo);
+ Hi = BitConvertToInteger(Hi);
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0),
+ JoinIntegers(Lo, Hi));
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
+ // We know that the extracted result type is legal. For now, assume the index
+ // is a constant.
+ EVT SubVT = N->getValueType(0);
+ SDValue Idx = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+
+ uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+ if (IdxVal < LoElts) {
+ assert(IdxVal + SubVT.getVectorNumElements() <= LoElts &&
+ "Extracted subvector crosses vector split!");
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
+ } else {
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi,
+ DAG.getConstant(IdxVal - LoElts, Idx.getValueType()));
+ }
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ EVT VecVT = Vec.getValueType();
+
+ if (isa<ConstantSDNode>(Idx)) {
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ assert(IdxVal < VecVT.getVectorNumElements() && "Invalid vector index!");
+
+ SDValue Lo, Hi;
+ GetSplitVector(Vec, Lo, Hi);
+
+ uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+
+ if (IdxVal < LoElts)
+ return DAG.UpdateNodeOperands(SDValue(N, 0), Lo, Idx);
+ return DAG.UpdateNodeOperands(SDValue(N, 0), Hi,
+ DAG.getConstant(IdxVal - LoElts,
+ Idx.getValueType()));
+ }
+
+ // Store the vector to the stack.
+ EVT EltVT = VecVT.getVectorElementType();
+ DebugLoc dl = N->getDebugLoc();
+ SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, SV, 0);
+
+ // Load back the required element.
+ StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
+ SV, 0, EltVT);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
+ assert(N->isUnindexed() && "Indexed store of vector?");
+ assert(OpNo == 1 && "Can only split the stored value");
+ DebugLoc dl = N->getDebugLoc();
+
+ bool isTruncating = N->isTruncatingStore();
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ int SVOffset = N->getSrcValueOffset();
+ EVT MemoryVT = N->getMemoryVT();
+ unsigned Alignment = N->getOriginalAlignment();
+ bool isVol = N->isVolatile();
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(1), Lo, Hi);
+
+ EVT LoMemVT, HiMemVT;
+ GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+
+ if (isTruncating)
+ Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+ LoMemVT, isVol, Alignment);
+ else
+ Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+ isVol, Alignment);
+
+ // Increment the pointer to the other half.
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ SVOffset += IncrementSize;
+
+ if (isTruncating)
+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset,
+ HiMemVT, isVol, Alignment);
+ else
+ Hi = DAG.getStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset,
+ isVol, Alignment);
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Result Vector Widening
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Widen node result " << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+
+ // See if the target wants to custom widen this node.
+ if (CustomWidenLowerNode(N, N->getValueType(ResNo)))
+ return;
+
+ SDValue Res = SDValue();
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "WidenVectorResult #" << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to widen the result of this operator!");
+
+ case ISD::BIT_CONVERT: Res = WidenVecRes_BIT_CONVERT(N); break;
+ case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;
+ case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;
+ case ISD::CONVERT_RNDSAT: Res = WidenVecRes_CONVERT_RNDSAT(N); break;
+ case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::FP_ROUND_INREG: Res = WidenVecRes_InregOp(N); break;
+ case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
+ case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break;
+ case ISD::SELECT: Res = WidenVecRes_SELECT(N); break;
+ case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break;
+ case ISD::SETCC: Res = WidenVecRes_SETCC(N); break;
+ case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break;
+ case ISD::VECTOR_SHUFFLE:
+ Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N));
+ break;
+ case ISD::VSETCC:
+ Res = WidenVecRes_VSETCC(N);
+ break;
+
+ case ISD::ADD:
+ case ISD::AND:
+ case ISD::BSWAP:
+ case ISD::FADD:
+ case ISD::FCOPYSIGN:
+ case ISD::FDIV:
+ case ISD::FMUL:
+ case ISD::FPOW:
+ case ISD::FPOWI:
+ case ISD::FREM:
+ case ISD::FSUB:
+ case ISD::MUL:
+ case ISD::MULHS:
+ case ISD::MULHU:
+ case ISD::OR:
+ case ISD::SDIV:
+ case ISD::SREM:
+ case ISD::UDIV:
+ case ISD::UREM:
+ case ISD::SUB:
+ case ISD::XOR:
+ Res = WidenVecRes_Binary(N);
+ break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ Res = WidenVecRes_Shift(N);
+ break;
+
+ case ISD::FP_ROUND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ Res = WidenVecRes_Convert(N);
+ break;
+
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::CTTZ:
+ case ISD::FABS:
+ case ISD::FCOS:
+ case ISD::FNEG:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ Res = WidenVecRes_Unary(N);
+ break;
+ }
+
+ // If Res is null, the sub-method took care of registering the result.
+ if (Res.getNode())
+ SetWidenedVector(SDValue(N, ResNo), Res);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
+ // Binary op widening.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp1, InOp2);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
+ SDValue InOp = N->getOperand(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ EVT InVT = InOp.getValueType();
+ EVT InEltVT = InVT.getVectorElementType();
+ EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts);
+
+ unsigned Opcode = N->getOpcode();
+ unsigned InVTNumElts = InVT.getVectorNumElements();
+
+ if (getTypeAction(InVT) == WidenVector) {
+ InOp = GetWidenedVector(N->getOperand(0));
+ InVT = InOp.getValueType();
+ InVTNumElts = InVT.getVectorNumElements();
+ if (InVTNumElts == WidenNumElts)
+ return DAG.getNode(Opcode, dl, WidenVT, InOp);
+ }
+
+ if (TLI.isTypeLegal(InWidenVT)) {
+ // Because the result and the input are different vector types, widening
+ // the result could create a legal type but widening the input might make
+ // it an illegal type that might lead to repeatedly splitting the input
+ // and then widening it. To avoid this, we widen the input only if
+ // it results in a legal type.
+ if (WidenNumElts % InVTNumElts == 0) {
+ // Widen the input and call convert on the widened input vector.
+ unsigned NumConcat = WidenNumElts/InVTNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ Ops[0] = InOp;
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = UndefVal;
+ return DAG.getNode(Opcode, dl, WidenVT,
+ DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT,
+ &Ops[0], NumConcat));
+ }
+
+ if (InVTNumElts % WidenNumElts == 0) {
+ // Extract the input and convert the shorten input vector.
+ return DAG.getNode(Opcode, dl, WidenVT,
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT,
+ InOp, DAG.getIntPtrConstant(0)));
+ }
+ }
+
+ // Otherwise unroll into some nasty scalar code and rebuild the vector.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ EVT EltVT = WidenVT.getVectorElementType();
+ unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
+ unsigned i;
+ for (i=0; i < MinElts; ++i)
+ Ops[i] = DAG.getNode(Opcode, dl, EltVT,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+ DAG.getIntPtrConstant(i)));
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i < WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ SDValue ShOp = N->getOperand(1);
+
+ EVT ShVT = ShOp.getValueType();
+ if (getTypeAction(ShVT) == WidenVector) {
+ ShOp = GetWidenedVector(ShOp);
+ ShVT = ShOp.getValueType();
+ }
+ EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(), ShVT.getVectorElementType(),
+ WidenVT.getVectorNumElements());
+ if (ShVT != ShWidenVT)
+ ShOp = ModifyToType(ShOp, ShWidenVT);
+
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
+ // Unary op widening.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
+ cast<VTSDNode>(N->getOperand(1))->getVT()
+ .getVectorElementType(),
+ WidenVT.getVectorNumElements());
+ SDValue WidenLHS = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ WidenVT, WidenLHS, DAG.getValueType(ExtVT));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
+ SDValue InOp = N->getOperand(0);
+ EVT InVT = InOp.getValueType();
+ EVT VT = N->getValueType(0);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ DebugLoc dl = N->getDebugLoc();
+
+ switch (getTypeAction(InVT)) {
+ default:
+ assert(false && "Unknown type action!");
+ break;
+ case Legal:
+ break;
+ case PromoteInteger:
+ // If the InOp is promoted to the same size, convert it. Otherwise,
+ // fall out of the switch and widen the promoted input.
+ InOp = GetPromotedInteger(InOp);
+ InVT = InOp.getValueType();
+ if (WidenVT.bitsEq(InVT))
+ return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp);
+ break;
+ case SoftenFloat:
+ case ExpandInteger:
+ case ExpandFloat:
+ case ScalarizeVector:
+ case SplitVector:
+ break;
+ case WidenVector:
+ // If the InOp is widened to the same size, convert it. Otherwise, fall
+ // out of the switch and widen the widened input.
+ InOp = GetWidenedVector(InOp);
+ InVT = InOp.getValueType();
+ if (WidenVT.bitsEq(InVT))
+ // The input widens to the same size. Convert to the widen value.
+ return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp);
+ break;
+ }
+
+ unsigned WidenSize = WidenVT.getSizeInBits();
+ unsigned InSize = InVT.getSizeInBits();
+ if (WidenSize % InSize == 0) {
+ // Determine new input vector type. The new input vector type will use
+ // the same element type (if its a vector) or use the input type as a
+ // vector. It is the same size as the type to widen to.
+ EVT NewInVT;
+ unsigned NewNumElts = WidenSize / InSize;
+ if (InVT.isVector()) {
+ EVT InEltVT = InVT.getVectorElementType();
+ NewInVT= EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenSize / InEltVT.getSizeInBits());
+ } else {
+ NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts);
+ }
+
+ if (TLI.isTypeLegal(NewInVT)) {
+ // Because the result and the input are different vector types, widening
+ // the result could create a legal type but widening the input might make
+ // it an illegal type that might lead to repeatedly splitting the input
+ // and then widening it. To avoid this, we widen the input only if
+ // it results in a legal type.
+ SmallVector<SDValue, 16> Ops(NewNumElts);
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ Ops[0] = InOp;
+ for (unsigned i = 1; i < NewNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ SDValue NewVec;
+ if (InVT.isVector())
+ NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl,
+ NewInVT, &Ops[0], NewNumElts);
+ else
+ NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
+ NewInVT, &Ops[0], NewNumElts);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, NewVec);
+ }
+ }
+
+ return CreateStackStoreLoad(InOp, WidenVT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ // Build a vector with undefined for the new nodes.
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SmallVector<SDValue, 16> NewOps(N->op_begin(), N->op_end());
+ NewOps.reserve(WidenNumElts);
+ for (unsigned i = NumElts; i < WidenNumElts; ++i)
+ NewOps.push_back(DAG.getUNDEF(EltVT));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &NewOps[0], NewOps.size());
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
+ EVT InVT = N->getOperand(0).getValueType();
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ unsigned NumOperands = N->getNumOperands();
+
+ bool InputWidened = false; // Indicates we need to widen the input.
+ if (getTypeAction(InVT) != WidenVector) {
+ if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) {
+ // Add undef vectors to widen to correct length.
+ unsigned NumConcat = WidenVT.getVectorNumElements() /
+ InVT.getVectorNumElements();
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ for (unsigned i=0; i < NumOperands; ++i)
+ Ops[i] = N->getOperand(i);
+ for (unsigned i = NumOperands; i != NumConcat; ++i)
+ Ops[i] = UndefVal;
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &Ops[0], NumConcat);
+ }
+ } else {
+ InputWidened = true;
+ if (WidenVT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) {
+ // The inputs and the result are widen to the same value.
+ unsigned i;
+ for (i=1; i < NumOperands; ++i)
+ if (N->getOperand(i).getOpcode() != ISD::UNDEF)
+ break;
+
+ if (i > NumOperands)
+ // Everything but the first operand is an UNDEF so just return the
+ // widened first operand.
+ return GetWidenedVector(N->getOperand(0));
+
+ if (NumOperands == 2) {
+ // Replace concat of two operands with a shuffle.
+ SmallVector<int, 16> MaskOps(WidenNumElts);
+ for (unsigned i=0; i < WidenNumElts/2; ++i) {
+ MaskOps[i] = i;
+ MaskOps[i+WidenNumElts/2] = i+WidenNumElts;
+ }
+ return DAG.getVectorShuffle(WidenVT, dl,
+ GetWidenedVector(N->getOperand(0)),
+ GetWidenedVector(N->getOperand(1)),
+ &MaskOps[0]);
+ }
+ }
+ }
+
+ // Fall back to use extracts and build vector.
+ EVT EltVT = WidenVT.getVectorElementType();
+ unsigned NumInElts = InVT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ unsigned Idx = 0;
+ for (unsigned i=0; i < NumOperands; ++i) {
+ SDValue InOp = N->getOperand(i);
+ if (InputWidened)
+ InOp = GetWidenedVector(InOp);
+ for (unsigned j=0; j < NumInElts; ++j)
+ Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getIntPtrConstant(j));
+ }
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; Idx < WidenNumElts; ++Idx)
+ Ops[Idx] = UndefVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue InOp = N->getOperand(0);
+ SDValue RndOp = N->getOperand(3);
+ SDValue SatOp = N->getOperand(4);
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ EVT InVT = InOp.getValueType();
+ EVT InEltVT = InVT.getVectorElementType();
+ EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts);
+
+ SDValue DTyOp = DAG.getValueType(WidenVT);
+ SDValue STyOp = DAG.getValueType(InWidenVT);
+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+
+ unsigned InVTNumElts = InVT.getVectorNumElements();
+ if (getTypeAction(InVT) == WidenVector) {
+ InOp = GetWidenedVector(InOp);
+ InVT = InOp.getValueType();
+ InVTNumElts = InVT.getVectorNumElements();
+ if (InVTNumElts == WidenNumElts)
+ return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+ SatOp, CvtCode);
+ }
+
+ if (TLI.isTypeLegal(InWidenVT)) {
+ // Because the result and the input are different vector types, widening
+ // the result could create a legal type but widening the input might make
+ // it an illegal type that might lead to repeatedly splitting the input
+ // and then widening it. To avoid this, we widen the input only if
+ // it results in a legal type.
+ if (WidenNumElts % InVTNumElts == 0) {
+ // Widen the input and call convert on the widened input vector.
+ unsigned NumConcat = WidenNumElts/InVTNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ Ops[0] = InOp;
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ for (unsigned i = 1; i != NumConcat; ++i) {
+ Ops[i] = UndefVal;
+ }
+ InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, &Ops[0],NumConcat);
+ return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+ SatOp, CvtCode);
+ }
+
+ if (InVTNumElts % WidenNumElts == 0) {
+ // Extract the input and convert the shorten input vector.
+ InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp,
+ DAG.getIntPtrConstant(0));
+ return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+ SatOp, CvtCode);
+ }
+ }
+
+ // Otherwise unroll into some nasty scalar code and rebuild the vector.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ EVT EltVT = WidenVT.getVectorElementType();
+ DTyOp = DAG.getValueType(EltVT);
+ STyOp = DAG.getValueType(InEltVT);
+
+ unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
+ unsigned i;
+ for (i=0; i < MinElts; ++i) {
+ SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+ DAG.getIntPtrConstant(i));
+ Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp,
+ SatOp, CvtCode);
+ }
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i < WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ SDValue InOp = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+
+ if (getTypeAction(InOp.getValueType()) == WidenVector)
+ InOp = GetWidenedVector(InOp);
+
+ EVT InVT = InOp.getValueType();
+
+ ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx);
+ if (CIdx) {
+ unsigned IdxVal = CIdx->getZExtValue();
+ // Check if we can just return the input vector after widening.
+ if (IdxVal == 0 && InVT == WidenVT)
+ return InOp;
+
+ // Check if we can extract from the vector.
+ unsigned InNumElts = InVT.getVectorNumElements();
+ if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
+ }
+
+ // We could try widening the input to the right length but for now, extract
+ // the original elements, fill the rest with undefs and build a vector.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ EVT EltVT = VT.getVectorElementType();
+ EVT IdxVT = Idx.getValueType();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned i;
+ if (CIdx) {
+ unsigned IdxVal = CIdx->getZExtValue();
+ for (i=0; i < NumElts; ++i)
+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getConstant(IdxVal+i, IdxVT));
+ } else {
+ Ops[0] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, Idx);
+ for (i=1; i < NumElts; ++i) {
+ SDValue NewIdx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
+ DAG.getConstant(i, IdxVT));
+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, NewIdx);
+ }
+ }
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i < WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, N->getDebugLoc(),
+ InOp.getValueType(), InOp,
+ N->getOperand(1), N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+
+ SDValue Result;
+ SmallVector<SDValue, 16> LdChain; // Chain for the series of load
+ if (ExtType != ISD::NON_EXTLOAD)
+ Result = GenWidenVectorExtLoads(LdChain, LD, ExtType);
+ else
+ Result = GenWidenVectorLoads(LdChain, LD);
+
+ // If we generate a single load, we can use that for the chain. Otherwise,
+ // build a factor node to remember the multiple loads are independent and
+ // chain to that.
+ SDValue NewChain;
+ if (LdChain.size() == 1)
+ NewChain = LdChain[0];
+ else
+ NewChain = DAG.getNode(ISD::TokenFactor, LD->getDebugLoc(), MVT::Other,
+ &LdChain[0], LdChain.size());
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+
+ return Result;
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, N->getDebugLoc(),
+ WidenVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SDValue Cond1 = N->getOperand(0);
+ EVT CondVT = Cond1.getValueType();
+ if (CondVT.isVector()) {
+ EVT CondEltVT = CondVT.getVectorElementType();
+ EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(), CondEltVT, WidenNumElts);
+ if (getTypeAction(CondVT) == WidenVector)
+ Cond1 = GetWidenedVector(Cond1);
+
+ if (Cond1.getValueType() != CondWidenVT)
+ Cond1 = ModifyToType(Cond1, CondWidenVT);
+ }
+
+ SDValue InOp1 = GetWidenedVector(N->getOperand(1));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(2));
+ assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+ WidenVT, Cond1, InOp1, InOp2);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) {
+ SDValue InOp1 = GetWidenedVector(N->getOperand(2));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(3));
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(),
+ InOp1.getValueType(), N->getOperand(0),
+ N->getOperand(1), InOp1, InOp2, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ return DAG.getNode(ISD::SETCC, N->getDebugLoc(), WidenVT,
+ InOp1, InOp2, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getUNDEF(WidenVT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+
+ // Adjust mask based on new input vector length.
+ SmallVector<int, 16> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = N->getMaskElt(i);
+ if (Idx < (int)NumElts)
+ NewMask.push_back(Idx);
+ else
+ NewMask.push_back(Idx - NumElts + WidenNumElts);
+ }
+ for (unsigned i = NumElts; i != WidenNumElts; ++i)
+ NewMask.push_back(-1);
+ return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, &NewMask[0]);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SDValue InOp1 = N->getOperand(0);
+ EVT InVT = InOp1.getValueType();
+ assert(InVT.isVector() && "can not widen non vector type");
+ EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), WidenNumElts);
+ InOp1 = GetWidenedVector(InOp1);
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+
+ // Assume that the input and output will be widen appropriately. If not,
+ // we will have to unroll it at some point.
+ assert(InOp1.getValueType() == WidenInVT &&
+ InOp2.getValueType() == WidenInVT &&
+ "Input not widened to expected type!");
+ return DAG.getNode(ISD::VSETCC, N->getDebugLoc(),
+ WidenVT, InOp1, InOp2, N->getOperand(2));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Widen Vector Operand
+//===----------------------------------------------------------------------===//
+bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Widen node operand " << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "WidenVectorOperand op #" << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to widen this operator's operand!");
+
+ case ISD::BIT_CONVERT: Res = WidenVecOp_BIT_CONVERT(N); break;
+ case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break;
+ case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
+ case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::STORE: Res = WidenVecOp_STORE(N); break;
+
+ case ISD::FP_ROUND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ Res = WidenVecOp_Convert(N);
+ break;
+ }
+
+ // If Res is null, the sub-method took care of registering the result.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
+ // Since the result is legal and the input is illegal, it is unlikely
+ // that we can fix the input to a legal type so unroll the convert
+ // into some scalar code and create a nasty build vector.
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = N->getDebugLoc();
+ unsigned NumElts = VT.getVectorNumElements();
+ SDValue InOp = N->getOperand(0);
+ if (getTypeAction(InOp.getValueType()) == WidenVector)
+ InOp = GetWidenedVector(InOp);
+ EVT InVT = InOp.getValueType();
+ EVT InEltVT = InVT.getVectorElementType();
+
+ unsigned Opcode = N->getOpcode();
+ SmallVector<SDValue, 16> Ops(NumElts);
+ for (unsigned i=0; i < NumElts; ++i)
+ Ops[i] = DAG.getNode(Opcode, dl, EltVT,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+ DAG.getIntPtrConstant(i)));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ EVT InWidenVT = InOp.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Check if we can convert between two legal vector types and extract.
+ unsigned InWidenSize = InWidenVT.getSizeInBits();
+ unsigned Size = VT.getSizeInBits();
+ if (InWidenSize % Size == 0 && !VT.isVector()) {
+ unsigned NewNumElts = InWidenSize / Size;
+ EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
+ if (TLI.isTypeLegal(NewVT)) {
+ SDValue BitOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, InOp);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
+ DAG.getIntPtrConstant(0));
+ }
+ }
+
+ return CreateStackStoreLoad(InOp, VT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
+ // If the input vector is not legal, it is likely that we will not find a
+ // legal vector of the same size. Replace the concatenate vector with a
+ // nasty build vector.
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = N->getDebugLoc();
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(NumElts);
+
+ EVT InVT = N->getOperand(0).getValueType();
+ unsigned NumInElts = InVT.getVectorNumElements();
+
+ unsigned Idx = 0;
+ unsigned NumOperands = N->getNumOperands();
+ for (unsigned i=0; i < NumOperands; ++i) {
+ SDValue InOp = N->getOperand(i);
+ if (getTypeAction(InOp.getValueType()) == WidenVector)
+ InOp = GetWidenedVector(InOp);
+ for (unsigned j=0; j < NumInElts; ++j)
+ Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getIntPtrConstant(j));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(),
+ N->getValueType(0), InOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(),
+ N->getValueType(0), InOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
+ // We have to widen the value but we want only to store the original
+ // vector type.
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+
+ SmallVector<SDValue, 16> StChain;
+ if (ST->isTruncatingStore())
+ GenWidenVectorTruncStores(StChain, ST);
+ else
+ GenWidenVectorStores(StChain, ST);
+
+ if (StChain.size() == 1)
+ return StChain[0];
+ else
+ return DAG.getNode(ISD::TokenFactor, ST->getDebugLoc(),
+ MVT::Other,&StChain[0],StChain.size());
+}
+
+//===----------------------------------------------------------------------===//
+// Vector Widening Utilities
+//===----------------------------------------------------------------------===//
+
+// Utility function to find the type to chop up a widen vector for load/store
+// TLI: Target lowering used to determine legal types.
+// Width: Width left need to load/store.
+// WidenVT: The widen vector type to load to/store from
+// Align: If 0, don't allow use of a wider type
+// WidenEx: If Align is not 0, the amount additional we can load/store from.
+
+static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
+ unsigned Width, EVT WidenVT,
+ unsigned Align = 0, unsigned WidenEx = 0) {
+ EVT WidenEltVT = WidenVT.getVectorElementType();
+ unsigned WidenWidth = WidenVT.getSizeInBits();
+ unsigned WidenEltWidth = WidenEltVT.getSizeInBits();
+ unsigned AlignInBits = Align*8;
+
+ // If we have one element to load/store, return it.
+ EVT RetVT = WidenEltVT;
+ if (Width == WidenEltWidth)
+ return RetVT;
+
+ // See if there is larger legal integer than the element type to load/store
+ unsigned VT;
+ for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE;
+ VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) {
+ EVT MemVT((MVT::SimpleValueType) VT);
+ unsigned MemVTWidth = MemVT.getSizeInBits();
+ if (MemVT.getSizeInBits() <= WidenEltWidth)
+ break;
+ if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
+ (MemVTWidth <= Width ||
+ (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
+ RetVT = MemVT;
+ break;
+ }
+ }
+
+ // See if there is a larger vector type to load/store that has the same vector
+ // element type and is evenly divisible with the WidenVT.
+ for (VT = (unsigned)MVT::LAST_VECTOR_VALUETYPE;
+ VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {
+ EVT MemVT = (MVT::SimpleValueType) VT;
+ unsigned MemVTWidth = MemVT.getSizeInBits();
+ if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
+ (WidenWidth % MemVTWidth) == 0 &&
+ (MemVTWidth <= Width ||
+ (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
+ if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT)
+ return MemVT;
+ }
+ }
+
+ return RetVT;
+}
+
+// Builds a vector type from scalar loads
+// VecTy: Resulting Vector type
+// LDOps: Load operators to build a vector type
+// [Start,End) the list of loads to use.
+static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
+ SmallVector<SDValue, 16>& LdOps,
+ unsigned Start, unsigned End) {
+ DebugLoc dl = LdOps[Start].getDebugLoc();
+ EVT LdTy = LdOps[Start].getValueType();
+ unsigned Width = VecTy.getSizeInBits();
+ unsigned NumElts = Width / LdTy.getSizeInBits();
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), LdTy, NumElts);
+
+ unsigned Idx = 1;
+ SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT,LdOps[Start]);
+
+ for (unsigned i = Start + 1; i != End; ++i) {
+ EVT NewLdTy = LdOps[i].getValueType();
+ if (NewLdTy != LdTy) {
+ NumElts = Width / NewLdTy.getSizeInBits();
+ NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts);
+ VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp);
+ // Readjust position and vector position based on new load type
+ Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits();
+ LdTy = NewLdTy;
+ }
+ VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
+ DAG.getIntPtrConstant(Idx++));
+ }
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VecTy, VecOp);
+}
+
+SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
+ LoadSDNode * LD) {
+ // The strategy assumes that we can efficiently load powers of two widths.
+ // The routines chops the vector into the largest vector loads with the same
+ // element type or scalar loads and then recombines it to the widen vector
+ // type.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+ unsigned WidenWidth = WidenVT.getSizeInBits();
+ EVT LdVT = LD->getMemoryVT();
+ DebugLoc dl = LD->getDebugLoc();
+ assert(LdVT.isVector() && WidenVT.isVector());
+ assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
+
+ // Load information
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ int SVOffset = LD->getSrcValueOffset();
+ unsigned Align = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+ const Value *SV = LD->getSrcValue();
+
+ int LdWidth = LdVT.getSizeInBits();
+ int WidthDiff = WidenWidth - LdWidth; // Difference
+ unsigned LdAlign = (isVolatile) ? 0 : Align; // Allow wider loads
+
+ // Find the vector type that can load from.
+ EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
+ int NewVTWidth = NewVT.getSizeInBits();
+ SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV, SVOffset,
+ isVolatile, Align);
+ LdChain.push_back(LdOp.getValue(1));
+
+ // Check if we can load the element with one instruction
+ if (LdWidth <= NewVTWidth) {
+ if (NewVT.isVector()) {
+ if (NewVT != WidenVT) {
+ assert(WidenWidth % NewVTWidth == 0);
+ unsigned NumConcat = WidenWidth / NewVTWidth;
+ SmallVector<SDValue, 16> ConcatOps(NumConcat);
+ SDValue UndefVal = DAG.getUNDEF(NewVT);
+ ConcatOps[0] = LdOp;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ ConcatOps[i] = UndefVal;
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0],
+ NumConcat);
+ } else
+ return LdOp;
+ } else {
+ unsigned NumElts = WidenWidth / LdWidth;
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
+ SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, VecOp);
+ }
+ }
+
+ // Load vector by using multiple loads from largest vector to scalar
+ SmallVector<SDValue, 16> LdOps;
+ LdOps.push_back(LdOp);
+
+ LdWidth -= NewVTWidth;
+ unsigned Offset = 0;
+
+ while (LdWidth > 0) {
+ unsigned Increment = NewVTWidth / 8;
+ Offset += Increment;
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getIntPtrConstant(Increment));
+
+ if (LdWidth < NewVTWidth) {
+ // Our current type we are using is too large, find a better size
+ NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
+ NewVTWidth = NewVT.getSizeInBits();
+ }
+
+ SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV,
+ SVOffset+Offset, isVolatile,
+ MinAlign(Align, Increment));
+ LdChain.push_back(LdOp.getValue(1));
+ LdOps.push_back(LdOp);
+
+ LdWidth -= NewVTWidth;
+ }
+
+ // Build the vector from the loads operations
+ unsigned End = LdOps.size();
+ if (LdOps[0].getValueType().isVector()) {
+ // If the load contains vectors, build the vector using concat vector.
+ // All of the vectors used to loads are power of 2 and the scalars load
+ // can be combined to make a power of 2 vector.
+ SmallVector<SDValue, 16> ConcatOps(End);
+ int i = End - 1;
+ int Idx = End;
+ EVT LdTy = LdOps[i].getValueType();
+ // First combine the scalar loads to a vector
+ if (!LdTy.isVector()) {
+ for (--i; i >= 0; --i) {
+ LdTy = LdOps[i].getValueType();
+ if (LdTy.isVector())
+ break;
+ }
+ ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End);
+ }
+ ConcatOps[--Idx] = LdOps[i];
+ for (--i; i >= 0; --i) {
+ EVT NewLdTy = LdOps[i].getValueType();
+ if (NewLdTy != LdTy) {
+ // Create a larger vector
+ ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
+ &ConcatOps[Idx], End - Idx);
+ Idx = End - 1;
+ LdTy = NewLdTy;
+ }
+ ConcatOps[--Idx] = LdOps[i];
+ }
+
+ if (WidenWidth != LdTy.getSizeInBits()*(End - Idx)) {
+ // We need to fill the rest with undefs to build the vector
+ unsigned NumOps = WidenWidth / LdTy.getSizeInBits();
+ SmallVector<SDValue, 16> WidenOps(NumOps);
+ SDValue UndefVal = DAG.getUNDEF(LdTy);
+ unsigned i = 0;
+ for (; i != End-Idx; ++i)
+ WidenOps[i] = ConcatOps[Idx+i];
+ for (; i != NumOps; ++i)
+ WidenOps[i] = UndefVal;
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &WidenOps[0],NumOps);
+ } else
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
+ &ConcatOps[Idx], End - Idx);
+ } else // All the loads are scalar loads.
+ return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
+}
+
+SDValue
+DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
+ LoadSDNode * LD,
+ ISD::LoadExtType ExtType) {
+ // For extension loads, it may not be more efficient to chop up the vector
+ // and then extended it. Instead, we unroll the load and build a new vector.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
+ EVT LdVT = LD->getMemoryVT();
+ DebugLoc dl = LD->getDebugLoc();
+ assert(LdVT.isVector() && WidenVT.isVector());
+
+ // Load information
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ int SVOffset = LD->getSrcValueOffset();
+ unsigned Align = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+ const Value *SV = LD->getSrcValue();
+
+ EVT EltVT = WidenVT.getVectorElementType();
+ EVT LdEltVT = LdVT.getVectorElementType();
+ unsigned NumElts = LdVT.getVectorNumElements();
+
+ // Load each element and widen
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ unsigned Increment = LdEltVT.getSizeInBits() / 8;
+ Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, SV, SVOffset,
+ LdEltVT, isVolatile, Align);
+ LdChain.push_back(Ops[0].getValue(1));
+ unsigned i = 0, Offset = Increment;
+ for (i=1; i < NumElts; ++i, Offset += Increment) {
+ SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
+ BasePtr, DAG.getIntPtrConstant(Offset));
+ Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, SV,
+ SVOffset + Offset, LdEltVT, isVolatile, Align);
+ LdChain.push_back(Ops[i].getValue(1));
+ }
+
+ // Fill the rest with undefs
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i != WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], Ops.size());
+}
+
+
+void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
+ StoreSDNode *ST) {
+ // The strategy assumes that we can efficiently store powers of two widths.
+ // The routines chops the vector into the largest vector stores with the same
+ // element type or scalar stores.
+ SDValue Chain = ST->getChain();
+ SDValue BasePtr = ST->getBasePtr();
+ const Value *SV = ST->getSrcValue();
+ int SVOffset = ST->getSrcValueOffset();
+ unsigned Align = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ SDValue ValOp = GetWidenedVector(ST->getValue());
+ DebugLoc dl = ST->getDebugLoc();
+
+ EVT StVT = ST->getMemoryVT();
+ unsigned StWidth = StVT.getSizeInBits();
+ EVT ValVT = ValOp.getValueType();
+ unsigned ValWidth = ValVT.getSizeInBits();
+ EVT ValEltVT = ValVT.getVectorElementType();
+ unsigned ValEltWidth = ValEltVT.getSizeInBits();
+ assert(StVT.getVectorElementType() == ValEltVT);
+
+ int Idx = 0; // current index to store
+ unsigned Offset = 0; // offset from base to store
+ while (StWidth != 0) {
+ // Find the largest vector type we can store with
+ EVT NewVT = FindMemType(DAG, TLI, StWidth, ValVT);
+ unsigned NewVTWidth = NewVT.getSizeInBits();
+ unsigned Increment = NewVTWidth / 8;
+ if (NewVT.isVector()) {
+ unsigned NumVTElts = NewVT.getVectorNumElements();
+ do {
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
+ DAG.getIntPtrConstant(Idx));
+ StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
+ SVOffset + Offset, isVolatile,
+ MinAlign(Align, Offset)));
+ StWidth -= NewVTWidth;
+ Offset += Increment;
+ Idx += NumVTElts;
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getIntPtrConstant(Increment));
+ } while (StWidth != 0 && StWidth >= NewVTWidth);
+ } else {
+ // Cast the vector to the scalar type we can store
+ unsigned NumElts = ValWidth / NewVTWidth;
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
+ SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, ValOp);
+ // Readjust index position based on new vector type
+ Idx = Idx * ValEltWidth / NewVTWidth;
+ do {
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
+ DAG.getIntPtrConstant(Idx++));
+ StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
+ SVOffset + Offset, isVolatile,
+ MinAlign(Align, Offset)));
+ StWidth -= NewVTWidth;
+ Offset += Increment;
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getIntPtrConstant(Increment));
+ } while (StWidth != 0 && StWidth >= NewVTWidth);
+ // Restore index back to be relative to the original widen element type
+ Idx = Idx * NewVTWidth / ValEltWidth;
+ }
+ }
+}
+
+void
+DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
+ StoreSDNode *ST) {
+ // For extension loads, it may not be more efficient to truncate the vector
+ // and then store it. Instead, we extract each element and then store it.
+ SDValue Chain = ST->getChain();
+ SDValue BasePtr = ST->getBasePtr();
+ const Value *SV = ST->getSrcValue();
+ int SVOffset = ST->getSrcValueOffset();
+ unsigned Align = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ SDValue ValOp = GetWidenedVector(ST->getValue());
+ DebugLoc dl = ST->getDebugLoc();
+
+ EVT StVT = ST->getMemoryVT();
+ EVT ValVT = ValOp.getValueType();
+
+ // It must be true that we the widen vector type is bigger than where
+ // we need to store.
+ assert(StVT.isVector() && ValOp.getValueType().isVector());
+ assert(StVT.bitsLT(ValOp.getValueType()));
+
+ // For truncating stores, we can not play the tricks of chopping legal
+ // vector types and bit cast it to the right type. Instead, we unroll
+ // the store.
+ EVT StEltVT = StVT.getVectorElementType();
+ EVT ValEltVT = ValVT.getVectorElementType();
+ unsigned Increment = ValEltVT.getSizeInBits() / 8;
+ unsigned NumElts = StVT.getVectorNumElements();
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+ DAG.getIntPtrConstant(0));
+ StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, SV,
+ SVOffset, StEltVT,
+ isVolatile, Align));
+ unsigned Offset = Increment;
+ for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
+ SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
+ BasePtr, DAG.getIntPtrConstant(Offset));
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+ DAG.getIntPtrConstant(0));
+ StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, SV,
+ SVOffset + Offset, StEltVT,
+ isVolatile, MinAlign(Align, Offset)));
+ }
+}
+
+/// Modifies a vector input (widen or narrows) to a vector of NVT. The
+/// input vector must have the same element type as NVT.
+SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
+ // Note that InOp might have been widened so it might already have
+ // the right width or it might need be narrowed.
+ EVT InVT = InOp.getValueType();
+ assert(InVT.getVectorElementType() == NVT.getVectorElementType() &&
+ "input and widen element type must match");
+ DebugLoc dl = InOp.getDebugLoc();
+
+ // Check if InOp already has the right width.
+ if (InVT == NVT)
+ return InOp;
+
+ unsigned InNumElts = InVT.getVectorNumElements();
+ unsigned WidenNumElts = NVT.getVectorNumElements();
+ if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) {
+ unsigned NumConcat = WidenNumElts / InNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ Ops[0] = InOp;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = UndefVal;
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, &Ops[0], NumConcat);
+ }
+
+ if (WidenNumElts < InNumElts && InNumElts % WidenNumElts)
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp,
+ DAG.getIntPtrConstant(0));
+
+ // Fall back to extract and build.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ EVT EltVT = NVT.getVectorElementType();
+ unsigned MinNumElts = std::min(WidenNumElts, InNumElts);
+ unsigned Idx;
+ for (Idx = 0; Idx < MinNumElts; ++Idx)
+ Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getIntPtrConstant(Idx));
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for ( ; Idx < WidenNumElts; ++Idx)
+ Ops[Idx] = UndefVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &Ops[0], WidenNumElts);
+}
diff --git a/lib/CodeGen/SelectionDAG/Makefile b/lib/CodeGen/SelectionDAG/Makefile
new file mode 100644
index 0000000..ea716fd
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/Makefile
@@ -0,0 +1,13 @@
+##===- lib/CodeGen/SelectionDAG/Makefile -------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMSelectionDAG
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/CodeGen/SelectionDAG/SDNodeOrdering.h b/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
new file mode 100644
index 0000000..f88b26d
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
@@ -0,0 +1,54 @@
+//===-- llvm/CodeGen/SDNodeOrdering.h - SDNode Ordering ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SDNodeOrdering class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SDNODEORDERING_H
+#define LLVM_CODEGEN_SDNODEORDERING_H
+
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+
+class SDNode;
+
+/// SDNodeOrdering - Maps a unique (monotonically increasing) value to each
+/// SDNode that roughly corresponds to the ordering of the original LLVM
+/// instruction. This is used for turning off scheduling, because we'll forgo
+/// the normal scheduling algorithms and output the instructions according to
+/// this ordering.
+class SDNodeOrdering {
+ DenseMap<const SDNode*, unsigned> OrderMap;
+
+ void operator=(const SDNodeOrdering&); // Do not implement.
+ SDNodeOrdering(const SDNodeOrdering&); // Do not implement.
+public:
+ SDNodeOrdering() {}
+
+ void add(const SDNode *Node, unsigned O) {
+ OrderMap[Node] = O;
+ }
+ void remove(const SDNode *Node) {
+ DenseMap<const SDNode*, unsigned>::iterator Itr = OrderMap.find(Node);
+ if (Itr != OrderMap.end())
+ OrderMap.erase(Itr);
+ }
+ void clear() {
+ OrderMap.clear();
+ }
+ unsigned getOrder(const SDNode *Node) {
+ return OrderMap[Node];
+ }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
new file mode 100644
index 0000000..ad8630a
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -0,0 +1,604 @@
+//===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a fast scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumUnfolds, "Number of nodes unfolded");
+STATISTIC(NumDups, "Number of duplicated nodes");
+STATISTIC(NumPRCopies, "Number of physical copies");
+
+static RegisterScheduler
+ fastDAGScheduler("fast", "Fast suboptimal list scheduling",
+ createFastDAGScheduler);
+
+namespace {
+ /// FastPriorityQueue - A degenerate priority queue that considers
+ /// all nodes to have the same priority.
+ ///
+ struct FastPriorityQueue {
+ SmallVector<SUnit *, 16> Queue;
+
+ bool empty() const { return Queue.empty(); }
+
+ void push(SUnit *U) {
+ Queue.push_back(U);
+ }
+
+ SUnit *pop() {
+ if (empty()) return NULL;
+ SUnit *V = Queue.back();
+ Queue.pop_back();
+ return V;
+ }
+ };
+
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGFast - The actual "fast" list scheduler implementation.
+///
+class ScheduleDAGFast : public ScheduleDAGSDNodes {
+private:
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ FastPriorityQueue AvailableQueue;
+
+ /// LiveRegDefs - A set of physical registers and their definition
+ /// that are "live". These nodes must be scheduled before any other nodes that
+ /// modifies the registers can be scheduled.
+ unsigned NumLiveRegs;
+ std::vector<SUnit*> LiveRegDefs;
+ std::vector<unsigned> LiveRegCycles;
+
+public:
+ ScheduleDAGFast(MachineFunction &mf)
+ : ScheduleDAGSDNodes(mf) {}
+
+ void Schedule();
+
+ /// AddPred - adds a predecessor edge to SUnit SU.
+ /// This returns true if this is a new predecessor.
+ void AddPred(SUnit *SU, const SDep &D) {
+ SU->addPred(D);
+ }
+
+ /// RemovePred - removes a predecessor edge from SUnit SU.
+ /// This returns true if an edge was removed.
+ void RemovePred(SUnit *SU, const SDep &D) {
+ SU->removePred(D);
+ }
+
+private:
+ void ReleasePred(SUnit *SU, SDep *PredEdge);
+ void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
+ void ScheduleNodeBottomUp(SUnit*, unsigned);
+ SUnit *CopyAndMoveSuccessors(SUnit*);
+ void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
+ const TargetRegisterClass*,
+ const TargetRegisterClass*,
+ SmallVector<SUnit*, 2>&);
+ bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
+ void ListScheduleBottomUp();
+
+ /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies.
+ bool ForceUnitLatencies() const { return true; }
+};
+} // end anonymous namespace
+
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGFast::Schedule() {
+ DEBUG(dbgs() << "********** List Scheduling **********\n");
+
+ NumLiveRegs = 0;
+ LiveRegDefs.resize(TRI->getNumRegs(), NULL);
+ LiveRegCycles.resize(TRI->getNumRegs(), 0);
+
+ // Build the scheduling graph.
+ BuildSchedGraph(NULL);
+
+ DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ SUnits[su].dumpAll(this));
+
+ // Execute the actual scheduling loop.
+ ListScheduleBottomUp();
+}
+
+//===----------------------------------------------------------------------===//
+// Bottom-Up Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+
+#ifndef NDEBUG
+ if (PredSU->NumSuccsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ PredSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(0);
+ }
+#endif
+ --PredSU->NumSuccsLeft;
+
+ // If all the node's successors are scheduled, this node is ready
+ // to be scheduled. Ignore the special EntrySU node.
+ if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
+ PredSU->isAvailable = true;
+ AvailableQueue.push(PredSU);
+ }
+}
+
+void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
+ // Bottom up: release predecessors
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ ReleasePred(SU, &*I);
+ if (I->isAssignedRegDep()) {
+ // This is a physical register dependency and it's impossible or
+ // expensive to copy the register. Make sure nothing that can
+ // clobber the register is scheduled between the predecessor and
+ // this node.
+ if (!LiveRegDefs[I->getReg()]) {
+ ++NumLiveRegs;
+ LiveRegDefs[I->getReg()] = I->getSUnit();
+ LiveRegCycles[I->getReg()] = CurCycle;
+ }
+ }
+ }
+}
+
+/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
+/// count of its predecessors. If a predecessor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
+ DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+ DEBUG(SU->dump(this));
+
+ assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
+ SU->setHeightToAtLeast(CurCycle);
+ Sequence.push_back(SU);
+
+ ReleasePredecessors(SU, CurCycle);
+
+ // Release all the implicit physical register defs that are live.
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isAssignedRegDep()) {
+ if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ assert(LiveRegDefs[I->getReg()] == SU &&
+ "Physical register dependency violated?");
+ --NumLiveRegs;
+ LiveRegDefs[I->getReg()] = NULL;
+ LiveRegCycles[I->getReg()] = 0;
+ }
+ }
+ }
+
+ SU->isScheduled = true;
+}
+
+/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
+/// successors to the newly created node.
+SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
+ if (SU->getNode()->getFlaggedNode())
+ return NULL;
+
+ SDNode *N = SU->getNode();
+ if (!N)
+ return NULL;
+
+ SUnit *NewSU;
+ bool TryUnfold = false;
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ EVT VT = N->getValueType(i);
+ if (VT == MVT::Flag)
+ return NULL;
+ else if (VT == MVT::Other)
+ TryUnfold = true;
+ }
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = N->getOperand(i);
+ EVT VT = Op.getNode()->getValueType(Op.getResNo());
+ if (VT == MVT::Flag)
+ return NULL;
+ }
+
+ if (TryUnfold) {
+ SmallVector<SDNode*, 2> NewNodes;
+ if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
+ return NULL;
+
+ DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
+ assert(NewNodes.size() == 2 && "Expected a load folding node!");
+
+ N = NewNodes[1];
+ SDNode *LoadNode = NewNodes[0];
+ unsigned NumVals = N->getNumValues();
+ unsigned OldNumVals = SU->getNode()->getNumValues();
+ for (unsigned i = 0; i != NumVals; ++i)
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
+ SDValue(LoadNode, 1));
+
+ SUnit *NewSU = NewSUnit(N);
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NewSU->NodeNum);
+
+ const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
+ for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
+ if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
+ NewSU->isTwoAddress = true;
+ break;
+ }
+ }
+ if (TID.isCommutable())
+ NewSU->isCommutable = true;
+
+ // LoadNode may already exist. This can happen when there is another
+ // load from the same location and producing the same type of value
+ // but it has different alignment or volatileness.
+ bool isNewLoad = true;
+ SUnit *LoadSU;
+ if (LoadNode->getNodeId() != -1) {
+ LoadSU = &SUnits[LoadNode->getNodeId()];
+ isNewLoad = false;
+ } else {
+ LoadSU = NewSUnit(LoadNode);
+ LoadNode->setNodeId(LoadSU->NodeNum);
+ }
+
+ SDep ChainPred;
+ SmallVector<SDep, 4> ChainSuccs;
+ SmallVector<SDep, 4> LoadPreds;
+ SmallVector<SDep, 4> NodePreds;
+ SmallVector<SDep, 4> NodeSuccs;
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ ChainPred = *I;
+ else if (I->getSUnit()->getNode() &&
+ I->getSUnit()->getNode()->isOperandOf(LoadNode))
+ LoadPreds.push_back(*I);
+ else
+ NodePreds.push_back(*I);
+ }
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ ChainSuccs.push_back(*I);
+ else
+ NodeSuccs.push_back(*I);
+ }
+
+ if (ChainPred.getSUnit()) {
+ RemovePred(SU, ChainPred);
+ if (isNewLoad)
+ AddPred(LoadSU, ChainPred);
+ }
+ for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
+ const SDep &Pred = LoadPreds[i];
+ RemovePred(SU, Pred);
+ if (isNewLoad) {
+ AddPred(LoadSU, Pred);
+ }
+ }
+ for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
+ const SDep &Pred = NodePreds[i];
+ RemovePred(SU, Pred);
+ AddPred(NewSU, Pred);
+ }
+ for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
+ SDep D = NodeSuccs[i];
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ D.setSUnit(NewSU);
+ AddPred(SuccDep, D);
+ }
+ for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
+ SDep D = ChainSuccs[i];
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ if (isNewLoad) {
+ D.setSUnit(LoadSU);
+ AddPred(SuccDep, D);
+ }
+ }
+ if (isNewLoad) {
+ AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency));
+ }
+
+ ++NumUnfolds;
+
+ if (NewSU->NumSuccsLeft == 0) {
+ NewSU->isAvailable = true;
+ return NewSU;
+ }
+ SU = NewSU;
+ }
+
+ DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n");
+ NewSU = Clone(SU);
+
+ // New SUnit has the exact same predecessors.
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I)
+ if (!I->isArtificial())
+ AddPred(NewSU, *I);
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isArtificial())
+ continue;
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = *I;
+ D.setSUnit(NewSU);
+ AddPred(SuccSU, D);
+ D.setSUnit(SU);
+ DelDeps.push_back(std::make_pair(SuccSU, D));
+ }
+ }
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
+ RemovePred(DelDeps[i].first, DelDeps[i].second);
+
+ ++NumDups;
+ return NewSU;
+}
+
+/// InsertCopiesAndMoveSuccs - Insert register copies and move all
+/// scheduled successors of the given SUnit to the last copy.
+void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC,
+ SmallVector<SUnit*, 2> &Copies) {
+ SUnit *CopyFromSU = NewSUnit(static_cast<SDNode *>(NULL));
+ CopyFromSU->CopySrcRC = SrcRC;
+ CopyFromSU->CopyDstRC = DestRC;
+
+ SUnit *CopyToSU = NewSUnit(static_cast<SDNode *>(NULL));
+ CopyToSU->CopySrcRC = DestRC;
+ CopyToSU->CopyDstRC = SrcRC;
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isArtificial())
+ continue;
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = *I;
+ D.setSUnit(CopyToSU);
+ AddPred(SuccSU, D);
+ DelDeps.push_back(std::make_pair(SuccSU, *I));
+ }
+ }
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
+ RemovePred(DelDeps[i].first, DelDeps[i].second);
+ }
+
+ AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
+ AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
+
+ Copies.push_back(CopyFromSU);
+ Copies.push_back(CopyToSU);
+
+ ++NumPRCopies;
+}
+
+/// getPhysicalRegisterVT - Returns the ValueType of the physical register
+/// definition of the specified node.
+/// FIXME: Move to SelectionDAG?
+static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+ const TargetInstrInfo *TII) {
+ const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
+ assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+ unsigned NumRes = TID.getNumDefs();
+ for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ if (Reg == *ImpDef)
+ break;
+ ++NumRes;
+ }
+ return N->getValueType(NumRes);
+}
+
+/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
+/// scheduling of the given node to satisfy live physical register dependencies.
+/// If the specific node is the last one that's available to schedule, do
+/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
+bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
+ SmallVector<unsigned, 4> &LRegs){
+ if (NumLiveRegs == 0)
+ return false;
+
+ SmallSet<unsigned, 4> RegAdded;
+ // If this node would clobber any "live" register, then it's not ready.
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isAssignedRegDep()) {
+ unsigned Reg = I->getReg();
+ if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != I->getSUnit()) {
+ if (RegAdded.insert(Reg))
+ LRegs.push_back(Reg);
+ }
+ for (const unsigned *Alias = TRI->getAliasSet(Reg);
+ *Alias; ++Alias)
+ if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != I->getSUnit()) {
+ if (RegAdded.insert(*Alias))
+ LRegs.push_back(*Alias);
+ }
+ }
+ }
+
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
+ if (!Node->isMachineOpcode())
+ continue;
+ const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
+ if (!TID.ImplicitDefs)
+ continue;
+ for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) {
+ if (LiveRegDefs[*Reg] && LiveRegDefs[*Reg] != SU) {
+ if (RegAdded.insert(*Reg))
+ LRegs.push_back(*Reg);
+ }
+ for (const unsigned *Alias = TRI->getAliasSet(*Reg);
+ *Alias; ++Alias)
+ if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
+ if (RegAdded.insert(*Alias))
+ LRegs.push_back(*Alias);
+ }
+ }
+ }
+ return !LRegs.empty();
+}
+
+
+/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
+/// schedulers.
+void ScheduleDAGFast::ListScheduleBottomUp() {
+ unsigned CurCycle = 0;
+
+ // Release any predecessors of the special Exit node.
+ ReleasePredecessors(&ExitSU, CurCycle);
+
+ // Add root to Available queue.
+ if (!SUnits.empty()) {
+ SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
+ assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
+ RootSU->isAvailable = true;
+ AvailableQueue.push(RootSU);
+ }
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ SmallVector<SUnit*, 4> NotReady;
+ DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue.empty()) {
+ bool Delayed = false;
+ LRegsMap.clear();
+ SUnit *CurSU = AvailableQueue.pop();
+ while (CurSU) {
+ SmallVector<unsigned, 4> LRegs;
+ if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+ break;
+ Delayed = true;
+ LRegsMap.insert(std::make_pair(CurSU, LRegs));
+
+ CurSU->isPending = true; // This SU is not in AvailableQueue right now.
+ NotReady.push_back(CurSU);
+ CurSU = AvailableQueue.pop();
+ }
+
+ // All candidates are delayed due to live physical reg dependencies.
+ // Try code duplication or inserting cross class copies
+ // to resolve it.
+ if (Delayed && !CurSU) {
+ if (!CurSU) {
+ // Try duplicating the nodes that produces these
+ // "expensive to copy" values to break the dependency. In case even
+ // that doesn't work, insert cross class copies.
+ SUnit *TrySU = NotReady[0];
+ SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+ assert(LRegs.size() == 1 && "Can't handle this yet!");
+ unsigned Reg = LRegs[0];
+ SUnit *LRDef = LiveRegDefs[Reg];
+ EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+ const TargetRegisterClass *RC =
+ TRI->getPhysicalRegisterRegClass(Reg, VT);
+ const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+ // If cross copy register class is null, then it must be possible copy
+ // the value directly. Do not try duplicate the def.
+ SUnit *NewDef = 0;
+ if (DestRC)
+ NewDef = CopyAndMoveSuccessors(LRDef);
+ else
+ DestRC = RC;
+ if (!NewDef) {
+ // Issue copies, these can be expensive cross register class copies.
+ SmallVector<SUnit*, 2> Copies;
+ InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+ DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum
+ << " to SU #" << Copies.front()->NodeNum << "\n");
+ AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false, /*isArtificial=*/true));
+ NewDef = Copies.back();
+ }
+
+ DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum
+ << " to SU #" << TrySU->NodeNum << "\n");
+ LiveRegDefs[Reg] = NewDef;
+ AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false, /*isArtificial=*/true));
+ TrySU->isAvailable = false;
+ CurSU = NewDef;
+ }
+
+ if (!CurSU) {
+ llvm_unreachable("Unable to resolve live physical register dependencies!");
+ }
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
+ NotReady[i]->isPending = false;
+ // May no longer be available due to backtracking.
+ if (NotReady[i]->isAvailable)
+ AvailableQueue.push(NotReady[i]);
+ }
+ NotReady.clear();
+
+ if (CurSU)
+ ScheduleNodeBottomUp(CurSU, CurCycle);
+ ++CurCycle;
+ }
+
+ // Reverse the order since it is bottom up.
+ std::reverse(Sequence.begin(), Sequence.end());
+
+#ifndef NDEBUG
+ VerifySchedule(/*isBottomUp=*/true);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+llvm::ScheduleDAGSDNodes *
+llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ return new ScheduleDAGFast(*IS->MF);
+}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
new file mode 100644
index 0000000..b92a672
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
@@ -0,0 +1,269 @@
+//===---- ScheduleDAGList.cpp - Implement a list scheduler for isel DAG ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a top-down list scheduler, using standard algorithms.
+// The basic approach uses a priority queue of available nodes to schedule.
+// One at a time, nodes are taken from the priority queue (thus in priority
+// order), checked for legality to schedule, and emitted if legal.
+//
+// Nodes may not be legal to schedule either due to structural hazards (e.g.
+// pipeline or resource constraints) or because an input to the instruction has
+// not completed execution.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/ADT/Statistic.h"
+#include <climits>
+using namespace llvm;
+
+STATISTIC(NumNoops , "Number of noops inserted");
+STATISTIC(NumStalls, "Number of pipeline stalls");
+
+static RegisterScheduler
+ tdListDAGScheduler("list-td", "Top-down list scheduler",
+ createTDListDAGScheduler);
+
+namespace {
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGList - The actual list scheduler implementation. This supports
+/// top-down scheduling.
+///
+class ScheduleDAGList : public ScheduleDAGSDNodes {
+private:
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ ///
+ SchedulingPriorityQueue *AvailableQueue;
+
+ /// PendingQueue - This contains all of the instructions whose operands have
+ /// been issued, but their results are not ready yet (due to the latency of
+ /// the operation). Once the operands become available, the instruction is
+ /// added to the AvailableQueue.
+ std::vector<SUnit*> PendingQueue;
+
+ /// HazardRec - The hazard recognizer to use.
+ ScheduleHazardRecognizer *HazardRec;
+
+public:
+ ScheduleDAGList(MachineFunction &mf,
+ SchedulingPriorityQueue *availqueue,
+ ScheduleHazardRecognizer *HR)
+ : ScheduleDAGSDNodes(mf),
+ AvailableQueue(availqueue), HazardRec(HR) {
+ }
+
+ ~ScheduleDAGList() {
+ delete HazardRec;
+ delete AvailableQueue;
+ }
+
+ void Schedule();
+
+private:
+ void ReleaseSucc(SUnit *SU, const SDep &D);
+ void ReleaseSuccessors(SUnit *SU);
+ void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+ void ListScheduleTopDown();
+};
+} // end anonymous namespace
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGList::Schedule() {
+ DEBUG(dbgs() << "********** List Scheduling **********\n");
+
+ // Build the scheduling graph.
+ BuildSchedGraph(NULL);
+
+ AvailableQueue->initNodes(SUnits);
+
+ ListScheduleTopDown();
+
+ AvailableQueue->releaseState();
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the PendingQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) {
+ SUnit *SuccSU = D.getSUnit();
+
+#ifndef NDEBUG
+ if (SuccSU->NumPredsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ SuccSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(0);
+ }
+#endif
+ --SuccSU->NumPredsLeft;
+
+ SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());
+
+ // If all the node's predecessors are scheduled, this node is ready
+ // to be scheduled. Ignore the special ExitSU node.
+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
+ PendingQueue.push_back(SuccSU);
+}
+
+void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) {
+ // Top down: release successors.
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ assert(!I->isAssignedRegDep() &&
+ "The list-td scheduler doesn't yet support physreg dependencies!");
+
+ ReleaseSucc(SU, *I);
+ }
+}
+
+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+ DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+ DEBUG(SU->dump(this));
+
+ Sequence.push_back(SU);
+ assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+ SU->setDepthToAtLeast(CurCycle);
+
+ ReleaseSuccessors(SU);
+ SU->isScheduled = true;
+ AvailableQueue->ScheduledNode(SU);
+}
+
+/// ListScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void ScheduleDAGList::ListScheduleTopDown() {
+ unsigned CurCycle = 0;
+
+ // Release any successors of the special Entry node.
+ ReleaseSuccessors(&EntrySU);
+
+ // All leaves to Available queue.
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ // It is available if it has no predecessors.
+ if (SUnits[i].Preds.empty()) {
+ AvailableQueue->push(&SUnits[i]);
+ SUnits[i].isAvailable = true;
+ }
+ }
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ std::vector<SUnit*> NotReady;
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue->empty() || !PendingQueue.empty()) {
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+ if (PendingQueue[i]->getDepth() == CurCycle) {
+ AvailableQueue->push(PendingQueue[i]);
+ PendingQueue[i]->isAvailable = true;
+ PendingQueue[i] = PendingQueue.back();
+ PendingQueue.pop_back();
+ --i; --e;
+ } else {
+ assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?");
+ }
+ }
+
+ // If there are no instructions available, don't try to issue anything, and
+ // don't advance the hazard recognizer.
+ if (AvailableQueue->empty()) {
+ ++CurCycle;
+ continue;
+ }
+
+ SUnit *FoundSUnit = 0;
+
+ bool HasNoopHazards = false;
+ while (!AvailableQueue->empty()) {
+ SUnit *CurSUnit = AvailableQueue->pop();
+
+ ScheduleHazardRecognizer::HazardType HT =
+ HazardRec->getHazardType(CurSUnit);
+ if (HT == ScheduleHazardRecognizer::NoHazard) {
+ FoundSUnit = CurSUnit;
+ break;
+ }
+
+ // Remember if this is a noop hazard.
+ HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
+
+ NotReady.push_back(CurSUnit);
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ if (!NotReady.empty()) {
+ AvailableQueue->push_all(NotReady);
+ NotReady.clear();
+ }
+
+ // If we found a node to schedule, do it now.
+ if (FoundSUnit) {
+ ScheduleNodeTopDown(FoundSUnit, CurCycle);
+ HazardRec->EmitInstruction(FoundSUnit);
+
+ // If this is a pseudo-op node, we don't want to increment the current
+ // cycle.
+ if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops!
+ ++CurCycle;
+ } else if (!HasNoopHazards) {
+ // Otherwise, we have a pipeline stall, but no other problem, just advance
+ // the current cycle and try again.
+ DEBUG(dbgs() << "*** Advancing cycle, no work to do\n");
+ HazardRec->AdvanceCycle();
+ ++NumStalls;
+ ++CurCycle;
+ } else {
+ // Otherwise, we have no instructions to issue and we have instructions
+ // that will fault if we don't do this right. This is the case for
+ // processors without pipeline interlocks and other cases.
+ DEBUG(dbgs() << "*** Emitting noop\n");
+ HazardRec->EmitNoop();
+ Sequence.push_back(0); // NULL here means noop
+ ++NumNoops;
+ ++CurCycle;
+ }
+ }
+
+#ifndef NDEBUG
+ VerifySchedule(/*isBottomUp=*/false);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+/// createTDListDAGScheduler - This creates a top-down list scheduler with a
+/// new hazard recognizer. This scheduler takes ownership of the hazard
+/// recognizer and deletes it when done.
+ScheduleDAGSDNodes *
+llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ return new ScheduleDAGList(*IS->MF,
+ new LatencyPriorityQueue(),
+ IS->CreateTargetHazardRecognizer());
+}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
new file mode 100644
index 0000000..3f1766d
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -0,0 +1,1596 @@
+//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements bottom-up and top-down register pressure reduction list
+// schedulers, using standard algorithms. The basic approach uses a priority
+// queue of available nodes to schedule. One at a time, nodes are taken from
+// the priority queue (thus in priority order), checked for legality to
+// schedule, and emitted if legal.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <climits>
+using namespace llvm;
+
+STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
+STATISTIC(NumUnfolds, "Number of nodes unfolded");
+STATISTIC(NumDups, "Number of duplicated nodes");
+STATISTIC(NumPRCopies, "Number of physical register copies");
+
+static RegisterScheduler
+ burrListDAGScheduler("list-burr",
+ "Bottom-up register reduction list scheduling",
+ createBURRListDAGScheduler);
+static RegisterScheduler
+ tdrListrDAGScheduler("list-tdrr",
+ "Top-down register reduction list scheduling",
+ createTDRRListDAGScheduler);
+static RegisterScheduler
+ sourceListDAGScheduler("source",
+ "Similar to list-burr but schedules in source "
+ "order when possible",
+ createSourceListDAGScheduler);
+
+namespace {
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGRRList - The actual register reduction list scheduler
+/// implementation. This supports both top-down and bottom-up scheduling.
+///
+class ScheduleDAGRRList : public ScheduleDAGSDNodes {
+private:
+ /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
+ /// it is top-down.
+ bool isBottomUp;
+
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ SchedulingPriorityQueue *AvailableQueue;
+
+ /// LiveRegDefs - A set of physical registers and their definition
+ /// that are "live". These nodes must be scheduled before any other nodes that
+ /// modifies the registers can be scheduled.
+ unsigned NumLiveRegs;
+ std::vector<SUnit*> LiveRegDefs;
+ std::vector<unsigned> LiveRegCycles;
+
+ /// Topo - A topological ordering for SUnits which permits fast IsReachable
+ /// and similar queries.
+ ScheduleDAGTopologicalSort Topo;
+
+public:
+ ScheduleDAGRRList(MachineFunction &mf,
+ bool isbottomup,
+ SchedulingPriorityQueue *availqueue)
+ : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup),
+ AvailableQueue(availqueue), Topo(SUnits) {
+ }
+
+ ~ScheduleDAGRRList() {
+ delete AvailableQueue;
+ }
+
+ void Schedule();
+
+ /// IsReachable - Checks if SU is reachable from TargetSU.
+ bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
+ return Topo.IsReachable(SU, TargetSU);
+ }
+
+ /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
+ /// create a cycle.
+ bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
+ return Topo.WillCreateCycle(SU, TargetSU);
+ }
+
+ /// AddPred - adds a predecessor edge to SUnit SU.
+ /// This returns true if this is a new predecessor.
+ /// Updates the topological ordering if required.
+ void AddPred(SUnit *SU, const SDep &D) {
+ Topo.AddPred(SU, D.getSUnit());
+ SU->addPred(D);
+ }
+
+ /// RemovePred - removes a predecessor edge from SUnit SU.
+ /// This returns true if an edge was removed.
+ /// Updates the topological ordering if required.
+ void RemovePred(SUnit *SU, const SDep &D) {
+ Topo.RemovePred(SU, D.getSUnit());
+ SU->removePred(D);
+ }
+
+private:
+ void ReleasePred(SUnit *SU, const SDep *PredEdge);
+ void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
+ void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
+ void ReleaseSuccessors(SUnit *SU);
+ void CapturePred(SDep *PredEdge);
+ void ScheduleNodeBottomUp(SUnit*, unsigned);
+ void ScheduleNodeTopDown(SUnit*, unsigned);
+ void UnscheduleNodeBottomUp(SUnit*);
+ void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
+ SUnit *CopyAndMoveSuccessors(SUnit*);
+ void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
+ const TargetRegisterClass*,
+ const TargetRegisterClass*,
+ SmallVector<SUnit*, 2>&);
+ bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
+ void ListScheduleTopDown();
+ void ListScheduleBottomUp();
+
+
+ /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
+ /// Updates the topological ordering if required.
+ SUnit *CreateNewSUnit(SDNode *N) {
+ unsigned NumSUnits = SUnits.size();
+ SUnit *NewNode = NewSUnit(N);
+ // Update the topological ordering.
+ if (NewNode->NodeNum >= NumSUnits)
+ Topo.InitDAGTopologicalSorting();
+ return NewNode;
+ }
+
+ /// CreateClone - Creates a new SUnit from an existing one.
+ /// Updates the topological ordering if required.
+ SUnit *CreateClone(SUnit *N) {
+ unsigned NumSUnits = SUnits.size();
+ SUnit *NewNode = Clone(N);
+ // Update the topological ordering.
+ if (NewNode->NodeNum >= NumSUnits)
+ Topo.InitDAGTopologicalSorting();
+ return NewNode;
+ }
+
+ /// ForceUnitLatencies - Return true, since register-pressure-reducing
+ /// scheduling doesn't need actual latency information.
+ bool ForceUnitLatencies() const { return true; }
+};
+} // end anonymous namespace
+
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGRRList::Schedule() {
+ DEBUG(dbgs() << "********** List Scheduling **********\n");
+
+ NumLiveRegs = 0;
+ LiveRegDefs.resize(TRI->getNumRegs(), NULL);
+ LiveRegCycles.resize(TRI->getNumRegs(), 0);
+
+ // Build the scheduling graph.
+ BuildSchedGraph(NULL);
+
+ DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ SUnits[su].dumpAll(this));
+ Topo.InitDAGTopologicalSorting();
+
+ AvailableQueue->initNodes(SUnits);
+
+ // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
+ if (isBottomUp)
+ ListScheduleBottomUp();
+ else
+ ListScheduleTopDown();
+
+ AvailableQueue->releaseState();
+}
+
+//===----------------------------------------------------------------------===//
+// Bottom-Up Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+
+#ifndef NDEBUG
+ if (PredSU->NumSuccsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ PredSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(0);
+ }
+#endif
+ --PredSU->NumSuccsLeft;
+
+ // If all the node's successors are scheduled, this node is ready
+ // to be scheduled. Ignore the special EntrySU node.
+ if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
+ PredSU->isAvailable = true;
+ AvailableQueue->push(PredSU);
+ }
+}
+
+void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
+ // Bottom up: release predecessors
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ ReleasePred(SU, &*I);
+ if (I->isAssignedRegDep()) {
+ // This is a physical register dependency and it's impossible or
+ // expensive to copy the register. Make sure nothing that can
+ // clobber the register is scheduled between the predecessor and
+ // this node.
+ if (!LiveRegDefs[I->getReg()]) {
+ ++NumLiveRegs;
+ LiveRegDefs[I->getReg()] = I->getSUnit();
+ LiveRegCycles[I->getReg()] = CurCycle;
+ }
+ }
+ }
+}
+
+/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
+/// count of its predecessors. If a predecessor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
+ DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+ DEBUG(SU->dump(this));
+
+ assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
+ SU->setHeightToAtLeast(CurCycle);
+ Sequence.push_back(SU);
+
+ ReleasePredecessors(SU, CurCycle);
+
+ // Release all the implicit physical register defs that are live.
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isAssignedRegDep()) {
+ if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ assert(LiveRegDefs[I->getReg()] == SU &&
+ "Physical register dependency violated?");
+ --NumLiveRegs;
+ LiveRegDefs[I->getReg()] = NULL;
+ LiveRegCycles[I->getReg()] = 0;
+ }
+ }
+ }
+
+ SU->isScheduled = true;
+ AvailableQueue->ScheduledNode(SU);
+}
+
+/// CapturePred - This does the opposite of ReleasePred. Since SU is being
+/// unscheduled, incrcease the succ left count of its predecessors. Remove
+/// them from AvailableQueue if necessary.
+void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+ if (PredSU->isAvailable) {
+ PredSU->isAvailable = false;
+ if (!PredSU->isPending)
+ AvailableQueue->remove(PredSU);
+ }
+
+ assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
+ ++PredSU->NumSuccsLeft;
+}
+
+/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
+/// its predecessor states to reflect the change.
+void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
+ DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
+ DEBUG(SU->dump(this));
+
+ AvailableQueue->UnscheduledNode(SU);
+
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ CapturePred(&*I);
+ if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
+ "Physical register dependency violated?");
+ --NumLiveRegs;
+ LiveRegDefs[I->getReg()] = NULL;
+ LiveRegCycles[I->getReg()] = 0;
+ }
+ }
+
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isAssignedRegDep()) {
+ if (!LiveRegDefs[I->getReg()]) {
+ LiveRegDefs[I->getReg()] = SU;
+ ++NumLiveRegs;
+ }
+ if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
+ LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
+ }
+ }
+
+ SU->setHeightDirty();
+ SU->isScheduled = false;
+ SU->isAvailable = true;
+ AvailableQueue->push(SU);
+}
+
+/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
+/// BTCycle in order to schedule a specific node.
+void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
+ unsigned &CurCycle) {
+ SUnit *OldSU = NULL;
+ while (CurCycle > BtCycle) {
+ OldSU = Sequence.back();
+ Sequence.pop_back();
+ if (SU->isSucc(OldSU))
+ // Don't try to remove SU from AvailableQueue.
+ SU->isAvailable = false;
+ UnscheduleNodeBottomUp(OldSU);
+ --CurCycle;
+ }
+
+ assert(!SU->isSucc(OldSU) && "Something is wrong!");
+
+ ++NumBacktracks;
+}
+
+static bool isOperandOf(const SUnit *SU, SDNode *N) {
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getFlaggedNode()) {
+ if (SUNode->isOperandOf(N))
+ return true;
+ }
+ return false;
+}
+
+/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
+/// successors to the newly created node.
+SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
+ if (SU->getNode()->getFlaggedNode())
+ return NULL;
+
+ SDNode *N = SU->getNode();
+ if (!N)
+ return NULL;
+
+ SUnit *NewSU;
+ bool TryUnfold = false;
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ EVT VT = N->getValueType(i);
+ if (VT == MVT::Flag)
+ return NULL;
+ else if (VT == MVT::Other)
+ TryUnfold = true;
+ }
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = N->getOperand(i);
+ EVT VT = Op.getNode()->getValueType(Op.getResNo());
+ if (VT == MVT::Flag)
+ return NULL;
+ }
+
+ if (TryUnfold) {
+ SmallVector<SDNode*, 2> NewNodes;
+ if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
+ return NULL;
+
+ DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
+ assert(NewNodes.size() == 2 && "Expected a load folding node!");
+
+ N = NewNodes[1];
+ SDNode *LoadNode = NewNodes[0];
+ unsigned NumVals = N->getNumValues();
+ unsigned OldNumVals = SU->getNode()->getNumValues();
+ for (unsigned i = 0; i != NumVals; ++i)
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
+ SDValue(LoadNode, 1));
+
+ // LoadNode may already exist. This can happen when there is another
+ // load from the same location and producing the same type of value
+ // but it has different alignment or volatileness.
+ bool isNewLoad = true;
+ SUnit *LoadSU;
+ if (LoadNode->getNodeId() != -1) {
+ LoadSU = &SUnits[LoadNode->getNodeId()];
+ isNewLoad = false;
+ } else {
+ LoadSU = CreateNewSUnit(LoadNode);
+ LoadNode->setNodeId(LoadSU->NodeNum);
+ ComputeLatency(LoadSU);
+ }
+
+ SUnit *NewSU = CreateNewSUnit(N);
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NewSU->NodeNum);
+
+ const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
+ for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
+ if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
+ NewSU->isTwoAddress = true;
+ break;
+ }
+ }
+ if (TID.isCommutable())
+ NewSU->isCommutable = true;
+ ComputeLatency(NewSU);
+
+ // Record all the edges to and from the old SU, by category.
+ SmallVector<SDep, 4> ChainPreds;
+ SmallVector<SDep, 4> ChainSuccs;
+ SmallVector<SDep, 4> LoadPreds;
+ SmallVector<SDep, 4> NodePreds;
+ SmallVector<SDep, 4> NodeSuccs;
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ ChainPreds.push_back(*I);
+ else if (isOperandOf(I->getSUnit(), LoadNode))
+ LoadPreds.push_back(*I);
+ else
+ NodePreds.push_back(*I);
+ }
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ ChainSuccs.push_back(*I);
+ else
+ NodeSuccs.push_back(*I);
+ }
+
+ // Now assign edges to the newly-created nodes.
+ for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
+ const SDep &Pred = ChainPreds[i];
+ RemovePred(SU, Pred);
+ if (isNewLoad)
+ AddPred(LoadSU, Pred);
+ }
+ for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
+ const SDep &Pred = LoadPreds[i];
+ RemovePred(SU, Pred);
+ if (isNewLoad)
+ AddPred(LoadSU, Pred);
+ }
+ for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
+ const SDep &Pred = NodePreds[i];
+ RemovePred(SU, Pred);
+ AddPred(NewSU, Pred);
+ }
+ for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
+ SDep D = NodeSuccs[i];
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ D.setSUnit(NewSU);
+ AddPred(SuccDep, D);
+ }
+ for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
+ SDep D = ChainSuccs[i];
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ if (isNewLoad) {
+ D.setSUnit(LoadSU);
+ AddPred(SuccDep, D);
+ }
+ }
+
+ // Add a data dependency to reflect that NewSU reads the value defined
+ // by LoadSU.
+ AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
+
+ if (isNewLoad)
+ AvailableQueue->addNode(LoadSU);
+ AvailableQueue->addNode(NewSU);
+
+ ++NumUnfolds;
+
+ if (NewSU->NumSuccsLeft == 0) {
+ NewSU->isAvailable = true;
+ return NewSU;
+ }
+ SU = NewSU;
+ }
+
+ DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n");
+ NewSU = CreateClone(SU);
+
+ // New SUnit has the exact same predecessors.
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I)
+ if (!I->isArtificial())
+ AddPred(NewSU, *I);
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isArtificial())
+ continue;
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = *I;
+ D.setSUnit(NewSU);
+ AddPred(SuccSU, D);
+ D.setSUnit(SU);
+ DelDeps.push_back(std::make_pair(SuccSU, D));
+ }
+ }
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
+ RemovePred(DelDeps[i].first, DelDeps[i].second);
+
+ AvailableQueue->updateNode(SU);
+ AvailableQueue->addNode(NewSU);
+
+ ++NumDups;
+ return NewSU;
+}
+
+/// InsertCopiesAndMoveSuccs - Insert register copies and move all
+/// scheduled successors of the given SUnit to the last copy.
+void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC,
+ SmallVector<SUnit*, 2> &Copies) {
+ SUnit *CopyFromSU = CreateNewSUnit(NULL);
+ CopyFromSU->CopySrcRC = SrcRC;
+ CopyFromSU->CopyDstRC = DestRC;
+
+ SUnit *CopyToSU = CreateNewSUnit(NULL);
+ CopyToSU->CopySrcRC = DestRC;
+ CopyToSU->CopyDstRC = SrcRC;
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isArtificial())
+ continue;
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = *I;
+ D.setSUnit(CopyToSU);
+ AddPred(SuccSU, D);
+ DelDeps.push_back(std::make_pair(SuccSU, *I));
+ }
+ }
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
+ RemovePred(DelDeps[i].first, DelDeps[i].second);
+
+ AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
+ AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
+
+ AvailableQueue->updateNode(SU);
+ AvailableQueue->addNode(CopyFromSU);
+ AvailableQueue->addNode(CopyToSU);
+ Copies.push_back(CopyFromSU);
+ Copies.push_back(CopyToSU);
+
+ ++NumPRCopies;
+}
+
+/// getPhysicalRegisterVT - Returns the ValueType of the physical register
+/// definition of the specified node.
+/// FIXME: Move to SelectionDAG?
+static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+ const TargetInstrInfo *TII) {
+ const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
+ assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+ unsigned NumRes = TID.getNumDefs();
+ for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ if (Reg == *ImpDef)
+ break;
+ ++NumRes;
+ }
+ return N->getValueType(NumRes);
+}
+
+/// CheckForLiveRegDef - Return true and update live register vector if the
+/// specified register def of the specified SUnit clobbers any "live" registers.
+static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
+ std::vector<SUnit*> &LiveRegDefs,
+ SmallSet<unsigned, 4> &RegAdded,
+ SmallVector<unsigned, 4> &LRegs,
+ const TargetRegisterInfo *TRI) {
+ bool Added = false;
+ if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
+ if (RegAdded.insert(Reg)) {
+ LRegs.push_back(Reg);
+ Added = true;
+ }
+ }
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
+ if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
+ if (RegAdded.insert(*Alias)) {
+ LRegs.push_back(*Alias);
+ Added = true;
+ }
+ }
+ return Added;
+}
+
+/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
+/// scheduling of the given node to satisfy live physical register dependencies.
+/// If the specific node is the last one that's available to schedule, do
+/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
+bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
+ SmallVector<unsigned, 4> &LRegs){
+ if (NumLiveRegs == 0)
+ return false;
+
+ SmallSet<unsigned, 4> RegAdded;
+ // If this node would clobber any "live" register, then it's not ready.
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isAssignedRegDep())
+ CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
+ RegAdded, LRegs, TRI);
+ }
+
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
+ if (Node->getOpcode() == ISD::INLINEASM) {
+ // Inline asm can clobber physical defs.
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
+ --NumOps; // Ignore the flag operand.
+
+ for (unsigned i = 2; i != NumOps;) {
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ unsigned NumVals = (Flags & 0xffff) >> 3;
+
+ ++i; // Skip the ID value.
+ if ((Flags & 7) == 2 || (Flags & 7) == 6) {
+ // Check for def of register or earlyclobber register.
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ }
+ } else
+ i += NumVals;
+ }
+ continue;
+ }
+
+ if (!Node->isMachineOpcode())
+ continue;
+ const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
+ if (!TID.ImplicitDefs)
+ continue;
+ for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
+ CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ }
+ return !LRegs.empty();
+}
+
+
+/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
+/// schedulers.
+void ScheduleDAGRRList::ListScheduleBottomUp() {
+ unsigned CurCycle = 0;
+
+ // Release any predecessors of the special Exit node.
+ ReleasePredecessors(&ExitSU, CurCycle);
+
+ // Add root to Available queue.
+ if (!SUnits.empty()) {
+ SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
+ assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
+ RootSU->isAvailable = true;
+ AvailableQueue->push(RootSU);
+ }
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ SmallVector<SUnit*, 4> NotReady;
+ DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue->empty()) {
+ bool Delayed = false;
+ LRegsMap.clear();
+ SUnit *CurSU = AvailableQueue->pop();
+ while (CurSU) {
+ SmallVector<unsigned, 4> LRegs;
+ if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+ break;
+ Delayed = true;
+ LRegsMap.insert(std::make_pair(CurSU, LRegs));
+
+ CurSU->isPending = true; // This SU is not in AvailableQueue right now.
+ NotReady.push_back(CurSU);
+ CurSU = AvailableQueue->pop();
+ }
+
+ // All candidates are delayed due to live physical reg dependencies.
+ // Try backtracking, code duplication, or inserting cross class copies
+ // to resolve it.
+ if (Delayed && !CurSU) {
+ for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
+ SUnit *TrySU = NotReady[i];
+ SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+
+ // Try unscheduling up to the point where it's safe to schedule
+ // this node.
+ unsigned LiveCycle = CurCycle;
+ for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
+ unsigned Reg = LRegs[j];
+ unsigned LCycle = LiveRegCycles[Reg];
+ LiveCycle = std::min(LiveCycle, LCycle);
+ }
+ SUnit *OldSU = Sequence[LiveCycle];
+ if (!WillCreateCycle(TrySU, OldSU)) {
+ BacktrackBottomUp(TrySU, LiveCycle, CurCycle);
+ // Force the current node to be scheduled before the node that
+ // requires the physical reg dep.
+ if (OldSU->isAvailable) {
+ OldSU->isAvailable = false;
+ AvailableQueue->remove(OldSU);
+ }
+ AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false, /*isArtificial=*/true));
+ // If one or more successors has been unscheduled, then the current
+ // node is no longer avaialable. Schedule a successor that's now
+ // available instead.
+ if (!TrySU->isAvailable)
+ CurSU = AvailableQueue->pop();
+ else {
+ CurSU = TrySU;
+ TrySU->isPending = false;
+ NotReady.erase(NotReady.begin()+i);
+ }
+ break;
+ }
+ }
+
+ if (!CurSU) {
+ // Can't backtrack. If it's too expensive to copy the value, then try
+ // duplicate the nodes that produces these "too expensive to copy"
+ // values to break the dependency. In case even that doesn't work,
+ // insert cross class copies.
+ // If it's not too expensive, i.e. cost != -1, issue copies.
+ SUnit *TrySU = NotReady[0];
+ SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+ assert(LRegs.size() == 1 && "Can't handle this yet!");
+ unsigned Reg = LRegs[0];
+ SUnit *LRDef = LiveRegDefs[Reg];
+ EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+ const TargetRegisterClass *RC =
+ TRI->getPhysicalRegisterRegClass(Reg, VT);
+ const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+ // If cross copy register class is null, then it must be possible copy
+ // the value directly. Do not try duplicate the def.
+ SUnit *NewDef = 0;
+ if (DestRC)
+ NewDef = CopyAndMoveSuccessors(LRDef);
+ else
+ DestRC = RC;
+ if (!NewDef) {
+ // Issue copies, these can be expensive cross register class copies.
+ SmallVector<SUnit*, 2> Copies;
+ InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+ DEBUG(dbgs() << "Adding an edge from SU #" << TrySU->NodeNum
+ << " to SU #" << Copies.front()->NodeNum << "\n");
+ AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false,
+ /*isArtificial=*/true));
+ NewDef = Copies.back();
+ }
+
+ DEBUG(dbgs() << "Adding an edge from SU #" << NewDef->NodeNum
+ << " to SU #" << TrySU->NodeNum << "\n");
+ LiveRegDefs[Reg] = NewDef;
+ AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false,
+ /*isArtificial=*/true));
+ TrySU->isAvailable = false;
+ CurSU = NewDef;
+ }
+
+ assert(CurSU && "Unable to resolve live physical register dependencies!");
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
+ NotReady[i]->isPending = false;
+ // May no longer be available due to backtracking.
+ if (NotReady[i]->isAvailable)
+ AvailableQueue->push(NotReady[i]);
+ }
+ NotReady.clear();
+
+ if (CurSU)
+ ScheduleNodeBottomUp(CurSU, CurCycle);
+ ++CurCycle;
+ }
+
+ // Reverse the order if it is bottom up.
+ std::reverse(Sequence.begin(), Sequence.end());
+
+#ifndef NDEBUG
+ VerifySchedule(isBottomUp);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
+ SUnit *SuccSU = SuccEdge->getSUnit();
+
+#ifndef NDEBUG
+ if (SuccSU->NumPredsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ SuccSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(0);
+ }
+#endif
+ --SuccSU->NumPredsLeft;
+
+ // If all the node's predecessors are scheduled, this node is ready
+ // to be scheduled. Ignore the special ExitSU node.
+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
+ SuccSU->isAvailable = true;
+ AvailableQueue->push(SuccSU);
+ }
+}
+
+void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
+ // Top down: release successors
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ assert(!I->isAssignedRegDep() &&
+ "The list-tdrr scheduler doesn't yet support physreg dependencies!");
+
+ ReleaseSucc(SU, &*I);
+ }
+}
+
+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+ DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+ DEBUG(SU->dump(this));
+
+ assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+ SU->setDepthToAtLeast(CurCycle);
+ Sequence.push_back(SU);
+
+ ReleaseSuccessors(SU);
+ SU->isScheduled = true;
+ AvailableQueue->ScheduledNode(SU);
+}
+
+/// ListScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void ScheduleDAGRRList::ListScheduleTopDown() {
+ unsigned CurCycle = 0;
+
+ // Release any successors of the special Entry node.
+ ReleaseSuccessors(&EntrySU);
+
+ // All leaves to Available queue.
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ // It is available if it has no predecessors.
+ if (SUnits[i].Preds.empty()) {
+ AvailableQueue->push(&SUnits[i]);
+ SUnits[i].isAvailable = true;
+ }
+ }
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue->empty()) {
+ SUnit *CurSU = AvailableQueue->pop();
+
+ if (CurSU)
+ ScheduleNodeTopDown(CurSU, CurCycle);
+ ++CurCycle;
+ }
+
+#ifndef NDEBUG
+ VerifySchedule(isBottomUp);
+#endif
+}
+
+
+//===----------------------------------------------------------------------===//
+// RegReductionPriorityQueue Implementation
+//===----------------------------------------------------------------------===//
+//
+// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
+// to reduce register pressure.
+//
+namespace {
+ template<class SF>
+ class RegReductionPriorityQueue;
+
+ /// Sorting functions for the Available queue.
+ struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+ RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
+ bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
+ bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+
+ bool operator()(const SUnit* left, const SUnit* right) const;
+ };
+
+ struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+ RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
+ td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
+ td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+
+ bool operator()(const SUnit* left, const SUnit* right) const;
+ };
+
+ struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+ RegReductionPriorityQueue<src_ls_rr_sort> *SPQ;
+ src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq)
+ : SPQ(spq) {}
+ src_ls_rr_sort(const src_ls_rr_sort &RHS)
+ : SPQ(RHS.SPQ) {}
+
+ bool operator()(const SUnit* left, const SUnit* right) const;
+ };
+} // end anonymous namespace
+
+/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
+/// Smaller number is the higher priority.
+static unsigned
+CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
+ unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
+ if (SethiUllmanNumber != 0)
+ return SethiUllmanNumber;
+
+ unsigned Extra = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ SUnit *PredSU = I->getSUnit();
+ unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
+ if (PredSethiUllman > SethiUllmanNumber) {
+ SethiUllmanNumber = PredSethiUllman;
+ Extra = 0;
+ } else if (PredSethiUllman == SethiUllmanNumber)
+ ++Extra;
+ }
+
+ SethiUllmanNumber += Extra;
+
+ if (SethiUllmanNumber == 0)
+ SethiUllmanNumber = 1;
+
+ return SethiUllmanNumber;
+}
+
+namespace {
+ template<class SF>
+ class RegReductionPriorityQueue : public SchedulingPriorityQueue {
+ PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue;
+ unsigned currentQueueId;
+
+ protected:
+ // SUnits - The SUnits for the current graph.
+ std::vector<SUnit> *SUnits;
+
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ ScheduleDAGRRList *scheduleDAG;
+
+ // SethiUllmanNumbers - The SethiUllman number for each node.
+ std::vector<unsigned> SethiUllmanNumbers;
+
+ public:
+ RegReductionPriorityQueue(const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri)
+ : Queue(SF(this)), currentQueueId(0),
+ TII(tii), TRI(tri), scheduleDAG(NULL) {}
+
+ void initNodes(std::vector<SUnit> &sunits) {
+ SUnits = &sunits;
+ // Add pseudo dependency edges for two-address nodes.
+ AddPseudoTwoAddrDeps();
+ // Reroute edges to nodes with multiple uses.
+ PrescheduleNodesWithMultipleUses();
+ // Calculate node priorities.
+ CalculateSethiUllmanNumbers();
+ }
+
+ void addNode(const SUnit *SU) {
+ unsigned SUSize = SethiUllmanNumbers.size();
+ if (SUnits->size() > SUSize)
+ SethiUllmanNumbers.resize(SUSize*2, 0);
+ CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+ }
+
+ void updateNode(const SUnit *SU) {
+ SethiUllmanNumbers[SU->NodeNum] = 0;
+ CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+ }
+
+ void releaseState() {
+ SUnits = 0;
+ SethiUllmanNumbers.clear();
+ }
+
+ unsigned getNodePriority(const SUnit *SU) const {
+ assert(SU->NodeNum < SethiUllmanNumbers.size());
+ unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
+ if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+ // CopyToReg should be close to its uses to facilitate coalescing and
+ // avoid spilling.
+ return 0;
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::INSERT_SUBREG)
+ // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
+ // close to their uses to facilitate coalescing.
+ return 0;
+ if (SU->NumSuccs == 0 && SU->NumPreds != 0)
+ // If SU does not have a register use, i.e. it doesn't produce a value
+ // that would be consumed (e.g. store), then it terminates a chain of
+ // computation. Give it a large SethiUllman number so it will be
+ // scheduled right before its predecessors that it doesn't lengthen
+ // their live ranges.
+ return 0xffff;
+ if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+ // If SU does not have a register def, schedule it close to its uses
+ // because it does not lengthen any live ranges.
+ return 0;
+ return SethiUllmanNumbers[SU->NodeNum];
+ }
+
+ unsigned getNodeOrdering(const SUnit *SU) const {
+ return scheduleDAG->DAG->GetOrdering(SU->getNode());
+ }
+
+ unsigned size() const { return Queue.size(); }
+
+ bool empty() const { return Queue.empty(); }
+
+ void push(SUnit *U) {
+ assert(!U->NodeQueueId && "Node in the queue already");
+ U->NodeQueueId = ++currentQueueId;
+ Queue.push(U);
+ }
+
+ void push_all(const std::vector<SUnit *> &Nodes) {
+ for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
+ push(Nodes[i]);
+ }
+
+ SUnit *pop() {
+ if (empty()) return NULL;
+ SUnit *V = Queue.top();
+ Queue.pop();
+ V->NodeQueueId = 0;
+ return V;
+ }
+
+ void remove(SUnit *SU) {
+ assert(!Queue.empty() && "Queue is empty!");
+ assert(SU->NodeQueueId != 0 && "Not in queue!");
+ Queue.erase_one(SU);
+ SU->NodeQueueId = 0;
+ }
+
+ void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
+ scheduleDAG = scheduleDag;
+ }
+
+ protected:
+ bool canClobber(const SUnit *SU, const SUnit *Op);
+ void AddPseudoTwoAddrDeps();
+ void PrescheduleNodesWithMultipleUses();
+ void CalculateSethiUllmanNumbers();
+ };
+
+ typedef RegReductionPriorityQueue<bu_ls_rr_sort>
+ BURegReductionPriorityQueue;
+
+ typedef RegReductionPriorityQueue<td_ls_rr_sort>
+ TDRegReductionPriorityQueue;
+
+ typedef RegReductionPriorityQueue<src_ls_rr_sort>
+ SrcRegReductionPriorityQueue;
+}
+
+/// closestSucc - Returns the scheduled cycle of the successor which is
+/// closest to the current cycle.
+static unsigned closestSucc(const SUnit *SU) {
+ unsigned MaxHeight = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain succs
+ unsigned Height = I->getSUnit()->getHeight();
+ // If there are bunch of CopyToRegs stacked up, they should be considered
+ // to be at the same position.
+ if (I->getSUnit()->getNode() &&
+ I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
+ Height = closestSucc(I->getSUnit())+1;
+ if (Height > MaxHeight)
+ MaxHeight = Height;
+ }
+ return MaxHeight;
+}
+
+/// calcMaxScratches - Returns an cost estimate of the worse case requirement
+/// for scratch registers, i.e. number of data dependencies.
+static unsigned calcMaxScratches(const SUnit *SU) {
+ unsigned Scratches = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ Scratches++;
+ }
+ return Scratches;
+}
+
+template <typename RRSort>
+static bool BURRSort(const SUnit *left, const SUnit *right,
+ const RegReductionPriorityQueue<RRSort> *SPQ) {
+ unsigned LPriority = SPQ->getNodePriority(left);
+ unsigned RPriority = SPQ->getNodePriority(right);
+ if (LPriority != RPriority)
+ return LPriority > RPriority;
+
+ // Try schedule def + use closer when Sethi-Ullman numbers are the same.
+ // e.g.
+ // t1 = op t2, c1
+ // t3 = op t4, c2
+ //
+ // and the following instructions are both ready.
+ // t2 = op c3
+ // t4 = op c4
+ //
+ // Then schedule t2 = op first.
+ // i.e.
+ // t4 = op c4
+ // t2 = op c3
+ // t1 = op t2, c1
+ // t3 = op t4, c2
+ //
+ // This creates more short live intervals.
+ unsigned LDist = closestSucc(left);
+ unsigned RDist = closestSucc(right);
+ if (LDist != RDist)
+ return LDist < RDist;
+
+ // How many registers becomes live when the node is scheduled.
+ unsigned LScratch = calcMaxScratches(left);
+ unsigned RScratch = calcMaxScratches(right);
+ if (LScratch != RScratch)
+ return LScratch > RScratch;
+
+ if (left->getHeight() != right->getHeight())
+ return left->getHeight() > right->getHeight();
+
+ if (left->getDepth() != right->getDepth())
+ return left->getDepth() < right->getDepth();
+
+ assert(left->NodeQueueId && right->NodeQueueId &&
+ "NodeQueueId cannot be zero");
+ return (left->NodeQueueId > right->NodeQueueId);
+}
+
+// Bottom up
+bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+ return BURRSort(left, right, SPQ);
+}
+
+// Source order, otherwise bottom up.
+bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
+ unsigned LOrder = SPQ->getNodeOrdering(left);
+ unsigned ROrder = SPQ->getNodeOrdering(right);
+
+ // Prefer an ordering where the lower the non-zero order number, the higher
+ // the preference.
+ if ((LOrder || ROrder) && LOrder != ROrder)
+ return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
+
+ return BURRSort(left, right, SPQ);
+}
+
+template<class SF>
+bool
+RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
+ if (SU->isTwoAddress) {
+ unsigned Opc = SU->getNode()->getMachineOpcode();
+ const TargetInstrDesc &TID = TII->get(Opc);
+ unsigned NumRes = TID.getNumDefs();
+ unsigned NumOps = TID.getNumOperands() - NumRes;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
+ SDNode *DU = SU->getNode()->getOperand(i).getNode();
+ if (DU->getNodeId() != -1 &&
+ Op->OrigNode == &(*SUnits)[DU->getNodeId()])
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// hasCopyToRegUse - Return true if SU has a value successor that is a
+/// CopyToReg node.
+static bool hasCopyToRegUse(const SUnit *SU) {
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue;
+ const SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg)
+ return true;
+ }
+ return false;
+}
+
+/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
+/// physical register defs.
+static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ SDNode *N = SuccSU->getNode();
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
+ assert(ImpDefs && "Caller should check hasPhysRegDefs");
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getFlaggedNode()) {
+ if (!SUNode->isMachineOpcode())
+ continue;
+ const unsigned *SUImpDefs =
+ TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
+ if (!SUImpDefs)
+ return false;
+ for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
+ EVT VT = N->getValueType(i);
+ if (VT == MVT::Flag || VT == MVT::Other)
+ continue;
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned Reg = ImpDefs[i - NumDefs];
+ for (;*SUImpDefs; ++SUImpDefs) {
+ unsigned SUReg = *SUImpDefs;
+ if (TRI->regsOverlap(Reg, SUReg))
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
+/// are not handled well by the general register pressure reduction
+/// heuristics. When presented with code like this:
+///
+/// N
+/// / |
+/// / |
+/// U store
+/// |
+/// ...
+///
+/// the heuristics tend to push the store up, but since the
+/// operand of the store has another use (U), this would increase
+/// the length of that other use (the U->N edge).
+///
+/// This function transforms code like the above to route U's
+/// dependence through the store when possible, like this:
+///
+/// N
+/// ||
+/// ||
+/// store
+/// |
+/// U
+/// |
+/// ...
+///
+/// This results in the store being scheduled immediately
+/// after N, which shortens the U->N live range, reducing
+/// register pressure.
+///
+template<class SF>
+void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
+ // Visit all the nodes in topological order, working top-down.
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+ SUnit *SU = &(*SUnits)[i];
+ // For now, only look at nodes with no data successors, such as stores.
+ // These are especially important, due to the heuristics in
+ // getNodePriority for nodes with no data successors.
+ if (SU->NumSuccs != 0)
+ continue;
+ // For now, only look at nodes with exactly one data predecessor.
+ if (SU->NumPreds != 1)
+ continue;
+ // Avoid prescheduling copies to virtual registers, which don't behave
+ // like other nodes from the perspective of scheduling heuristics.
+ if (SDNode *N = SU->getNode())
+ if (N->getOpcode() == ISD::CopyToReg &&
+ TargetRegisterInfo::isVirtualRegister
+ (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+ continue;
+
+ // Locate the single data predecessor.
+ SUnit *PredSU = 0;
+ for (SUnit::const_pred_iterator II = SU->Preds.begin(),
+ EE = SU->Preds.end(); II != EE; ++II)
+ if (!II->isCtrl()) {
+ PredSU = II->getSUnit();
+ break;
+ }
+ assert(PredSU);
+
+ // Don't rewrite edges that carry physregs, because that requires additional
+ // support infrastructure.
+ if (PredSU->hasPhysRegDefs)
+ continue;
+ // Short-circuit the case where SU is PredSU's only data successor.
+ if (PredSU->NumSuccs == 1)
+ continue;
+ // Avoid prescheduling to copies from virtual registers, which don't behave
+ // like other nodes from the perspective of scheduling // heuristics.
+ if (SDNode *N = SU->getNode())
+ if (N->getOpcode() == ISD::CopyFromReg &&
+ TargetRegisterInfo::isVirtualRegister
+ (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+ continue;
+
+ // Perform checks on the successors of PredSU.
+ for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
+ EE = PredSU->Succs.end(); II != EE; ++II) {
+ SUnit *PredSuccSU = II->getSUnit();
+ if (PredSuccSU == SU) continue;
+ // If PredSU has another successor with no data successors, for
+ // now don't attempt to choose either over the other.
+ if (PredSuccSU->NumSuccs == 0)
+ goto outer_loop_continue;
+ // Don't break physical register dependencies.
+ if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
+ if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
+ goto outer_loop_continue;
+ // Don't introduce graph cycles.
+ if (scheduleDAG->IsReachable(SU, PredSuccSU))
+ goto outer_loop_continue;
+ }
+
+ // Ok, the transformation is safe and the heuristics suggest it is
+ // profitable. Update the graph.
+ DEBUG(dbgs() << "Prescheduling SU # " << SU->NodeNum
+ << " next to PredSU # " << PredSU->NodeNum
+ << " to guide scheduling in the presence of multiple uses\n");
+ for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
+ SDep Edge = PredSU->Succs[i];
+ assert(!Edge.isAssignedRegDep());
+ SUnit *SuccSU = Edge.getSUnit();
+ if (SuccSU != SU) {
+ Edge.setSUnit(PredSU);
+ scheduleDAG->RemovePred(SuccSU, Edge);
+ scheduleDAG->AddPred(SU, Edge);
+ Edge.setSUnit(SU);
+ scheduleDAG->AddPred(SuccSU, Edge);
+ --i;
+ }
+ }
+ outer_loop_continue:;
+ }
+}
+
+/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
+/// it as a def&use operand. Add a pseudo control edge from it to the other
+/// node (if it won't create a cycle) so the two-address one will be scheduled
+/// first (lower in the schedule). If both nodes are two-address, favor the
+/// one that has a CopyToReg use (more likely to be a loop induction update).
+/// If both are two-address, but one is commutable while the other is not
+/// commutable, favor the one that's not commutable.
+template<class SF>
+void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+ SUnit *SU = &(*SUnits)[i];
+ if (!SU->isTwoAddress)
+ continue;
+
+ SDNode *Node = SU->getNode();
+ if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode())
+ continue;
+
+ unsigned Opc = Node->getMachineOpcode();
+ const TargetInstrDesc &TID = TII->get(Opc);
+ unsigned NumRes = TID.getNumDefs();
+ unsigned NumOps = TID.getNumOperands() - NumRes;
+ for (unsigned j = 0; j != NumOps; ++j) {
+ if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
+ continue;
+ SDNode *DU = SU->getNode()->getOperand(j).getNode();
+ if (DU->getNodeId() == -1)
+ continue;
+ const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
+ if (!DUSU) continue;
+ for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
+ E = DUSU->Succs.end(); I != E; ++I) {
+ if (I->isCtrl()) continue;
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU == SU)
+ continue;
+ // Be conservative. Ignore if nodes aren't at roughly the same
+ // depth and height.
+ if (SuccSU->getHeight() < SU->getHeight() &&
+ (SU->getHeight() - SuccSU->getHeight()) > 1)
+ continue;
+ // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
+ // constrains whatever is using the copy, instead of the copy
+ // itself. In the case that the copy is coalesced, this
+ // preserves the intent of the pseudo two-address heurietics.
+ while (SuccSU->Succs.size() == 1 &&
+ SuccSU->getNode()->isMachineOpcode() &&
+ SuccSU->getNode()->getMachineOpcode() ==
+ TargetOpcode::COPY_TO_REGCLASS)
+ SuccSU = SuccSU->Succs.front().getSUnit();
+ // Don't constrain non-instruction nodes.
+ if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
+ continue;
+ // Don't constrain nodes with physical register defs if the
+ // predecessor can clobber them.
+ if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
+ if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
+ continue;
+ }
+ // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
+ // these may be coalesced away. We want them close to their uses.
+ unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
+ if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
+ SuccOpc == TargetOpcode::INSERT_SUBREG ||
+ SuccOpc == TargetOpcode::SUBREG_TO_REG)
+ continue;
+ if ((!canClobber(SuccSU, DUSU) ||
+ (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
+ (!SU->isCommutable && SuccSU->isCommutable)) &&
+ !scheduleDAG->IsReachable(SuccSU, SU)) {
+ DEBUG(dbgs() << "Adding a pseudo-two-addr edge from SU # "
+ << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
+ scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false,
+ /*isArtificial=*/true));
+ }
+ }
+ }
+ }
+}
+
+/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
+/// scheduling units.
+template<class SF>
+void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
+ SethiUllmanNumbers.assign(SUnits->size(), 0);
+
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
+ CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
+}
+
+/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
+/// predecessors of the successors of the SUnit SU. Stop when the provided
+/// limit is exceeded.
+static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
+ unsigned Limit) {
+ unsigned Sum = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ const SUnit *SuccSU = I->getSUnit();
+ for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
+ EE = SuccSU->Preds.end(); II != EE; ++II) {
+ SUnit *PredSU = II->getSUnit();
+ if (!PredSU->isScheduled)
+ if (++Sum > Limit)
+ return Sum;
+ }
+ }
+ return Sum;
+}
+
+
+// Top down
+bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+ unsigned LPriority = SPQ->getNodePriority(left);
+ unsigned RPriority = SPQ->getNodePriority(right);
+ bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
+ bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
+ bool LIsFloater = LIsTarget && left->NumPreds == 0;
+ bool RIsFloater = RIsTarget && right->NumPreds == 0;
+ unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
+ unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
+
+ if (left->NumSuccs == 0 && right->NumSuccs != 0)
+ return false;
+ else if (left->NumSuccs != 0 && right->NumSuccs == 0)
+ return true;
+
+ if (LIsFloater)
+ LBonus -= 2;
+ if (RIsFloater)
+ RBonus -= 2;
+ if (left->NumSuccs == 1)
+ LBonus += 2;
+ if (right->NumSuccs == 1)
+ RBonus += 2;
+
+ if (LPriority+LBonus != RPriority+RBonus)
+ return LPriority+LBonus < RPriority+RBonus;
+
+ if (left->getDepth() != right->getDepth())
+ return left->getDepth() < right->getDepth();
+
+ if (left->NumSuccsLeft != right->NumSuccsLeft)
+ return left->NumSuccsLeft > right->NumSuccsLeft;
+
+ assert(left->NodeQueueId && right->NodeQueueId &&
+ "NodeQueueId cannot be zero");
+ return (left->NodeQueueId > right->NodeQueueId);
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+llvm::ScheduleDAGSDNodes *
+llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ const TargetMachine &TM = IS->TM;
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+ BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI);
+
+ ScheduleDAGRRList *SD =
+ new ScheduleDAGRRList(*IS->MF, true, PQ);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ const TargetMachine &TM = IS->TM;
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+ TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI);
+
+ ScheduleDAGRRList *SD =
+ new ScheduleDAGRRList(*IS->MF, false, PQ);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ const TargetMachine &TM = IS->TM;
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+ SrcRegReductionPriorityQueue *PQ = new SrcRegReductionPriorityQueue(TII, TRI);
+
+ ScheduleDAGRRList *SD =
+ new ScheduleDAGRRList(*IS->MF, true, PQ);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
new file mode 100644
index 0000000..b51c61b
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -0,0 +1,435 @@
+//===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG class, which is a base class used by
+// scheduling implementation classes.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "ScheduleDAGSDNodes.h"
+#include "InstrEmitter.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(LoadsClustered, "Number of loads clustered together");
+
+ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
+ : ScheduleDAG(mf) {
+}
+
+/// Run - perform scheduling.
+///
+void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb,
+ MachineBasicBlock::iterator insertPos) {
+ DAG = dag;
+ ScheduleDAG::Run(bb, insertPos);
+}
+
+SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
+ SUnit *SU = NewSUnit(Old->getNode());
+ SU->OrigNode = Old->OrigNode;
+ SU->Latency = Old->Latency;
+ SU->isTwoAddress = Old->isTwoAddress;
+ SU->isCommutable = Old->isCommutable;
+ SU->hasPhysRegDefs = Old->hasPhysRegDefs;
+ SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;
+ Old->isCloned = true;
+ return SU;
+}
+
+/// CheckForPhysRegDependency - Check if the dependency between def and use of
+/// a specified operand is a physical register dependency. If so, returns the
+/// register and the cost of copying the register.
+static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
+ const TargetRegisterInfo *TRI,
+ const TargetInstrInfo *TII,
+ unsigned &PhysReg, int &Cost) {
+ if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
+ return;
+
+ unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return;
+
+ unsigned ResNo = User->getOperand(2).getResNo();
+ if (Def->isMachineOpcode()) {
+ const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
+ if (ResNo >= II.getNumDefs() &&
+ II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) {
+ PhysReg = Reg;
+ const TargetRegisterClass *RC =
+ TRI->getPhysicalRegisterRegClass(Reg, Def->getValueType(ResNo));
+ Cost = RC->getCopyCost();
+ }
+ }
+}
+
+static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag,
+ SelectionDAG *DAG) {
+ SmallVector<EVT, 4> VTs;
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
+ VTs.push_back(N->getValueType(i));
+ if (AddFlag)
+ VTs.push_back(MVT::Flag);
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ Ops.push_back(N->getOperand(i));
+ if (Flag.getNode())
+ Ops.push_back(Flag);
+ SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size());
+ DAG->MorphNodeTo(N, N->getOpcode(), VTList, &Ops[0], Ops.size());
+}
+
+/// ClusterNeighboringLoads - Force nearby loads together by "flagging" them.
+/// This function finds loads of the same base and different offsets. If the
+/// offsets are not far apart (target specific), it add MVT::Flag inputs and
+/// outputs to ensure they are scheduled together and in order. This
+/// optimization may benefit some targets by improving cache locality.
+void ScheduleDAGSDNodes::ClusterNeighboringLoads() {
+ SmallPtrSet<SDNode*, 16> Visited;
+ SmallVector<int64_t, 4> Offsets;
+ DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode.
+ for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
+ E = DAG->allnodes_end(); NI != E; ++NI) {
+ SDNode *Node = &*NI;
+ if (!Node || !Node->isMachineOpcode())
+ continue;
+
+ unsigned Opc = Node->getMachineOpcode();
+ const TargetInstrDesc &TID = TII->get(Opc);
+ if (!TID.mayLoad())
+ continue;
+
+ SDNode *Chain = 0;
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Other)
+ Chain = Node->getOperand(NumOps-1).getNode();
+ if (!Chain)
+ continue;
+
+ // Look for other loads of the same chain. Find loads that are loading from
+ // the same base pointer and different offsets.
+ Visited.clear();
+ Offsets.clear();
+ O2SMap.clear();
+ bool Cluster = false;
+ SDNode *Base = Node;
+ int64_t BaseOffset;
+ for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
+ I != E; ++I) {
+ SDNode *User = *I;
+ if (User == Node || !Visited.insert(User))
+ continue;
+ int64_t Offset1, Offset2;
+ if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
+ Offset1 == Offset2)
+ // FIXME: Should be ok if they addresses are identical. But earlier
+ // optimizations really should have eliminated one of the loads.
+ continue;
+ if (O2SMap.insert(std::make_pair(Offset1, Base)).second)
+ Offsets.push_back(Offset1);
+ O2SMap.insert(std::make_pair(Offset2, User));
+ Offsets.push_back(Offset2);
+ if (Offset2 < Offset1) {
+ Base = User;
+ BaseOffset = Offset2;
+ } else {
+ BaseOffset = Offset1;
+ }
+ Cluster = true;
+ }
+
+ if (!Cluster)
+ continue;
+
+ // Sort them in increasing order.
+ std::sort(Offsets.begin(), Offsets.end());
+
+ // Check if the loads are close enough.
+ SmallVector<SDNode*, 4> Loads;
+ unsigned NumLoads = 0;
+ int64_t BaseOff = Offsets[0];
+ SDNode *BaseLoad = O2SMap[BaseOff];
+ Loads.push_back(BaseLoad);
+ for (unsigned i = 1, e = Offsets.size(); i != e; ++i) {
+ int64_t Offset = Offsets[i];
+ SDNode *Load = O2SMap[Offset];
+ if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,
+ NumLoads))
+ break; // Stop right here. Ignore loads that are further away.
+ Loads.push_back(Load);
+ ++NumLoads;
+ }
+
+ if (NumLoads == 0)
+ continue;
+
+ // Cluster loads by adding MVT::Flag outputs and inputs. This also
+ // ensure they are scheduled in order of increasing addresses.
+ SDNode *Lead = Loads[0];
+ AddFlags(Lead, SDValue(0,0), true, DAG);
+ SDValue InFlag = SDValue(Lead, Lead->getNumValues()-1);
+ for (unsigned i = 1, e = Loads.size(); i != e; ++i) {
+ bool OutFlag = i < e-1;
+ SDNode *Load = Loads[i];
+ AddFlags(Load, InFlag, OutFlag, DAG);
+ if (OutFlag)
+ InFlag = SDValue(Load, Load->getNumValues()-1);
+ ++LoadsClustered;
+ }
+ }
+}
+
+void ScheduleDAGSDNodes::BuildSchedUnits() {
+ // During scheduling, the NodeId field of SDNode is used to map SDNodes
+ // to their associated SUnits by holding SUnits table indices. A value
+ // of -1 means the SDNode does not yet have an associated SUnit.
+ unsigned NumNodes = 0;
+ for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
+ E = DAG->allnodes_end(); NI != E; ++NI) {
+ NI->setNodeId(-1);
+ ++NumNodes;
+ }
+
+ // Reserve entries in the vector for each of the SUnits we are creating. This
+ // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
+ // invalidated.
+ // FIXME: Multiply by 2 because we may clone nodes during scheduling.
+ // This is a temporary workaround.
+ SUnits.reserve(NumNodes * 2);
+
+ // Check to see if the scheduler cares about latencies.
+ bool UnitLatencies = ForceUnitLatencies();
+
+ for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
+ E = DAG->allnodes_end(); NI != E; ++NI) {
+ if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate.
+ continue;
+
+ // If this node has already been processed, stop now.
+ if (NI->getNodeId() != -1) continue;
+
+ SUnit *NodeSUnit = NewSUnit(NI);
+
+ // See if anything is flagged to this node, if so, add them to flagged
+ // nodes. Nodes can have at most one flag input and one flag output. Flags
+ // are required to be the last operand and result of a node.
+
+ // Scan up to find flagged preds.
+ SDNode *N = NI;
+ while (N->getNumOperands() &&
+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) {
+ N = N->getOperand(N->getNumOperands()-1).getNode();
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NodeSUnit->NodeNum);
+ }
+
+ // Scan down to find any flagged succs.
+ N = NI;
+ while (N->getValueType(N->getNumValues()-1) == MVT::Flag) {
+ SDValue FlagVal(N, N->getNumValues()-1);
+
+ // There are either zero or one users of the Flag result.
+ bool HasFlagUse = false;
+ for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+ UI != E; ++UI)
+ if (FlagVal.isOperandOf(*UI)) {
+ HasFlagUse = true;
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NodeSUnit->NodeNum);
+ N = *UI;
+ break;
+ }
+ if (!HasFlagUse) break;
+ }
+
+ // If there are flag operands involved, N is now the bottom-most node
+ // of the sequence of nodes that are flagged together.
+ // Update the SUnit.
+ NodeSUnit->setNode(N);
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NodeSUnit->NodeNum);
+
+ // Assign the Latency field of NodeSUnit using target-provided information.
+ if (UnitLatencies)
+ NodeSUnit->Latency = 1;
+ else
+ ComputeLatency(NodeSUnit);
+ }
+}
+
+void ScheduleDAGSDNodes::AddSchedEdges() {
+ const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>();
+
+ // Check to see if the scheduler cares about latencies.
+ bool UnitLatencies = ForceUnitLatencies();
+
+ // Pass 2: add the preds, succs, etc.
+ for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
+ SUnit *SU = &SUnits[su];
+ SDNode *MainNode = SU->getNode();
+
+ if (MainNode->isMachineOpcode()) {
+ unsigned Opc = MainNode->getMachineOpcode();
+ const TargetInstrDesc &TID = TII->get(Opc);
+ for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
+ if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
+ SU->isTwoAddress = true;
+ break;
+ }
+ }
+ if (TID.isCommutable())
+ SU->isCommutable = true;
+ }
+
+ // Find all predecessors and successors of the group.
+ for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) {
+ if (N->isMachineOpcode() &&
+ TII->get(N->getMachineOpcode()).getImplicitDefs()) {
+ SU->hasPhysRegClobbers = true;
+ unsigned NumUsed = InstrEmitter::CountResults(N);
+ while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
+ --NumUsed; // Skip over unused values at the end.
+ if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
+ SU->hasPhysRegDefs = true;
+ }
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDNode *OpN = N->getOperand(i).getNode();
+ if (isPassiveNode(OpN)) continue; // Not scheduled.
+ SUnit *OpSU = &SUnits[OpN->getNodeId()];
+ assert(OpSU && "Node has no SUnit!");
+ if (OpSU == SU) continue; // In the same group.
+
+ EVT OpVT = N->getOperand(i).getValueType();
+ assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!");
+ bool isChain = OpVT == MVT::Other;
+
+ unsigned PhysReg = 0;
+ int Cost = 1;
+ // Determine if this is a physical register dependency.
+ CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
+ assert((PhysReg == 0 || !isChain) &&
+ "Chain dependence via physreg data?");
+ // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
+ // emits a copy from the physical register to a virtual register unless
+ // it requires a cross class copy (cost < 0). That means we are only
+ // treating "expensive to copy" register dependency as physical register
+ // dependency. This may change in the future though.
+ if (Cost >= 0)
+ PhysReg = 0;
+
+ const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
+ OpSU->Latency, PhysReg);
+ if (!isChain && !UnitLatencies) {
+ ComputeOperandLatency(OpSU, SU, (SDep &)dep);
+ ST.adjustSchedDependency(OpSU, SU, (SDep &)dep);
+ }
+
+ SU->addPred(dep);
+ }
+ }
+ }
+}
+
+/// BuildSchedGraph - Build the SUnit graph from the selection dag that we
+/// are input. This SUnit graph is similar to the SelectionDAG, but
+/// excludes nodes that aren't interesting to scheduling, and represents
+/// flagged together nodes with a single SUnit.
+void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
+ // Cluster loads from "near" addresses into combined SUnits.
+ ClusterNeighboringLoads();
+ // Populate the SUnits array.
+ BuildSchedUnits();
+ // Compute all the scheduling dependencies between nodes.
+ AddSchedEdges();
+}
+
+void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
+ const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
+
+ // Compute the latency for the node. We use the sum of the latencies for
+ // all nodes flagged together into this SUnit.
+ SU->Latency = 0;
+ for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
+ if (N->isMachineOpcode()) {
+ SU->Latency += InstrItins.
+ getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass());
+ }
+}
+
+void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
+ if (!SU->getNode()) {
+ dbgs() << "PHYS REG COPY\n";
+ return;
+ }
+
+ SU->getNode()->dump(DAG);
+ dbgs() << "\n";
+ SmallVector<SDNode *, 4> FlaggedNodes;
+ for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode())
+ FlaggedNodes.push_back(N);
+ while (!FlaggedNodes.empty()) {
+ dbgs() << " ";
+ FlaggedNodes.back()->dump(DAG);
+ dbgs() << "\n";
+ FlaggedNodes.pop_back();
+ }
+}
+
+/// EmitSchedule - Emit the machine code in scheduled order.
+MachineBasicBlock *ScheduleDAGSDNodes::
+EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
+ InstrEmitter Emitter(BB, InsertPos);
+ DenseMap<SDValue, unsigned> VRBaseMap;
+ DenseMap<SUnit*, unsigned> CopyVRBaseMap;
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ SUnit *SU = Sequence[i];
+ if (!SU) {
+ // Null SUnit* is a noop.
+ EmitNoop();
+ continue;
+ }
+
+ // For pre-regalloc scheduling, create instructions corresponding to the
+ // SDNode and any flagged SDNodes and append them to the block.
+ if (!SU->getNode()) {
+ // Emit a copy.
+ EmitPhysRegCopy(SU, CopyVRBaseMap);
+ continue;
+ }
+
+ SmallVector<SDNode *, 4> FlaggedNodes;
+ for (SDNode *N = SU->getNode()->getFlaggedNode(); N;
+ N = N->getFlaggedNode())
+ FlaggedNodes.push_back(N);
+ while (!FlaggedNodes.empty()) {
+ Emitter.EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned,
+ VRBaseMap, EM);
+ FlaggedNodes.pop_back();
+ }
+ Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
+ VRBaseMap, EM);
+ }
+
+ BB = Emitter.getBlock();
+ InsertPos = Emitter.getInsertPos();
+ return BB;
+}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
new file mode 100644
index 0000000..6b829b6
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -0,0 +1,121 @@
+//===---- ScheduleDAGSDNodes.h - SDNode Scheduling --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScheduleDAGSDNodes class, which implements
+// scheduling for an SDNode-based dependency graph.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SCHEDULEDAGSDNODES_H
+#define SCHEDULEDAGSDNODES_H
+
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+
+namespace llvm {
+ /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
+ ///
+ /// Edges between SUnits are initially based on edges in the SelectionDAG,
+ /// and additional edges can be added by the schedulers as heuristics.
+ /// SDNodes such as Constants, Registers, and a few others that are not
+ /// interesting to schedulers are not allocated SUnits.
+ ///
+ /// SDNodes with MVT::Flag operands are grouped along with the flagged
+ /// nodes into a single SUnit so that they are scheduled together.
+ ///
+ /// SDNode-based scheduling graphs do not use SDep::Anti or SDep::Output
+ /// edges. Physical register dependence information is not carried in
+ /// the DAG and must be handled explicitly by schedulers.
+ ///
+ class ScheduleDAGSDNodes : public ScheduleDAG {
+ public:
+ SelectionDAG *DAG; // DAG of the current basic block
+
+ explicit ScheduleDAGSDNodes(MachineFunction &mf);
+
+ virtual ~ScheduleDAGSDNodes() {}
+
+ /// Run - perform scheduling.
+ ///
+ void Run(SelectionDAG *dag, MachineBasicBlock *bb,
+ MachineBasicBlock::iterator insertPos);
+
+ /// isPassiveNode - Return true if the node is a non-scheduled leaf.
+ ///
+ static bool isPassiveNode(SDNode *Node) {
+ if (isa<ConstantSDNode>(Node)) return true;
+ if (isa<ConstantFPSDNode>(Node)) return true;
+ if (isa<RegisterSDNode>(Node)) return true;
+ if (isa<GlobalAddressSDNode>(Node)) return true;
+ if (isa<BasicBlockSDNode>(Node)) return true;
+ if (isa<FrameIndexSDNode>(Node)) return true;
+ if (isa<ConstantPoolSDNode>(Node)) return true;
+ if (isa<JumpTableSDNode>(Node)) return true;
+ if (isa<ExternalSymbolSDNode>(Node)) return true;
+ if (isa<BlockAddressSDNode>(Node)) return true;
+ if (Node->getOpcode() == ISD::EntryToken) return true;
+ return false;
+ }
+
+ /// NewSUnit - Creates a new SUnit and return a ptr to it.
+ ///
+ SUnit *NewSUnit(SDNode *N) {
+#ifndef NDEBUG
+ const SUnit *Addr = 0;
+ if (!SUnits.empty())
+ Addr = &SUnits[0];
+#endif
+ SUnits.push_back(SUnit(N, (unsigned)SUnits.size()));
+ assert((Addr == 0 || Addr == &SUnits[0]) &&
+ "SUnits std::vector reallocated on the fly!");
+ SUnits.back().OrigNode = &SUnits.back();
+ return &SUnits.back();
+ }
+
+ /// Clone - Creates a clone of the specified SUnit. It does not copy the
+ /// predecessors / successors info nor the temporary scheduling states.
+ ///
+ SUnit *Clone(SUnit *N);
+
+ /// BuildSchedGraph - Build the SUnit graph from the selection dag that we
+ /// are input. This SUnit graph is similar to the SelectionDAG, but
+ /// excludes nodes that aren't interesting to scheduling, and represents
+ /// flagged together nodes with a single SUnit.
+ virtual void BuildSchedGraph(AliasAnalysis *AA);
+
+ /// ComputeLatency - Compute node latency.
+ ///
+ virtual void ComputeLatency(SUnit *SU);
+
+ virtual MachineBasicBlock *
+ EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM);
+
+ /// Schedule - Order nodes according to selected style, filling
+ /// in the Sequence member.
+ ///
+ virtual void Schedule() = 0;
+
+ virtual void dumpNode(const SUnit *SU) const;
+
+ virtual std::string getGraphNodeLabel(const SUnit *SU) const;
+
+ virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const;
+
+ private:
+ /// ClusterNeighboringLoads - Cluster loads from "near" addresses into
+ /// combined SUnits.
+ void ClusterNeighboringLoads();
+
+ /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph.
+ void BuildSchedUnits();
+ void AddSchedEdges();
+ };
+}
+
+#endif
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
new file mode 100644
index 0000000..6122a2a
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -0,0 +1,6325 @@
+//===-- SelectionDAG.cpp - Implement the SelectionDAG data structures -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "SDNodeOrdering.h"
+#include "llvm/Constants.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Mutex.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include <algorithm>
+#include <cmath>
+using namespace llvm;
+
+/// makeVTList - Return an instance of the SDVTList struct initialized with the
+/// specified members.
+static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
+ SDVTList Res = {VTs, NumVTs};
+ return Res;
+}
+
+static const fltSemantics *EVTToAPFloatSemantics(EVT VT) {
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unknown FP format");
+ case MVT::f32: return &APFloat::IEEEsingle;
+ case MVT::f64: return &APFloat::IEEEdouble;
+ case MVT::f80: return &APFloat::x87DoubleExtended;
+ case MVT::f128: return &APFloat::IEEEquad;
+ case MVT::ppcf128: return &APFloat::PPCDoubleDouble;
+ }
+}
+
+SelectionDAG::DAGUpdateListener::~DAGUpdateListener() {}
+
+//===----------------------------------------------------------------------===//
+// ConstantFPSDNode Class
+//===----------------------------------------------------------------------===//
+
+/// isExactlyValue - We don't rely on operator== working on double values, as
+/// it returns true for things that are clearly not equal, like -0.0 and 0.0.
+/// As such, this method can be used to do an exact bit-for-bit comparison of
+/// two floating point values.
+bool ConstantFPSDNode::isExactlyValue(const APFloat& V) const {
+ return getValueAPF().bitwiseIsEqual(V);
+}
+
+bool ConstantFPSDNode::isValueValidForType(EVT VT,
+ const APFloat& Val) {
+ assert(VT.isFloatingPoint() && "Can only convert between FP types");
+
+ // PPC long double cannot be converted to any other type.
+ if (VT == MVT::ppcf128 ||
+ &Val.getSemantics() == &APFloat::PPCDoubleDouble)
+ return false;
+
+ // convert modifies in place, so make a copy.
+ APFloat Val2 = APFloat(Val);
+ bool losesInfo;
+ (void) Val2.convert(*EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven,
+ &losesInfo);
+ return !losesInfo;
+}
+
+//===----------------------------------------------------------------------===//
+// ISD Namespace
+//===----------------------------------------------------------------------===//
+
+/// isBuildVectorAllOnes - Return true if the specified node is a
+/// BUILD_VECTOR where all of the elements are ~0 or undef.
+bool ISD::isBuildVectorAllOnes(const SDNode *N) {
+ // Look through a bit convert.
+ if (N->getOpcode() == ISD::BIT_CONVERT)
+ N = N->getOperand(0).getNode();
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
+
+ unsigned i = 0, e = N->getNumOperands();
+
+ // Skip over all of the undef values.
+ while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF)
+ ++i;
+
+ // Do not accept an all-undef vector.
+ if (i == e) return false;
+
+ // Do not accept build_vectors that aren't all constants or which have non-~0
+ // elements.
+ SDValue NotZero = N->getOperand(i);
+ if (isa<ConstantSDNode>(NotZero)) {
+ if (!cast<ConstantSDNode>(NotZero)->isAllOnesValue())
+ return false;
+ } else if (isa<ConstantFPSDNode>(NotZero)) {
+ if (!cast<ConstantFPSDNode>(NotZero)->getValueAPF().
+ bitcastToAPInt().isAllOnesValue())
+ return false;
+ } else
+ return false;
+
+ // Okay, we have at least one ~0 value, check to see if the rest match or are
+ // undefs.
+ for (++i; i != e; ++i)
+ if (N->getOperand(i) != NotZero &&
+ N->getOperand(i).getOpcode() != ISD::UNDEF)
+ return false;
+ return true;
+}
+
+
+/// isBuildVectorAllZeros - Return true if the specified node is a
+/// BUILD_VECTOR where all of the elements are 0 or undef.
+bool ISD::isBuildVectorAllZeros(const SDNode *N) {
+ // Look through a bit convert.
+ if (N->getOpcode() == ISD::BIT_CONVERT)
+ N = N->getOperand(0).getNode();
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
+
+ unsigned i = 0, e = N->getNumOperands();
+
+ // Skip over all of the undef values.
+ while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF)
+ ++i;
+
+ // Do not accept an all-undef vector.
+ if (i == e) return false;
+
+ // Do not accept build_vectors that aren't all constants or which have non-0
+ // elements.
+ SDValue Zero = N->getOperand(i);
+ if (isa<ConstantSDNode>(Zero)) {
+ if (!cast<ConstantSDNode>(Zero)->isNullValue())
+ return false;
+ } else if (isa<ConstantFPSDNode>(Zero)) {
+ if (!cast<ConstantFPSDNode>(Zero)->getValueAPF().isPosZero())
+ return false;
+ } else
+ return false;
+
+ // Okay, we have at least one 0 value, check to see if the rest match or are
+ // undefs.
+ for (++i; i != e; ++i)
+ if (N->getOperand(i) != Zero &&
+ N->getOperand(i).getOpcode() != ISD::UNDEF)
+ return false;
+ return true;
+}
+
+/// isScalarToVector - Return true if the specified node is a
+/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
+/// element is not an undef.
+bool ISD::isScalarToVector(const SDNode *N) {
+ if (N->getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return true;
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+ if (N->getOperand(0).getOpcode() == ISD::UNDEF)
+ return false;
+ unsigned NumElems = N->getNumOperands();
+ for (unsigned i = 1; i < NumElems; ++i) {
+ SDValue V = N->getOperand(i);
+ if (V.getOpcode() != ISD::UNDEF)
+ return false;
+ }
+ return true;
+}
+
+/// getSetCCSwappedOperands - Return the operation corresponding to (Y op X)
+/// when given the operation for (X op Y).
+ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
+ // To perform this operation, we just need to swap the L and G bits of the
+ // operation.
+ unsigned OldL = (Operation >> 2) & 1;
+ unsigned OldG = (Operation >> 1) & 1;
+ return ISD::CondCode((Operation & ~6) | // Keep the N, U, E bits
+ (OldL << 1) | // New G bit
+ (OldG << 2)); // New L bit.
+}
+
+/// getSetCCInverse - Return the operation corresponding to !(X op Y), where
+/// 'op' is a valid SetCC operation.
+ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) {
+ unsigned Operation = Op;
+ if (isInteger)
+ Operation ^= 7; // Flip L, G, E bits, but not U.
+ else
+ Operation ^= 15; // Flip all of the condition bits.
+
+ if (Operation > ISD::SETTRUE2)
+ Operation &= ~8; // Don't let N and U bits get set.
+
+ return ISD::CondCode(Operation);
+}
+
+
+/// isSignedOp - For an integer comparison, return 1 if the comparison is a
+/// signed operation and 2 if the result is an unsigned comparison. Return zero
+/// if the operation does not depend on the sign of the input (setne and seteq).
+static int isSignedOp(ISD::CondCode Opcode) {
+ switch (Opcode) {
+ default: llvm_unreachable("Illegal integer setcc operation!");
+ case ISD::SETEQ:
+ case ISD::SETNE: return 0;
+ case ISD::SETLT:
+ case ISD::SETLE:
+ case ISD::SETGT:
+ case ISD::SETGE: return 1;
+ case ISD::SETULT:
+ case ISD::SETULE:
+ case ISD::SETUGT:
+ case ISD::SETUGE: return 2;
+ }
+}
+
+/// getSetCCOrOperation - Return the result of a logical OR between different
+/// comparisons of identical values: ((X op1 Y) | (X op2 Y)). This function
+/// returns SETCC_INVALID if it is not possible to represent the resultant
+/// comparison.
+ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2,
+ bool isInteger) {
+ if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+ // Cannot fold a signed integer setcc with an unsigned integer setcc.
+ return ISD::SETCC_INVALID;
+
+ unsigned Op = Op1 | Op2; // Combine all of the condition bits.
+
+ // If the N and U bits get set then the resultant comparison DOES suddenly
+ // care about orderedness, and is true when ordered.
+ if (Op > ISD::SETTRUE2)
+ Op &= ~16; // Clear the U bit if the N bit is set.
+
+ // Canonicalize illegal integer setcc's.
+ if (isInteger && Op == ISD::SETUNE) // e.g. SETUGT | SETULT
+ Op = ISD::SETNE;
+
+ return ISD::CondCode(Op);
+}
+
+/// getSetCCAndOperation - Return the result of a logical AND between different
+/// comparisons of identical values: ((X op1 Y) & (X op2 Y)). This
+/// function returns zero if it is not possible to represent the resultant
+/// comparison.
+ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,
+ bool isInteger) {
+ if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+ // Cannot fold a signed setcc with an unsigned setcc.
+ return ISD::SETCC_INVALID;
+
+ // Combine all of the condition bits.
+ ISD::CondCode Result = ISD::CondCode(Op1 & Op2);
+
+ // Canonicalize illegal integer setcc's.
+ if (isInteger) {
+ switch (Result) {
+ default: break;
+ case ISD::SETUO : Result = ISD::SETFALSE; break; // SETUGT & SETULT
+ case ISD::SETOEQ: // SETEQ & SETU[LG]E
+ case ISD::SETUEQ: Result = ISD::SETEQ ; break; // SETUGE & SETULE
+ case ISD::SETOLT: Result = ISD::SETULT ; break; // SETULT & SETNE
+ case ISD::SETOGT: Result = ISD::SETUGT ; break; // SETUGT & SETNE
+ }
+ }
+
+ return Result;
+}
+
+const TargetMachine &SelectionDAG::getTarget() const {
+ return MF->getTarget();
+}
+
+//===----------------------------------------------------------------------===//
+// SDNode Profile Support
+//===----------------------------------------------------------------------===//
+
+/// AddNodeIDOpcode - Add the node opcode to the NodeID data.
+///
+static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC) {
+ ID.AddInteger(OpC);
+}
+
+/// AddNodeIDValueTypes - Value type lists are intern'd so we can represent them
+/// solely with their pointer.
+static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) {
+ ID.AddPointer(VTList.VTs);
+}
+
+/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
+///
+static void AddNodeIDOperands(FoldingSetNodeID &ID,
+ const SDValue *Ops, unsigned NumOps) {
+ for (; NumOps; --NumOps, ++Ops) {
+ ID.AddPointer(Ops->getNode());
+ ID.AddInteger(Ops->getResNo());
+ }
+}
+
+/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
+///
+static void AddNodeIDOperands(FoldingSetNodeID &ID,
+ const SDUse *Ops, unsigned NumOps) {
+ for (; NumOps; --NumOps, ++Ops) {
+ ID.AddPointer(Ops->getNode());
+ ID.AddInteger(Ops->getResNo());
+ }
+}
+
+static void AddNodeIDNode(FoldingSetNodeID &ID,
+ unsigned short OpC, SDVTList VTList,
+ const SDValue *OpList, unsigned N) {
+ AddNodeIDOpcode(ID, OpC);
+ AddNodeIDValueTypes(ID, VTList);
+ AddNodeIDOperands(ID, OpList, N);
+}
+
+/// AddNodeIDCustom - If this is an SDNode with special info, add this info to
+/// the NodeID data.
+static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
+ switch (N->getOpcode()) {
+ case ISD::TargetExternalSymbol:
+ case ISD::ExternalSymbol:
+ llvm_unreachable("Should only be used on nodes with operands");
+ default: break; // Normal nodes don't need extra info.
+ case ISD::TargetConstant:
+ case ISD::Constant:
+ ID.AddPointer(cast<ConstantSDNode>(N)->getConstantIntValue());
+ break;
+ case ISD::TargetConstantFP:
+ case ISD::ConstantFP: {
+ ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue());
+ break;
+ }
+ case ISD::TargetGlobalAddress:
+ case ISD::GlobalAddress:
+ case ISD::TargetGlobalTLSAddress:
+ case ISD::GlobalTLSAddress: {
+ const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
+ ID.AddPointer(GA->getGlobal());
+ ID.AddInteger(GA->getOffset());
+ ID.AddInteger(GA->getTargetFlags());
+ break;
+ }
+ case ISD::BasicBlock:
+ ID.AddPointer(cast<BasicBlockSDNode>(N)->getBasicBlock());
+ break;
+ case ISD::Register:
+ ID.AddInteger(cast<RegisterSDNode>(N)->getReg());
+ break;
+
+ case ISD::SRCVALUE:
+ ID.AddPointer(cast<SrcValueSDNode>(N)->getValue());
+ break;
+ case ISD::FrameIndex:
+ case ISD::TargetFrameIndex:
+ ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex());
+ break;
+ case ISD::JumpTable:
+ case ISD::TargetJumpTable:
+ ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex());
+ ID.AddInteger(cast<JumpTableSDNode>(N)->getTargetFlags());
+ break;
+ case ISD::ConstantPool:
+ case ISD::TargetConstantPool: {
+ const ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N);
+ ID.AddInteger(CP->getAlignment());
+ ID.AddInteger(CP->getOffset());
+ if (CP->isMachineConstantPoolEntry())
+ CP->getMachineCPVal()->AddSelectionDAGCSEId(ID);
+ else
+ ID.AddPointer(CP->getConstVal());
+ ID.AddInteger(CP->getTargetFlags());
+ break;
+ }
+ case ISD::LOAD: {
+ const LoadSDNode *LD = cast<LoadSDNode>(N);
+ ID.AddInteger(LD->getMemoryVT().getRawBits());
+ ID.AddInteger(LD->getRawSubclassData());
+ break;
+ }
+ case ISD::STORE: {
+ const StoreSDNode *ST = cast<StoreSDNode>(N);
+ ID.AddInteger(ST->getMemoryVT().getRawBits());
+ ID.AddInteger(ST->getRawSubclassData());
+ break;
+ }
+ case ISD::ATOMIC_CMP_SWAP:
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX: {
+ const AtomicSDNode *AT = cast<AtomicSDNode>(N);
+ ID.AddInteger(AT->getMemoryVT().getRawBits());
+ ID.AddInteger(AT->getRawSubclassData());
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+ for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements();
+ i != e; ++i)
+ ID.AddInteger(SVN->getMaskElt(i));
+ break;
+ }
+ case ISD::TargetBlockAddress:
+ case ISD::BlockAddress: {
+ ID.AddPointer(cast<BlockAddressSDNode>(N)->getBlockAddress());
+ ID.AddInteger(cast<BlockAddressSDNode>(N)->getTargetFlags());
+ break;
+ }
+ } // end switch (N->getOpcode())
+}
+
+/// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID
+/// data.
+static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
+ AddNodeIDOpcode(ID, N->getOpcode());
+ // Add the return value info.
+ AddNodeIDValueTypes(ID, N->getVTList());
+ // Add the operand info.
+ AddNodeIDOperands(ID, N->op_begin(), N->getNumOperands());
+
+ // Handle SDNode leafs with special info.
+ AddNodeIDCustom(ID, N);
+}
+
+/// encodeMemSDNodeFlags - Generic routine for computing a value for use in
+/// the CSE map that carries volatility, indexing mode, and
+/// extension/truncation information.
+///
+static inline unsigned
+encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile) {
+ assert((ConvType & 3) == ConvType &&
+ "ConvType may not require more than 2 bits!");
+ assert((AM & 7) == AM &&
+ "AM may not require more than 3 bits!");
+ return ConvType |
+ (AM << 2) |
+ (isVolatile << 5);
+}
+
+//===----------------------------------------------------------------------===//
+// SelectionDAG Class
+//===----------------------------------------------------------------------===//
+
+/// doNotCSE - Return true if CSE should not be performed for this node.
+static bool doNotCSE(SDNode *N) {
+ if (N->getValueType(0) == MVT::Flag)
+ return true; // Never CSE anything that produces a flag.
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::HANDLENODE:
+ case ISD::EH_LABEL:
+ return true; // Never CSE these nodes.
+ }
+
+ // Check that remaining values produced are not flags.
+ for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
+ if (N->getValueType(i) == MVT::Flag)
+ return true; // Never CSE anything that produces a flag.
+
+ return false;
+}
+
+/// RemoveDeadNodes - This method deletes all unreachable nodes in the
+/// SelectionDAG.
+void SelectionDAG::RemoveDeadNodes() {
+ // Create a dummy node (which is not added to allnodes), that adds a reference
+ // to the root node, preventing it from being deleted.
+ HandleSDNode Dummy(getRoot());
+
+ SmallVector<SDNode*, 128> DeadNodes;
+
+ // Add all obviously-dead nodes to the DeadNodes worklist.
+ for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I)
+ if (I->use_empty())
+ DeadNodes.push_back(I);
+
+ RemoveDeadNodes(DeadNodes);
+
+ // If the root changed (e.g. it was a dead load, update the root).
+ setRoot(Dummy.getValue());
+}
+
+/// RemoveDeadNodes - This method deletes the unreachable nodes in the
+/// given list, and any nodes that become unreachable as a result.
+void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes,
+ DAGUpdateListener *UpdateListener) {
+
+ // Process the worklist, deleting the nodes and adding their uses to the
+ // worklist.
+ while (!DeadNodes.empty()) {
+ SDNode *N = DeadNodes.pop_back_val();
+
+ if (UpdateListener)
+ UpdateListener->NodeDeleted(N, 0);
+
+ // Take the node out of the appropriate CSE map.
+ RemoveNodeFromCSEMaps(N);
+
+ // Next, brutally remove the operand list. This is safe to do, as there are
+ // no cycles in the graph.
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) {
+ SDUse &Use = *I++;
+ SDNode *Operand = Use.getNode();
+ Use.set(SDValue());
+
+ // Now that we removed this operand, see if there are no uses of it left.
+ if (Operand->use_empty())
+ DeadNodes.push_back(Operand);
+ }
+
+ DeallocateNode(N);
+ }
+}
+
+void SelectionDAG::RemoveDeadNode(SDNode *N, DAGUpdateListener *UpdateListener){
+ SmallVector<SDNode*, 16> DeadNodes(1, N);
+ RemoveDeadNodes(DeadNodes, UpdateListener);
+}
+
+void SelectionDAG::DeleteNode(SDNode *N) {
+ // First take this out of the appropriate CSE map.
+ RemoveNodeFromCSEMaps(N);
+
+ // Finally, remove uses due to operands of this node, remove from the
+ // AllNodes list, and delete the node.
+ DeleteNodeNotInCSEMaps(N);
+}
+
+void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) {
+ assert(N != AllNodes.begin() && "Cannot delete the entry node!");
+ assert(N->use_empty() && "Cannot delete a node that is not dead!");
+
+ // Drop all of the operands and decrement used node's use counts.
+ N->DropOperands();
+
+ DeallocateNode(N);
+}
+
+void SelectionDAG::DeallocateNode(SDNode *N) {
+ if (N->OperandsNeedDelete)
+ delete[] N->OperandList;
+
+ // Set the opcode to DELETED_NODE to help catch bugs when node
+ // memory is reallocated.
+ N->NodeType = ISD::DELETED_NODE;
+
+ NodeAllocator.Deallocate(AllNodes.remove(N));
+
+ // Remove the ordering of this node.
+ Ordering->remove(N);
+}
+
+/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that
+/// correspond to it. This is useful when we're about to delete or repurpose
+/// the node. We don't want future request for structurally identical nodes
+/// to return N anymore.
+bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
+ bool Erased = false;
+ switch (N->getOpcode()) {
+ case ISD::EntryToken:
+ llvm_unreachable("EntryToken should not be in CSEMaps!");
+ return false;
+ case ISD::HANDLENODE: return false; // noop.
+ case ISD::CONDCODE:
+ assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] &&
+ "Cond code doesn't exist!");
+ Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != 0;
+ CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = 0;
+ break;
+ case ISD::ExternalSymbol:
+ Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());
+ break;
+ case ISD::TargetExternalSymbol: {
+ ExternalSymbolSDNode *ESN = cast<ExternalSymbolSDNode>(N);
+ Erased = TargetExternalSymbols.erase(
+ std::pair<std::string,unsigned char>(ESN->getSymbol(),
+ ESN->getTargetFlags()));
+ break;
+ }
+ case ISD::VALUETYPE: {
+ EVT VT = cast<VTSDNode>(N)->getVT();
+ if (VT.isExtended()) {
+ Erased = ExtendedValueTypeNodes.erase(VT);
+ } else {
+ Erased = ValueTypeNodes[VT.getSimpleVT().SimpleTy] != 0;
+ ValueTypeNodes[VT.getSimpleVT().SimpleTy] = 0;
+ }
+ break;
+ }
+ default:
+ // Remove it from the CSE Map.
+ Erased = CSEMap.RemoveNode(N);
+ break;
+ }
+#ifndef NDEBUG
+ // Verify that the node was actually in one of the CSE maps, unless it has a
+ // flag result (which cannot be CSE'd) or is one of the special cases that are
+ // not subject to CSE.
+ if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Flag &&
+ !N->isMachineOpcode() && !doNotCSE(N)) {
+ N->dump(this);
+ dbgs() << "\n";
+ llvm_unreachable("Node is not in map!");
+ }
+#endif
+ return Erased;
+}
+
+/// AddModifiedNodeToCSEMaps - The specified node has been removed from the CSE
+/// maps and modified in place. Add it back to the CSE maps, unless an identical
+/// node already exists, in which case transfer all its users to the existing
+/// node. This transfer can potentially trigger recursive merging.
+///
+void
+SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N,
+ DAGUpdateListener *UpdateListener) {
+ // For node types that aren't CSE'd, just act as if no identical node
+ // already exists.
+ if (!doNotCSE(N)) {
+ SDNode *Existing = CSEMap.GetOrInsertNode(N);
+ if (Existing != N) {
+ // If there was already an existing matching node, use ReplaceAllUsesWith
+ // to replace the dead one with the existing one. This can cause
+ // recursive merging of other unrelated nodes down the line.
+ ReplaceAllUsesWith(N, Existing, UpdateListener);
+
+ // N is now dead. Inform the listener if it exists and delete it.
+ if (UpdateListener)
+ UpdateListener->NodeDeleted(N, Existing);
+ DeleteNodeNotInCSEMaps(N);
+ return;
+ }
+ }
+
+ // If the node doesn't already exist, we updated it. Inform a listener if
+ // it exists.
+ if (UpdateListener)
+ UpdateListener->NodeUpdated(N);
+}
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified. If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take. If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op,
+ void *&InsertPos) {
+ if (doNotCSE(N))
+ return 0;
+
+ SDValue Ops[] = { Op };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1);
+ AddNodeIDCustom(ID, N);
+ SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+ return Node;
+}
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified. If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take. If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
+ SDValue Op1, SDValue Op2,
+ void *&InsertPos) {
+ if (doNotCSE(N))
+ return 0;
+
+ SDValue Ops[] = { Op1, Op2 };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2);
+ AddNodeIDCustom(ID, N);
+ SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+ return Node;
+}
+
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified. If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take. If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
+ const SDValue *Ops,unsigned NumOps,
+ void *&InsertPos) {
+ if (doNotCSE(N))
+ return 0;
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps);
+ AddNodeIDCustom(ID, N);
+ SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+ return Node;
+}
+
+/// VerifyNode - Sanity check the given node. Aborts if it is invalid.
+void SelectionDAG::VerifyNode(SDNode *N) {
+ switch (N->getOpcode()) {
+ default:
+ break;
+ case ISD::BUILD_PAIR: {
+ EVT VT = N->getValueType(0);
+ assert(N->getNumValues() == 1 && "Too many results!");
+ assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) &&
+ "Wrong return type!");
+ assert(N->getNumOperands() == 2 && "Wrong number of operands!");
+ assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() &&
+ "Mismatched operand types!");
+ assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() &&
+ "Wrong operand type!");
+ assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() &&
+ "Wrong return type size");
+ break;
+ }
+ case ISD::BUILD_VECTOR: {
+ assert(N->getNumValues() == 1 && "Too many results!");
+ assert(N->getValueType(0).isVector() && "Wrong return type!");
+ assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() &&
+ "Wrong number of operands!");
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I)
+ assert((I->getValueType() == EltVT ||
+ (EltVT.isInteger() && I->getValueType().isInteger() &&
+ EltVT.bitsLE(I->getValueType()))) &&
+ "Wrong operand type!");
+ break;
+ }
+ }
+}
+
+/// getEVTAlignment - Compute the default alignment value for the
+/// given type.
+///
+unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
+ const Type *Ty = VT == MVT::iPTR ?
+ PointerType::get(Type::getInt8Ty(*getContext()), 0) :
+ VT.getTypeForEVT(*getContext());
+
+ return TLI.getTargetData()->getABITypeAlignment(Ty);
+}
+
+// EntryNode could meaningfully have debug info if we can find it...
+SelectionDAG::SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli)
+ : TLI(tli), FLI(fli), DW(0),
+ EntryNode(ISD::EntryToken, DebugLoc::getUnknownLoc(),
+ getVTList(MVT::Other)),
+ Root(getEntryNode()), Ordering(0) {
+ AllNodes.push_back(&EntryNode);
+ Ordering = new SDNodeOrdering();
+}
+
+void SelectionDAG::init(MachineFunction &mf, MachineModuleInfo *mmi,
+ DwarfWriter *dw) {
+ MF = &mf;
+ MMI = mmi;
+ DW = dw;
+ Context = &mf.getFunction()->getContext();
+}
+
+SelectionDAG::~SelectionDAG() {
+ allnodes_clear();
+ delete Ordering;
+}
+
+void SelectionDAG::allnodes_clear() {
+ assert(&*AllNodes.begin() == &EntryNode);
+ AllNodes.remove(AllNodes.begin());
+ while (!AllNodes.empty())
+ DeallocateNode(AllNodes.begin());
+}
+
+void SelectionDAG::clear() {
+ allnodes_clear();
+ OperandAllocator.Reset();
+ CSEMap.clear();
+
+ ExtendedValueTypeNodes.clear();
+ ExternalSymbols.clear();
+ TargetExternalSymbols.clear();
+ std::fill(CondCodeNodes.begin(), CondCodeNodes.end(),
+ static_cast<CondCodeSDNode*>(0));
+ std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(),
+ static_cast<SDNode*>(0));
+
+ EntryNode.UseList = 0;
+ AllNodes.push_back(&EntryNode);
+ Root = getEntryNode();
+ Ordering = new SDNodeOrdering();
+}
+
+SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) {
+ return VT.bitsGT(Op.getValueType()) ?
+ getNode(ISD::SIGN_EXTEND, DL, VT, Op) :
+ getNode(ISD::TRUNCATE, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) {
+ return VT.bitsGT(Op.getValueType()) ?
+ getNode(ISD::ZERO_EXTEND, DL, VT, Op) :
+ getNode(ISD::TRUNCATE, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, EVT VT) {
+ assert(!VT.isVector() &&
+ "getZeroExtendInReg should use the vector element type instead of "
+ "the vector type!");
+ if (Op.getValueType() == VT) return Op;
+ unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+ APInt Imm = APInt::getLowBitsSet(BitWidth,
+ VT.getSizeInBits());
+ return getNode(ISD::AND, DL, Op.getValueType(), Op,
+ getConstant(Imm, Op.getValueType()));
+}
+
+/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
+///
+SDValue SelectionDAG::getNOT(DebugLoc DL, SDValue Val, EVT VT) {
+ EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+ SDValue NegOne =
+ getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
+ return getNode(ISD::XOR, DL, VT, Val, NegOne);
+}
+
+SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT) {
+ EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+ assert((EltVT.getSizeInBits() >= 64 ||
+ (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) &&
+ "getConstant with a uint64_t value that doesn't fit in the type!");
+ return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT);
+}
+
+SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT) {
+ return getConstant(*ConstantInt::get(*Context, Val), VT, isT);
+}
+
+SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) {
+ assert(VT.isInteger() && "Cannot create FP integer constant!");
+
+ EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+ assert(Val.getBitWidth() == EltVT.getSizeInBits() &&
+ "APInt size does not match type size!");
+
+ unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0);
+ ID.AddPointer(&Val);
+ void *IP = 0;
+ SDNode *N = NULL;
+ if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
+ if (!VT.isVector())
+ return SDValue(N, 0);
+
+ if (!N) {
+ N = NodeAllocator.Allocate<ConstantSDNode>();
+ new (N) ConstantSDNode(isT, &Val, EltVT);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ }
+
+ SDValue Result(N, 0);
+ if (VT.isVector()) {
+ SmallVector<SDValue, 8> Ops;
+ Ops.assign(VT.getVectorNumElements(), Result);
+ Result = getNode(ISD::BUILD_VECTOR, DebugLoc::getUnknownLoc(),
+ VT, &Ops[0], Ops.size());
+ }
+ return Result;
+}
+
+SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) {
+ return getConstant(Val, TLI.getPointerTy(), isTarget);
+}
+
+
+SDValue SelectionDAG::getConstantFP(const APFloat& V, EVT VT, bool isTarget) {
+ return getConstantFP(*ConstantFP::get(*getContext(), V), VT, isTarget);
+}
+
+SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){
+ assert(VT.isFloatingPoint() && "Cannot create integer FP constant!");
+
+ EVT EltVT =
+ VT.isVector() ? VT.getVectorElementType() : VT;
+
+ // Do the map lookup using the actual bit pattern for the floating point
+ // value, so that we don't have problems with 0.0 comparing equal to -0.0, and
+ // we don't have issues with SNANs.
+ unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0);
+ ID.AddPointer(&V);
+ void *IP = 0;
+ SDNode *N = NULL;
+ if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
+ if (!VT.isVector())
+ return SDValue(N, 0);
+
+ if (!N) {
+ N = NodeAllocator.Allocate<ConstantFPSDNode>();
+ new (N) ConstantFPSDNode(isTarget, &V, EltVT);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ }
+
+ SDValue Result(N, 0);
+ if (VT.isVector()) {
+ SmallVector<SDValue, 8> Ops;
+ Ops.assign(VT.getVectorNumElements(), Result);
+ // FIXME DebugLoc info might be appropriate here
+ Result = getNode(ISD::BUILD_VECTOR, DebugLoc::getUnknownLoc(),
+ VT, &Ops[0], Ops.size());
+ }
+ return Result;
+}
+
+SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {
+ EVT EltVT =
+ VT.isVector() ? VT.getVectorElementType() : VT;
+ if (EltVT==MVT::f32)
+ return getConstantFP(APFloat((float)Val), VT, isTarget);
+ else
+ return getConstantFP(APFloat(Val), VT, isTarget);
+}
+
+SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV,
+ EVT VT, int64_t Offset,
+ bool isTargetGA,
+ unsigned char TargetFlags) {
+ assert((TargetFlags == 0 || isTargetGA) &&
+ "Cannot set target flags on target-independent globals");
+
+ // Truncate (with sign-extension) the offset value to the pointer size.
+ EVT PTy = TLI.getPointerTy();
+ unsigned BitWidth = PTy.getSizeInBits();
+ if (BitWidth < 64)
+ Offset = (Offset << (64 - BitWidth) >> (64 - BitWidth));
+
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ if (!GVar) {
+ // If GV is an alias then use the aliasee for determining thread-localness.
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
+ }
+
+ unsigned Opc;
+ if (GVar && GVar->isThreadLocal())
+ Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress;
+ else
+ Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress;
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddPointer(GV);
+ ID.AddInteger(Offset);
+ ID.AddInteger(TargetFlags);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = NodeAllocator.Allocate<GlobalAddressSDNode>();
+ new (N) GlobalAddressSDNode(Opc, GV, VT, Offset, TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {
+ unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddInteger(FI);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = NodeAllocator.Allocate<FrameIndexSDNode>();
+ new (N) FrameIndexSDNode(FI, VT, isTarget);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
+ unsigned char TargetFlags) {
+ assert((TargetFlags == 0 || isTarget) &&
+ "Cannot set target flags on target-independent jump tables");
+ unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddInteger(JTI);
+ ID.AddInteger(TargetFlags);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = NodeAllocator.Allocate<JumpTableSDNode>();
+ new (N) JumpTableSDNode(JTI, VT, isTarget, TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getConstantPool(Constant *C, EVT VT,
+ unsigned Alignment, int Offset,
+ bool isTarget,
+ unsigned char TargetFlags) {
+ assert((TargetFlags == 0 || isTarget) &&
+ "Cannot set target flags on target-independent globals");
+ if (Alignment == 0)
+ Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType());
+ unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddInteger(Alignment);
+ ID.AddInteger(Offset);
+ ID.AddPointer(C);
+ ID.AddInteger(TargetFlags);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>();
+ new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+
+SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
+ unsigned Alignment, int Offset,
+ bool isTarget,
+ unsigned char TargetFlags) {
+ assert((TargetFlags == 0 || isTarget) &&
+ "Cannot set target flags on target-independent globals");
+ if (Alignment == 0)
+ Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType());
+ unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddInteger(Alignment);
+ ID.AddInteger(Offset);
+ C->AddSelectionDAGCSEId(ID);
+ ID.AddInteger(TargetFlags);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>();
+ new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0);
+ ID.AddPointer(MBB);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = NodeAllocator.Allocate<BasicBlockSDNode>();
+ new (N) BasicBlockSDNode(MBB);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getValueType(EVT VT) {
+ if (VT.isSimple() && (unsigned)VT.getSimpleVT().SimpleTy >=
+ ValueTypeNodes.size())
+ ValueTypeNodes.resize(VT.getSimpleVT().SimpleTy+1);
+
+ SDNode *&N = VT.isExtended() ?
+ ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT().SimpleTy];
+
+ if (N) return SDValue(N, 0);
+ N = NodeAllocator.Allocate<VTSDNode>();
+ new (N) VTSDNode(VT);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) {
+ SDNode *&N = ExternalSymbols[Sym];
+ if (N) return SDValue(N, 0);
+ N = NodeAllocator.Allocate<ExternalSymbolSDNode>();
+ new (N) ExternalSymbolSDNode(false, Sym, 0, VT);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT,
+ unsigned char TargetFlags) {
+ SDNode *&N =
+ TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym,
+ TargetFlags)];
+ if (N) return SDValue(N, 0);
+ N = NodeAllocator.Allocate<ExternalSymbolSDNode>();
+ new (N) ExternalSymbolSDNode(true, Sym, TargetFlags, VT);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
+ if ((unsigned)Cond >= CondCodeNodes.size())
+ CondCodeNodes.resize(Cond+1);
+
+ if (CondCodeNodes[Cond] == 0) {
+ CondCodeSDNode *N = NodeAllocator.Allocate<CondCodeSDNode>();
+ new (N) CondCodeSDNode(Cond);
+ CondCodeNodes[Cond] = N;
+ AllNodes.push_back(N);
+ }
+
+ return SDValue(CondCodeNodes[Cond], 0);
+}
+
+// commuteShuffle - swaps the values of N1 and N2, and swaps all indices in
+// the shuffle mask M that point at N1 to point at N2, and indices that point
+// N2 to point at N1.
+static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) {
+ std::swap(N1, N2);
+ int NElts = M.size();
+ for (int i = 0; i != NElts; ++i) {
+ if (M[i] >= NElts)
+ M[i] -= NElts;
+ else if (M[i] >= 0)
+ M[i] += NElts;
+ }
+}
+
+SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1,
+ SDValue N2, const int *Mask) {
+ assert(N1.getValueType() == N2.getValueType() && "Invalid VECTOR_SHUFFLE");
+ assert(VT.isVector() && N1.getValueType().isVector() &&
+ "Vector Shuffle VTs must be a vectors");
+ assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType()
+ && "Vector Shuffle VTs must have same element type");
+
+ // Canonicalize shuffle undef, undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF)
+ return getUNDEF(VT);
+
+ // Validate that all indices in Mask are within the range of the elements
+ // input to the shuffle.
+ unsigned NElts = VT.getVectorNumElements();
+ SmallVector<int, 8> MaskVec;
+ for (unsigned i = 0; i != NElts; ++i) {
+ assert(Mask[i] < (int)(NElts * 2) && "Index out of range");
+ MaskVec.push_back(Mask[i]);
+ }
+
+ // Canonicalize shuffle v, v -> v, undef
+ if (N1 == N2) {
+ N2 = getUNDEF(VT);
+ for (unsigned i = 0; i != NElts; ++i)
+ if (MaskVec[i] >= (int)NElts) MaskVec[i] -= NElts;
+ }
+
+ // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
+ if (N1.getOpcode() == ISD::UNDEF)
+ commuteShuffle(N1, N2, MaskVec);
+
+ // Canonicalize all index into lhs, -> shuffle lhs, undef
+ // Canonicalize all index into rhs, -> shuffle rhs, undef
+ bool AllLHS = true, AllRHS = true;
+ bool N2Undef = N2.getOpcode() == ISD::UNDEF;
+ for (unsigned i = 0; i != NElts; ++i) {
+ if (MaskVec[i] >= (int)NElts) {
+ if (N2Undef)
+ MaskVec[i] = -1;
+ else
+ AllLHS = false;
+ } else if (MaskVec[i] >= 0) {
+ AllRHS = false;
+ }
+ }
+ if (AllLHS && AllRHS)
+ return getUNDEF(VT);
+ if (AllLHS && !N2Undef)
+ N2 = getUNDEF(VT);
+ if (AllRHS) {
+ N1 = getUNDEF(VT);
+ commuteShuffle(N1, N2, MaskVec);
+ }
+
+ // If Identity shuffle, or all shuffle in to undef, return that node.
+ bool AllUndef = true;
+ bool Identity = true;
+ for (unsigned i = 0; i != NElts; ++i) {
+ if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false;
+ if (MaskVec[i] >= 0) AllUndef = false;
+ }
+ if (Identity && NElts == N1.getValueType().getVectorNumElements())
+ return N1;
+ if (AllUndef)
+ return getUNDEF(VT);
+
+ FoldingSetNodeID ID;
+ SDValue Ops[2] = { N1, N2 };
+ AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops, 2);
+ for (unsigned i = 0; i != NElts; ++i)
+ ID.AddInteger(MaskVec[i]);
+
+ void* IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ // Allocate the mask array for the node out of the BumpPtrAllocator, since
+ // SDNode doesn't have access to it. This memory will be "leaked" when
+ // the node is deallocated, but recovered when the NodeAllocator is released.
+ int *MaskAlloc = OperandAllocator.Allocate<int>(NElts);
+ memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int));
+
+ ShuffleVectorSDNode *N = NodeAllocator.Allocate<ShuffleVectorSDNode>();
+ new (N) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getConvertRndSat(EVT VT, DebugLoc dl,
+ SDValue Val, SDValue DTy,
+ SDValue STy, SDValue Rnd, SDValue Sat,
+ ISD::CvtCode Code) {
+ // If the src and dest types are the same and the conversion is between
+ // integer types of the same sign or two floats, no conversion is necessary.
+ if (DTy == STy &&
+ (Code == ISD::CVT_UU || Code == ISD::CVT_SS || Code == ISD::CVT_FF))
+ return Val;
+
+ FoldingSetNodeID ID;
+ SDValue Ops[] = { Val, DTy, STy, Rnd, Sat };
+ AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), &Ops[0], 5);
+ void* IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ CvtRndSatSDNode *N = NodeAllocator.Allocate<CvtRndSatSDNode>();
+ new (N) CvtRndSatSDNode(VT, dl, Ops, 5, Code);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0);
+ ID.AddInteger(RegNo);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = NodeAllocator.Allocate<RegisterSDNode>();
+ new (N) RegisterSDNode(RegNo, VT);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getLabel(unsigned Opcode, DebugLoc dl,
+ SDValue Root,
+ unsigned LabelID) {
+ FoldingSetNodeID ID;
+ SDValue Ops[] = { Root };
+ AddNodeIDNode(ID, Opcode, getVTList(MVT::Other), &Ops[0], 1);
+ ID.AddInteger(LabelID);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = NodeAllocator.Allocate<LabelSDNode>();
+ new (N) LabelSDNode(Opcode, dl, Root, LabelID);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getBlockAddress(BlockAddress *BA, EVT VT,
+ bool isTarget,
+ unsigned char TargetFlags) {
+ unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress;
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddPointer(BA);
+ ID.AddInteger(TargetFlags);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = NodeAllocator.Allocate<BlockAddressSDNode>();
+ new (N) BlockAddressSDNode(Opc, VT, BA, TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getSrcValue(const Value *V) {
+ assert((!V || isa<PointerType>(V->getType())) &&
+ "SrcValue is not a pointer?");
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), 0, 0);
+ ID.AddPointer(V);
+
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = NodeAllocator.Allocate<SrcValueSDNode>();
+ new (N) SrcValueSDNode(V);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+/// getShiftAmountOperand - Return the specified value casted to
+/// the target's desired shift amount type.
+SDValue SelectionDAG::getShiftAmountOperand(SDValue Op) {
+ EVT OpTy = Op.getValueType();
+ MVT ShTy = TLI.getShiftAmountTy();
+ if (OpTy == ShTy || OpTy.isVector()) return Op;
+
+ ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
+ return getNode(Opcode, Op.getDebugLoc(), ShTy, Op);
+}
+
+/// CreateStackTemporary - Create a stack temporary, suitable for holding the
+/// specified value type.
+SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
+ MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
+ unsigned ByteSize = VT.getStoreSize();
+ const Type *Ty = VT.getTypeForEVT(*getContext());
+ unsigned StackAlign =
+ std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), minAlign);
+
+ int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
+ return getFrameIndex(FrameIdx, TLI.getPointerTy());
+}
+
+/// CreateStackTemporary - Create a stack temporary suitable for holding
+/// either of the specified value types.
+SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
+ unsigned Bytes = std::max(VT1.getStoreSizeInBits(),
+ VT2.getStoreSizeInBits())/8;
+ const Type *Ty1 = VT1.getTypeForEVT(*getContext());
+ const Type *Ty2 = VT2.getTypeForEVT(*getContext());
+ const TargetData *TD = TLI.getTargetData();
+ unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1),
+ TD->getPrefTypeAlignment(Ty2));
+
+ MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
+ int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align, false);
+ return getFrameIndex(FrameIdx, TLI.getPointerTy());
+}
+
+SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
+ SDValue N2, ISD::CondCode Cond, DebugLoc dl) {
+ // These setcc operations always fold.
+ switch (Cond) {
+ default: break;
+ case ISD::SETFALSE:
+ case ISD::SETFALSE2: return getConstant(0, VT);
+ case ISD::SETTRUE:
+ case ISD::SETTRUE2: return getConstant(1, VT);
+
+ case ISD::SETOEQ:
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETONE:
+ case ISD::SETO:
+ case ISD::SETUO:
+ case ISD::SETUEQ:
+ case ISD::SETUNE:
+ assert(!N1.getValueType().isInteger() && "Illegal setcc for integer!");
+ break;
+ }
+
+ if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode())) {
+ const APInt &C2 = N2C->getAPIntValue();
+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+ const APInt &C1 = N1C->getAPIntValue();
+
+ switch (Cond) {
+ default: llvm_unreachable("Unknown integer setcc!");
+ case ISD::SETEQ: return getConstant(C1 == C2, VT);
+ case ISD::SETNE: return getConstant(C1 != C2, VT);
+ case ISD::SETULT: return getConstant(C1.ult(C2), VT);
+ case ISD::SETUGT: return getConstant(C1.ugt(C2), VT);
+ case ISD::SETULE: return getConstant(C1.ule(C2), VT);
+ case ISD::SETUGE: return getConstant(C1.uge(C2), VT);
+ case ISD::SETLT: return getConstant(C1.slt(C2), VT);
+ case ISD::SETGT: return getConstant(C1.sgt(C2), VT);
+ case ISD::SETLE: return getConstant(C1.sle(C2), VT);
+ case ISD::SETGE: return getConstant(C1.sge(C2), VT);
+ }
+ }
+ }
+ if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
+ if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2.getNode())) {
+ // No compile time operations on this type yet.
+ if (N1C->getValueType(0) == MVT::ppcf128)
+ return SDValue();
+
+ APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF());
+ switch (Cond) {
+ default: break;
+ case ISD::SETEQ: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, VT);
+ case ISD::SETNE: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpLessThan, VT);
+ case ISD::SETLT: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, VT);
+ case ISD::SETGT: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, VT);
+ case ISD::SETLE: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan ||
+ R==APFloat::cmpEqual, VT);
+ case ISD::SETGE: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpEqual, VT);
+ case ISD::SETO: return getConstant(R!=APFloat::cmpUnordered, VT);
+ case ISD::SETUO: return getConstant(R==APFloat::cmpUnordered, VT);
+ case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered ||
+ R==APFloat::cmpEqual, VT);
+ case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, VT);
+ case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered ||
+ R==APFloat::cmpLessThan, VT);
+ case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpUnordered, VT);
+ case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, VT);
+ case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, VT);
+ }
+ } else {
+ // Ensure that the constant occurs on the RHS.
+ return getSetCC(dl, VT, N2, N1, ISD::getSetCCSwappedOperands(Cond));
+ }
+ }
+
+ // Could not fold it.
+ return SDValue();
+}
+
+/// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We
+/// use this predicate to simplify operations downstream.
+bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
+ // This predicate is not safe for vector operations.
+ if (Op.getValueType().isVector())
+ return false;
+
+ unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+ return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth);
+}
+
+/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use
+/// this predicate to simplify operations downstream. Mask is known to be zero
+/// for bits that V cannot have.
+bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
+ unsigned Depth) const {
+ APInt KnownZero, KnownOne;
+ ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ return (KnownZero & Mask) == Mask;
+}
+
+/// ComputeMaskedBits - Determine which of the bits specified in Mask are
+/// known to be either zero or one and return them in the KnownZero/KnownOne
+/// bitsets. This code only analyzes bits in Mask, in order to short-circuit
+/// processing.
+void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
+ APInt &KnownZero, APInt &KnownOne,
+ unsigned Depth) const {
+ unsigned BitWidth = Mask.getBitWidth();
+ assert(BitWidth == Op.getValueType().getScalarType().getSizeInBits() &&
+ "Mask size mismatches value type size!");
+
+ KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything.
+ if (Depth == 6 || Mask == 0)
+ return; // Limit search depth.
+
+ APInt KnownZero2, KnownOne2;
+
+ switch (Op.getOpcode()) {
+ case ISD::Constant:
+ // We know all of the bits for a constant!
+ KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & Mask;
+ KnownZero = ~KnownOne & Mask;
+ return;
+ case ISD::AND:
+ // If either the LHS or the RHS are Zero, the result is zero.
+ ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownZero,
+ KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-1 bits are only known if set in both the LHS & RHS.
+ KnownOne &= KnownOne2;
+ // Output known-0 are known to be clear if zero in either the LHS | RHS.
+ KnownZero |= KnownZero2;
+ return;
+ case ISD::OR:
+ ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownOne,
+ KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-0 bits are only known if clear in both the LHS & RHS.
+ KnownZero &= KnownZero2;
+ // Output known-1 are known to be set if set in either the LHS | RHS.
+ KnownOne |= KnownOne2;
+ return;
+ case ISD::XOR: {
+ ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+ KnownZero = KnownZeroOut;
+ return;
+ }
+ case ISD::MUL: {
+ APInt Mask2 = APInt::getAllOnesValue(BitWidth);
+ ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If low bits are zero in either operand, output low known-0 bits.
+ // Also compute a conserative estimate for high known-0 bits.
+ // More trickiness is possible, but this is sufficient for the
+ // interesting case of alignment computation.
+ KnownOne.clear();
+ unsigned TrailZ = KnownZero.countTrailingOnes() +
+ KnownZero2.countTrailingOnes();
+ unsigned LeadZ = std::max(KnownZero.countLeadingOnes() +
+ KnownZero2.countLeadingOnes(),
+ BitWidth) - BitWidth;
+
+ TrailZ = std::min(TrailZ, BitWidth);
+ LeadZ = std::min(LeadZ, BitWidth);
+ KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
+ APInt::getHighBitsSet(BitWidth, LeadZ);
+ KnownZero &= Mask;
+ return;
+ }
+ case ISD::UDIV: {
+ // For the purposes of computing leading zeros we can conservatively
+ // treat a udiv as a logical right shift by the power of 2 known to
+ // be less than the denominator.
+ APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+ ComputeMaskedBits(Op.getOperand(0),
+ AllOnes, KnownZero2, KnownOne2, Depth+1);
+ unsigned LeadZ = KnownZero2.countLeadingOnes();
+
+ KnownOne2.clear();
+ KnownZero2.clear();
+ ComputeMaskedBits(Op.getOperand(1),
+ AllOnes, KnownZero2, KnownOne2, Depth+1);
+ unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
+ if (RHSUnknownLeadingOnes != BitWidth)
+ LeadZ = std::min(BitWidth,
+ LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
+
+ KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask;
+ return;
+ }
+ case ISD::SELECT:
+ ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ return;
+ case ISD::SELECT_CC:
+ ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ return;
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SSUBO:
+ case ISD::USUBO:
+ case ISD::SMULO:
+ case ISD::UMULO:
+ if (Op.getResNo() != 1)
+ return;
+ // The boolean result conforms to getBooleanContents. Fall through.
+ case ISD::SETCC:
+ // If we know the result of a setcc has the top bits zero, use this info.
+ if (TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ return;
+ case ISD::SHL:
+ // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned ShAmt = SA->getZExtValue();
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ return;
+
+ ComputeMaskedBits(Op.getOperand(0), Mask.lshr(ShAmt),
+ KnownZero, KnownOne, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero <<= ShAmt;
+ KnownOne <<= ShAmt;
+ // low bits known zero.
+ KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt);
+ }
+ return;
+ case ISD::SRL:
+ // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned ShAmt = SA->getZExtValue();
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ return;
+
+ ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt),
+ KnownZero, KnownOne, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.lshr(ShAmt);
+ KnownOne = KnownOne.lshr(ShAmt);
+
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask;
+ KnownZero |= HighBits; // High bits known zero.
+ }
+ return;
+ case ISD::SRA:
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned ShAmt = SA->getZExtValue();
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ return;
+
+ APInt InDemandedMask = (Mask << ShAmt);
+ // If any of the demanded bits are produced by the sign extension, we also
+ // demand the input sign bit.
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask;
+ if (HighBits.getBoolValue())
+ InDemandedMask |= APInt::getSignBit(BitWidth);
+
+ ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.lshr(ShAmt);
+ KnownOne = KnownOne.lshr(ShAmt);
+
+ // Handle the sign bits.
+ APInt SignBit = APInt::getSignBit(BitWidth);
+ SignBit = SignBit.lshr(ShAmt); // Adjust to where it is now in the mask.
+
+ if (KnownZero.intersects(SignBit)) {
+ KnownZero |= HighBits; // New bits are known zero.
+ } else if (KnownOne.intersects(SignBit)) {
+ KnownOne |= HighBits; // New bits are known one.
+ }
+ }
+ return;
+ case ISD::SIGN_EXTEND_INREG: {
+ EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ unsigned EBits = EVT.getScalarType().getSizeInBits();
+
+ // Sign extension. Compute the demanded bits in the result that are not
+ // present in the input.
+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits) & Mask;
+
+ APInt InSignBit = APInt::getSignBit(EBits);
+ APInt InputDemandedBits = Mask & APInt::getLowBitsSet(BitWidth, EBits);
+
+ // If the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ InSignBit.zext(BitWidth);
+ if (NewBits.getBoolValue())
+ InputDemandedBits |= InSignBit;
+
+ ComputeMaskedBits(Op.getOperand(0), InputDemandedBits,
+ KnownZero, KnownOne, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
+ // If the sign bit of the input is known set or clear, then we know the
+ // top bits of the result.
+ if (KnownZero.intersects(InSignBit)) { // Input sign bit known clear
+ KnownZero |= NewBits;
+ KnownOne &= ~NewBits;
+ } else if (KnownOne.intersects(InSignBit)) { // Input sign bit known set
+ KnownOne |= NewBits;
+ KnownZero &= ~NewBits;
+ } else { // Input sign bit unknown
+ KnownZero &= ~NewBits;
+ KnownOne &= ~NewBits;
+ }
+ return;
+ }
+ case ISD::CTTZ:
+ case ISD::CTLZ:
+ case ISD::CTPOP: {
+ unsigned LowBits = Log2_32(BitWidth)+1;
+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+ KnownOne.clear();
+ return;
+ }
+ case ISD::LOAD: {
+ if (ISD::isZEXTLoad(Op.getNode())) {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ EVT VT = LD->getMemoryVT();
+ unsigned MemBits = VT.getScalarType().getSizeInBits();
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits) & Mask;
+ }
+ return;
+ }
+ case ISD::ZERO_EXTEND: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getScalarType().getSizeInBits();
+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
+ APInt InMask = Mask;
+ InMask.trunc(InBits);
+ KnownZero.trunc(InBits);
+ KnownOne.trunc(InBits);
+ ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+ KnownZero.zext(BitWidth);
+ KnownOne.zext(BitWidth);
+ KnownZero |= NewBits;
+ return;
+ }
+ case ISD::SIGN_EXTEND: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getScalarType().getSizeInBits();
+ APInt InSignBit = APInt::getSignBit(InBits);
+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
+ APInt InMask = Mask;
+ InMask.trunc(InBits);
+
+ // If any of the sign extended bits are demanded, we know that the sign
+ // bit is demanded. Temporarily set this bit in the mask for our callee.
+ if (NewBits.getBoolValue())
+ InMask |= InSignBit;
+
+ KnownZero.trunc(InBits);
+ KnownOne.trunc(InBits);
+ ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+
+ // Note if the sign bit is known to be zero or one.
+ bool SignBitKnownZero = KnownZero.isNegative();
+ bool SignBitKnownOne = KnownOne.isNegative();
+ assert(!(SignBitKnownZero && SignBitKnownOne) &&
+ "Sign bit can't be known to be both zero and one!");
+
+ // If the sign bit wasn't actually demanded by our caller, we don't
+ // want it set in the KnownZero and KnownOne result values. Reset the
+ // mask and reapply it to the result values.
+ InMask = Mask;
+ InMask.trunc(InBits);
+ KnownZero &= InMask;
+ KnownOne &= InMask;
+
+ KnownZero.zext(BitWidth);
+ KnownOne.zext(BitWidth);
+
+ // If the sign bit is known zero or one, the top bits match.
+ if (SignBitKnownZero)
+ KnownZero |= NewBits;
+ else if (SignBitKnownOne)
+ KnownOne |= NewBits;
+ return;
+ }
+ case ISD::ANY_EXTEND: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getScalarType().getSizeInBits();
+ APInt InMask = Mask;
+ InMask.trunc(InBits);
+ KnownZero.trunc(InBits);
+ KnownOne.trunc(InBits);
+ ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+ KnownZero.zext(BitWidth);
+ KnownOne.zext(BitWidth);
+ return;
+ }
+ case ISD::TRUNCATE: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getScalarType().getSizeInBits();
+ APInt InMask = Mask;
+ InMask.zext(InBits);
+ KnownZero.zext(InBits);
+ KnownOne.zext(InBits);
+ ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero.trunc(BitWidth);
+ KnownOne.trunc(BitWidth);
+ break;
+ }
+ case ISD::AssertZext: {
+ EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());
+ ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero,
+ KnownOne, Depth+1);
+ KnownZero |= (~InMask) & Mask;
+ return;
+ }
+ case ISD::FGETSIGN:
+ // All bits are zero except the low bit.
+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ return;
+
+ case ISD::SUB: {
+ if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) {
+ // We know that the top bits of C-X are clear if X contains less bits
+ // than C (i.e. no wrap-around can happen). For example, 20-X is
+ // positive if we can prove that X is >= 0 and < 16.
+ if (CLHS->getAPIntValue().isNonNegative()) {
+ unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros();
+ // NLZ can't be BitWidth with no sign bit
+ APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
+ ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero2, KnownOne2,
+ Depth+1);
+
+ // If all of the MaskV bits are known to be zero, then we know the
+ // output top bits are zero, because we now know that the output is
+ // from [0-C].
+ if ((KnownZero2 & MaskV) == MaskV) {
+ unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros();
+ // Top bits known zero.
+ KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask;
+ }
+ }
+ }
+ }
+ // fall through
+ case ISD::ADD: {
+ // Output known-0 bits are known if clear or set in both the low clear bits
+ // common to both LHS & RHS. For example, 8+(X<<3) is known to have the
+ // low 3 bits clear.
+ APInt Mask2 = APInt::getLowBitsSet(BitWidth, Mask.countTrailingOnes());
+ ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ unsigned KnownZeroOut = KnownZero2.countTrailingOnes();
+
+ ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ KnownZeroOut = std::min(KnownZeroOut,
+ KnownZero2.countTrailingOnes());
+
+ KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut);
+ return;
+ }
+ case ISD::SREM:
+ if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ const APInt &RA = Rem->getAPIntValue().abs();
+ if (RA.isPowerOf2()) {
+ APInt LowBits = RA - 1;
+ APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
+ ComputeMaskedBits(Op.getOperand(0), Mask2,KnownZero2,KnownOne2,Depth+1);
+
+ // The low bits of the first operand are unchanged by the srem.
+ KnownZero = KnownZero2 & LowBits;
+ KnownOne = KnownOne2 & LowBits;
+
+ // If the first operand is non-negative or has all low bits zero, then
+ // the upper bits are all zero.
+ if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits))
+ KnownZero |= ~LowBits;
+
+ // If the first operand is negative and not all low bits are zero, then
+ // the upper bits are all one.
+ if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0))
+ KnownOne |= ~LowBits;
+
+ KnownZero &= Mask;
+ KnownOne &= Mask;
+
+ assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
+ }
+ }
+ return;
+ case ISD::UREM: {
+ if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ const APInt &RA = Rem->getAPIntValue();
+ if (RA.isPowerOf2()) {
+ APInt LowBits = (RA - 1);
+ APInt Mask2 = LowBits & Mask;
+ KnownZero |= ~LowBits & Mask;
+ ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero, KnownOne,Depth+1);
+ assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
+ break;
+ }
+ }
+
+ // Since the result is less than or equal to either operand, any leading
+ // zero bits in either operand must also exist in the result.
+ APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+ ComputeMaskedBits(Op.getOperand(0), AllOnes, KnownZero, KnownOne,
+ Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), AllOnes, KnownZero2, KnownOne2,
+ Depth+1);
+
+ uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),
+ KnownZero2.countLeadingOnes());
+ KnownOne.clear();
+ KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask;
+ return;
+ }
+ default:
+ // Allow the target to implement this method for its nodes.
+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this,
+ Depth);
+ }
+ return;
+ }
+}
+
+/// ComputeNumSignBits - Return the number of times the sign bit of the
+/// register is replicated into the other bits. We know that at least 1 bit
+/// is always equal to the sign bit (itself), but other cases can give us
+/// information. For example, immediately after an "SRA X, 2", we know that
+/// the top 3 bits are all equal to each other, so we return 3.
+unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
+ EVT VT = Op.getValueType();
+ assert(VT.isInteger() && "Invalid VT!");
+ unsigned VTBits = VT.getScalarType().getSizeInBits();
+ unsigned Tmp, Tmp2;
+ unsigned FirstAnswer = 1;
+
+ if (Depth == 6)
+ return 1; // Limit search depth.
+
+ switch (Op.getOpcode()) {
+ default: break;
+ case ISD::AssertSext:
+ Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
+ return VTBits-Tmp+1;
+ case ISD::AssertZext:
+ Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
+ return VTBits-Tmp;
+
+ case ISD::Constant: {
+ const APInt &Val = cast<ConstantSDNode>(Op)->getAPIntValue();
+ // If negative, return # leading ones.
+ if (Val.isNegative())
+ return Val.countLeadingOnes();
+
+ // Return # leading zeros.
+ return Val.countLeadingZeros();
+ }
+
+ case ISD::SIGN_EXTEND:
+ Tmp = VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+ return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;
+
+ case ISD::SIGN_EXTEND_INREG:
+ // Max of the input and what this extends.
+ Tmp =
+ cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarType().getSizeInBits();
+ Tmp = VTBits-Tmp+1;
+
+ Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ return std::max(Tmp, Tmp2);
+
+ case ISD::SRA:
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ // SRA X, C -> adds C sign bits.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ Tmp += C->getZExtValue();
+ if (Tmp > VTBits) Tmp = VTBits;
+ }
+ return Tmp;
+ case ISD::SHL:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ // shl destroys sign bits.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (C->getZExtValue() >= VTBits || // Bad shift.
+ C->getZExtValue() >= Tmp) break; // Shifted all sign bits out.
+ return Tmp - C->getZExtValue();
+ }
+ break;
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: // NOT is handled here.
+ // Logical binary ops preserve the number of sign bits at the worst.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp != 1) {
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ FirstAnswer = std::min(Tmp, Tmp2);
+ // We computed what we know about the sign bits as our first
+ // answer. Now proceed to the generic code that uses
+ // ComputeMaskedBits, and pick whichever answer is better.
+ }
+ break;
+
+ case ISD::SELECT:
+ Tmp = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1);
+ return std::min(Tmp, Tmp2);
+
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SSUBO:
+ case ISD::USUBO:
+ case ISD::SMULO:
+ case ISD::UMULO:
+ if (Op.getResNo() != 1)
+ break;
+ // The boolean result conforms to getBooleanContents. Fall through.
+ case ISD::SETCC:
+ // If setcc returns 0/-1, all bits are sign bits.
+ if (TLI.getBooleanContents() ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
+ return VTBits;
+ break;
+ case ISD::ROTL:
+ case ISD::ROTR:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned RotAmt = C->getZExtValue() & (VTBits-1);
+
+ // Handle rotate right by N like a rotate left by 32-N.
+ if (Op.getOpcode() == ISD::ROTR)
+ RotAmt = (VTBits-RotAmt) & (VTBits-1);
+
+ // If we aren't rotating out all of the known-in sign bits, return the
+ // number that are left. This handles rotl(sext(x), 1) for example.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp > RotAmt+1) return Tmp-RotAmt;
+ }
+ break;
+ case ISD::ADD:
+ // Add can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+
+ // Special case decrementing a value (ADD X, -1):
+ if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ if (CRHS->isAllOnesValue()) {
+ APInt KnownZero, KnownOne;
+ APInt Mask = APInt::getAllOnesValue(VTBits);
+ ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+
+ // If the input is known to be 0 or 1, the output is 0/-1, which is all
+ // sign bits set.
+ if ((KnownZero | APInt(VTBits, 1)) == Mask)
+ return VTBits;
+
+ // If we are subtracting one from a positive number, there is no carry
+ // out of the result.
+ if (KnownZero.isNegative())
+ return Tmp;
+ }
+
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ if (Tmp2 == 1) return 1;
+ return std::min(Tmp, Tmp2)-1;
+ break;
+
+ case ISD::SUB:
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ if (Tmp2 == 1) return 1;
+
+ // Handle NEG.
+ if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)))
+ if (CLHS->isNullValue()) {
+ APInt KnownZero, KnownOne;
+ APInt Mask = APInt::getAllOnesValue(VTBits);
+ ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+ // If the input is known to be 0 or 1, the output is 0/-1, which is all
+ // sign bits set.
+ if ((KnownZero | APInt(VTBits, 1)) == Mask)
+ return VTBits;
+
+ // If the input is known to be positive (the sign bit is known clear),
+ // the output of the NEG has the same number of sign bits as the input.
+ if (KnownZero.isNegative())
+ return Tmp2;
+
+ // Otherwise, we treat this like a SUB.
+ }
+
+ // Sub can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ return std::min(Tmp, Tmp2)-1;
+ break;
+ case ISD::TRUNCATE:
+ // FIXME: it's tricky to do anything useful for this, but it is an important
+ // case for targets like X86.
+ break;
+ }
+
+ // Handle LOADX separately here. EXTLOAD case will fallthrough.
+ if (Op.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ unsigned ExtType = LD->getExtensionType();
+ switch (ExtType) {
+ default: break;
+ case ISD::SEXTLOAD: // '17' bits known
+ Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+ return VTBits-Tmp+1;
+ case ISD::ZEXTLOAD: // '16' bits known
+ Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+ return VTBits-Tmp;
+ }
+ }
+
+ // Allow the target to implement this method for its nodes.
+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) {
+ unsigned NumBits = TLI.ComputeNumSignBitsForTargetNode(Op, Depth);
+ if (NumBits > 1) FirstAnswer = std::max(FirstAnswer, NumBits);
+ }
+
+ // Finally, if we can prove that the top bits of the result are 0's or 1's,
+ // use this information.
+ APInt KnownZero, KnownOne;
+ APInt Mask = APInt::getAllOnesValue(VTBits);
+ ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
+
+ if (KnownZero.isNegative()) { // sign bit is 0
+ Mask = KnownZero;
+ } else if (KnownOne.isNegative()) { // sign bit is 1;
+ Mask = KnownOne;
+ } else {
+ // Nothing known.
+ return FirstAnswer;
+ }
+
+ // Okay, we know that the sign bit in Mask is set. Use CLZ to determine
+ // the number of identical bits in the top of the input value.
+ Mask = ~Mask;
+ Mask <<= Mask.getBitWidth()-VTBits;
+ // Return # leading zeros. We use 'min' here in case Val was zero before
+ // shifting. We don't want to return '64' as for an i32 "0".
+ return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros()));
+}
+
+bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
+ // If we're told that NaNs won't happen, assume they won't.
+ if (FiniteOnlyFPMath())
+ return true;
+
+ // If the value is a constant, we can obviously see if it is a NaN or not.
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
+ return !C->getValueAPF().isNaN();
+
+ // TODO: Recognize more cases here.
+
+ return false;
+}
+
+bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const {
+ GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
+ if (!GA) return false;
+ if (GA->getOffset() != 0) return false;
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal());
+ if (!GV) return false;
+ MachineModuleInfo *MMI = getMachineModuleInfo();
+ return MMI && MMI->hasDebugInfo();
+}
+
+
+/// getShuffleScalarElt - Returns the scalar element that will make up the ith
+/// element of the result of the vector shuffle.
+SDValue SelectionDAG::getShuffleScalarElt(const ShuffleVectorSDNode *N,
+ unsigned i) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+ if (N->getMaskElt(i) < 0)
+ return getUNDEF(VT.getVectorElementType());
+ unsigned Index = N->getMaskElt(i);
+ unsigned NumElems = VT.getVectorNumElements();
+ SDValue V = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1);
+ Index %= NumElems;
+
+ if (V.getOpcode() == ISD::BIT_CONVERT) {
+ V = V.getOperand(0);
+ EVT VVT = V.getValueType();
+ if (!VVT.isVector() || VVT.getVectorNumElements() != (unsigned)NumElems)
+ return SDValue();
+ }
+ if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return (Index == 0) ? V.getOperand(0)
+ : getUNDEF(VT.getVectorElementType());
+ if (V.getOpcode() == ISD::BUILD_VECTOR)
+ return V.getOperand(Index);
+ if (const ShuffleVectorSDNode *SVN = dyn_cast<ShuffleVectorSDNode>(V))
+ return getShuffleScalarElt(SVN, Index);
+ return SDValue();
+}
+
+
+/// getNode - Gets or creates the specified node.
+///
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = NodeAllocator.Allocate<SDNode>();
+ new (N) SDNode(Opcode, DL, getVTList(VT));
+ CSEMap.InsertNode(N, IP);
+
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifyNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
+ EVT VT, SDValue Operand) {
+ // Constant fold unary operations with an integer constant operand.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.getNode())) {
+ const APInt &Val = C->getAPIntValue();
+ unsigned BitWidth = VT.getSizeInBits();
+ switch (Opcode) {
+ default: break;
+ case ISD::SIGN_EXTEND:
+ return getConstant(APInt(Val).sextOrTrunc(BitWidth), VT);
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::TRUNCATE:
+ return getConstant(APInt(Val).zextOrTrunc(BitWidth), VT);
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP: {
+ const uint64_t zero[] = {0, 0};
+ // No compile time operations on this type.
+ if (VT==MVT::ppcf128)
+ break;
+ APFloat apf = APFloat(APInt(BitWidth, 2, zero));
+ (void)apf.convertFromAPInt(Val,
+ Opcode==ISD::SINT_TO_FP,
+ APFloat::rmNearestTiesToEven);
+ return getConstantFP(apf, VT);
+ }
+ case ISD::BIT_CONVERT:
+ if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
+ return getConstantFP(Val.bitsToFloat(), VT);
+ else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
+ return getConstantFP(Val.bitsToDouble(), VT);
+ break;
+ case ISD::BSWAP:
+ return getConstant(Val.byteSwap(), VT);
+ case ISD::CTPOP:
+ return getConstant(Val.countPopulation(), VT);
+ case ISD::CTLZ:
+ return getConstant(Val.countLeadingZeros(), VT);
+ case ISD::CTTZ:
+ return getConstant(Val.countTrailingZeros(), VT);
+ }
+ }
+
+ // Constant fold unary operations with a floating point constant operand.
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand.getNode())) {
+ APFloat V = C->getValueAPF(); // make copy
+ if (VT != MVT::ppcf128 && Operand.getValueType() != MVT::ppcf128) {
+ switch (Opcode) {
+ case ISD::FNEG:
+ V.changeSign();
+ return getConstantFP(V, VT);
+ case ISD::FABS:
+ V.clearSign();
+ return getConstantFP(V, VT);
+ case ISD::FP_ROUND:
+ case ISD::FP_EXTEND: {
+ bool ignored;
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)V.convert(*EVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven, &ignored);
+ return getConstantFP(V, VT);
+ }
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: {
+ integerPart x[2];
+ bool ignored;
+ assert(integerPartWidth >= 64);
+ // FIXME need to be more flexible about rounding mode.
+ APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(),
+ Opcode==ISD::FP_TO_SINT,
+ APFloat::rmTowardZero, &ignored);
+ if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual
+ break;
+ APInt api(VT.getSizeInBits(), 2, x);
+ return getConstant(api, VT);
+ }
+ case ISD::BIT_CONVERT:
+ if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
+ return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT);
+ else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
+ return getConstant(V.bitcastToAPInt().getZExtValue(), VT);
+ break;
+ }
+ }
+ }
+
+ unsigned OpOpcode = Operand.getNode()->getOpcode();
+ switch (Opcode) {
+ case ISD::TokenFactor:
+ case ISD::MERGE_VALUES:
+ case ISD::CONCAT_VECTORS:
+ return Operand; // Factor, merge or concat of one node? No need.
+ case ISD::FP_ROUND: llvm_unreachable("Invalid method to make FP_ROUND node");
+ case ISD::FP_EXTEND:
+ assert(VT.isFloatingPoint() &&
+ Operand.getValueType().isFloatingPoint() && "Invalid FP cast!");
+ if (Operand.getValueType() == VT) return Operand; // noop conversion.
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+ if (Operand.getOpcode() == ISD::UNDEF)
+ return getUNDEF(VT);
+ break;
+ case ISD::SIGN_EXTEND:
+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ "Invalid SIGN_EXTEND!");
+ if (Operand.getValueType() == VT) return Operand; // noop extension
+ assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
+ "Invalid sext node, dst < src!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+ if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
+ return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+ break;
+ case ISD::ZERO_EXTEND:
+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ "Invalid ZERO_EXTEND!");
+ if (Operand.getValueType() == VT) return Operand; // noop extension
+ assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
+ "Invalid zext node, dst < src!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+ if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x)
+ return getNode(ISD::ZERO_EXTEND, DL, VT,
+ Operand.getNode()->getOperand(0));
+ break;
+ case ISD::ANY_EXTEND:
+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ "Invalid ANY_EXTEND!");
+ if (Operand.getValueType() == VT) return Operand; // noop extension
+ assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
+ "Invalid anyext node, dst < src!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+ if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND)
+ // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x)
+ return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+ break;
+ case ISD::TRUNCATE:
+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ "Invalid TRUNCATE!");
+ if (Operand.getValueType() == VT) return Operand; // noop truncate
+ assert(Operand.getValueType().getScalarType().bitsGT(VT.getScalarType()) &&
+ "Invalid truncate node, src < dst!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+ if (OpOpcode == ISD::TRUNCATE)
+ return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
+ else if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
+ OpOpcode == ISD::ANY_EXTEND) {
+ // If the source is smaller than the dest, we still need an extend.
+ if (Operand.getNode()->getOperand(0).getValueType().getScalarType()
+ .bitsLT(VT.getScalarType()))
+ return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+ else if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT))
+ return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
+ else
+ return Operand.getNode()->getOperand(0);
+ }
+ break;
+ case ISD::BIT_CONVERT:
+ // Basic sanity checking.
+ assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits()
+ && "Cannot BIT_CONVERT between types of different sizes!");
+ if (VT == Operand.getValueType()) return Operand; // noop conversion.
+ if (OpOpcode == ISD::BIT_CONVERT) // bitconv(bitconv(x)) -> bitconv(x)
+ return getNode(ISD::BIT_CONVERT, DL, VT, Operand.getOperand(0));
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ break;
+ case ISD::SCALAR_TO_VECTOR:
+ assert(VT.isVector() && !Operand.getValueType().isVector() &&
+ (VT.getVectorElementType() == Operand.getValueType() ||
+ (VT.getVectorElementType().isInteger() &&
+ Operand.getValueType().isInteger() &&
+ VT.getVectorElementType().bitsLE(Operand.getValueType()))) &&
+ "Illegal SCALAR_TO_VECTOR node!");
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ // scalar_to_vector(extract_vector_elt V, 0) -> V, top bits are undefined.
+ if (OpOpcode == ISD::EXTRACT_VECTOR_ELT &&
+ isa<ConstantSDNode>(Operand.getOperand(1)) &&
+ Operand.getConstantOperandVal(1) == 0 &&
+ Operand.getOperand(0).getValueType() == VT)
+ return Operand.getOperand(0);
+ break;
+ case ISD::FNEG:
+ // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
+ if (UnsafeFPMath && OpOpcode == ISD::FSUB)
+ return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1),
+ Operand.getNode()->getOperand(0));
+ if (OpOpcode == ISD::FNEG) // --X -> X
+ return Operand.getNode()->getOperand(0);
+ break;
+ case ISD::FABS:
+ if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X)
+ return getNode(ISD::FABS, DL, VT, Operand.getNode()->getOperand(0));
+ break;
+ }
+
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+ if (VT != MVT::Flag) { // Don't CSE flag producing nodes
+ FoldingSetNodeID ID;
+ SDValue Ops[1] = { Operand };
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 1);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ N = NodeAllocator.Allocate<UnarySDNode>();
+ new (N) UnarySDNode(Opcode, DL, VTs, Operand);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = NodeAllocator.Allocate<UnarySDNode>();
+ new (N) UnarySDNode(Opcode, DL, VTs, Operand);
+ }
+
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifyNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode,
+ EVT VT,
+ ConstantSDNode *Cst1,
+ ConstantSDNode *Cst2) {
+ const APInt &C1 = Cst1->getAPIntValue(), &C2 = Cst2->getAPIntValue();
+
+ switch (Opcode) {
+ case ISD::ADD: return getConstant(C1 + C2, VT);
+ case ISD::SUB: return getConstant(C1 - C2, VT);
+ case ISD::MUL: return getConstant(C1 * C2, VT);
+ case ISD::UDIV:
+ if (C2.getBoolValue()) return getConstant(C1.udiv(C2), VT);
+ break;
+ case ISD::UREM:
+ if (C2.getBoolValue()) return getConstant(C1.urem(C2), VT);
+ break;
+ case ISD::SDIV:
+ if (C2.getBoolValue()) return getConstant(C1.sdiv(C2), VT);
+ break;
+ case ISD::SREM:
+ if (C2.getBoolValue()) return getConstant(C1.srem(C2), VT);
+ break;
+ case ISD::AND: return getConstant(C1 & C2, VT);
+ case ISD::OR: return getConstant(C1 | C2, VT);
+ case ISD::XOR: return getConstant(C1 ^ C2, VT);
+ case ISD::SHL: return getConstant(C1 << C2, VT);
+ case ISD::SRL: return getConstant(C1.lshr(C2), VT);
+ case ISD::SRA: return getConstant(C1.ashr(C2), VT);
+ case ISD::ROTL: return getConstant(C1.rotl(C2), VT);
+ case ISD::ROTR: return getConstant(C1.rotr(C2), VT);
+ default: break;
+ }
+
+ return SDValue();
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+ SDValue N1, SDValue N2) {
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+ switch (Opcode) {
+ default: break;
+ case ISD::TokenFactor:
+ assert(VT == MVT::Other && N1.getValueType() == MVT::Other &&
+ N2.getValueType() == MVT::Other && "Invalid token factor!");
+ // Fold trivial token factors.
+ if (N1.getOpcode() == ISD::EntryToken) return N2;
+ if (N2.getOpcode() == ISD::EntryToken) return N1;
+ if (N1 == N2) return N1;
+ break;
+ case ISD::CONCAT_VECTORS:
+ // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to
+ // one big BUILD_VECTOR.
+ if (N1.getOpcode() == ISD::BUILD_VECTOR &&
+ N2.getOpcode() == ISD::BUILD_VECTOR) {
+ SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end());
+ Elts.insert(Elts.end(), N2.getNode()->op_begin(), N2.getNode()->op_end());
+ return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size());
+ }
+ break;
+ case ISD::AND:
+ assert(VT.isInteger() && N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ // (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's
+ // worth handling here.
+ if (N2C && N2C->isNullValue())
+ return N2;
+ if (N2C && N2C->isAllOnesValue()) // X & -1 -> X
+ return N1;
+ break;
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::ADD:
+ case ISD::SUB:
+ assert(VT.isInteger() && N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ // (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so
+ // it's worth handling here.
+ if (N2C && N2C->isNullValue())
+ return N1;
+ break;
+ case ISD::UDIV:
+ case ISD::UREM:
+ case ISD::MULHU:
+ case ISD::MULHS:
+ case ISD::MUL:
+ case ISD::SDIV:
+ case ISD::SREM:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ // fall through
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ if (UnsafeFPMath) {
+ if (Opcode == ISD::FADD) {
+ // 0+x --> x
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1))
+ if (CFP->getValueAPF().isZero())
+ return N2;
+ // x+0 --> x
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))
+ if (CFP->getValueAPF().isZero())
+ return N1;
+ } else if (Opcode == ISD::FSUB) {
+ // x-0 --> x
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))
+ if (CFP->getValueAPF().isZero())
+ return N1;
+ }
+ }
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ break;
+ case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match.
+ assert(N1.getValueType() == VT &&
+ N1.getValueType().isFloatingPoint() &&
+ N2.getValueType().isFloatingPoint() &&
+ "Invalid FCOPYSIGN!");
+ break;
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ assert(VT == N1.getValueType() &&
+ "Shift operators return type must be the same as their first arg");
+ assert(VT.isInteger() && N2.getValueType().isInteger() &&
+ "Shifts only work on integers");
+
+ // Always fold shifts of i1 values so the code generator doesn't need to
+ // handle them. Since we know the size of the shift has to be less than the
+ // size of the value, the shift/rotate count is guaranteed to be zero.
+ if (VT == MVT::i1)
+ return N1;
+ if (N2C && N2C->isNullValue())
+ return N1;
+ break;
+ case ISD::FP_ROUND_INREG: {
+ EVT EVT = cast<VTSDNode>(N2)->getVT();
+ assert(VT == N1.getValueType() && "Not an inreg round!");
+ assert(VT.isFloatingPoint() && EVT.isFloatingPoint() &&
+ "Cannot FP_ROUND_INREG integer types");
+ assert(EVT.isVector() == VT.isVector() &&
+ "FP_ROUND_INREG type should be vector iff the operand "
+ "type is vector!");
+ assert((!EVT.isVector() ||
+ EVT.getVectorNumElements() == VT.getVectorNumElements()) &&
+ "Vector element counts must match in FP_ROUND_INREG");
+ assert(EVT.bitsLE(VT) && "Not rounding down!");
+ if (cast<VTSDNode>(N2)->getVT() == VT) return N1; // Not actually rounding.
+ break;
+ }
+ case ISD::FP_ROUND:
+ assert(VT.isFloatingPoint() &&
+ N1.getValueType().isFloatingPoint() &&
+ VT.bitsLE(N1.getValueType()) &&
+ isa<ConstantSDNode>(N2) && "Invalid FP_ROUND!");
+ if (N1.getValueType() == VT) return N1; // noop conversion.
+ break;
+ case ISD::AssertSext:
+ case ISD::AssertZext: {
+ EVT EVT = cast<VTSDNode>(N2)->getVT();
+ assert(VT == N1.getValueType() && "Not an inreg extend!");
+ assert(VT.isInteger() && EVT.isInteger() &&
+ "Cannot *_EXTEND_INREG FP types");
+ assert(!EVT.isVector() &&
+ "AssertSExt/AssertZExt type should be the vector element type "
+ "rather than the vector type!");
+ assert(EVT.bitsLE(VT) && "Not extending!");
+ if (VT == EVT) return N1; // noop assertion.
+ break;
+ }
+ case ISD::SIGN_EXTEND_INREG: {
+ EVT EVT = cast<VTSDNode>(N2)->getVT();
+ assert(VT == N1.getValueType() && "Not an inreg extend!");
+ assert(VT.isInteger() && EVT.isInteger() &&
+ "Cannot *_EXTEND_INREG FP types");
+ assert(EVT.isVector() == VT.isVector() &&
+ "SIGN_EXTEND_INREG type should be vector iff the operand "
+ "type is vector!");
+ assert((!EVT.isVector() ||
+ EVT.getVectorNumElements() == VT.getVectorNumElements()) &&
+ "Vector element counts must match in SIGN_EXTEND_INREG");
+ assert(EVT.bitsLE(VT) && "Not extending!");
+ if (EVT == VT) return N1; // Not actually extending
+
+ if (N1C) {
+ APInt Val = N1C->getAPIntValue();
+ unsigned FromBits = EVT.getScalarType().getSizeInBits();
+ Val <<= Val.getBitWidth()-FromBits;
+ Val = Val.ashr(Val.getBitWidth()-FromBits);
+ return getConstant(Val, VT);
+ }
+ break;
+ }
+ case ISD::EXTRACT_VECTOR_ELT:
+ // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF.
+ if (N1.getOpcode() == ISD::UNDEF)
+ return getUNDEF(VT);
+
+ // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is
+ // expanding copies of large vectors from registers.
+ if (N2C &&
+ N1.getOpcode() == ISD::CONCAT_VECTORS &&
+ N1.getNumOperands() > 0) {
+ unsigned Factor =
+ N1.getOperand(0).getValueType().getVectorNumElements();
+ return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
+ N1.getOperand(N2C->getZExtValue() / Factor),
+ getConstant(N2C->getZExtValue() % Factor,
+ N2.getValueType()));
+ }
+
+ // EXTRACT_VECTOR_ELT of BUILD_VECTOR is often formed while lowering is
+ // expanding large vector constants.
+ if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) {
+ SDValue Elt = N1.getOperand(N2C->getZExtValue());
+ EVT VEltTy = N1.getValueType().getVectorElementType();
+ if (Elt.getValueType() != VEltTy) {
+ // If the vector element type is not legal, the BUILD_VECTOR operands
+ // are promoted and implicitly truncated. Make that explicit here.
+ Elt = getNode(ISD::TRUNCATE, DL, VEltTy, Elt);
+ }
+ if (VT != VEltTy) {
+ // If the vector element type is not legal, the EXTRACT_VECTOR_ELT
+ // result is implicitly extended.
+ Elt = getNode(ISD::ANY_EXTEND, DL, VT, Elt);
+ }
+ return Elt;
+ }
+
+ // EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector
+ // operations are lowered to scalars.
+ if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT) {
+ // If the indices are the same, return the inserted element else
+ // if the indices are known different, extract the element from
+ // the original vector.
+ if (N1.getOperand(2) == N2) {
+ if (VT == N1.getOperand(1).getValueType())
+ return N1.getOperand(1);
+ else
+ return getSExtOrTrunc(N1.getOperand(1), DL, VT);
+ } else if (isa<ConstantSDNode>(N1.getOperand(2)) &&
+ isa<ConstantSDNode>(N2))
+ return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2);
+ }
+ break;
+ case ISD::EXTRACT_ELEMENT:
+ assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!");
+ assert(!N1.getValueType().isVector() && !VT.isVector() &&
+ (N1.getValueType().isInteger() == VT.isInteger()) &&
+ "Wrong types for EXTRACT_ELEMENT!");
+
+ // EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding
+ // 64-bit integers into 32-bit parts. Instead of building the extract of
+ // the BUILD_PAIR, only to have legalize rip it apart, just do it now.
+ if (N1.getOpcode() == ISD::BUILD_PAIR)
+ return N1.getOperand(N2C->getZExtValue());
+
+ // EXTRACT_ELEMENT of a constant int is also very common.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+ unsigned ElementSize = VT.getSizeInBits();
+ unsigned Shift = ElementSize * N2C->getZExtValue();
+ APInt ShiftedVal = C->getAPIntValue().lshr(Shift);
+ return getConstant(ShiftedVal.trunc(ElementSize), VT);
+ }
+ break;
+ case ISD::EXTRACT_SUBVECTOR:
+ if (N1.getValueType() == VT) // Trivial extraction.
+ return N1;
+ break;
+ }
+
+ if (N1C) {
+ if (N2C) {
+ SDValue SV = FoldConstantArithmetic(Opcode, VT, N1C, N2C);
+ if (SV.getNode()) return SV;
+ } else { // Cannonicalize constant to RHS if commutative
+ if (isCommutativeBinOp(Opcode)) {
+ std::swap(N1C, N2C);
+ std::swap(N1, N2);
+ }
+ }
+ }
+
+ // Constant fold FP operations.
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode());
+ ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode());
+ if (N1CFP) {
+ if (!N2CFP && isCommutativeBinOp(Opcode)) {
+ // Cannonicalize constant to RHS if commutative
+ std::swap(N1CFP, N2CFP);
+ std::swap(N1, N2);
+ } else if (N2CFP && VT != MVT::ppcf128) {
+ APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF();
+ APFloat::opStatus s;
+ switch (Opcode) {
+ case ISD::FADD:
+ s = V1.add(V2, APFloat::rmNearestTiesToEven);
+ if (s != APFloat::opInvalidOp)
+ return getConstantFP(V1, VT);
+ break;
+ case ISD::FSUB:
+ s = V1.subtract(V2, APFloat::rmNearestTiesToEven);
+ if (s!=APFloat::opInvalidOp)
+ return getConstantFP(V1, VT);
+ break;
+ case ISD::FMUL:
+ s = V1.multiply(V2, APFloat::rmNearestTiesToEven);
+ if (s!=APFloat::opInvalidOp)
+ return getConstantFP(V1, VT);
+ break;
+ case ISD::FDIV:
+ s = V1.divide(V2, APFloat::rmNearestTiesToEven);
+ if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero)
+ return getConstantFP(V1, VT);
+ break;
+ case ISD::FREM :
+ s = V1.mod(V2, APFloat::rmNearestTiesToEven);
+ if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero)
+ return getConstantFP(V1, VT);
+ break;
+ case ISD::FCOPYSIGN:
+ V1.copySign(V2);
+ return getConstantFP(V1, VT);
+ default: break;
+ }
+ }
+ }
+
+ // Canonicalize an UNDEF to the RHS, even over a constant.
+ if (N1.getOpcode() == ISD::UNDEF) {
+ if (isCommutativeBinOp(Opcode)) {
+ std::swap(N1, N2);
+ } else {
+ switch (Opcode) {
+ case ISD::FP_ROUND_INREG:
+ case ISD::SIGN_EXTEND_INREG:
+ case ISD::SUB:
+ case ISD::FSUB:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::SRA:
+ return N1; // fold op(undef, arg2) -> undef
+ case ISD::UDIV:
+ case ISD::SDIV:
+ case ISD::UREM:
+ case ISD::SREM:
+ case ISD::SRL:
+ case ISD::SHL:
+ if (!VT.isVector())
+ return getConstant(0, VT); // fold op(undef, arg2) -> 0
+ // For vectors, we can't easily build an all zero vector, just return
+ // the LHS.
+ return N2;
+ }
+ }
+ }
+
+ // Fold a bunch of operators when the RHS is undef.
+ if (N2.getOpcode() == ISD::UNDEF) {
+ switch (Opcode) {
+ case ISD::XOR:
+ if (N1.getOpcode() == ISD::UNDEF)
+ // Handle undef ^ undef -> 0 special case. This is a common
+ // idiom (misuse).
+ return getConstant(0, VT);
+ // fallthrough
+ case ISD::ADD:
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::SUB:
+ case ISD::UDIV:
+ case ISD::SDIV:
+ case ISD::UREM:
+ case ISD::SREM:
+ return N2; // fold op(arg1, undef) -> undef
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ if (UnsafeFPMath)
+ return N2;
+ break;
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::SRL:
+ case ISD::SHL:
+ if (!VT.isVector())
+ return getConstant(0, VT); // fold op(arg1, undef) -> 0
+ // For vectors, we can't easily build an all zero vector, just return
+ // the LHS.
+ return N1;
+ case ISD::OR:
+ if (!VT.isVector())
+ return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
+ // For vectors, we can't easily build an all one vector, just return
+ // the LHS.
+ return N1;
+ case ISD::SRA:
+ return N1;
+ }
+ }
+
+ // Memoize this node if possible.
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+ if (VT != MVT::Flag) {
+ SDValue Ops[] = { N1, N2 };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ N = NodeAllocator.Allocate<BinarySDNode>();
+ new (N) BinarySDNode(Opcode, DL, VTs, N1, N2);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = NodeAllocator.Allocate<BinarySDNode>();
+ new (N) BinarySDNode(Opcode, DL, VTs, N1, N2);
+ }
+
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifyNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+ SDValue N1, SDValue N2, SDValue N3) {
+ // Perform various simplifications.
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+ switch (Opcode) {
+ case ISD::CONCAT_VECTORS:
+ // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to
+ // one big BUILD_VECTOR.
+ if (N1.getOpcode() == ISD::BUILD_VECTOR &&
+ N2.getOpcode() == ISD::BUILD_VECTOR &&
+ N3.getOpcode() == ISD::BUILD_VECTOR) {
+ SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end());
+ Elts.insert(Elts.end(), N2.getNode()->op_begin(), N2.getNode()->op_end());
+ Elts.insert(Elts.end(), N3.getNode()->op_begin(), N3.getNode()->op_end());
+ return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size());
+ }
+ break;
+ case ISD::SETCC: {
+ // Use FoldSetCC to simplify SETCC's.
+ SDValue Simp = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL);
+ if (Simp.getNode()) return Simp;
+ break;
+ }
+ case ISD::SELECT:
+ if (N1C) {
+ if (N1C->getZExtValue())
+ return N2; // select true, X, Y -> X
+ else
+ return N3; // select false, X, Y -> Y
+ }
+
+ if (N2 == N3) return N2; // select C, X, X -> X
+ break;
+ case ISD::BRCOND:
+ if (N2C) {
+ if (N2C->getZExtValue()) // Unconditional branch
+ return getNode(ISD::BR, DL, MVT::Other, N1, N3);
+ else
+ return N1; // Never-taken branch
+ }
+ break;
+ case ISD::VECTOR_SHUFFLE:
+ llvm_unreachable("should use getVectorShuffle constructor!");
+ break;
+ case ISD::BIT_CONVERT:
+ // Fold bit_convert nodes from a type to themselves.
+ if (N1.getValueType() == VT)
+ return N1;
+ break;
+ }
+
+ // Memoize node if it doesn't produce a flag.
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+ if (VT != MVT::Flag) {
+ SDValue Ops[] = { N1, N2, N3 };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ N = NodeAllocator.Allocate<TernarySDNode>();
+ new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = NodeAllocator.Allocate<TernarySDNode>();
+ new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
+ }
+
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifyNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+ SDValue N1, SDValue N2, SDValue N3,
+ SDValue N4) {
+ SDValue Ops[] = { N1, N2, N3, N4 };
+ return getNode(Opcode, DL, VT, Ops, 4);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+ SDValue N1, SDValue N2, SDValue N3,
+ SDValue N4, SDValue N5) {
+ SDValue Ops[] = { N1, N2, N3, N4, N5 };
+ return getNode(Opcode, DL, VT, Ops, 5);
+}
+
+/// getStackArgumentTokenFactor - Compute a TokenFactor to force all
+/// the incoming stack arguments to be loaded from the stack.
+SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
+ SmallVector<SDValue, 8> ArgChains;
+
+ // Include the original chain at the beginning of the list. When this is
+ // used by target LowerCall hooks, this helps legalize find the
+ // CALLSEQ_BEGIN node.
+ ArgChains.push_back(Chain);
+
+ // Add a chain value for each stack argument.
+ for (SDNode::use_iterator U = getEntryNode().getNode()->use_begin(),
+ UE = getEntryNode().getNode()->use_end(); U != UE; ++U)
+ if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
+ if (FI->getIndex() < 0)
+ ArgChains.push_back(SDValue(L, 1));
+
+ // Build a tokenfactor for all the chains.
+ return getNode(ISD::TokenFactor, Chain.getDebugLoc(), MVT::Other,
+ &ArgChains[0], ArgChains.size());
+}
+
+/// getMemsetValue - Vectorized representation of the memset value
+/// operand.
+static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
+ DebugLoc dl) {
+ unsigned NumBits = VT.isVector() ?
+ VT.getVectorElementType().getSizeInBits() : VT.getSizeInBits();
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
+ APInt Val = APInt(NumBits, C->getZExtValue() & 255);
+ unsigned Shift = 8;
+ for (unsigned i = NumBits; i > 8; i >>= 1) {
+ Val = (Val << Shift) | Val;
+ Shift <<= 1;
+ }
+ if (VT.isInteger())
+ return DAG.getConstant(Val, VT);
+ return DAG.getConstantFP(APFloat(Val), VT);
+ }
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value);
+ unsigned Shift = 8;
+ for (unsigned i = NumBits; i > 8; i >>= 1) {
+ Value = DAG.getNode(ISD::OR, dl, VT,
+ DAG.getNode(ISD::SHL, dl, VT, Value,
+ DAG.getConstant(Shift,
+ TLI.getShiftAmountTy())),
+ Value);
+ Shift <<= 1;
+ }
+
+ return Value;
+}
+
+/// getMemsetStringVal - Similar to getMemsetValue. Except this is only
+/// used when a memcpy is turned into a memset when the source is a constant
+/// string ptr.
+static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ std::string &Str, unsigned Offset) {
+ // Handle vector with all elements zero.
+ if (Str.empty()) {
+ if (VT.isInteger())
+ return DAG.getConstant(0, VT);
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ DAG.getConstant(0,
+ EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts)));
+ }
+
+ assert(!VT.isVector() && "Can't handle vector type here!");
+ unsigned NumBits = VT.getSizeInBits();
+ unsigned MSB = NumBits / 8;
+ uint64_t Val = 0;
+ if (TLI.isLittleEndian())
+ Offset = Offset + MSB - 1;
+ for (unsigned i = 0; i != MSB; ++i) {
+ Val = (Val << 8) | (unsigned char)Str[Offset];
+ Offset += TLI.isLittleEndian() ? -1 : 1;
+ }
+ return DAG.getConstant(Val, VT);
+}
+
+/// getMemBasePlusOffset - Returns base and offset node for the
+///
+static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset,
+ SelectionDAG &DAG) {
+ EVT VT = Base.getValueType();
+ return DAG.getNode(ISD::ADD, Base.getDebugLoc(),
+ VT, Base, DAG.getConstant(Offset, VT));
+}
+
+/// isMemSrcFromString - Returns true if memcpy source is a string constant.
+///
+static bool isMemSrcFromString(SDValue Src, std::string &Str) {
+ unsigned SrcDelta = 0;
+ GlobalAddressSDNode *G = NULL;
+ if (Src.getOpcode() == ISD::GlobalAddress)
+ G = cast<GlobalAddressSDNode>(Src);
+ else if (Src.getOpcode() == ISD::ADD &&
+ Src.getOperand(0).getOpcode() == ISD::GlobalAddress &&
+ Src.getOperand(1).getOpcode() == ISD::Constant) {
+ G = cast<GlobalAddressSDNode>(Src.getOperand(0));
+ SrcDelta = cast<ConstantSDNode>(Src.getOperand(1))->getZExtValue();
+ }
+ if (!G)
+ return false;
+
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal());
+ if (GV && GetConstantStringInfo(GV, Str, SrcDelta, false))
+ return true;
+
+ return false;
+}
+
+/// MeetsMaxMemopRequirement - Determines if the number of memory ops required
+/// to replace the memset / memcpy is below the threshold. It also returns the
+/// types of the sequence of memory ops to perform memset / memcpy.
+static
+bool MeetsMaxMemopRequirement(std::vector<EVT> &MemOps,
+ SDValue Dst, SDValue Src,
+ unsigned Limit, uint64_t Size, unsigned &Align,
+ std::string &Str, bool &isSrcStr,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ isSrcStr = isMemSrcFromString(Src, Str);
+ bool isSrcConst = isa<ConstantSDNode>(Src);
+ EVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr, DAG);
+ bool AllowUnalign = TLI.allowsUnalignedMemoryAccesses(VT);
+ if (VT != MVT::iAny) {
+ const Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+ unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
+ // If source is a string constant, this will require an unaligned load.
+ if (NewAlign > Align && (isSrcConst || AllowUnalign)) {
+ if (Dst.getOpcode() != ISD::FrameIndex) {
+ // Can't change destination alignment. It requires a unaligned store.
+ if (AllowUnalign)
+ VT = MVT::iAny;
+ } else {
+ int FI = cast<FrameIndexSDNode>(Dst)->getIndex();
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ if (MFI->isFixedObjectIndex(FI)) {
+ // Can't change destination alignment. It requires a unaligned store.
+ if (AllowUnalign)
+ VT = MVT::iAny;
+ } else {
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI->getObjectAlignment(FI) < NewAlign)
+ MFI->setObjectAlignment(FI, NewAlign);
+ Align = NewAlign;
+ }
+ }
+ }
+ }
+
+ if (VT == MVT::iAny) {
+ if (TLI.allowsUnalignedMemoryAccesses(MVT::i64)) {
+ VT = MVT::i64;
+ } else {
+ switch (Align & 7) {
+ case 0: VT = MVT::i64; break;
+ case 4: VT = MVT::i32; break;
+ case 2: VT = MVT::i16; break;
+ default: VT = MVT::i8; break;
+ }
+ }
+
+ MVT LVT = MVT::i64;
+ while (!TLI.isTypeLegal(LVT))
+ LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
+ assert(LVT.isInteger());
+
+ if (VT.bitsGT(LVT))
+ VT = LVT;
+ }
+
+ unsigned NumMemOps = 0;
+ while (Size != 0) {
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ while (VTSize > Size) {
+ // For now, only use non-vector load / store's for the left-over pieces.
+ if (VT.isVector()) {
+ VT = MVT::i64;
+ while (!TLI.isTypeLegal(VT))
+ VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
+ VTSize = VT.getSizeInBits() / 8;
+ } else {
+ // This can result in a type that is not legal on the target, e.g.
+ // 1 or 2 bytes on PPC.
+ VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
+ VTSize >>= 1;
+ }
+ }
+
+ if (++NumMemOps > Limit)
+ return false;
+ MemOps.push_back(VT);
+ Size -= VTSize;
+ }
+
+ return true;
+}
+
+static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst,
+ SDValue Src, uint64_t Size,
+ unsigned Align, bool AlwaysInline,
+ const Value *DstSV, uint64_t DstSVOff,
+ const Value *SrcSV, uint64_t SrcSVOff){
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Expand memcpy to a series of load and store ops if the size operand falls
+ // below a certain threshold.
+ std::vector<EVT> MemOps;
+ uint64_t Limit = -1ULL;
+ if (!AlwaysInline)
+ Limit = TLI.getMaxStoresPerMemcpy();
+ unsigned DstAlign = Align; // Destination alignment can change.
+ std::string Str;
+ bool CopyFromStr;
+ if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign,
+ Str, CopyFromStr, DAG, TLI))
+ return SDValue();
+
+
+ bool isZeroStr = CopyFromStr && Str.empty();
+ SmallVector<SDValue, 8> OutChains;
+ unsigned NumMemOps = MemOps.size();
+ uint64_t SrcOff = 0, DstOff = 0;
+ for (unsigned i = 0; i != NumMemOps; ++i) {
+ EVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ SDValue Value, Store;
+
+ if (CopyFromStr && (isZeroStr || !VT.isVector())) {
+ // It's unlikely a store of a vector immediate can be done in a single
+ // instruction. It would require a load from a constantpool first.
+ // We also handle store a vector with all zero's.
+ // FIXME: Handle other cases where store of vector immediate is done in
+ // a single instruction.
+ Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);
+ Store = DAG.getStore(Chain, dl, Value,
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstSV, DstSVOff + DstOff, false, DstAlign);
+ } else {
+ // The type might not be legal for the target. This should only happen
+ // if the type is smaller than a legal type, as on PPC, so the right
+ // thing to do is generate a LoadExt/StoreTrunc pair. These simplify
+ // to Load/Store if NVT==VT.
+ // FIXME does the case above also need this?
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ assert(NVT.bitsGE(VT));
+ Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
+ getMemBasePlusOffset(Src, SrcOff, DAG),
+ SrcSV, SrcSVOff + SrcOff, VT, false, Align);
+ Store = DAG.getTruncStore(Chain, dl, Value,
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstSV, DstSVOff + DstOff, VT, false, DstAlign);
+ }
+ OutChains.push_back(Store);
+ SrcOff += VTSize;
+ DstOff += VTSize;
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], OutChains.size());
+}
+
+static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst,
+ SDValue Src, uint64_t Size,
+ unsigned Align, bool AlwaysInline,
+ const Value *DstSV, uint64_t DstSVOff,
+ const Value *SrcSV, uint64_t SrcSVOff){
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Expand memmove to a series of load and store ops if the size operand falls
+ // below a certain threshold.
+ std::vector<EVT> MemOps;
+ uint64_t Limit = -1ULL;
+ if (!AlwaysInline)
+ Limit = TLI.getMaxStoresPerMemmove();
+ unsigned DstAlign = Align; // Destination alignment can change.
+ std::string Str;
+ bool CopyFromStr;
+ if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign,
+ Str, CopyFromStr, DAG, TLI))
+ return SDValue();
+
+ uint64_t SrcOff = 0, DstOff = 0;
+
+ SmallVector<SDValue, 8> LoadValues;
+ SmallVector<SDValue, 8> LoadChains;
+ SmallVector<SDValue, 8> OutChains;
+ unsigned NumMemOps = MemOps.size();
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ EVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ SDValue Value, Store;
+
+ Value = DAG.getLoad(VT, dl, Chain,
+ getMemBasePlusOffset(Src, SrcOff, DAG),
+ SrcSV, SrcSVOff + SrcOff, false, Align);
+ LoadValues.push_back(Value);
+ LoadChains.push_back(Value.getValue(1));
+ SrcOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &LoadChains[0], LoadChains.size());
+ OutChains.clear();
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ EVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ SDValue Value, Store;
+
+ Store = DAG.getStore(Chain, dl, LoadValues[i],
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstSV, DstSVOff + DstOff, false, DstAlign);
+ OutChains.push_back(Store);
+ DstOff += VTSize;
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], OutChains.size());
+}
+
+static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst,
+ SDValue Src, uint64_t Size,
+ unsigned Align,
+ const Value *DstSV, uint64_t DstSVOff) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Expand memset to a series of load/store ops if the size operand
+ // falls below a certain threshold.
+ std::vector<EVT> MemOps;
+ std::string Str;
+ bool CopyFromStr;
+ if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, TLI.getMaxStoresPerMemset(),
+ Size, Align, Str, CopyFromStr, DAG, TLI))
+ return SDValue();
+
+ SmallVector<SDValue, 8> OutChains;
+ uint64_t DstOff = 0;
+
+ unsigned NumMemOps = MemOps.size();
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ EVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ SDValue Value = getMemsetValue(Src, VT, DAG, dl);
+ SDValue Store = DAG.getStore(Chain, dl, Value,
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstSV, DstSVOff + DstOff);
+ OutChains.push_back(Store);
+ DstOff += VTSize;
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], OutChains.size());
+}
+
+SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
+ SDValue Src, SDValue Size,
+ unsigned Align, bool AlwaysInline,
+ const Value *DstSV, uint64_t DstSVOff,
+ const Value *SrcSV, uint64_t SrcSVOff) {
+
+ // Check to see if we should lower the memcpy to loads and stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memcpy with size zero? Just return the original chain.
+ if (ConstantSize->isNullValue())
+ return Chain;
+
+ SDValue Result =
+ getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(),
+ Align, false, DstSV, DstSVOff, SrcSV, SrcSVOff);
+ if (Result.getNode())
+ return Result;
+ }
+
+ // Then check to see if we should lower the memcpy with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ SDValue Result =
+ TLI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
+ AlwaysInline,
+ DstSV, DstSVOff, SrcSV, SrcSVOff);
+ if (Result.getNode())
+ return Result;
+
+ // If we really need inline code and the target declined to provide it,
+ // use a (potentially long) sequence of loads and stores.
+ if (AlwaysInline) {
+ assert(ConstantSize && "AlwaysInline requires a constant size!");
+ return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(), Align, true,
+ DstSV, DstSVOff, SrcSV, SrcSVOff);
+ }
+
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext());
+ Entry.Node = Dst; Args.push_back(Entry);
+ Entry.Node = Src; Args.push_back(Entry);
+ Entry.Node = Size; Args.push_back(Entry);
+ // FIXME: pass in DebugLoc
+ std::pair<SDValue,SDValue> CallResult =
+ TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
+ false, false, false, false, 0,
+ TLI.getLibcallCallingConv(RTLIB::MEMCPY), false,
+ /*isReturnValueUsed=*/false,
+ getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY),
+ TLI.getPointerTy()),
+ Args, *this, dl, GetOrdering(Chain.getNode()));
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
+ SDValue Src, SDValue Size,
+ unsigned Align,
+ const Value *DstSV, uint64_t DstSVOff,
+ const Value *SrcSV, uint64_t SrcSVOff) {
+
+ // Check to see if we should lower the memmove to loads and stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memmove with size zero? Just return the original chain.
+ if (ConstantSize->isNullValue())
+ return Chain;
+
+ SDValue Result =
+ getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(),
+ Align, false, DstSV, DstSVOff, SrcSV, SrcSVOff);
+ if (Result.getNode())
+ return Result;
+ }
+
+ // Then check to see if we should lower the memmove with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ SDValue Result =
+ TLI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align,
+ DstSV, DstSVOff, SrcSV, SrcSVOff);
+ if (Result.getNode())
+ return Result;
+
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext());
+ Entry.Node = Dst; Args.push_back(Entry);
+ Entry.Node = Src; Args.push_back(Entry);
+ Entry.Node = Size; Args.push_back(Entry);
+ // FIXME: pass in DebugLoc
+ std::pair<SDValue,SDValue> CallResult =
+ TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
+ false, false, false, false, 0,
+ TLI.getLibcallCallingConv(RTLIB::MEMMOVE), false,
+ /*isReturnValueUsed=*/false,
+ getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE),
+ TLI.getPointerTy()),
+ Args, *this, dl, GetOrdering(Chain.getNode()));
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
+ SDValue Src, SDValue Size,
+ unsigned Align,
+ const Value *DstSV, uint64_t DstSVOff) {
+
+ // Check to see if we should lower the memset to stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memset with size zero? Just return the original chain.
+ if (ConstantSize->isNullValue())
+ return Chain;
+
+ SDValue Result =
+ getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(),
+ Align, DstSV, DstSVOff);
+ if (Result.getNode())
+ return Result;
+ }
+
+ // Then check to see if we should lower the memset with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ SDValue Result =
+ TLI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align,
+ DstSV, DstSVOff);
+ if (Result.getNode())
+ return Result;
+
+ // Emit a library call.
+ const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext());
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Dst; Entry.Ty = IntPtrTy;
+ Args.push_back(Entry);
+ // Extend or truncate the argument to be an i32 value for the call.
+ if (Src.getValueType().bitsGT(MVT::i32))
+ Src = getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
+ else
+ Src = getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
+ Entry.Node = Src;
+ Entry.Ty = Type::getInt32Ty(*getContext());
+ Entry.isSExt = true;
+ Args.push_back(Entry);
+ Entry.Node = Size;
+ Entry.Ty = IntPtrTy;
+ Entry.isSExt = false;
+ Args.push_back(Entry);
+ // FIXME: pass in DebugLoc
+ std::pair<SDValue,SDValue> CallResult =
+ TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
+ false, false, false, false, 0,
+ TLI.getLibcallCallingConv(RTLIB::MEMSET), false,
+ /*isReturnValueUsed=*/false,
+ getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
+ TLI.getPointerTy()),
+ Args, *this, dl, GetOrdering(Chain.getNode()));
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+ SDValue Chain,
+ SDValue Ptr, SDValue Cmp,
+ SDValue Swp, const Value* PtrVal,
+ unsigned Alignment) {
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getEVTAlignment(MemVT);
+
+ // Check if the memory reference references a frame index
+ if (!PtrVal)
+ if (const FrameIndexSDNode *FI =
+ dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
+ PtrVal = PseudoSourceValue::getFixedStack(FI->getIndex());
+
+ MachineFunction &MF = getMachineFunction();
+ unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+
+ // For now, atomics are considered to be volatile always.
+ Flags |= MachineMemOperand::MOVolatile;
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrVal, Flags, 0,
+ MemVT.getStoreSize(), Alignment);
+
+ return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+ SDValue Chain,
+ SDValue Ptr, SDValue Cmp,
+ SDValue Swp, MachineMemOperand *MMO) {
+ assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op");
+ assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
+
+ EVT VT = Cmp.getValueType();
+
+ SDVTList VTs = getVTList(VT, MVT::Other);
+ FoldingSetNodeID ID;
+ ID.AddInteger(MemVT.getRawBits());
+ SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 4);
+ void* IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<AtomicSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode* N = NodeAllocator.Allocate<AtomicSDNode>();
+ new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Cmp, Swp, MMO);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+ SDValue Chain,
+ SDValue Ptr, SDValue Val,
+ const Value* PtrVal,
+ unsigned Alignment) {
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getEVTAlignment(MemVT);
+
+ // Check if the memory reference references a frame index
+ if (!PtrVal)
+ if (const FrameIndexSDNode *FI =
+ dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
+ PtrVal = PseudoSourceValue::getFixedStack(FI->getIndex());
+
+ MachineFunction &MF = getMachineFunction();
+ unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+
+ // For now, atomics are considered to be volatile always.
+ Flags |= MachineMemOperand::MOVolatile;
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrVal, Flags, 0,
+ MemVT.getStoreSize(), Alignment);
+
+ return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+ SDValue Chain,
+ SDValue Ptr, SDValue Val,
+ MachineMemOperand *MMO) {
+ assert((Opcode == ISD::ATOMIC_LOAD_ADD ||
+ Opcode == ISD::ATOMIC_LOAD_SUB ||
+ Opcode == ISD::ATOMIC_LOAD_AND ||
+ Opcode == ISD::ATOMIC_LOAD_OR ||
+ Opcode == ISD::ATOMIC_LOAD_XOR ||
+ Opcode == ISD::ATOMIC_LOAD_NAND ||
+ Opcode == ISD::ATOMIC_LOAD_MIN ||
+ Opcode == ISD::ATOMIC_LOAD_MAX ||
+ Opcode == ISD::ATOMIC_LOAD_UMIN ||
+ Opcode == ISD::ATOMIC_LOAD_UMAX ||
+ Opcode == ISD::ATOMIC_SWAP) &&
+ "Invalid Atomic Op");
+
+ EVT VT = Val.getValueType();
+
+ SDVTList VTs = getVTList(VT, MVT::Other);
+ FoldingSetNodeID ID;
+ ID.AddInteger(MemVT.getRawBits());
+ SDValue Ops[] = {Chain, Ptr, Val};
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
+ void* IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<AtomicSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode* N = NodeAllocator.Allocate<AtomicSDNode>();
+ new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Val, MMO);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+/// getMergeValues - Create a MERGE_VALUES node from the given operands.
+/// Allowed to return something different (and simpler) if Simplify is true.
+SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps,
+ DebugLoc dl) {
+ if (NumOps == 1)
+ return Ops[0];
+
+ SmallVector<EVT, 4> VTs;
+ VTs.reserve(NumOps);
+ for (unsigned i = 0; i < NumOps; ++i)
+ VTs.push_back(Ops[i].getValueType());
+ return getNode(ISD::MERGE_VALUES, dl, getVTList(&VTs[0], NumOps),
+ Ops, NumOps);
+}
+
+SDValue
+SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl,
+ const EVT *VTs, unsigned NumVTs,
+ const SDValue *Ops, unsigned NumOps,
+ EVT MemVT, const Value *srcValue, int SVOff,
+ unsigned Align, bool Vol,
+ bool ReadMem, bool WriteMem) {
+ return getMemIntrinsicNode(Opcode, dl, makeVTList(VTs, NumVTs), Ops, NumOps,
+ MemVT, srcValue, SVOff, Align, Vol,
+ ReadMem, WriteMem);
+}
+
+SDValue
+SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
+ const SDValue *Ops, unsigned NumOps,
+ EVT MemVT, const Value *srcValue, int SVOff,
+ unsigned Align, bool Vol,
+ bool ReadMem, bool WriteMem) {
+ if (Align == 0) // Ensure that codegen never sees alignment 0
+ Align = getEVTAlignment(MemVT);
+
+ MachineFunction &MF = getMachineFunction();
+ unsigned Flags = 0;
+ if (WriteMem)
+ Flags |= MachineMemOperand::MOStore;
+ if (ReadMem)
+ Flags |= MachineMemOperand::MOLoad;
+ if (Vol)
+ Flags |= MachineMemOperand::MOVolatile;
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(srcValue, Flags, SVOff,
+ MemVT.getStoreSize(), Align);
+
+ return getMemIntrinsicNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);
+}
+
+SDValue
+SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
+ const SDValue *Ops, unsigned NumOps,
+ EVT MemVT, MachineMemOperand *MMO) {
+ assert((Opcode == ISD::INTRINSIC_VOID ||
+ Opcode == ISD::INTRINSIC_W_CHAIN ||
+ (Opcode <= INT_MAX &&
+ (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
+ "Opcode is not a memory-accessing opcode!");
+
+ // Memoize the node unless it returns a flag.
+ MemIntrinsicSDNode *N;
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+
+ N = NodeAllocator.Allocate<MemIntrinsicSDNode>();
+ new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = NodeAllocator.Allocate<MemIntrinsicSDNode>();
+ new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);
+ }
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue
+SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
+ ISD::LoadExtType ExtType, EVT VT, SDValue Chain,
+ SDValue Ptr, SDValue Offset,
+ const Value *SV, int SVOffset, EVT MemVT,
+ bool isVolatile, unsigned Alignment) {
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getEVTAlignment(VT);
+
+ // Check if the memory reference references a frame index
+ if (!SV)
+ if (const FrameIndexSDNode *FI =
+ dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
+ SV = PseudoSourceValue::getFixedStack(FI->getIndex());
+
+ MachineFunction &MF = getMachineFunction();
+ unsigned Flags = MachineMemOperand::MOLoad;
+ if (isVolatile)
+ Flags |= MachineMemOperand::MOVolatile;
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(SV, Flags, SVOffset,
+ MemVT.getStoreSize(), Alignment);
+ return getLoad(AM, dl, ExtType, VT, Chain, Ptr, Offset, MemVT, MMO);
+}
+
+SDValue
+SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
+ ISD::LoadExtType ExtType, EVT VT, SDValue Chain,
+ SDValue Ptr, SDValue Offset, EVT MemVT,
+ MachineMemOperand *MMO) {
+ if (VT == MemVT) {
+ ExtType = ISD::NON_EXTLOAD;
+ } else if (ExtType == ISD::NON_EXTLOAD) {
+ assert(VT == MemVT && "Non-extending load from different memory type!");
+ } else {
+ // Extending load.
+ assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) &&
+ "Should only be an extending load, not truncating!");
+ assert(VT.isInteger() == MemVT.isInteger() &&
+ "Cannot convert from FP to Int or Int -> FP!");
+ assert(VT.isVector() == MemVT.isVector() &&
+ "Cannot use trunc store to convert to or from a vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() == MemVT.getVectorNumElements()) &&
+ "Cannot use trunc store to change the number of vector elements!");
+ }
+
+ bool Indexed = AM != ISD::UNINDEXED;
+ assert((Indexed || Offset.getOpcode() == ISD::UNDEF) &&
+ "Unindexed load with an offset!");
+
+ SDVTList VTs = Indexed ?
+ getVTList(VT, Ptr.getValueType(), MVT::Other) : getVTList(VT, MVT::Other);
+ SDValue Ops[] = { Chain, Ptr, Offset };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);
+ ID.AddInteger(MemVT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile()));
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<LoadSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode *N = NodeAllocator.Allocate<LoadSDNode>();
+ new (N) LoadSDNode(Ops, dl, VTs, AM, ExtType, MemVT, MMO);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl,
+ SDValue Chain, SDValue Ptr,
+ const Value *SV, int SVOffset,
+ bool isVolatile, unsigned Alignment) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoad(ISD::UNINDEXED, dl, ISD::NON_EXTLOAD, VT, Chain, Ptr, Undef,
+ SV, SVOffset, VT, isVolatile, Alignment);
+}
+
+SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
+ SDValue Chain, SDValue Ptr,
+ const Value *SV,
+ int SVOffset, EVT MemVT,
+ bool isVolatile, unsigned Alignment) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoad(ISD::UNINDEXED, dl, ExtType, VT, Chain, Ptr, Undef,
+ SV, SVOffset, MemVT, isVolatile, Alignment);
+}
+
+SDValue
+SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
+ SDValue Offset, ISD::MemIndexedMode AM) {
+ LoadSDNode *LD = cast<LoadSDNode>(OrigLoad);
+ assert(LD->getOffset().getOpcode() == ISD::UNDEF &&
+ "Load is already a indexed load!");
+ return getLoad(AM, dl, LD->getExtensionType(), OrigLoad.getValueType(),
+ LD->getChain(), Base, Offset, LD->getSrcValue(),
+ LD->getSrcValueOffset(), LD->getMemoryVT(),
+ LD->isVolatile(), LD->getAlignment());
+}
+
+SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
+ SDValue Ptr, const Value *SV, int SVOffset,
+ bool isVolatile, unsigned Alignment) {
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getEVTAlignment(Val.getValueType());
+
+ // Check if the memory reference references a frame index
+ if (!SV)
+ if (const FrameIndexSDNode *FI =
+ dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
+ SV = PseudoSourceValue::getFixedStack(FI->getIndex());
+
+ MachineFunction &MF = getMachineFunction();
+ unsigned Flags = MachineMemOperand::MOStore;
+ if (isVolatile)
+ Flags |= MachineMemOperand::MOVolatile;
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(SV, Flags, SVOffset,
+ Val.getValueType().getStoreSize(), Alignment);
+
+ return getStore(Chain, dl, Val, Ptr, MMO);
+}
+
+SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
+ SDValue Ptr, MachineMemOperand *MMO) {
+ EVT VT = Val.getValueType();
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ SDValue Ops[] = { Chain, Val, Ptr, Undef };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile()));
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<StoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
+ new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, false, VT, MMO);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
+ SDValue Ptr, const Value *SV,
+ int SVOffset, EVT SVT,
+ bool isVolatile, unsigned Alignment) {
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getEVTAlignment(SVT);
+
+ // Check if the memory reference references a frame index
+ if (!SV)
+ if (const FrameIndexSDNode *FI =
+ dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
+ SV = PseudoSourceValue::getFixedStack(FI->getIndex());
+
+ MachineFunction &MF = getMachineFunction();
+ unsigned Flags = MachineMemOperand::MOStore;
+ if (isVolatile)
+ Flags |= MachineMemOperand::MOVolatile;
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(SV, Flags, SVOffset, SVT.getStoreSize(), Alignment);
+
+ return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO);
+}
+
+SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
+ SDValue Ptr, EVT SVT,
+ MachineMemOperand *MMO) {
+ EVT VT = Val.getValueType();
+
+ if (VT == SVT)
+ return getStore(Chain, dl, Val, Ptr, MMO);
+
+ assert(SVT.getScalarType().bitsLT(VT.getScalarType()) &&
+ "Should only be a truncating store, not extending!");
+ assert(VT.isInteger() == SVT.isInteger() &&
+ "Can't do FP-INT conversion!");
+ assert(VT.isVector() == SVT.isVector() &&
+ "Cannot use trunc store to convert to or from a vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() == SVT.getVectorNumElements()) &&
+ "Cannot use trunc store to change the number of vector elements!");
+
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ SDValue Ops[] = { Chain, Val, Ptr, Undef };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+ ID.AddInteger(SVT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile()));
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<StoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
+ new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, true, SVT, MMO);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue
+SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base,
+ SDValue Offset, ISD::MemIndexedMode AM) {
+ StoreSDNode *ST = cast<StoreSDNode>(OrigStore);
+ assert(ST->getOffset().getOpcode() == ISD::UNDEF &&
+ "Store is already a indexed store!");
+ SDVTList VTs = getVTList(Base.getValueType(), MVT::Other);
+ SDValue Ops[] = { ST->getChain(), ST->getValue(), Base, Offset };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+ ID.AddInteger(ST->getMemoryVT().getRawBits());
+ ID.AddInteger(ST->getRawSubclassData());
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
+ new (N) StoreSDNode(Ops, dl, VTs, AM,
+ ST->isTruncatingStore(), ST->getMemoryVT(),
+ ST->getMemOperand());
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getVAArg(EVT VT, DebugLoc dl,
+ SDValue Chain, SDValue Ptr,
+ SDValue SV) {
+ SDValue Ops[] = { Chain, Ptr, SV };
+ return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 3);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+ const SDUse *Ops, unsigned NumOps) {
+ switch (NumOps) {
+ case 0: return getNode(Opcode, DL, VT);
+ case 1: return getNode(Opcode, DL, VT, Ops[0]);
+ case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]);
+ case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
+ default: break;
+ }
+
+ // Copy from an SDUse array into an SDValue array for use with
+ // the regular getNode logic.
+ SmallVector<SDValue, 8> NewOps(Ops, Ops + NumOps);
+ return getNode(Opcode, DL, VT, &NewOps[0], NumOps);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+ const SDValue *Ops, unsigned NumOps) {
+ switch (NumOps) {
+ case 0: return getNode(Opcode, DL, VT);
+ case 1: return getNode(Opcode, DL, VT, Ops[0]);
+ case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]);
+ case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
+ default: break;
+ }
+
+ switch (Opcode) {
+ default: break;
+ case ISD::SELECT_CC: {
+ assert(NumOps == 5 && "SELECT_CC takes 5 operands!");
+ assert(Ops[0].getValueType() == Ops[1].getValueType() &&
+ "LHS and RHS of condition must have same type!");
+ assert(Ops[2].getValueType() == Ops[3].getValueType() &&
+ "True and False arms of SelectCC must have same type!");
+ assert(Ops[2].getValueType() == VT &&
+ "select_cc node must be of same type as true and false value!");
+ break;
+ }
+ case ISD::BR_CC: {
+ assert(NumOps == 5 && "BR_CC takes 5 operands!");
+ assert(Ops[2].getValueType() == Ops[3].getValueType() &&
+ "LHS/RHS of comparison should match types!");
+ break;
+ }
+ }
+
+ // Memoize nodes.
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+
+ if (VT != MVT::Flag) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps);
+ void *IP = 0;
+
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ N = NodeAllocator.Allocate<SDNode>();
+ new (N) SDNode(Opcode, DL, VTs, Ops, NumOps);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = NodeAllocator.Allocate<SDNode>();
+ new (N) SDNode(Opcode, DL, VTs, Ops, NumOps);
+ }
+
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifyNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
+ const std::vector<EVT> &ResultTys,
+ const SDValue *Ops, unsigned NumOps) {
+ return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()),
+ Ops, NumOps);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
+ const EVT *VTs, unsigned NumVTs,
+ const SDValue *Ops, unsigned NumOps) {
+ if (NumVTs == 1)
+ return getNode(Opcode, DL, VTs[0], Ops, NumOps);
+ return getNode(Opcode, DL, makeVTList(VTs, NumVTs), Ops, NumOps);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ const SDValue *Ops, unsigned NumOps) {
+ if (VTList.NumVTs == 1)
+ return getNode(Opcode, DL, VTList.VTs[0], Ops, NumOps);
+
+#if 0
+ switch (Opcode) {
+ // FIXME: figure out how to safely handle things like
+ // int foo(int x) { return 1 << (x & 255); }
+ // int bar() { return foo(256); }
+ case ISD::SRA_PARTS:
+ case ISD::SRL_PARTS:
+ case ISD::SHL_PARTS:
+ if (N3.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ cast<VTSDNode>(N3.getOperand(1))->getVT() != MVT::i1)
+ return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
+ else if (N3.getOpcode() == ISD::AND)
+ if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) {
+ // If the and is only masking out bits that cannot effect the shift,
+ // eliminate the and.
+ unsigned NumBits = VT.getScalarType().getSizeInBits()*2;
+ if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1)
+ return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
+ }
+ break;
+ }
+#endif
+
+ // Memoize the node unless it returns a flag.
+ SDNode *N;
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ if (NumOps == 1) {
+ N = NodeAllocator.Allocate<UnarySDNode>();
+ new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]);
+ } else if (NumOps == 2) {
+ N = NodeAllocator.Allocate<BinarySDNode>();
+ new (N) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]);
+ } else if (NumOps == 3) {
+ N = NodeAllocator.Allocate<TernarySDNode>();
+ new (N) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], Ops[2]);
+ } else {
+ N = NodeAllocator.Allocate<SDNode>();
+ new (N) SDNode(Opcode, DL, VTList, Ops, NumOps);
+ }
+ CSEMap.InsertNode(N, IP);
+ } else {
+ if (NumOps == 1) {
+ N = NodeAllocator.Allocate<UnarySDNode>();
+ new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]);
+ } else if (NumOps == 2) {
+ N = NodeAllocator.Allocate<BinarySDNode>();
+ new (N) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]);
+ } else if (NumOps == 3) {
+ N = NodeAllocator.Allocate<TernarySDNode>();
+ new (N) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], Ops[2]);
+ } else {
+ N = NodeAllocator.Allocate<SDNode>();
+ new (N) SDNode(Opcode, DL, VTList, Ops, NumOps);
+ }
+ }
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifyNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList) {
+ return getNode(Opcode, DL, VTList, 0, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ SDValue N1) {
+ SDValue Ops[] = { N1 };
+ return getNode(Opcode, DL, VTList, Ops, 1);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ SDValue N1, SDValue N2) {
+ SDValue Ops[] = { N1, N2 };
+ return getNode(Opcode, DL, VTList, Ops, 2);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ SDValue N1, SDValue N2, SDValue N3) {
+ SDValue Ops[] = { N1, N2, N3 };
+ return getNode(Opcode, DL, VTList, Ops, 3);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ SDValue N1, SDValue N2, SDValue N3,
+ SDValue N4) {
+ SDValue Ops[] = { N1, N2, N3, N4 };
+ return getNode(Opcode, DL, VTList, Ops, 4);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ SDValue N1, SDValue N2, SDValue N3,
+ SDValue N4, SDValue N5) {
+ SDValue Ops[] = { N1, N2, N3, N4, N5 };
+ return getNode(Opcode, DL, VTList, Ops, 5);
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT) {
+ return makeVTList(SDNode::getValueTypeList(VT), 1);
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) {
+ for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+ E = VTList.rend(); I != E; ++I)
+ if (I->NumVTs == 2 && I->VTs[0] == VT1 && I->VTs[1] == VT2)
+ return *I;
+
+ EVT *Array = Allocator.Allocate<EVT>(2);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ SDVTList Result = makeVTList(Array, 2);
+ VTList.push_back(Result);
+ return Result;
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) {
+ for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+ E = VTList.rend(); I != E; ++I)
+ if (I->NumVTs == 3 && I->VTs[0] == VT1 && I->VTs[1] == VT2 &&
+ I->VTs[2] == VT3)
+ return *I;
+
+ EVT *Array = Allocator.Allocate<EVT>(3);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ Array[2] = VT3;
+ SDVTList Result = makeVTList(Array, 3);
+ VTList.push_back(Result);
+ return Result;
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) {
+ for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+ E = VTList.rend(); I != E; ++I)
+ if (I->NumVTs == 4 && I->VTs[0] == VT1 && I->VTs[1] == VT2 &&
+ I->VTs[2] == VT3 && I->VTs[3] == VT4)
+ return *I;
+
+ EVT *Array = Allocator.Allocate<EVT>(4);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ Array[2] = VT3;
+ Array[3] = VT4;
+ SDVTList Result = makeVTList(Array, 4);
+ VTList.push_back(Result);
+ return Result;
+}
+
+SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) {
+ switch (NumVTs) {
+ case 0: llvm_unreachable("Cannot have nodes without results!");
+ case 1: return getVTList(VTs[0]);
+ case 2: return getVTList(VTs[0], VTs[1]);
+ case 3: return getVTList(VTs[0], VTs[1], VTs[2]);
+ case 4: return getVTList(VTs[0], VTs[1], VTs[2], VTs[3]);
+ default: break;
+ }
+
+ for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+ E = VTList.rend(); I != E; ++I) {
+ if (I->NumVTs != NumVTs || VTs[0] != I->VTs[0] || VTs[1] != I->VTs[1])
+ continue;
+
+ bool NoMatch = false;
+ for (unsigned i = 2; i != NumVTs; ++i)
+ if (VTs[i] != I->VTs[i]) {
+ NoMatch = true;
+ break;
+ }
+ if (!NoMatch)
+ return *I;
+ }
+
+ EVT *Array = Allocator.Allocate<EVT>(NumVTs);
+ std::copy(VTs, VTs+NumVTs, Array);
+ SDVTList Result = makeVTList(Array, NumVTs);
+ VTList.push_back(Result);
+ return Result;
+}
+
+
+/// UpdateNodeOperands - *Mutate* the specified node in-place to have the
+/// specified operands. If the resultant node already exists in the DAG,
+/// this does not modify the specified node, instead it returns the node that
+/// already exists. If the resultant node does not exist in the DAG, the
+/// input node is returned. As a degenerate case, if you specify the same
+/// input operands as the node already has, the input node is returned.
+SDValue SelectionDAG::UpdateNodeOperands(SDValue InN, SDValue Op) {
+ SDNode *N = InN.getNode();
+ assert(N->getNumOperands() == 1 && "Update with wrong number of operands");
+
+ // Check to see if there is no change.
+ if (Op == N->getOperand(0)) return InN;
+
+ // See if the modified node already exists.
+ void *InsertPos = 0;
+ if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos))
+ return SDValue(Existing, InN.getResNo());
+
+ // Nope it doesn't. Remove the node from its current place in the maps.
+ if (InsertPos)
+ if (!RemoveNodeFromCSEMaps(N))
+ InsertPos = 0;
+
+ // Now we update the operands.
+ N->OperandList[0].set(Op);
+
+ // If this gets put into a CSE map, add it.
+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+ return InN;
+}
+
+SDValue SelectionDAG::
+UpdateNodeOperands(SDValue InN, SDValue Op1, SDValue Op2) {
+ SDNode *N = InN.getNode();
+ assert(N->getNumOperands() == 2 && "Update with wrong number of operands");
+
+ // Check to see if there is no change.
+ if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1))
+ return InN; // No operands changed, just return the input node.
+
+ // See if the modified node already exists.
+ void *InsertPos = 0;
+ if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos))
+ return SDValue(Existing, InN.getResNo());
+
+ // Nope it doesn't. Remove the node from its current place in the maps.
+ if (InsertPos)
+ if (!RemoveNodeFromCSEMaps(N))
+ InsertPos = 0;
+
+ // Now we update the operands.
+ if (N->OperandList[0] != Op1)
+ N->OperandList[0].set(Op1);
+ if (N->OperandList[1] != Op2)
+ N->OperandList[1].set(Op2);
+
+ // If this gets put into a CSE map, add it.
+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+ return InN;
+}
+
+SDValue SelectionDAG::
+UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2, SDValue Op3) {
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return UpdateNodeOperands(N, Ops, 3);
+}
+
+SDValue SelectionDAG::
+UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2,
+ SDValue Op3, SDValue Op4) {
+ SDValue Ops[] = { Op1, Op2, Op3, Op4 };
+ return UpdateNodeOperands(N, Ops, 4);
+}
+
+SDValue SelectionDAG::
+UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2,
+ SDValue Op3, SDValue Op4, SDValue Op5) {
+ SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 };
+ return UpdateNodeOperands(N, Ops, 5);
+}
+
+SDValue SelectionDAG::
+UpdateNodeOperands(SDValue InN, const SDValue *Ops, unsigned NumOps) {
+ SDNode *N = InN.getNode();
+ assert(N->getNumOperands() == NumOps &&
+ "Update with wrong number of operands");
+
+ // Check to see if there is no change.
+ bool AnyChange = false;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ if (Ops[i] != N->getOperand(i)) {
+ AnyChange = true;
+ break;
+ }
+ }
+
+ // No operands changed, just return the input node.
+ if (!AnyChange) return InN;
+
+ // See if the modified node already exists.
+ void *InsertPos = 0;
+ if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, NumOps, InsertPos))
+ return SDValue(Existing, InN.getResNo());
+
+ // Nope it doesn't. Remove the node from its current place in the maps.
+ if (InsertPos)
+ if (!RemoveNodeFromCSEMaps(N))
+ InsertPos = 0;
+
+ // Now we update the operands.
+ for (unsigned i = 0; i != NumOps; ++i)
+ if (N->OperandList[i] != Ops[i])
+ N->OperandList[i].set(Ops[i]);
+
+ // If this gets put into a CSE map, add it.
+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+ return InN;
+}
+
+/// DropOperands - Release the operands and set this node to have
+/// zero operands.
+void SDNode::DropOperands() {
+ // Unlike the code in MorphNodeTo that does this, we don't need to
+ // watch for dead nodes here.
+ for (op_iterator I = op_begin(), E = op_end(); I != E; ) {
+ SDUse &Use = *I++;
+ Use.set(SDValue());
+ }
+}
+
+/// SelectNodeTo - These are wrappers around MorphNodeTo that accept a
+/// machine opcode.
+///
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT) {
+ SDVTList VTs = getVTList(VT);
+ return SelectNodeTo(N, MachineOpc, VTs, 0, 0);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT, SDValue Op1) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 1);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT, SDValue Op1,
+ SDValue Op2) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 2);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT, SDValue Op1,
+ SDValue Op2, SDValue Op3) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT, const SDValue *Ops,
+ unsigned NumOps) {
+ SDVTList VTs = getVTList(VT);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2, const SDValue *Ops,
+ unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return SelectNodeTo(N, MachineOpc, VTs, (SDValue *)0, 0);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2, EVT VT3,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2, EVT VT3, EVT VT4,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2,
+ SDValue Op1) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 1);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2,
+ SDValue Op1, SDValue Op2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 2);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2, EVT VT3,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ SDVTList VTs, const SDValue *Ops,
+ unsigned NumOps) {
+ return MorphNodeTo(N, ~MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ EVT VT) {
+ SDVTList VTs = getVTList(VT);
+ return MorphNodeTo(N, Opc, VTs, 0, 0);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ EVT VT, SDValue Op1) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1 };
+ return MorphNodeTo(N, Opc, VTs, Ops, 1);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ EVT VT, SDValue Op1,
+ SDValue Op2) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2 };
+ return MorphNodeTo(N, Opc, VTs, Ops, 2);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ EVT VT, SDValue Op1,
+ SDValue Op2, SDValue Op3) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return MorphNodeTo(N, Opc, VTs, Ops, 3);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ EVT VT, const SDValue *Ops,
+ unsigned NumOps) {
+ SDVTList VTs = getVTList(VT);
+ return MorphNodeTo(N, Opc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ EVT VT1, EVT VT2, const SDValue *Ops,
+ unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return MorphNodeTo(N, Opc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ EVT VT1, EVT VT2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return MorphNodeTo(N, Opc, VTs, (SDValue *)0, 0);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ EVT VT1, EVT VT2, EVT VT3,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ return MorphNodeTo(N, Opc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ EVT VT1, EVT VT2,
+ SDValue Op1) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1 };
+ return MorphNodeTo(N, Opc, VTs, Ops, 1);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ EVT VT1, EVT VT2,
+ SDValue Op1, SDValue Op2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2 };
+ return MorphNodeTo(N, Opc, VTs, Ops, 2);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ EVT VT1, EVT VT2,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return MorphNodeTo(N, Opc, VTs, Ops, 3);
+}
+
+/// MorphNodeTo - These *mutate* the specified node to have the specified
+/// return type, opcode, and operands.
+///
+/// Note that MorphNodeTo returns the resultant node. If there is already a
+/// node of the specified opcode and operands, it returns that node instead of
+/// the current one. Note that the DebugLoc need not be the same.
+///
+/// Using MorphNodeTo is faster than creating a new node and swapping it in
+/// with ReplaceAllUsesWith both because it often avoids allocating a new
+/// node, and because it doesn't require CSE recalculation for any of
+/// the node's users.
+///
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ SDVTList VTs, const SDValue *Ops,
+ unsigned NumOps) {
+ // If an identical node already exists, use it.
+ void *IP = 0;
+ if (VTs.VTs[VTs.NumVTs-1] != MVT::Flag) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, VTs, Ops, NumOps);
+ if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return ON;
+ }
+
+ if (!RemoveNodeFromCSEMaps(N))
+ IP = 0;
+
+ // Start the morphing.
+ N->NodeType = Opc;
+ N->ValueList = VTs.VTs;
+ N->NumValues = VTs.NumVTs;
+
+ // Clear the operands list, updating used nodes to remove this from their
+ // use list. Keep track of any operands that become dead as a result.
+ SmallPtrSet<SDNode*, 16> DeadNodeSet;
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) {
+ SDUse &Use = *I++;
+ SDNode *Used = Use.getNode();
+ Use.set(SDValue());
+ if (Used->use_empty())
+ DeadNodeSet.insert(Used);
+ }
+
+ if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N)) {
+ // Initialize the memory references information.
+ MN->setMemRefs(0, 0);
+ // If NumOps is larger than the # of operands we can have in a
+ // MachineSDNode, reallocate the operand list.
+ if (NumOps > MN->NumOperands || !MN->OperandsNeedDelete) {
+ if (MN->OperandsNeedDelete)
+ delete[] MN->OperandList;
+ if (NumOps > array_lengthof(MN->LocalOperands))
+ // We're creating a final node that will live unmorphed for the
+ // remainder of the current SelectionDAG iteration, so we can allocate
+ // the operands directly out of a pool with no recycling metadata.
+ MN->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps),
+ Ops, NumOps);
+ else
+ MN->InitOperands(MN->LocalOperands, Ops, NumOps);
+ MN->OperandsNeedDelete = false;
+ } else
+ MN->InitOperands(MN->OperandList, Ops, NumOps);
+ } else {
+ // If NumOps is larger than the # of operands we currently have, reallocate
+ // the operand list.
+ if (NumOps > N->NumOperands) {
+ if (N->OperandsNeedDelete)
+ delete[] N->OperandList;
+ N->InitOperands(new SDUse[NumOps], Ops, NumOps);
+ N->OperandsNeedDelete = true;
+ } else
+ N->InitOperands(N->OperandList, Ops, NumOps);
+ }
+
+ // Delete any nodes that are still dead after adding the uses for the
+ // new operands.
+ SmallVector<SDNode *, 16> DeadNodes;
+ for (SmallPtrSet<SDNode *, 16>::iterator I = DeadNodeSet.begin(),
+ E = DeadNodeSet.end(); I != E; ++I)
+ if ((*I)->use_empty())
+ DeadNodes.push_back(*I);
+ RemoveDeadNodes(DeadNodes);
+
+ if (IP)
+ CSEMap.InsertNode(N, IP); // Memoize the new node.
+ return N;
+}
+
+
+/// getMachineNode - These are used for target selectors to create a new node
+/// with specified return type(s), MachineInstr opcode, and operands.
+///
+/// Note that getMachineNode returns the resultant node. If there is already a
+/// node of the specified opcode and operands, it returns that node instead of
+/// the current one.
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT) {
+ SDVTList VTs = getVTList(VT);
+ return getMachineNode(Opcode, dl, VTs, 0, 0);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1 };
+ return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+ SDValue Op1, SDValue Op2) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2 };
+ return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+ SDValue Op1, SDValue Op2, SDValue Op3) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT);
+ return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return getMachineNode(Opcode, dl, VTs, 0, 0);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+ EVT VT1, EVT VT2, SDValue Op1) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1 };
+ return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+ EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2 };
+ return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+ EVT VT1, EVT VT2, SDValue Op1,
+ SDValue Op2, SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+ EVT VT1, EVT VT2,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+ EVT VT1, EVT VT2, EVT VT3,
+ SDValue Op1, SDValue Op2) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ SDValue Ops[] = { Op1, Op2 };
+ return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+ EVT VT1, EVT VT2, EVT VT3,
+ SDValue Op1, SDValue Op2, SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+ EVT VT1, EVT VT2, EVT VT3,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1,
+ EVT VT2, EVT VT3, EVT VT4,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
+ return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+ const std::vector<EVT> &ResultTys,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size());
+ return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
+ const SDValue *Ops, unsigned NumOps) {
+ bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Flag;
+ MachineSDNode *N;
+ void *IP;
+
+ if (DoCSE) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps);
+ IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return cast<MachineSDNode>(E);
+ }
+
+ // Allocate a new MachineSDNode.
+ N = NodeAllocator.Allocate<MachineSDNode>();
+ new (N) MachineSDNode(~Opcode, DL, VTs);
+
+ // Initialize the operands list.
+ if (NumOps > array_lengthof(N->LocalOperands))
+ // We're creating a final node that will live unmorphed for the
+ // remainder of the current SelectionDAG iteration, so we can allocate
+ // the operands directly out of a pool with no recycling metadata.
+ N->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps),
+ Ops, NumOps);
+ else
+ N->InitOperands(N->LocalOperands, Ops, NumOps);
+ N->OperandsNeedDelete = false;
+
+ if (DoCSE)
+ CSEMap.InsertNode(N, IP);
+
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifyNode(N);
+#endif
+ return N;
+}
+
+/// getTargetExtractSubreg - A convenience function for creating
+/// TargetOpcode::EXTRACT_SUBREG nodes.
+SDValue
+SelectionDAG::getTargetExtractSubreg(int SRIdx, DebugLoc DL, EVT VT,
+ SDValue Operand) {
+ SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32);
+ SDNode *Subreg = getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+ VT, Operand, SRIdxVal);
+ return SDValue(Subreg, 0);
+}
+
+/// getTargetInsertSubreg - A convenience function for creating
+/// TargetOpcode::INSERT_SUBREG nodes.
+SDValue
+SelectionDAG::getTargetInsertSubreg(int SRIdx, DebugLoc DL, EVT VT,
+ SDValue Operand, SDValue Subreg) {
+ SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32);
+ SDNode *Result = getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
+ VT, Operand, Subreg, SRIdxVal);
+ return SDValue(Result, 0);
+}
+
+/// getNodeIfExists - Get the specified node if it's already available, or
+/// else return NULL.
+SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
+ const SDValue *Ops, unsigned NumOps) {
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return E;
+ }
+ return NULL;
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version assumes From has a single result value.
+///
+void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To,
+ DAGUpdateListener *UpdateListener) {
+ SDNode *From = FromN.getNode();
+ assert(From->getNumValues() == 1 && FromN.getResNo() == 0 &&
+ "Cannot replace with this method!");
+ assert(From != To.getNode() && "Cannot replace uses of with self");
+
+ // Iterate over all the existing uses of From. New uses will be added
+ // to the beginning of the use list, which we avoid visiting.
+ // This specifically avoids visiting uses of From that arise while the
+ // replacement is happening, because any such uses would be the result
+ // of CSE: If an existing node looks like From after one of its operands
+ // is replaced by To, we don't want to replace of all its users with To
+ // too. See PR3018 for more info.
+ SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+ while (UI != UE) {
+ SDNode *User = *UI;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+ ++UI;
+ Use.set(To);
+ } while (UI != UE && *UI == User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User, UpdateListener);
+ }
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version assumes that for each value of From, there is a
+/// corresponding value in To in the same position with the same type.
+///
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To,
+ DAGUpdateListener *UpdateListener) {
+#ifndef NDEBUG
+ for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
+ assert((!From->hasAnyUseOfValue(i) ||
+ From->getValueType(i) == To->getValueType(i)) &&
+ "Cannot use this version of ReplaceAllUsesWith!");
+#endif
+
+ // Handle the trivial case.
+ if (From == To)
+ return;
+
+ // Iterate over just the existing users of From. See the comments in
+ // the ReplaceAllUsesWith above.
+ SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+ while (UI != UE) {
+ SDNode *User = *UI;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+ ++UI;
+ Use.setNode(To);
+ } while (UI != UE && *UI == User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User, UpdateListener);
+ }
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version can replace From with any result values. To must match the
+/// number and types of values returned by From.
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From,
+ const SDValue *To,
+ DAGUpdateListener *UpdateListener) {
+ if (From->getNumValues() == 1) // Handle the simple case efficiently.
+ return ReplaceAllUsesWith(SDValue(From, 0), To[0], UpdateListener);
+
+ // Iterate over just the existing users of From. See the comments in
+ // the ReplaceAllUsesWith above.
+ SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+ while (UI != UE) {
+ SDNode *User = *UI;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+ const SDValue &ToOp = To[Use.getResNo()];
+ ++UI;
+ Use.set(ToOp);
+ } while (UI != UE && *UI == User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User, UpdateListener);
+ }
+}
+
+/// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving
+/// uses of other values produced by From.getNode() alone. The Deleted
+/// vector is handled the same way as for ReplaceAllUsesWith.
+void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To,
+ DAGUpdateListener *UpdateListener){
+ // Handle the really simple, really trivial case efficiently.
+ if (From == To) return;
+
+ // Handle the simple, trivial, case efficiently.
+ if (From.getNode()->getNumValues() == 1) {
+ ReplaceAllUsesWith(From, To, UpdateListener);
+ return;
+ }
+
+ // Iterate over just the existing users of From. See the comments in
+ // the ReplaceAllUsesWith above.
+ SDNode::use_iterator UI = From.getNode()->use_begin(),
+ UE = From.getNode()->use_end();
+ while (UI != UE) {
+ SDNode *User = *UI;
+ bool UserRemovedFromCSEMaps = false;
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+
+ // Skip uses of different values from the same node.
+ if (Use.getResNo() != From.getResNo()) {
+ ++UI;
+ continue;
+ }
+
+ // If this node hasn't been modified yet, it's still in the CSE maps,
+ // so remove its old self from the CSE maps.
+ if (!UserRemovedFromCSEMaps) {
+ RemoveNodeFromCSEMaps(User);
+ UserRemovedFromCSEMaps = true;
+ }
+
+ ++UI;
+ Use.set(To);
+ } while (UI != UE && *UI == User);
+
+ // We are iterating over all uses of the From node, so if a use
+ // doesn't use the specific value, no changes are made.
+ if (!UserRemovedFromCSEMaps)
+ continue;
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User, UpdateListener);
+ }
+}
+
+namespace {
+ /// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith
+ /// to record information about a use.
+ struct UseMemo {
+ SDNode *User;
+ unsigned Index;
+ SDUse *Use;
+ };
+
+ /// operator< - Sort Memos by User.
+ bool operator<(const UseMemo &L, const UseMemo &R) {
+ return (intptr_t)L.User < (intptr_t)R.User;
+ }
+}
+
+/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving
+/// uses of other values produced by From.getNode() alone. The same value
+/// may appear in both the From and To list. The Deleted vector is
+/// handled the same way as for ReplaceAllUsesWith.
+void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
+ const SDValue *To,
+ unsigned Num,
+ DAGUpdateListener *UpdateListener){
+ // Handle the simple, trivial case efficiently.
+ if (Num == 1)
+ return ReplaceAllUsesOfValueWith(*From, *To, UpdateListener);
+
+ // Read up all the uses and make records of them. This helps
+ // processing new uses that are introduced during the
+ // replacement process.
+ SmallVector<UseMemo, 4> Uses;
+ for (unsigned i = 0; i != Num; ++i) {
+ unsigned FromResNo = From[i].getResNo();
+ SDNode *FromNode = From[i].getNode();
+ for (SDNode::use_iterator UI = FromNode->use_begin(),
+ E = FromNode->use_end(); UI != E; ++UI) {
+ SDUse &Use = UI.getUse();
+ if (Use.getResNo() == FromResNo) {
+ UseMemo Memo = { *UI, i, &Use };
+ Uses.push_back(Memo);
+ }
+ }
+ }
+
+ // Sort the uses, so that all the uses from a given User are together.
+ std::sort(Uses.begin(), Uses.end());
+
+ for (unsigned UseIndex = 0, UseIndexEnd = Uses.size();
+ UseIndex != UseIndexEnd; ) {
+ // We know that this user uses some value of From. If it is the right
+ // value, update it.
+ SDNode *User = Uses[UseIndex].User;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // The Uses array is sorted, so all the uses for a given User
+ // are next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ unsigned i = Uses[UseIndex].Index;
+ SDUse &Use = *Uses[UseIndex].Use;
+ ++UseIndex;
+
+ Use.set(To[i]);
+ } while (UseIndex != UseIndexEnd && Uses[UseIndex].User == User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User, UpdateListener);
+ }
+}
+
+/// AssignTopologicalOrder - Assign a unique node id for each node in the DAG
+/// based on their topological order. It returns the maximum id and a vector
+/// of the SDNodes* in assigned order by reference.
+unsigned SelectionDAG::AssignTopologicalOrder() {
+
+ unsigned DAGSize = 0;
+
+ // SortedPos tracks the progress of the algorithm. Nodes before it are
+ // sorted, nodes after it are unsorted. When the algorithm completes
+ // it is at the end of the list.
+ allnodes_iterator SortedPos = allnodes_begin();
+
+ // Visit all the nodes. Move nodes with no operands to the front of
+ // the list immediately. Annotate nodes that do have operands with their
+ // operand count. Before we do this, the Node Id fields of the nodes
+ // may contain arbitrary values. After, the Node Id fields for nodes
+ // before SortedPos will contain the topological sort index, and the
+ // Node Id fields for nodes At SortedPos and after will contain the
+ // count of outstanding operands.
+ for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) {
+ SDNode *N = I++;
+ checkForCycles(N);
+ unsigned Degree = N->getNumOperands();
+ if (Degree == 0) {
+ // A node with no uses, add it to the result array immediately.
+ N->setNodeId(DAGSize++);
+ allnodes_iterator Q = N;
+ if (Q != SortedPos)
+ SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q));
+ assert(SortedPos != AllNodes.end() && "Overran node list");
+ ++SortedPos;
+ } else {
+ // Temporarily use the Node Id as scratch space for the degree count.
+ N->setNodeId(Degree);
+ }
+ }
+
+ // Visit all the nodes. As we iterate, moves nodes into sorted order,
+ // such that by the time the end is reached all nodes will be sorted.
+ for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) {
+ SDNode *N = I;
+ checkForCycles(N);
+ // N is in sorted position, so all its uses have one less operand
+ // that needs to be sorted.
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI) {
+ SDNode *P = *UI;
+ unsigned Degree = P->getNodeId();
+ assert(Degree != 0 && "Invalid node degree");
+ --Degree;
+ if (Degree == 0) {
+ // All of P's operands are sorted, so P may sorted now.
+ P->setNodeId(DAGSize++);
+ if (P != SortedPos)
+ SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(P));
+ assert(SortedPos != AllNodes.end() && "Overran node list");
+ ++SortedPos;
+ } else {
+ // Update P's outstanding operand count.
+ P->setNodeId(Degree);
+ }
+ }
+ if (I == SortedPos) {
+#ifndef NDEBUG
+ SDNode *S = ++I;
+ dbgs() << "Overran sorted position:\n";
+ S->dumprFull();
+#endif
+ llvm_unreachable(0);
+ }
+ }
+
+ assert(SortedPos == AllNodes.end() &&
+ "Topological sort incomplete!");
+ assert(AllNodes.front().getOpcode() == ISD::EntryToken &&
+ "First node in topological sort is not the entry token!");
+ assert(AllNodes.front().getNodeId() == 0 &&
+ "First node in topological sort has non-zero id!");
+ assert(AllNodes.front().getNumOperands() == 0 &&
+ "First node in topological sort has operands!");
+ assert(AllNodes.back().getNodeId() == (int)DAGSize-1 &&
+ "Last node in topologic sort has unexpected id!");
+ assert(AllNodes.back().use_empty() &&
+ "Last node in topologic sort has users!");
+ assert(DAGSize == allnodes_size() && "Node count mismatch!");
+ return DAGSize;
+}
+
+/// AssignOrdering - Assign an order to the SDNode.
+void SelectionDAG::AssignOrdering(const SDNode *SD, unsigned Order) {
+ assert(SD && "Trying to assign an order to a null node!");
+ Ordering->add(SD, Order);
+}
+
+/// GetOrdering - Get the order for the SDNode.
+unsigned SelectionDAG::GetOrdering(const SDNode *SD) const {
+ assert(SD && "Trying to get the order of a null node!");
+ return Ordering->getOrder(SD);
+}
+
+
+//===----------------------------------------------------------------------===//
+// SDNode Class
+//===----------------------------------------------------------------------===//
+
+HandleSDNode::~HandleSDNode() {
+ DropOperands();
+}
+
+GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, const GlobalValue *GA,
+ EVT VT, int64_t o, unsigned char TF)
+ : SDNode(Opc, DebugLoc::getUnknownLoc(), getSDVTList(VT)),
+ Offset(o), TargetFlags(TF) {
+ TheGlobal = const_cast<GlobalValue*>(GA);
+}
+
+MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt,
+ MachineMemOperand *mmo)
+ : SDNode(Opc, dl, VTs), MemoryVT(memvt), MMO(mmo) {
+ SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile());
+ assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
+ assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
+}
+
+MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs,
+ const SDValue *Ops, unsigned NumOps, EVT memvt,
+ MachineMemOperand *mmo)
+ : SDNode(Opc, dl, VTs, Ops, NumOps),
+ MemoryVT(memvt), MMO(mmo) {
+ SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile());
+ assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
+ assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
+}
+
+/// Profile - Gather unique data for the node.
+///
+void SDNode::Profile(FoldingSetNodeID &ID) const {
+ AddNodeIDNode(ID, this);
+}
+
+namespace {
+ struct EVTArray {
+ std::vector<EVT> VTs;
+
+ EVTArray() {
+ VTs.reserve(MVT::LAST_VALUETYPE);
+ for (unsigned i = 0; i < MVT::LAST_VALUETYPE; ++i)
+ VTs.push_back(MVT((MVT::SimpleValueType)i));
+ }
+ };
+}
+
+static ManagedStatic<std::set<EVT, EVT::compareRawBits> > EVTs;
+static ManagedStatic<EVTArray> SimpleVTArray;
+static ManagedStatic<sys::SmartMutex<true> > VTMutex;
+
+/// getValueTypeList - Return a pointer to the specified value type.
+///
+const EVT *SDNode::getValueTypeList(EVT VT) {
+ if (VT.isExtended()) {
+ sys::SmartScopedLock<true> Lock(*VTMutex);
+ return &(*EVTs->insert(VT).first);
+ } else {
+ return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy];
+ }
+}
+
+/// hasNUsesOfValue - Return true if there are exactly NUSES uses of the
+/// indicated value. This method ignores uses of other values defined by this
+/// operation.
+bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const {
+ assert(Value < getNumValues() && "Bad value!");
+
+ // TODO: Only iterate over uses of a given value of the node
+ for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
+ if (UI.getUse().getResNo() == Value) {
+ if (NUses == 0)
+ return false;
+ --NUses;
+ }
+ }
+
+ // Found exactly the right number of uses?
+ return NUses == 0;
+}
+
+
+/// hasAnyUseOfValue - Return true if there are any use of the indicated
+/// value. This method ignores uses of other values defined by this operation.
+bool SDNode::hasAnyUseOfValue(unsigned Value) const {
+ assert(Value < getNumValues() && "Bad value!");
+
+ for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI)
+ if (UI.getUse().getResNo() == Value)
+ return true;
+
+ return false;
+}
+
+
+/// isOnlyUserOf - Return true if this node is the only use of N.
+///
+bool SDNode::isOnlyUserOf(SDNode *N) const {
+ bool Seen = false;
+ for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+ SDNode *User = *I;
+ if (User == this)
+ Seen = true;
+ else
+ return false;
+ }
+
+ return Seen;
+}
+
+/// isOperand - Return true if this node is an operand of N.
+///
+bool SDValue::isOperandOf(SDNode *N) const {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (*this == N->getOperand(i))
+ return true;
+ return false;
+}
+
+bool SDNode::isOperandOf(SDNode *N) const {
+ for (unsigned i = 0, e = N->NumOperands; i != e; ++i)
+ if (this == N->OperandList[i].getNode())
+ return true;
+ return false;
+}
+
+/// reachesChainWithoutSideEffects - Return true if this operand (which must
+/// be a chain) reaches the specified operand without crossing any
+/// side-effecting instructions. In practice, this looks through token
+/// factors and non-volatile loads. In order to remain efficient, this only
+/// looks a couple of nodes in, it does not do an exhaustive search.
+bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
+ unsigned Depth) const {
+ if (*this == Dest) return true;
+
+ // Don't search too deeply, we just want to be able to see through
+ // TokenFactor's etc.
+ if (Depth == 0) return false;
+
+ // If this is a token factor, all inputs to the TF happen in parallel. If any
+ // of the operands of the TF reach dest, then we can do the xform.
+ if (getOpcode() == ISD::TokenFactor) {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1))
+ return true;
+ return false;
+ }
+
+ // Loads don't have side effects, look through them.
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(*this)) {
+ if (!Ld->isVolatile())
+ return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1);
+ }
+ return false;
+}
+
+/// isPredecessorOf - Return true if this node is a predecessor of N. This node
+/// is either an operand of N or it can be reached by traversing up the operands.
+/// NOTE: this is an expensive method. Use it carefully.
+bool SDNode::isPredecessorOf(SDNode *N) const {
+ SmallPtrSet<SDNode *, 32> Visited;
+ SmallVector<SDNode *, 16> Worklist;
+ Worklist.push_back(N);
+
+ do {
+ N = Worklist.pop_back_val();
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDNode *Op = N->getOperand(i).getNode();
+ if (Op == this)
+ return true;
+ if (Visited.insert(Op))
+ Worklist.push_back(Op);
+ }
+ } while (!Worklist.empty());
+
+ return false;
+}
+
+uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
+ assert(Num < NumOperands && "Invalid child # of SDNode!");
+ return cast<ConstantSDNode>(OperandList[Num])->getZExtValue();
+}
+
+std::string SDNode::getOperationName(const SelectionDAG *G) const {
+ switch (getOpcode()) {
+ default:
+ if (getOpcode() < ISD::BUILTIN_OP_END)
+ return "<<Unknown DAG Node>>";
+ if (isMachineOpcode()) {
+ if (G)
+ if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
+ if (getMachineOpcode() < TII->getNumOpcodes())
+ return TII->get(getMachineOpcode()).getName();
+ return "<<Unknown Machine Node>>";
+ }
+ if (G) {
+ const TargetLowering &TLI = G->getTargetLoweringInfo();
+ const char *Name = TLI.getTargetNodeName(getOpcode());
+ if (Name) return Name;
+ return "<<Unknown Target Node>>";
+ }
+ return "<<Unknown Node>>";
+
+#ifndef NDEBUG
+ case ISD::DELETED_NODE:
+ return "<<Deleted Node!>>";
+#endif
+ case ISD::PREFETCH: return "Prefetch";
+ case ISD::MEMBARRIER: return "MemBarrier";
+ case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap";
+ case ISD::ATOMIC_SWAP: return "AtomicSwap";
+ case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd";
+ case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub";
+ case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd";
+ case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr";
+ case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor";
+ case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand";
+ case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin";
+ case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax";
+ case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin";
+ case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax";
+ case ISD::PCMARKER: return "PCMarker";
+ case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
+ case ISD::SRCVALUE: return "SrcValue";
+ case ISD::EntryToken: return "EntryToken";
+ case ISD::TokenFactor: return "TokenFactor";
+ case ISD::AssertSext: return "AssertSext";
+ case ISD::AssertZext: return "AssertZext";
+
+ case ISD::BasicBlock: return "BasicBlock";
+ case ISD::VALUETYPE: return "ValueType";
+ case ISD::Register: return "Register";
+
+ case ISD::Constant: return "Constant";
+ case ISD::ConstantFP: return "ConstantFP";
+ case ISD::GlobalAddress: return "GlobalAddress";
+ case ISD::GlobalTLSAddress: return "GlobalTLSAddress";
+ case ISD::FrameIndex: return "FrameIndex";
+ case ISD::JumpTable: return "JumpTable";
+ case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
+ case ISD::RETURNADDR: return "RETURNADDR";
+ case ISD::FRAMEADDR: return "FRAMEADDR";
+ case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
+ case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR";
+ case ISD::LSDAADDR: return "LSDAADDR";
+ case ISD::EHSELECTION: return "EHSELECTION";
+ case ISD::EH_RETURN: return "EH_RETURN";
+ case ISD::ConstantPool: return "ConstantPool";
+ case ISD::ExternalSymbol: return "ExternalSymbol";
+ case ISD::BlockAddress: return "BlockAddress";
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
+ unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
+ if (IID < Intrinsic::num_intrinsics)
+ return Intrinsic::getName((Intrinsic::ID)IID);
+ else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
+ return TII->getName(IID);
+ llvm_unreachable("Invalid intrinsic ID");
+ }
+
+ case ISD::BUILD_VECTOR: return "BUILD_VECTOR";
+ case ISD::TargetConstant: return "TargetConstant";
+ case ISD::TargetConstantFP:return "TargetConstantFP";
+ case ISD::TargetGlobalAddress: return "TargetGlobalAddress";
+ case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress";
+ case ISD::TargetFrameIndex: return "TargetFrameIndex";
+ case ISD::TargetJumpTable: return "TargetJumpTable";
+ case ISD::TargetConstantPool: return "TargetConstantPool";
+ case ISD::TargetExternalSymbol: return "TargetExternalSymbol";
+ case ISD::TargetBlockAddress: return "TargetBlockAddress";
+
+ case ISD::CopyToReg: return "CopyToReg";
+ case ISD::CopyFromReg: return "CopyFromReg";
+ case ISD::UNDEF: return "undef";
+ case ISD::MERGE_VALUES: return "merge_values";
+ case ISD::INLINEASM: return "inlineasm";
+ case ISD::EH_LABEL: return "eh_label";
+ case ISD::HANDLENODE: return "handlenode";
+
+ // Unary operators
+ case ISD::FABS: return "fabs";
+ case ISD::FNEG: return "fneg";
+ case ISD::FSQRT: return "fsqrt";
+ case ISD::FSIN: return "fsin";
+ case ISD::FCOS: return "fcos";
+ case ISD::FPOWI: return "fpowi";
+ case ISD::FPOW: return "fpow";
+ case ISD::FTRUNC: return "ftrunc";
+ case ISD::FFLOOR: return "ffloor";
+ case ISD::FCEIL: return "fceil";
+ case ISD::FRINT: return "frint";
+ case ISD::FNEARBYINT: return "fnearbyint";
+
+ // Binary operators
+ case ISD::ADD: return "add";
+ case ISD::SUB: return "sub";
+ case ISD::MUL: return "mul";
+ case ISD::MULHU: return "mulhu";
+ case ISD::MULHS: return "mulhs";
+ case ISD::SDIV: return "sdiv";
+ case ISD::UDIV: return "udiv";
+ case ISD::SREM: return "srem";
+ case ISD::UREM: return "urem";
+ case ISD::SMUL_LOHI: return "smul_lohi";
+ case ISD::UMUL_LOHI: return "umul_lohi";
+ case ISD::SDIVREM: return "sdivrem";
+ case ISD::UDIVREM: return "udivrem";
+ case ISD::AND: return "and";
+ case ISD::OR: return "or";
+ case ISD::XOR: return "xor";
+ case ISD::SHL: return "shl";
+ case ISD::SRA: return "sra";
+ case ISD::SRL: return "srl";
+ case ISD::ROTL: return "rotl";
+ case ISD::ROTR: return "rotr";
+ case ISD::FADD: return "fadd";
+ case ISD::FSUB: return "fsub";
+ case ISD::FMUL: return "fmul";
+ case ISD::FDIV: return "fdiv";
+ case ISD::FREM: return "frem";
+ case ISD::FCOPYSIGN: return "fcopysign";
+ case ISD::FGETSIGN: return "fgetsign";
+
+ case ISD::SETCC: return "setcc";
+ case ISD::VSETCC: return "vsetcc";
+ case ISD::SELECT: return "select";
+ case ISD::SELECT_CC: return "select_cc";
+ case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt";
+ case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt";
+ case ISD::CONCAT_VECTORS: return "concat_vectors";
+ case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";
+ case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
+ case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
+ case ISD::CARRY_FALSE: return "carry_false";
+ case ISD::ADDC: return "addc";
+ case ISD::ADDE: return "adde";
+ case ISD::SADDO: return "saddo";
+ case ISD::UADDO: return "uaddo";
+ case ISD::SSUBO: return "ssubo";
+ case ISD::USUBO: return "usubo";
+ case ISD::SMULO: return "smulo";
+ case ISD::UMULO: return "umulo";
+ case ISD::SUBC: return "subc";
+ case ISD::SUBE: return "sube";
+ case ISD::SHL_PARTS: return "shl_parts";
+ case ISD::SRA_PARTS: return "sra_parts";
+ case ISD::SRL_PARTS: return "srl_parts";
+
+ // Conversion operators.
+ case ISD::SIGN_EXTEND: return "sign_extend";
+ case ISD::ZERO_EXTEND: return "zero_extend";
+ case ISD::ANY_EXTEND: return "any_extend";
+ case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg";
+ case ISD::TRUNCATE: return "truncate";
+ case ISD::FP_ROUND: return "fp_round";
+ case ISD::FLT_ROUNDS_: return "flt_rounds";
+ case ISD::FP_ROUND_INREG: return "fp_round_inreg";
+ case ISD::FP_EXTEND: return "fp_extend";
+
+ case ISD::SINT_TO_FP: return "sint_to_fp";
+ case ISD::UINT_TO_FP: return "uint_to_fp";
+ case ISD::FP_TO_SINT: return "fp_to_sint";
+ case ISD::FP_TO_UINT: return "fp_to_uint";
+ case ISD::BIT_CONVERT: return "bit_convert";
+
+ case ISD::CONVERT_RNDSAT: {
+ switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
+ default: llvm_unreachable("Unknown cvt code!");
+ case ISD::CVT_FF: return "cvt_ff";
+ case ISD::CVT_FS: return "cvt_fs";
+ case ISD::CVT_FU: return "cvt_fu";
+ case ISD::CVT_SF: return "cvt_sf";
+ case ISD::CVT_UF: return "cvt_uf";
+ case ISD::CVT_SS: return "cvt_ss";
+ case ISD::CVT_SU: return "cvt_su";
+ case ISD::CVT_US: return "cvt_us";
+ case ISD::CVT_UU: return "cvt_uu";
+ }
+ }
+
+ // Control flow instructions
+ case ISD::BR: return "br";
+ case ISD::BRIND: return "brind";
+ case ISD::BR_JT: return "br_jt";
+ case ISD::BRCOND: return "brcond";
+ case ISD::BR_CC: return "br_cc";
+ case ISD::CALLSEQ_START: return "callseq_start";
+ case ISD::CALLSEQ_END: return "callseq_end";
+
+ // Other operators
+ case ISD::LOAD: return "load";
+ case ISD::STORE: return "store";
+ case ISD::VAARG: return "vaarg";
+ case ISD::VACOPY: return "vacopy";
+ case ISD::VAEND: return "vaend";
+ case ISD::VASTART: return "vastart";
+ case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc";
+ case ISD::EXTRACT_ELEMENT: return "extract_element";
+ case ISD::BUILD_PAIR: return "build_pair";
+ case ISD::STACKSAVE: return "stacksave";
+ case ISD::STACKRESTORE: return "stackrestore";
+ case ISD::TRAP: return "trap";
+
+ // Bit manipulation
+ case ISD::BSWAP: return "bswap";
+ case ISD::CTPOP: return "ctpop";
+ case ISD::CTTZ: return "cttz";
+ case ISD::CTLZ: return "ctlz";
+
+ // Trampolines
+ case ISD::TRAMPOLINE: return "trampoline";
+
+ case ISD::CONDCODE:
+ switch (cast<CondCodeSDNode>(this)->get()) {
+ default: llvm_unreachable("Unknown setcc condition!");
+ case ISD::SETOEQ: return "setoeq";
+ case ISD::SETOGT: return "setogt";
+ case ISD::SETOGE: return "setoge";
+ case ISD::SETOLT: return "setolt";
+ case ISD::SETOLE: return "setole";
+ case ISD::SETONE: return "setone";
+
+ case ISD::SETO: return "seto";
+ case ISD::SETUO: return "setuo";
+ case ISD::SETUEQ: return "setue";
+ case ISD::SETUGT: return "setugt";
+ case ISD::SETUGE: return "setuge";
+ case ISD::SETULT: return "setult";
+ case ISD::SETULE: return "setule";
+ case ISD::SETUNE: return "setune";
+
+ case ISD::SETEQ: return "seteq";
+ case ISD::SETGT: return "setgt";
+ case ISD::SETGE: return "setge";
+ case ISD::SETLT: return "setlt";
+ case ISD::SETLE: return "setle";
+ case ISD::SETNE: return "setne";
+ }
+ }
+}
+
+const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
+ switch (AM) {
+ default:
+ return "";
+ case ISD::PRE_INC:
+ return "<pre-inc>";
+ case ISD::PRE_DEC:
+ return "<pre-dec>";
+ case ISD::POST_INC:
+ return "<post-inc>";
+ case ISD::POST_DEC:
+ return "<post-dec>";
+ }
+}
+
+std::string ISD::ArgFlagsTy::getArgFlagsString() {
+ std::string S = "< ";
+
+ if (isZExt())
+ S += "zext ";
+ if (isSExt())
+ S += "sext ";
+ if (isInReg())
+ S += "inreg ";
+ if (isSRet())
+ S += "sret ";
+ if (isByVal())
+ S += "byval ";
+ if (isNest())
+ S += "nest ";
+ if (getByValAlign())
+ S += "byval-align:" + utostr(getByValAlign()) + " ";
+ if (getOrigAlign())
+ S += "orig-align:" + utostr(getOrigAlign()) + " ";
+ if (getByValSize())
+ S += "byval-size:" + utostr(getByValSize()) + " ";
+ return S + ">";
+}
+
+void SDNode::dump() const { dump(0); }
+void SDNode::dump(const SelectionDAG *G) const {
+ print(dbgs(), G);
+}
+
+void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
+ OS << (void*)this << ": ";
+
+ for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
+ if (i) OS << ",";
+ if (getValueType(i) == MVT::Other)
+ OS << "ch";
+ else
+ OS << getValueType(i).getEVTString();
+ }
+ OS << " = " << getOperationName(G);
+}
+
+void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
+ if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
+ if (!MN->memoperands_empty()) {
+ OS << "<";
+ OS << "Mem:";
+ for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
+ e = MN->memoperands_end(); i != e; ++i) {
+ OS << **i;
+ if (next(i) != e)
+ OS << " ";
+ }
+ OS << ">";
+ }
+ } else if (const ShuffleVectorSDNode *SVN =
+ dyn_cast<ShuffleVectorSDNode>(this)) {
+ OS << "<";
+ for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (i) OS << ",";
+ if (Idx < 0)
+ OS << "u";
+ else
+ OS << Idx;
+ }
+ OS << ">";
+ } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
+ OS << '<' << CSDN->getAPIntValue() << '>';
+ } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
+ if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle)
+ OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
+ else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble)
+ OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
+ else {
+ OS << "<APFloat(";
+ CSDN->getValueAPF().bitcastToAPInt().dump();
+ OS << ")>";
+ }
+ } else if (const GlobalAddressSDNode *GADN =
+ dyn_cast<GlobalAddressSDNode>(this)) {
+ int64_t offset = GADN->getOffset();
+ OS << '<';
+ WriteAsOperand(OS, GADN->getGlobal());
+ OS << '>';
+ if (offset > 0)
+ OS << " + " << offset;
+ else
+ OS << " " << offset;
+ if (unsigned int TF = GADN->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
+ OS << "<" << FIDN->getIndex() << ">";
+ } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
+ OS << "<" << JTDN->getIndex() << ">";
+ if (unsigned int TF = JTDN->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
+ int offset = CP->getOffset();
+ if (CP->isMachineConstantPoolEntry())
+ OS << "<" << *CP->getMachineCPVal() << ">";
+ else
+ OS << "<" << *CP->getConstVal() << ">";
+ if (offset > 0)
+ OS << " + " << offset;
+ else
+ OS << " " << offset;
+ if (unsigned int TF = CP->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
+ OS << "<";
+ const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
+ if (LBB)
+ OS << LBB->getName() << " ";
+ OS << (const void*)BBDN->getBasicBlock() << ">";
+ } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
+ if (G && R->getReg() &&
+ TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
+ OS << " %" << G->getTarget().getRegisterInfo()->getName(R->getReg());
+ } else {
+ OS << " %reg" << R->getReg();
+ }
+ } else if (const ExternalSymbolSDNode *ES =
+ dyn_cast<ExternalSymbolSDNode>(this)) {
+ OS << "'" << ES->getSymbol() << "'";
+ if (unsigned int TF = ES->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
+ if (M->getValue())
+ OS << "<" << M->getValue() << ">";
+ else
+ OS << "<null>";
+ } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
+ OS << ":" << N->getVT().getEVTString();
+ }
+ else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
+ OS << "<" << *LD->getMemOperand();
+
+ bool doExt = true;
+ switch (LD->getExtensionType()) {
+ default: doExt = false; break;
+ case ISD::EXTLOAD: OS << ", anyext"; break;
+ case ISD::SEXTLOAD: OS << ", sext"; break;
+ case ISD::ZEXTLOAD: OS << ", zext"; break;
+ }
+ if (doExt)
+ OS << " from " << LD->getMemoryVT().getEVTString();
+
+ const char *AM = getIndexedModeName(LD->getAddressingMode());
+ if (*AM)
+ OS << ", " << AM;
+
+ OS << ">";
+ } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
+ OS << "<" << *ST->getMemOperand();
+
+ if (ST->isTruncatingStore())
+ OS << ", trunc to " << ST->getMemoryVT().getEVTString();
+
+ const char *AM = getIndexedModeName(ST->getAddressingMode());
+ if (*AM)
+ OS << ", " << AM;
+
+ OS << ">";
+ } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
+ OS << "<" << *M->getMemOperand() << ">";
+ } else if (const BlockAddressSDNode *BA =
+ dyn_cast<BlockAddressSDNode>(this)) {
+ OS << "<";
+ WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false);
+ OS << ", ";
+ WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false);
+ OS << ">";
+ if (unsigned int TF = BA->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ }
+
+ if (G)
+ if (unsigned Order = G->GetOrdering(this))
+ OS << " [ORD=" << Order << ']';
+}
+
+void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
+ print_types(OS, G);
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ if (i) OS << ", "; else OS << " ";
+ OS << (void*)getOperand(i).getNode();
+ if (unsigned RN = getOperand(i).getResNo())
+ OS << ":" << RN;
+ }
+ print_details(OS, G);
+}
+
+static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
+ const SelectionDAG *G, unsigned depth,
+ unsigned indent)
+{
+ if (depth == 0)
+ return;
+
+ OS.indent(indent);
+
+ N->print(OS, G);
+
+ if (depth < 1)
+ return;
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ OS << '\n';
+ printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2);
+ }
+}
+
+void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
+ unsigned depth) const {
+ printrWithDepthHelper(OS, this, G, depth, 0);
+}
+
+void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
+ // Don't print impossibly deep things.
+ printrWithDepth(OS, G, 100);
+}
+
+void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
+ printrWithDepth(dbgs(), G, depth);
+}
+
+void SDNode::dumprFull(const SelectionDAG *G) const {
+ // Don't print impossibly deep things.
+ dumprWithDepth(G, 100);
+}
+
+static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i).getNode()->hasOneUse())
+ DumpNodes(N->getOperand(i).getNode(), indent+2, G);
+ else
+ dbgs() << "\n" << std::string(indent+2, ' ')
+ << (void*)N->getOperand(i).getNode() << ": <multiple use>";
+
+
+ dbgs() << "\n";
+ dbgs().indent(indent);
+ N->dump(G);
+}
+
+SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
+ assert(N->getNumValues() == 1 &&
+ "Can't unroll a vector with multiple results!");
+
+ EVT VT = N->getValueType(0);
+ unsigned NE = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = N->getDebugLoc();
+
+ SmallVector<SDValue, 8> Scalars;
+ SmallVector<SDValue, 4> Operands(N->getNumOperands());
+
+ // If ResNE is 0, fully unroll the vector op.
+ if (ResNE == 0)
+ ResNE = NE;
+ else if (NE > ResNE)
+ NE = ResNE;
+
+ unsigned i;
+ for (i= 0; i != NE; ++i) {
+ for (unsigned j = 0; j != N->getNumOperands(); ++j) {
+ SDValue Operand = N->getOperand(j);
+ EVT OperandVT = Operand.getValueType();
+ if (OperandVT.isVector()) {
+ // A vector operand; extract a single element.
+ EVT OperandEltVT = OperandVT.getVectorElementType();
+ Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ OperandEltVT,
+ Operand,
+ getConstant(i, MVT::i32));
+ } else {
+ // A scalar operand; just use it as is.
+ Operands[j] = Operand;
+ }
+ }
+
+ switch (N->getOpcode()) {
+ default:
+ Scalars.push_back(getNode(N->getOpcode(), dl, EltVT,
+ &Operands[0], Operands.size()));
+ break;
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0],
+ getShiftAmountOperand(Operands[1])));
+ break;
+ case ISD::SIGN_EXTEND_INREG:
+ case ISD::FP_ROUND_INREG: {
+ EVT ExtVT = cast<VTSDNode>(Operands[1])->getVT().getVectorElementType();
+ Scalars.push_back(getNode(N->getOpcode(), dl, EltVT,
+ Operands[0],
+ getValueType(ExtVT)));
+ }
+ }
+ }
+
+ for (; i < ResNE; ++i)
+ Scalars.push_back(getUNDEF(EltVT));
+
+ return getNode(ISD::BUILD_VECTOR, dl,
+ EVT::getVectorVT(*getContext(), EltVT, ResNE),
+ &Scalars[0], Scalars.size());
+}
+
+
+/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a
+/// location that is 'Dist' units away from the location that the 'Base' load
+/// is loading from.
+bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
+ unsigned Bytes, int Dist) const {
+ if (LD->getChain() != Base->getChain())
+ return false;
+ EVT VT = LD->getValueType(0);
+ if (VT.getSizeInBits() / 8 != Bytes)
+ return false;
+
+ SDValue Loc = LD->getOperand(1);
+ SDValue BaseLoc = Base->getOperand(1);
+ if (Loc.getOpcode() == ISD::FrameIndex) {
+ if (BaseLoc.getOpcode() != ISD::FrameIndex)
+ return false;
+ const MachineFrameInfo *MFI = getMachineFunction().getFrameInfo();
+ int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
+ int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
+ int FS = MFI->getObjectSize(FI);
+ int BFS = MFI->getObjectSize(BFI);
+ if (FS != BFS || FS != (int)Bytes) return false;
+ return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
+ }
+ if (Loc.getOpcode() == ISD::ADD && Loc.getOperand(0) == BaseLoc) {
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(Loc.getOperand(1));
+ if (V && (V->getSExtValue() == Dist*Bytes))
+ return true;
+ }
+
+ GlobalValue *GV1 = NULL;
+ GlobalValue *GV2 = NULL;
+ int64_t Offset1 = 0;
+ int64_t Offset2 = 0;
+ bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
+ bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
+ if (isGA1 && isGA2 && GV1 == GV2)
+ return Offset1 == (Offset2 + Dist*Bytes);
+ return false;
+}
+
+
+/// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if
+/// it cannot be inferred.
+unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
+ // If this is a GlobalAddress + cst, return the alignment.
+ GlobalValue *GV;
+ int64_t GVOffset = 0;
+ if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset))
+ return MinAlign(GV->getAlignment(), GVOffset);
+
+ // If this is a direct reference to a stack slot, use information about the
+ // stack slot's alignment.
+ int FrameIdx = 1 << 31;
+ int64_t FrameOffset = 0;
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {
+ FrameIdx = FI->getIndex();
+ } else if (Ptr.getOpcode() == ISD::ADD &&
+ isa<ConstantSDNode>(Ptr.getOperand(1)) &&
+ isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
+ FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+ FrameOffset = Ptr.getConstantOperandVal(1);
+ }
+
+ if (FrameIdx != (1 << 31)) {
+ // FIXME: Handle FI+CST.
+ const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo();
+ unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
+ FrameOffset);
+ if (MFI.isFixedObjectIndex(FrameIdx)) {
+ int64_t ObjectOffset = MFI.getObjectOffset(FrameIdx) + FrameOffset;
+
+ // The alignment of the frame index can be determined from its offset from
+ // the incoming frame position. If the frame object is at offset 32 and
+ // the stack is guaranteed to be 16-byte aligned, then we know that the
+ // object is 16-byte aligned.
+ unsigned StackAlign = getTarget().getFrameInfo()->getStackAlignment();
+ unsigned Align = MinAlign(ObjectOffset, StackAlign);
+
+ // Finally, the frame object itself may have a known alignment. Factor
+ // the alignment + offset into a new alignment. For example, if we know
+ // the FI is 8 byte aligned, but the pointer is 4 off, we really have a
+ // 4-byte alignment of the resultant pointer. Likewise align 4 + 4-byte
+ // offset = 4-byte alignment, align 4 + 1-byte offset = align 1, etc.
+ return std::max(Align, FIInfoAlign);
+ }
+ return FIInfoAlign;
+ }
+
+ return 0;
+}
+
+void SelectionDAG::dump() const {
+ dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
+
+ for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
+ I != E; ++I) {
+ const SDNode *N = I;
+ if (!N->hasOneUse() && N != getRoot().getNode())
+ DumpNodes(N, 2, this);
+ }
+
+ if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
+
+ dbgs() << "\n\n";
+}
+
+void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
+ print_types(OS, G);
+ print_details(OS, G);
+}
+
+typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet;
+static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
+ const SelectionDAG *G, VisitedSDNodeSet &once) {
+ if (!once.insert(N)) // If we've been here before, return now.
+ return;
+
+ // Dump the current SDNode, but don't end the line yet.
+ OS << std::string(indent, ' ');
+ N->printr(OS, G);
+
+ // Having printed this SDNode, walk the children:
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ const SDNode *child = N->getOperand(i).getNode();
+
+ if (i) OS << ",";
+ OS << " ";
+
+ if (child->getNumOperands() == 0) {
+ // This child has no grandchildren; print it inline right here.
+ child->printr(OS, G);
+ once.insert(child);
+ } else { // Just the address. FIXME: also print the child's opcode.
+ OS << (void*)child;
+ if (unsigned RN = N->getOperand(i).getResNo())
+ OS << ":" << RN;
+ }
+ }
+
+ OS << "\n";
+
+ // Dump children that have grandchildren on their own line(s).
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ const SDNode *child = N->getOperand(i).getNode();
+ DumpNodesr(OS, child, indent+2, G, once);
+ }
+}
+
+void SDNode::dumpr() const {
+ VisitedSDNodeSet once;
+ DumpNodesr(dbgs(), this, 0, 0, once);
+}
+
+void SDNode::dumpr(const SelectionDAG *G) const {
+ VisitedSDNodeSet once;
+ DumpNodesr(dbgs(), this, 0, G, once);
+}
+
+
+// getAddressSpace - Return the address space this GlobalAddress belongs to.
+unsigned GlobalAddressSDNode::getAddressSpace() const {
+ return getGlobal()->getType()->getAddressSpace();
+}
+
+
+const Type *ConstantPoolSDNode::getType() const {
+ if (isMachineConstantPoolEntry())
+ return Val.MachineCPVal->getType();
+ return Val.ConstVal->getType();
+}
+
+bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
+ APInt &SplatUndef,
+ unsigned &SplatBitSize,
+ bool &HasAnyUndefs,
+ unsigned MinSplatBits,
+ bool isBigEndian) {
+ EVT VT = getValueType(0);
+ assert(VT.isVector() && "Expected a vector type");
+ unsigned sz = VT.getSizeInBits();
+ if (MinSplatBits > sz)
+ return false;
+
+ SplatValue = APInt(sz, 0);
+ SplatUndef = APInt(sz, 0);
+
+ // Get the bits. Bits with undefined values (when the corresponding element
+ // of the vector is an ISD::UNDEF value) are set in SplatUndef and cleared
+ // in SplatValue. If any of the values are not constant, give up and return
+ // false.
+ unsigned int nOps = getNumOperands();
+ assert(nOps > 0 && "isConstantSplat has 0-size build vector");
+ unsigned EltBitSize = VT.getVectorElementType().getSizeInBits();
+
+ for (unsigned j = 0; j < nOps; ++j) {
+ unsigned i = isBigEndian ? nOps-1-j : j;
+ SDValue OpVal = getOperand(i);
+ unsigned BitPos = j * EltBitSize;
+
+ if (OpVal.getOpcode() == ISD::UNDEF)
+ SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize);
+ else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal))
+ SplatValue |= (APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize).
+ zextOrTrunc(sz) << BitPos);
+ else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal))
+ SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos;
+ else
+ return false;
+ }
+
+ // The build_vector is all constants or undefs. Find the smallest element
+ // size that splats the vector.
+
+ HasAnyUndefs = (SplatUndef != 0);
+ while (sz > 8) {
+
+ unsigned HalfSize = sz / 2;
+ APInt HighValue = APInt(SplatValue).lshr(HalfSize).trunc(HalfSize);
+ APInt LowValue = APInt(SplatValue).trunc(HalfSize);
+ APInt HighUndef = APInt(SplatUndef).lshr(HalfSize).trunc(HalfSize);
+ APInt LowUndef = APInt(SplatUndef).trunc(HalfSize);
+
+ // If the two halves do not match (ignoring undef bits), stop here.
+ if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) ||
+ MinSplatBits > HalfSize)
+ break;
+
+ SplatValue = HighValue | LowValue;
+ SplatUndef = HighUndef & LowUndef;
+
+ sz = HalfSize;
+ }
+
+ SplatBitSize = sz;
+ return true;
+}
+
+bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
+ // Find the first non-undef value in the shuffle mask.
+ unsigned i, e;
+ for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i)
+ /* search */;
+
+ assert(i != e && "VECTOR_SHUFFLE node with all undef indices!");
+
+ // Make sure all remaining elements are either undef or the same as the first
+ // non-undef value.
+ for (int Idx = Mask[i]; i != e; ++i)
+ if (Mask[i] >= 0 && Mask[i] != Idx)
+ return false;
+ return true;
+}
+
+static void checkForCyclesHelper(const SDNode *N,
+ std::set<const SDNode *> &visited) {
+ if (visited.find(N) != visited.end()) {
+ dbgs() << "Offending node:\n";
+ N->dumprFull();
+ assert(0 && "Detected cycle in SelectionDAG");
+ }
+
+ std::set<const SDNode*>::iterator i;
+ bool inserted;
+
+ tie(i, inserted) = visited.insert(N);
+ assert(inserted && "Missed cycle");
+
+ for(unsigned i = 0; i < N->getNumOperands(); ++i) {
+ checkForCyclesHelper(N->getOperand(i).getNode(), visited);
+ }
+ visited.erase(i);
+}
+
+void llvm::checkForCycles(const llvm::SDNode *N) {
+#ifdef XDEBUG
+ assert(N && "Checking nonexistant SDNode");
+ std::set<const SDNode *> visited;
+ checkForCyclesHelper(N, visited);
+#endif
+}
+
+void llvm::checkForCycles(const llvm::SelectionDAG *DAG) {
+ checkForCycles(DAG->getRoot().getNode());
+}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
new file mode 100644
index 0000000..de17f90
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -0,0 +1,6164 @@
+//===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating from LLVM IR into SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "isel"
+#include "SelectionDAGBuilder.h"
+#include "FunctionLoweringInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Constants.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+/// LimitFloatPrecision - Generate low-precision inline sequences for
+/// some float libcalls (6, 8 or 12 bits).
+static unsigned LimitFloatPrecision;
+
+static cl::opt<unsigned, true>
+LimitFPPrecision("limit-float-precision",
+ cl::desc("Generate low-precision inline sequences "
+ "for some float libcalls"),
+ cl::location(LimitFloatPrecision),
+ cl::init(0));
+
+namespace {
+ /// RegsForValue - This struct represents the registers (physical or virtual)
+ /// that a particular set of values is assigned, and the type information
+ /// about the value. The most common situation is to represent one value at a
+ /// time, but struct or array values are handled element-wise as multiple
+ /// values. The splitting of aggregates is performed recursively, so that we
+ /// never have aggregate-typed registers. The values at this point do not
+ /// necessarily have legal types, so each value may require one or more
+ /// registers of some legal type.
+ ///
+ struct RegsForValue {
+ /// TLI - The TargetLowering object.
+ ///
+ const TargetLowering *TLI;
+
+ /// ValueVTs - The value types of the values, which may not be legal, and
+ /// may need be promoted or synthesized from one or more registers.
+ ///
+ SmallVector<EVT, 4> ValueVTs;
+
+ /// RegVTs - The value types of the registers. This is the same size as
+ /// ValueVTs and it records, for each value, what the type of the assigned
+ /// register or registers are. (Individual values are never synthesized
+ /// from more than one type of register.)
+ ///
+ /// With virtual registers, the contents of RegVTs is redundant with TLI's
+ /// getRegisterType member function, however when with physical registers
+ /// it is necessary to have a separate record of the types.
+ ///
+ SmallVector<EVT, 4> RegVTs;
+
+ /// Regs - This list holds the registers assigned to the values.
+ /// Each legal or promoted value requires one register, and each
+ /// expanded value requires multiple registers.
+ ///
+ SmallVector<unsigned, 4> Regs;
+
+ RegsForValue() : TLI(0) {}
+
+ RegsForValue(const TargetLowering &tli,
+ const SmallVector<unsigned, 4> ®s,
+ EVT regvt, EVT valuevt)
+ : TLI(&tli), ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
+ RegsForValue(const TargetLowering &tli,
+ const SmallVector<unsigned, 4> ®s,
+ const SmallVector<EVT, 4> ®vts,
+ const SmallVector<EVT, 4> &valuevts)
+ : TLI(&tli), ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {}
+ RegsForValue(LLVMContext &Context, const TargetLowering &tli,
+ unsigned Reg, const Type *Ty) : TLI(&tli) {
+ ComputeValueVTs(tli, Ty, ValueVTs);
+
+ for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ EVT ValueVT = ValueVTs[Value];
+ unsigned NumRegs = TLI->getNumRegisters(Context, ValueVT);
+ EVT RegisterVT = TLI->getRegisterType(Context, ValueVT);
+ for (unsigned i = 0; i != NumRegs; ++i)
+ Regs.push_back(Reg + i);
+ RegVTs.push_back(RegisterVT);
+ Reg += NumRegs;
+ }
+ }
+
+ /// areValueTypesLegal - Return true if types of all the values are legal.
+ bool areValueTypesLegal() {
+ for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ EVT RegisterVT = RegVTs[Value];
+ if (!TLI->isTypeLegal(RegisterVT))
+ return false;
+ }
+ return true;
+ }
+
+
+ /// append - Add the specified values to this one.
+ void append(const RegsForValue &RHS) {
+ TLI = RHS.TLI;
+ ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
+ RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
+ Regs.append(RHS.Regs.begin(), RHS.Regs.end());
+ }
+
+
+ /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+ /// this value and returns the result as a ValueVTs value. This uses
+ /// Chain/Flag as the input and updates them for the output Chain/Flag.
+ /// If the Flag pointer is NULL, no flag is used.
+ SDValue getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
+ SDValue &Chain, SDValue *Flag) const;
+
+ /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
+ /// specified value into the registers specified by this object. This uses
+ /// Chain/Flag as the input and updates them for the output Chain/Flag.
+ /// If the Flag pointer is NULL, no flag is used.
+ void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
+ unsigned Order, SDValue &Chain, SDValue *Flag) const;
+
+ /// AddInlineAsmOperands - Add this value to the specified inlineasm node
+ /// operand list. This adds the code marker, matching input operand index
+ /// (if applicable), and includes the number of values added into it.
+ void AddInlineAsmOperands(unsigned Code,
+ bool HasMatching, unsigned MatchingIdx,
+ SelectionDAG &DAG, unsigned Order,
+ std::vector<SDValue> &Ops) const;
+ };
+}
+
+/// getCopyFromParts - Create a value that contains the specified legal parts
+/// combined into the value they represent. If the parts combine to a type
+/// larger then ValueVT then AssertOp can be used to specify whether the extra
+/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
+/// (ISD::AssertSext).
+static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
+ const SDValue *Parts,
+ unsigned NumParts, EVT PartVT, EVT ValueVT,
+ ISD::NodeType AssertOp = ISD::DELETED_NODE) {
+ assert(NumParts > 0 && "No parts to assemble!");
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Val = Parts[0];
+
+ if (NumParts > 1) {
+ // Assemble the value from multiple parts.
+ if (!ValueVT.isVector() && ValueVT.isInteger()) {
+ unsigned PartBits = PartVT.getSizeInBits();
+ unsigned ValueBits = ValueVT.getSizeInBits();
+
+ // Assemble the power of 2 part.
+ unsigned RoundParts = NumParts & (NumParts - 1) ?
+ 1 << Log2_32(NumParts) : NumParts;
+ unsigned RoundBits = PartBits * RoundParts;
+ EVT RoundVT = RoundBits == ValueBits ?
+ ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
+ SDValue Lo, Hi;
+
+ EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
+
+ if (RoundParts > 2) {
+ Lo = getCopyFromParts(DAG, dl, Order, Parts, RoundParts / 2,
+ PartVT, HalfVT);
+ Hi = getCopyFromParts(DAG, dl, Order, Parts + RoundParts / 2,
+ RoundParts / 2, PartVT, HalfVT);
+ } else {
+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[0]);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[1]);
+ }
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ Val = DAG.getNode(ISD::BUILD_PAIR, dl, RoundVT, Lo, Hi);
+
+ if (RoundParts < NumParts) {
+ // Assemble the trailing non-power-of-2 part.
+ unsigned OddParts = NumParts - RoundParts;
+ EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
+ Hi = getCopyFromParts(DAG, dl, Order,
+ Parts + RoundParts, OddParts, PartVT, OddVT);
+
+ // Combine the round and odd parts.
+ Lo = Val;
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Hi = DAG.getNode(ISD::ANY_EXTEND, dl, TotalVT, Hi);
+ Hi = DAG.getNode(ISD::SHL, dl, TotalVT, Hi,
+ DAG.getConstant(Lo.getValueType().getSizeInBits(),
+ TLI.getPointerTy()));
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, TotalVT, Lo);
+ Val = DAG.getNode(ISD::OR, dl, TotalVT, Lo, Hi);
+ }
+ } else if (ValueVT.isVector()) {
+ // Handle a multi-element vector.
+ EVT IntermediateVT, RegisterVT;
+ unsigned NumIntermediates;
+ unsigned NumRegs =
+ TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
+ NumIntermediates, RegisterVT);
+ assert(NumRegs == NumParts
+ && "Part count doesn't match vector breakdown!");
+ NumParts = NumRegs; // Silence a compiler warning.
+ assert(RegisterVT == PartVT
+ && "Part type doesn't match vector breakdown!");
+ assert(RegisterVT == Parts[0].getValueType() &&
+ "Part type doesn't match part!");
+
+ // Assemble the parts into intermediate operands.
+ SmallVector<SDValue, 8> Ops(NumIntermediates);
+ if (NumIntermediates == NumParts) {
+ // If the register was not expanded, truncate or copy the value,
+ // as appropriate.
+ for (unsigned i = 0; i != NumParts; ++i)
+ Ops[i] = getCopyFromParts(DAG, dl, Order, &Parts[i], 1,
+ PartVT, IntermediateVT);
+ } else if (NumParts > 0) {
+ // If the intermediate type was expanded, build the intermediate
+ // operands from the parts.
+ assert(NumParts % NumIntermediates == 0 &&
+ "Must expand into a divisible number of parts!");
+ unsigned Factor = NumParts / NumIntermediates;
+ for (unsigned i = 0; i != NumIntermediates; ++i)
+ Ops[i] = getCopyFromParts(DAG, dl, Order, &Parts[i * Factor], Factor,
+ PartVT, IntermediateVT);
+ }
+
+ // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
+ // intermediate operands.
+ Val = DAG.getNode(IntermediateVT.isVector() ?
+ ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, dl,
+ ValueVT, &Ops[0], NumIntermediates);
+ } else if (PartVT.isFloatingPoint()) {
+ // FP split into multiple FP parts (for ppcf128)
+ assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) &&
+ "Unexpected split");
+ SDValue Lo, Hi;
+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[0]);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[1]);
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Val = DAG.getNode(ISD::BUILD_PAIR, dl, ValueVT, Lo, Hi);
+ } else {
+ // FP split into integer parts (soft fp)
+ assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
+ !PartVT.isVector() && "Unexpected split");
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
+ Val = getCopyFromParts(DAG, dl, Order, Parts, NumParts, PartVT, IntVT);
+ }
+ }
+
+ // There is now one part, held in Val. Correct it to match ValueVT.
+ PartVT = Val.getValueType();
+
+ if (PartVT == ValueVT)
+ return Val;
+
+ if (PartVT.isVector()) {
+ assert(ValueVT.isVector() && "Unknown vector conversion!");
+ return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
+ }
+
+ if (ValueVT.isVector()) {
+ assert(ValueVT.getVectorElementType() == PartVT &&
+ ValueVT.getVectorNumElements() == 1 &&
+ "Only trivial scalar-to-vector conversions should get here!");
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, ValueVT, Val);
+ }
+
+ if (PartVT.isInteger() &&
+ ValueVT.isInteger()) {
+ if (ValueVT.bitsLT(PartVT)) {
+ // For a truncate, see if we have any information to
+ // indicate whether the truncated bits will always be
+ // zero or sign-extension.
+ if (AssertOp != ISD::DELETED_NODE)
+ Val = DAG.getNode(AssertOp, dl, PartVT, Val,
+ DAG.getValueType(ValueVT));
+ return DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
+ } else {
+ return DAG.getNode(ISD::ANY_EXTEND, dl, ValueVT, Val);
+ }
+ }
+
+ if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
+ if (ValueVT.bitsLT(Val.getValueType())) {
+ // FP_ROUND's are always exact here.
+ return DAG.getNode(ISD::FP_ROUND, dl, ValueVT, Val,
+ DAG.getIntPtrConstant(1));
+ }
+
+ return DAG.getNode(ISD::FP_EXTEND, dl, ValueVT, Val);
+ }
+
+ if (PartVT.getSizeInBits() == ValueVT.getSizeInBits())
+ return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
+
+ llvm_unreachable("Unknown mismatch!");
+ return SDValue();
+}
+
+/// getCopyToParts - Create a series of nodes that contain the specified value
+/// split into legal parts. If the parts contain more bits than Val, then, for
+/// integers, ExtendKind can be used to specify how to generate the extra bits.
+static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
+ SDValue Val, SDValue *Parts, unsigned NumParts,
+ EVT PartVT,
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT PtrVT = TLI.getPointerTy();
+ EVT ValueVT = Val.getValueType();
+ unsigned PartBits = PartVT.getSizeInBits();
+ unsigned OrigNumParts = NumParts;
+ assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!");
+
+ if (!NumParts)
+ return;
+
+ if (!ValueVT.isVector()) {
+ if (PartVT == ValueVT) {
+ assert(NumParts == 1 && "No-op copy with multiple parts!");
+ Parts[0] = Val;
+ return;
+ }
+
+ if (NumParts * PartBits > ValueVT.getSizeInBits()) {
+ // If the parts cover more bits than the value has, promote the value.
+ if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
+ assert(NumParts == 1 && "Do not know what to promote to!");
+ Val = DAG.getNode(ISD::FP_EXTEND, dl, PartVT, Val);
+ } else if (PartVT.isInteger() && ValueVT.isInteger()) {
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Val = DAG.getNode(ExtendKind, dl, ValueVT, Val);
+ } else {
+ llvm_unreachable("Unknown mismatch!");
+ }
+ } else if (PartBits == ValueVT.getSizeInBits()) {
+ // Different types of the same size.
+ assert(NumParts == 1 && PartVT != ValueVT);
+ Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val);
+ } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
+ // If the parts cover less bits than value has, truncate the value.
+ if (PartVT.isInteger() && ValueVT.isInteger()) {
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
+ } else {
+ llvm_unreachable("Unknown mismatch!");
+ }
+ }
+
+ // The value may have changed - recompute ValueVT.
+ ValueVT = Val.getValueType();
+ assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
+ "Failed to tile the value with PartVT!");
+
+ if (NumParts == 1) {
+ assert(PartVT == ValueVT && "Type conversion failed!");
+ Parts[0] = Val;
+ return;
+ }
+
+ // Expand the value into multiple parts.
+ if (NumParts & (NumParts - 1)) {
+ // The number of parts is not a power of 2. Split off and copy the tail.
+ assert(PartVT.isInteger() && ValueVT.isInteger() &&
+ "Do not know what to expand to!");
+ unsigned RoundParts = 1 << Log2_32(NumParts);
+ unsigned RoundBits = RoundParts * PartBits;
+ unsigned OddParts = NumParts - RoundParts;
+ SDValue OddVal = DAG.getNode(ISD::SRL, dl, ValueVT, Val,
+ DAG.getConstant(RoundBits,
+ TLI.getPointerTy()));
+ getCopyToParts(DAG, dl, Order, OddVal, Parts + RoundParts,
+ OddParts, PartVT);
+
+ if (TLI.isBigEndian())
+ // The odd parts were reversed by getCopyToParts - unreverse them.
+ std::reverse(Parts + RoundParts, Parts + NumParts);
+
+ NumParts = RoundParts;
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
+ }
+
+ // The number of parts is a power of 2. Repeatedly bisect the value using
+ // EXTRACT_ELEMENT.
+ Parts[0] = DAG.getNode(ISD::BIT_CONVERT, dl,
+ EVT::getIntegerVT(*DAG.getContext(),
+ ValueVT.getSizeInBits()),
+ Val);
+
+ for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
+ for (unsigned i = 0; i < NumParts; i += StepSize) {
+ unsigned ThisBits = StepSize * PartBits / 2;
+ EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
+ SDValue &Part0 = Parts[i];
+ SDValue &Part1 = Parts[i+StepSize/2];
+
+ Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
+ ThisVT, Part0,
+ DAG.getConstant(1, PtrVT));
+ Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
+ ThisVT, Part0,
+ DAG.getConstant(0, PtrVT));
+
+ if (ThisBits == PartBits && ThisVT != PartVT) {
+ Part0 = DAG.getNode(ISD::BIT_CONVERT, dl,
+ PartVT, Part0);
+ Part1 = DAG.getNode(ISD::BIT_CONVERT, dl,
+ PartVT, Part1);
+ }
+ }
+ }
+
+ if (TLI.isBigEndian())
+ std::reverse(Parts, Parts + OrigNumParts);
+
+ return;
+ }
+
+ // Vector ValueVT.
+ if (NumParts == 1) {
+ if (PartVT != ValueVT) {
+ if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
+ Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val);
+ } else {
+ assert(ValueVT.getVectorElementType() == PartVT &&
+ ValueVT.getVectorNumElements() == 1 &&
+ "Only trivial vector-to-scalar conversions should get here!");
+ Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ PartVT, Val,
+ DAG.getConstant(0, PtrVT));
+ }
+ }
+
+ Parts[0] = Val;
+ return;
+ }
+
+ // Handle a multi-element vector.
+ EVT IntermediateVT, RegisterVT;
+ unsigned NumIntermediates;
+ unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT,
+ IntermediateVT, NumIntermediates, RegisterVT);
+ unsigned NumElements = ValueVT.getVectorNumElements();
+
+ assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+ NumParts = NumRegs; // Silence a compiler warning.
+ assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+
+ // Split the vector into intermediate operands.
+ SmallVector<SDValue, 8> Ops(NumIntermediates);
+ for (unsigned i = 0; i != NumIntermediates; ++i) {
+ if (IntermediateVT.isVector())
+ Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
+ IntermediateVT, Val,
+ DAG.getConstant(i * (NumElements / NumIntermediates),
+ PtrVT));
+ else
+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ IntermediateVT, Val,
+ DAG.getConstant(i, PtrVT));
+ }
+
+ // Split the intermediate operands into legal parts.
+ if (NumParts == NumIntermediates) {
+ // If the register was not expanded, promote or copy the value,
+ // as appropriate.
+ for (unsigned i = 0; i != NumParts; ++i)
+ getCopyToParts(DAG, dl, Order, Ops[i], &Parts[i], 1, PartVT);
+ } else if (NumParts > 0) {
+ // If the intermediate type was expanded, split each the value into
+ // legal parts.
+ assert(NumParts % NumIntermediates == 0 &&
+ "Must expand into a divisible number of parts!");
+ unsigned Factor = NumParts / NumIntermediates;
+ for (unsigned i = 0; i != NumIntermediates; ++i)
+ getCopyToParts(DAG, dl, Order, Ops[i], &Parts[i*Factor], Factor, PartVT);
+ }
+}
+
+
+void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {
+ AA = &aa;
+ GFI = gfi;
+ TD = DAG.getTarget().getTargetData();
+}
+
+/// clear - Clear out the curret SelectionDAG and the associated
+/// state and prepare this SelectionDAGBuilder object to be used
+/// for a new block. This doesn't clear out information about
+/// additional blocks that are needed to complete switch lowering
+/// or PHI node updating; that information is cleared out as it is
+/// consumed.
+void SelectionDAGBuilder::clear() {
+ NodeMap.clear();
+ PendingLoads.clear();
+ PendingExports.clear();
+ EdgeMapping.clear();
+ DAG.clear();
+ CurDebugLoc = DebugLoc::getUnknownLoc();
+ HasTailCall = false;
+}
+
+/// getRoot - Return the current virtual root of the Selection DAG,
+/// flushing any PendingLoad items. This must be done before emitting
+/// a store or any other node that may need to be ordered after any
+/// prior load instructions.
+///
+SDValue SelectionDAGBuilder::getRoot() {
+ if (PendingLoads.empty())
+ return DAG.getRoot();
+
+ if (PendingLoads.size() == 1) {
+ SDValue Root = PendingLoads[0];
+ DAG.setRoot(Root);
+ PendingLoads.clear();
+ return Root;
+ }
+
+ // Otherwise, we have to make a token factor node.
+ SDValue Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
+ &PendingLoads[0], PendingLoads.size());
+ PendingLoads.clear();
+ DAG.setRoot(Root);
+ return Root;
+}
+
+/// getControlRoot - Similar to getRoot, but instead of flushing all the
+/// PendingLoad items, flush all the PendingExports items. It is necessary
+/// to do this before emitting a terminator instruction.
+///
+SDValue SelectionDAGBuilder::getControlRoot() {
+ SDValue Root = DAG.getRoot();
+
+ if (PendingExports.empty())
+ return Root;
+
+ // Turn all of the CopyToReg chains into one factored node.
+ if (Root.getOpcode() != ISD::EntryToken) {
+ unsigned i = 0, e = PendingExports.size();
+ for (; i != e; ++i) {
+ assert(PendingExports[i].getNode()->getNumOperands() > 1);
+ if (PendingExports[i].getNode()->getOperand(0) == Root)
+ break; // Don't add the root if we already indirectly depend on it.
+ }
+
+ if (i == e)
+ PendingExports.push_back(Root);
+ }
+
+ Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
+ &PendingExports[0],
+ PendingExports.size());
+ PendingExports.clear();
+ DAG.setRoot(Root);
+ return Root;
+}
+
+void SelectionDAGBuilder::AssignOrderingToNode(const SDNode *Node) {
+ if (DAG.GetOrdering(Node) != 0) return; // Already has ordering.
+ DAG.AssignOrdering(Node, SDNodeOrder);
+
+ for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I)
+ AssignOrderingToNode(Node->getOperand(I).getNode());
+}
+
+void SelectionDAGBuilder::visit(Instruction &I) {
+ visit(I.getOpcode(), I);
+}
+
+void SelectionDAGBuilder::visit(unsigned Opcode, User &I) {
+ // Note: this doesn't use InstVisitor, because it has to work with
+ // ConstantExpr's in addition to instructions.
+ switch (Opcode) {
+ default: llvm_unreachable("Unknown instruction type encountered!");
+ // Build the switch statement using the Instruction.def file.
+#define HANDLE_INST(NUM, OPCODE, CLASS) \
+ case Instruction::OPCODE: visit##OPCODE((CLASS&)I); break;
+#include "llvm/Instruction.def"
+ }
+
+ // Assign the ordering to the freshly created DAG nodes.
+ if (NodeMap.count(&I)) {
+ ++SDNodeOrder;
+ AssignOrderingToNode(getValue(&I).getNode());
+ }
+}
+
+SDValue SelectionDAGBuilder::getValue(const Value *V) {
+ SDValue &N = NodeMap[V];
+ if (N.getNode()) return N;
+
+ if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) {
+ EVT VT = TLI.getValueType(V->getType(), true);
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
+ return N = DAG.getConstant(*CI, VT);
+
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return N = DAG.getGlobalAddress(GV, VT);
+
+ if (isa<ConstantPointerNull>(C))
+ return N = DAG.getConstant(0, TLI.getPointerTy());
+
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+ return N = DAG.getConstantFP(*CFP, VT);
+
+ if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
+ return N = DAG.getUNDEF(VT);
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ visit(CE->getOpcode(), *CE);
+ SDValue N1 = NodeMap[V];
+ assert(N1.getNode() && "visit didn't populate the ValueMap!");
+ return N1;
+ }
+
+ if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
+ SmallVector<SDValue, 4> Constants;
+ for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
+ OI != OE; ++OI) {
+ SDNode *Val = getValue(*OI).getNode();
+ // If the operand is an empty aggregate, there are no values.
+ if (!Val) continue;
+ // Add each leaf value from the operand to the Constants list
+ // to form a flattened list of all the values.
+ for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
+ Constants.push_back(SDValue(Val, i));
+ }
+
+ return DAG.getMergeValues(&Constants[0], Constants.size(),
+ getCurDebugLoc());
+ }
+
+ if (isa<StructType>(C->getType()) || isa<ArrayType>(C->getType())) {
+ assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
+ "Unknown struct or array constant!");
+
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, C->getType(), ValueVTs);
+ unsigned NumElts = ValueVTs.size();
+ if (NumElts == 0)
+ return SDValue(); // empty struct
+ SmallVector<SDValue, 4> Constants(NumElts);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ EVT EltVT = ValueVTs[i];
+ if (isa<UndefValue>(C))
+ Constants[i] = DAG.getUNDEF(EltVT);
+ else if (EltVT.isFloatingPoint())
+ Constants[i] = DAG.getConstantFP(0, EltVT);
+ else
+ Constants[i] = DAG.getConstant(0, EltVT);
+ }
+
+ return DAG.getMergeValues(&Constants[0], NumElts,
+ getCurDebugLoc());
+ }
+
+ if (BlockAddress *BA = dyn_cast<BlockAddress>(C))
+ return DAG.getBlockAddress(BA, VT);
+
+ const VectorType *VecTy = cast<VectorType>(V->getType());
+ unsigned NumElements = VecTy->getNumElements();
+
+ // Now that we know the number and type of the elements, get that number of
+ // elements into the Ops array based on what kind of constant it is.
+ SmallVector<SDValue, 16> Ops;
+ if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
+ for (unsigned i = 0; i != NumElements; ++i)
+ Ops.push_back(getValue(CP->getOperand(i)));
+ } else {
+ assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
+ EVT EltVT = TLI.getValueType(VecTy->getElementType());
+
+ SDValue Op;
+ if (EltVT.isFloatingPoint())
+ Op = DAG.getConstantFP(0, EltVT);
+ else
+ Op = DAG.getConstant(0, EltVT);
+ Ops.assign(NumElements, Op);
+ }
+
+ // Create a BUILD_VECTOR node.
+ return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
+ VT, &Ops[0], Ops.size());
+ }
+
+ // If this is a static alloca, generate it as the frameindex instead of
+ // computation.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end())
+ return DAG.getFrameIndex(SI->second, TLI.getPointerTy());
+ }
+
+ unsigned InReg = FuncInfo.ValueMap[V];
+ assert(InReg && "Value not in map!");
+
+ RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
+ SDValue Chain = DAG.getEntryNode();
+ return RFV.getCopyFromRegs(DAG, getCurDebugLoc(),
+ SDNodeOrder, Chain, NULL);
+}
+
+/// Get the EVTs and ArgFlags collections that represent the legalized return
+/// type of the given function. This does not require a DAG or a return value,
+/// and is suitable for use before any DAGs for the function are constructed.
+static void getReturnInfo(const Type* ReturnType,
+ Attributes attr, SmallVectorImpl<EVT> &OutVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &OutFlags,
+ TargetLowering &TLI,
+ SmallVectorImpl<uint64_t> *Offsets = 0) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, ReturnType, ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0) return;
+ unsigned Offset = 0;
+
+ for (unsigned j = 0, f = NumValues; j != f; ++j) {
+ EVT VT = ValueVTs[j];
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+ if (attr & Attribute::SExt)
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (attr & Attribute::ZExt)
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ // FIXME: C calling convention requires the return type to be promoted to
+ // at least 32-bit. But this is not necessary for non-C calling
+ // conventions. The frontend should mark functions whose return values
+ // require promoting with signext or zeroext attributes.
+ if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
+ EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32);
+ if (VT.bitsLT(MinVT))
+ VT = MinVT;
+ }
+
+ unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
+ EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
+ unsigned PartSize = TLI.getTargetData()->getTypeAllocSize(
+ PartVT.getTypeForEVT(ReturnType->getContext()));
+
+ // 'inreg' on function refers to return value
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+ if (attr & Attribute::InReg)
+ Flags.setInReg();
+
+ // Propagate extension type if any
+ if (attr & Attribute::SExt)
+ Flags.setSExt();
+ else if (attr & Attribute::ZExt)
+ Flags.setZExt();
+
+ for (unsigned i = 0; i < NumParts; ++i) {
+ OutVTs.push_back(PartVT);
+ OutFlags.push_back(Flags);
+ if (Offsets)
+ {
+ Offsets->push_back(Offset);
+ Offset += PartSize;
+ }
+ }
+ }
+}
+
+void SelectionDAGBuilder::visitRet(ReturnInst &I) {
+ SDValue Chain = getControlRoot();
+ SmallVector<ISD::OutputArg, 8> Outs;
+ FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
+
+ if (!FLI.CanLowerReturn) {
+ unsigned DemoteReg = FLI.DemoteRegister;
+ const Function *F = I.getParent()->getParent();
+
+ // Emit a store of the return value through the virtual register.
+ // Leave Outs empty so that LowerReturn won't try to load return
+ // registers the usual way.
+ SmallVector<EVT, 1> PtrValueVTs;
+ ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()),
+ PtrValueVTs);
+
+ SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]);
+ SDValue RetOp = getValue(I.getOperand(0));
+
+ SmallVector<EVT, 4> ValueVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets);
+ unsigned NumValues = ValueVTs.size();
+
+ SmallVector<SDValue, 4> Chains(NumValues);
+ EVT PtrVT = PtrValueVTs[0];
+ for (unsigned i = 0; i != NumValues; ++i) {
+ SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, RetPtr,
+ DAG.getConstant(Offsets[i], PtrVT));
+ Chains[i] =
+ DAG.getStore(Chain, getCurDebugLoc(),
+ SDValue(RetOp.getNode(), RetOp.getResNo() + i),
+ Add, NULL, Offsets[i], false, 0);
+ }
+
+ Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], NumValues);
+ } else {
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0) continue;
+
+ SDValue RetOp = getValue(I.getOperand(i));
+ for (unsigned j = 0, f = NumValues; j != f; ++j) {
+ EVT VT = ValueVTs[j];
+
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+ const Function *F = I.getParent()->getParent();
+ if (F->paramHasAttr(0, Attribute::SExt))
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (F->paramHasAttr(0, Attribute::ZExt))
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ // FIXME: C calling convention requires the return type to be promoted
+ // to at least 32-bit. But this is not necessary for non-C calling
+ // conventions. The frontend should mark functions whose return values
+ // require promoting with signext or zeroext attributes.
+ if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
+ EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32);
+ if (VT.bitsLT(MinVT))
+ VT = MinVT;
+ }
+
+ unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
+ EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
+ SmallVector<SDValue, 4> Parts(NumParts);
+ getCopyToParts(DAG, getCurDebugLoc(), SDNodeOrder,
+ SDValue(RetOp.getNode(), RetOp.getResNo() + j),
+ &Parts[0], NumParts, PartVT, ExtendKind);
+
+ // 'inreg' on function refers to return value
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+ if (F->paramHasAttr(0, Attribute::InReg))
+ Flags.setInReg();
+
+ // Propagate extension type if any
+ if (F->paramHasAttr(0, Attribute::SExt))
+ Flags.setSExt();
+ else if (F->paramHasAttr(0, Attribute::ZExt))
+ Flags.setZExt();
+
+ for (unsigned i = 0; i < NumParts; ++i)
+ Outs.push_back(ISD::OutputArg(Flags, Parts[i], /*isfixed=*/true));
+ }
+ }
+ }
+
+ bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+ CallingConv::ID CallConv =
+ DAG.getMachineFunction().getFunction()->getCallingConv();
+ Chain = TLI.LowerReturn(Chain, CallConv, isVarArg,
+ Outs, getCurDebugLoc(), DAG);
+
+ // Verify that the target's LowerReturn behaved as expected.
+ assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
+ "LowerReturn didn't return a valid chain!");
+
+ // Update the DAG with the new chain value resulting from return lowering.
+ DAG.setRoot(Chain);
+}
+
+/// CopyToExportRegsIfNeeded - If the given value has virtual registers
+/// created for it, emit nodes to copy the value into the virtual
+/// registers.
+void SelectionDAGBuilder::CopyToExportRegsIfNeeded(Value *V) {
+ if (!V->use_empty()) {
+ DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
+ if (VMI != FuncInfo.ValueMap.end())
+ CopyValueToVirtualRegister(V, VMI->second);
+ }
+}
+
+/// ExportFromCurrentBlock - If this condition isn't known to be exported from
+/// the current basic block, add it to ValueMap now so that we'll get a
+/// CopyTo/FromReg.
+void SelectionDAGBuilder::ExportFromCurrentBlock(Value *V) {
+ // No need to export constants.
+ if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
+
+ // Already exported?
+ if (FuncInfo.isExportedInst(V)) return;
+
+ unsigned Reg = FuncInfo.InitializeRegForValue(V);
+ CopyValueToVirtualRegister(V, Reg);
+}
+
+bool SelectionDAGBuilder::isExportableFromCurrentBlock(Value *V,
+ const BasicBlock *FromBB) {
+ // The operands of the setcc have to be in this block. We don't know
+ // how to export them from some other block.
+ if (Instruction *VI = dyn_cast<Instruction>(V)) {
+ // Can export from current BB.
+ if (VI->getParent() == FromBB)
+ return true;
+
+ // Is already exported, noop.
+ return FuncInfo.isExportedInst(V);
+ }
+
+ // If this is an argument, we can export it if the BB is the entry block or
+ // if it is already exported.
+ if (isa<Argument>(V)) {
+ if (FromBB == &FromBB->getParent()->getEntryBlock())
+ return true;
+
+ // Otherwise, can only export this if it is already exported.
+ return FuncInfo.isExportedInst(V);
+ }
+
+ // Otherwise, constants can always be exported.
+ return true;
+}
+
+static bool InBlock(const Value *V, const BasicBlock *BB) {
+ if (const Instruction *I = dyn_cast<Instruction>(V))
+ return I->getParent() == BB;
+ return true;
+}
+
+/// getFCmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR floating-point condition code. This includes
+/// consideration of global floating-point math flags.
+///
+static ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred) {
+ ISD::CondCode FPC, FOC;
+ switch (Pred) {
+ case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break;
+ case FCmpInst::FCMP_OEQ: FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break;
+ case FCmpInst::FCMP_OGT: FOC = ISD::SETGT; FPC = ISD::SETOGT; break;
+ case FCmpInst::FCMP_OGE: FOC = ISD::SETGE; FPC = ISD::SETOGE; break;
+ case FCmpInst::FCMP_OLT: FOC = ISD::SETLT; FPC = ISD::SETOLT; break;
+ case FCmpInst::FCMP_OLE: FOC = ISD::SETLE; FPC = ISD::SETOLE; break;
+ case FCmpInst::FCMP_ONE: FOC = ISD::SETNE; FPC = ISD::SETONE; break;
+ case FCmpInst::FCMP_ORD: FOC = FPC = ISD::SETO; break;
+ case FCmpInst::FCMP_UNO: FOC = FPC = ISD::SETUO; break;
+ case FCmpInst::FCMP_UEQ: FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break;
+ case FCmpInst::FCMP_UGT: FOC = ISD::SETGT; FPC = ISD::SETUGT; break;
+ case FCmpInst::FCMP_UGE: FOC = ISD::SETGE; FPC = ISD::SETUGE; break;
+ case FCmpInst::FCMP_ULT: FOC = ISD::SETLT; FPC = ISD::SETULT; break;
+ case FCmpInst::FCMP_ULE: FOC = ISD::SETLE; FPC = ISD::SETULE; break;
+ case FCmpInst::FCMP_UNE: FOC = ISD::SETNE; FPC = ISD::SETUNE; break;
+ case FCmpInst::FCMP_TRUE: FOC = FPC = ISD::SETTRUE; break;
+ default:
+ llvm_unreachable("Invalid FCmp predicate opcode!");
+ FOC = FPC = ISD::SETFALSE;
+ break;
+ }
+ if (FiniteOnlyFPMath())
+ return FOC;
+ else
+ return FPC;
+}
+
+/// getICmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR integer condition code.
+///
+static ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred) {
+ switch (Pred) {
+ case ICmpInst::ICMP_EQ: return ISD::SETEQ;
+ case ICmpInst::ICMP_NE: return ISD::SETNE;
+ case ICmpInst::ICMP_SLE: return ISD::SETLE;
+ case ICmpInst::ICMP_ULE: return ISD::SETULE;
+ case ICmpInst::ICMP_SGE: return ISD::SETGE;
+ case ICmpInst::ICMP_UGE: return ISD::SETUGE;
+ case ICmpInst::ICMP_SLT: return ISD::SETLT;
+ case ICmpInst::ICMP_ULT: return ISD::SETULT;
+ case ICmpInst::ICMP_SGT: return ISD::SETGT;
+ case ICmpInst::ICMP_UGT: return ISD::SETUGT;
+ default:
+ llvm_unreachable("Invalid ICmp predicate opcode!");
+ return ISD::SETNE;
+ }
+}
+
+/// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
+/// This function emits a branch and is used at the leaves of an OR or an
+/// AND operator tree.
+///
+void
+SelectionDAGBuilder::EmitBranchForMergedCondition(Value *Cond,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB) {
+ const BasicBlock *BB = CurBB->getBasicBlock();
+
+ // If the leaf of the tree is a comparison, merge the condition into
+ // the caseblock.
+ if (CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
+ // The operands of the cmp have to be in this block. We don't know
+ // how to export them from some other block. If this is the first block
+ // of the sequence, no exporting is needed.
+ if (CurBB == CurMBB ||
+ (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
+ isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
+ ISD::CondCode Condition;
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
+ Condition = getICmpCondCode(IC->getPredicate());
+ } else if (FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
+ Condition = getFCmpCondCode(FC->getPredicate());
+ } else {
+ Condition = ISD::SETEQ; // silence warning.
+ llvm_unreachable("Unknown compare instruction");
+ }
+
+ CaseBlock CB(Condition, BOp->getOperand(0),
+ BOp->getOperand(1), NULL, TBB, FBB, CurBB);
+ SwitchCases.push_back(CB);
+ return;
+ }
+ }
+
+ // Create a CaseBlock record representing this branch.
+ CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()),
+ NULL, TBB, FBB, CurBB);
+ SwitchCases.push_back(CB);
+}
+
+/// FindMergedConditions - If Cond is an expression like
+void SelectionDAGBuilder::FindMergedConditions(Value *Cond,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB,
+ unsigned Opc) {
+ // If this node is not part of the or/and tree, emit it as a branch.
+ Instruction *BOp = dyn_cast<Instruction>(Cond);
+ if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
+ (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() ||
+ BOp->getParent() != CurBB->getBasicBlock() ||
+ !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
+ !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
+ EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB);
+ return;
+ }
+
+ // Create TmpBB after CurBB.
+ MachineFunction::iterator BBI = CurBB;
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());
+ CurBB->getParent()->insert(++BBI, TmpBB);
+
+ if (Opc == Instruction::Or) {
+ // Codegen X | Y as:
+ // jmp_if_X TBB
+ // jmp TmpBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+
+ // Emit the LHS condition.
+ FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, Opc);
+
+ // Emit the RHS condition into TmpBB.
+ FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc);
+ } else {
+ assert(Opc == Instruction::And && "Unknown merge op!");
+ // Codegen X & Y as:
+ // jmp_if_X TmpBB
+ // jmp FBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+ // This requires creation of TmpBB after CurBB.
+
+ // Emit the LHS condition.
+ FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, Opc);
+
+ // Emit the RHS condition into TmpBB.
+ FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc);
+ }
+}
+
+/// If the set of cases should be emitted as a series of branches, return true.
+/// If we should emit this as a bunch of and/or'd together conditions, return
+/// false.
+bool
+SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
+ if (Cases.size() != 2) return true;
+
+ // If this is two comparisons of the same values or'd or and'd together, they
+ // will get folded into a single comparison, so don't emit two blocks.
+ if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
+ Cases[0].CmpRHS == Cases[1].CmpRHS) ||
+ (Cases[0].CmpRHS == Cases[1].CmpLHS &&
+ Cases[0].CmpLHS == Cases[1].CmpRHS)) {
+ return false;
+ }
+
+ // Handle: (X != null) | (Y != null) --> (X|Y) != 0
+ // Handle: (X == null) & (Y == null) --> (X|Y) == 0
+ if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
+ Cases[0].CC == Cases[1].CC &&
+ isa<Constant>(Cases[0].CmpRHS) &&
+ cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
+ if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
+ return false;
+ if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
+ return false;
+ }
+
+ return true;
+}
+
+void SelectionDAGBuilder::visitBr(BranchInst &I) {
+ // Update machine-CFG edges.
+ MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
+
+ // Figure out which block is immediately after the current one.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CurMBB;
+ if (++BBI != FuncInfo.MF->end())
+ NextBlock = BBI;
+
+ if (I.isUnconditional()) {
+ // Update machine-CFG edges.
+ CurMBB->addSuccessor(Succ0MBB);
+
+ // If this is not a fall-through branch, emit the branch.
+ if (Succ0MBB != NextBlock)
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+ MVT::Other, getControlRoot(),
+ DAG.getBasicBlock(Succ0MBB)));
+
+ return;
+ }
+
+ // If this condition is one of the special cases we handle, do special stuff
+ // now.
+ Value *CondVal = I.getCondition();
+ MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
+
+ // If this is a series of conditions that are or'd or and'd together, emit
+ // this as a sequence of branches instead of setcc's with and/or operations.
+ // For example, instead of something like:
+ // cmp A, B
+ // C = seteq
+ // cmp D, E
+ // F = setle
+ // or C, F
+ // jnz foo
+ // Emit:
+ // cmp A, B
+ // je foo
+ // cmp D, E
+ // jle foo
+ //
+ if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
+ if (BOp->hasOneUse() &&
+ (BOp->getOpcode() == Instruction::And ||
+ BOp->getOpcode() == Instruction::Or)) {
+ FindMergedConditions(BOp, Succ0MBB, Succ1MBB, CurMBB, BOp->getOpcode());
+ // If the compares in later blocks need to use values not currently
+ // exported from this block, export them now. This block should always
+ // be the first entry.
+ assert(SwitchCases[0].ThisBB == CurMBB && "Unexpected lowering!");
+
+ // Allow some cases to be rejected.
+ if (ShouldEmitAsBranches(SwitchCases)) {
+ for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {
+ ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
+ ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
+ }
+
+ // Emit the branch for this block.
+ visitSwitchCase(SwitchCases[0]);
+ SwitchCases.erase(SwitchCases.begin());
+ return;
+ }
+
+ // Okay, we decided not to do this, remove any inserted MBB's and clear
+ // SwitchCases.
+ for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
+ FuncInfo.MF->erase(SwitchCases[i].ThisBB);
+
+ SwitchCases.clear();
+ }
+ }
+
+ // Create a CaseBlock record representing this branch.
+ CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
+ NULL, Succ0MBB, Succ1MBB, CurMBB);
+
+ // Use visitSwitchCase to actually insert the fast branch sequence for this
+ // cond branch.
+ visitSwitchCase(CB);
+}
+
+/// visitSwitchCase - Emits the necessary code to represent a single node in
+/// the binary search tree resulting from lowering a switch instruction.
+void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB) {
+ SDValue Cond;
+ SDValue CondLHS = getValue(CB.CmpLHS);
+ DebugLoc dl = getCurDebugLoc();
+
+ // Build the setcc now.
+ if (CB.CmpMHS == NULL) {
+ // Fold "(X == true)" to X and "(X == false)" to !X to
+ // handle common cases produced by branch lowering.
+ if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
+ CB.CC == ISD::SETEQ)
+ Cond = CondLHS;
+ else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
+ CB.CC == ISD::SETEQ) {
+ SDValue True = DAG.getConstant(1, CondLHS.getValueType());
+ Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
+ } else
+ Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
+ } else {
+ assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
+
+ const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
+ const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
+
+ SDValue CmpOp = getValue(CB.CmpMHS);
+ EVT VT = CmpOp.getValueType();
+
+ if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
+ Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT),
+ ISD::SETLE);
+ } else {
+ SDValue SUB = DAG.getNode(ISD::SUB, dl,
+ VT, CmpOp, DAG.getConstant(Low, VT));
+ Cond = DAG.getSetCC(dl, MVT::i1, SUB,
+ DAG.getConstant(High-Low, VT), ISD::SETULE);
+ }
+ }
+
+ // Update successor info
+ CurMBB->addSuccessor(CB.TrueBB);
+ CurMBB->addSuccessor(CB.FalseBB);
+
+ // Set NextBlock to be the MBB immediately after the current one, if any.
+ // This is used to avoid emitting unnecessary branches to the next block.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CurMBB;
+ if (++BBI != FuncInfo.MF->end())
+ NextBlock = BBI;
+
+ // If the lhs block is the next block, invert the condition so that we can
+ // fall through to the lhs instead of the rhs block.
+ if (CB.TrueBB == NextBlock) {
+ std::swap(CB.TrueBB, CB.FalseBB);
+ SDValue True = DAG.getConstant(1, Cond.getValueType());
+ Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
+ }
+
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
+ MVT::Other, getControlRoot(), Cond,
+ DAG.getBasicBlock(CB.TrueBB));
+
+ // If the branch was constant folded, fix up the CFG.
+ if (BrCond.getOpcode() == ISD::BR) {
+ CurMBB->removeSuccessor(CB.FalseBB);
+ } else {
+ // Otherwise, go ahead and insert the false branch.
+ if (BrCond == getControlRoot())
+ CurMBB->removeSuccessor(CB.TrueBB);
+
+ if (CB.FalseBB != NextBlock)
+ BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
+ DAG.getBasicBlock(CB.FalseBB));
+ }
+
+ DAG.setRoot(BrCond);
+}
+
+/// visitJumpTable - Emit JumpTable node in the current MBB
+void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
+ // Emit the code for the jump table
+ assert(JT.Reg != -1U && "Should lower JT Header first!");
+ EVT PTy = TLI.getPointerTy();
+ SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
+ JT.Reg, PTy);
+ SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
+ SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurDebugLoc(),
+ MVT::Other, Index.getValue(1),
+ Table, Index);
+ DAG.setRoot(BrJumpTable);
+}
+
+/// visitJumpTableHeader - This function emits necessary code to produce index
+/// in the JumpTable from switch case.
+void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
+ JumpTableHeader &JTH) {
+ // Subtract the lowest switch case value from the value being switched on and
+ // conditional branch to default mbb if the result is greater than the
+ // difference between smallest and largest cases.
+ SDValue SwitchOp = getValue(JTH.SValue);
+ EVT VT = SwitchOp.getValueType();
+ SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
+ DAG.getConstant(JTH.First, VT));
+
+ // The SDNode we just created, which holds the value being switched on minus
+ // the smallest case value, needs to be copied to a virtual register so it
+ // can be used as an index into the jump table in a subsequent basic block.
+ // This value may be smaller or larger than the target's pointer type, and
+ // therefore require extension or truncating.
+ SwitchOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), TLI.getPointerTy());
+
+ unsigned JumpTableReg = FuncInfo.MakeReg(TLI.getPointerTy());
+ SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
+ JumpTableReg, SwitchOp);
+ JT.Reg = JumpTableReg;
+
+ // Emit the range check for the jump table, and branch to the default block
+ // for the switch statement if the value being switched on exceeds the largest
+ // case in the switch.
+ SDValue CMP = DAG.getSetCC(getCurDebugLoc(),
+ TLI.getSetCCResultType(Sub.getValueType()), Sub,
+ DAG.getConstant(JTH.Last-JTH.First,VT),
+ ISD::SETUGT);
+
+ // Set NextBlock to be the MBB immediately after the current one, if any.
+ // This is used to avoid emitting unnecessary branches to the next block.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CurMBB;
+
+ if (++BBI != FuncInfo.MF->end())
+ NextBlock = BBI;
+
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
+ MVT::Other, CopyTo, CMP,
+ DAG.getBasicBlock(JT.Default));
+
+ if (JT.MBB != NextBlock)
+ BrCond = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrCond,
+ DAG.getBasicBlock(JT.MBB));
+
+ DAG.setRoot(BrCond);
+}
+
+/// visitBitTestHeader - This function emits necessary code to produce value
+/// suitable for "bit tests"
+void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B) {
+ // Subtract the minimum value
+ SDValue SwitchOp = getValue(B.SValue);
+ EVT VT = SwitchOp.getValueType();
+ SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
+ DAG.getConstant(B.First, VT));
+
+ // Check range
+ SDValue RangeCmp = DAG.getSetCC(getCurDebugLoc(),
+ TLI.getSetCCResultType(Sub.getValueType()),
+ Sub, DAG.getConstant(B.Range, VT),
+ ISD::SETUGT);
+
+ SDValue ShiftOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(),
+ TLI.getPointerTy());
+
+ B.Reg = FuncInfo.MakeReg(TLI.getPointerTy());
+ SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
+ B.Reg, ShiftOp);
+
+ // Set NextBlock to be the MBB immediately after the current one, if any.
+ // This is used to avoid emitting unnecessary branches to the next block.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CurMBB;
+ if (++BBI != FuncInfo.MF->end())
+ NextBlock = BBI;
+
+ MachineBasicBlock* MBB = B.Cases[0].ThisBB;
+
+ CurMBB->addSuccessor(B.Default);
+ CurMBB->addSuccessor(MBB);
+
+ SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
+ MVT::Other, CopyTo, RangeCmp,
+ DAG.getBasicBlock(B.Default));
+
+ if (MBB != NextBlock)
+ BrRange = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, CopyTo,
+ DAG.getBasicBlock(MBB));
+
+ DAG.setRoot(BrRange);
+}
+
+/// visitBitTestCase - this function produces one "bit test"
+void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB,
+ unsigned Reg,
+ BitTestCase &B) {
+ // Make desired shift
+ SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg,
+ TLI.getPointerTy());
+ SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(),
+ TLI.getPointerTy(),
+ DAG.getConstant(1, TLI.getPointerTy()),
+ ShiftOp);
+
+ // Emit bit tests and jumps
+ SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(),
+ TLI.getPointerTy(), SwitchVal,
+ DAG.getConstant(B.Mask, TLI.getPointerTy()));
+ SDValue AndCmp = DAG.getSetCC(getCurDebugLoc(),
+ TLI.getSetCCResultType(AndOp.getValueType()),
+ AndOp, DAG.getConstant(0, TLI.getPointerTy()),
+ ISD::SETNE);
+
+ CurMBB->addSuccessor(B.TargetBB);
+ CurMBB->addSuccessor(NextMBB);
+
+ SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
+ MVT::Other, getControlRoot(),
+ AndCmp, DAG.getBasicBlock(B.TargetBB));
+
+ // Set NextBlock to be the MBB immediately after the current one, if any.
+ // This is used to avoid emitting unnecessary branches to the next block.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CurMBB;
+ if (++BBI != FuncInfo.MF->end())
+ NextBlock = BBI;
+
+ if (NextMBB != NextBlock)
+ BrAnd = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrAnd,
+ DAG.getBasicBlock(NextMBB));
+
+ DAG.setRoot(BrAnd);
+}
+
+void SelectionDAGBuilder::visitInvoke(InvokeInst &I) {
+ // Retrieve successors.
+ MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
+ MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
+
+ const Value *Callee(I.getCalledValue());
+ if (isa<InlineAsm>(Callee))
+ visitInlineAsm(&I);
+ else
+ LowerCallTo(&I, getValue(Callee), false, LandingPad);
+
+ // If the value of the invoke is used outside of its defining block, make it
+ // available as a virtual register.
+ CopyToExportRegsIfNeeded(&I);
+
+ // Update successor info
+ CurMBB->addSuccessor(Return);
+ CurMBB->addSuccessor(LandingPad);
+
+ // Drop into normal successor.
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+ MVT::Other, getControlRoot(),
+ DAG.getBasicBlock(Return)));
+}
+
+void SelectionDAGBuilder::visitUnwind(UnwindInst &I) {
+}
+
+/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
+/// small case ranges).
+bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
+ CaseRecVector& WorkList,
+ Value* SV,
+ MachineBasicBlock* Default) {
+ Case& BackCase = *(CR.Range.second-1);
+
+ // Size is the number of Cases represented by this range.
+ size_t Size = CR.Range.second - CR.Range.first;
+ if (Size > 3)
+ return false;
+
+ // Get the MachineFunction which holds the current MBB. This is used when
+ // inserting any additional MBBs necessary to represent the switch.
+ MachineFunction *CurMF = FuncInfo.MF;
+
+ // Figure out which block is immediately after the current one.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CR.CaseBB;
+
+ if (++BBI != FuncInfo.MF->end())
+ NextBlock = BBI;
+
+ // TODO: If any two of the cases has the same destination, and if one value
+ // is the same as the other, but has one bit unset that the other has set,
+ // use bit manipulation to do two compares at once. For example:
+ // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
+
+ // Rearrange the case blocks so that the last one falls through if possible.
+ if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
+ // The last case block won't fall through into 'NextBlock' if we emit the
+ // branches in this order. See if rearranging a case value would help.
+ for (CaseItr I = CR.Range.first, E = CR.Range.second-1; I != E; ++I) {
+ if (I->BB == NextBlock) {
+ std::swap(*I, BackCase);
+ break;
+ }
+ }
+ }
+
+ // Create a CaseBlock record representing a conditional branch to
+ // the Case's target mbb if the value being switched on SV is equal
+ // to C.
+ MachineBasicBlock *CurBlock = CR.CaseBB;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
+ MachineBasicBlock *FallThrough;
+ if (I != E-1) {
+ FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock());
+ CurMF->insert(BBI, FallThrough);
+
+ // Put SV in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(SV);
+ } else {
+ // If the last case doesn't match, go to the default block.
+ FallThrough = Default;
+ }
+
+ Value *RHS, *LHS, *MHS;
+ ISD::CondCode CC;
+ if (I->High == I->Low) {
+ // This is just small small case range :) containing exactly 1 case
+ CC = ISD::SETEQ;
+ LHS = SV; RHS = I->High; MHS = NULL;
+ } else {
+ CC = ISD::SETLE;
+ LHS = I->Low; MHS = SV; RHS = I->High;
+ }
+ CaseBlock CB(CC, LHS, RHS, MHS, I->BB, FallThrough, CurBlock);
+
+ // If emitting the first comparison, just call visitSwitchCase to emit the
+ // code into the current block. Otherwise, push the CaseBlock onto the
+ // vector to be later processed by SDISel, and insert the node's MBB
+ // before the next MBB.
+ if (CurBlock == CurMBB)
+ visitSwitchCase(CB);
+ else
+ SwitchCases.push_back(CB);
+
+ CurBlock = FallThrough;
+ }
+
+ return true;
+}
+
+static inline bool areJTsAllowed(const TargetLowering &TLI) {
+ return !DisableJumpTables &&
+ (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
+ TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
+}
+
+static APInt ComputeRange(const APInt &First, const APInt &Last) {
+ APInt LastExt(Last), FirstExt(First);
+ uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
+ LastExt.sext(BitWidth); FirstExt.sext(BitWidth);
+ return (LastExt - FirstExt + 1ULL);
+}
+
+/// handleJTSwitchCase - Emit jumptable for current switch case range
+bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ Value* SV,
+ MachineBasicBlock* Default) {
+ Case& FrontCase = *CR.Range.first;
+ Case& BackCase = *(CR.Range.second-1);
+
+ const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
+ const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue();
+
+ APInt TSize(First.getBitWidth(), 0);
+ for (CaseItr I = CR.Range.first, E = CR.Range.second;
+ I!=E; ++I)
+ TSize += I->size();
+
+ if (!areJTsAllowed(TLI) || TSize.ult(APInt(First.getBitWidth(), 4)))
+ return false;
+
+ APInt Range = ComputeRange(First, Last);
+ double Density = TSize.roundToDouble() / Range.roundToDouble();
+ if (Density < 0.4)
+ return false;
+
+ DEBUG(dbgs() << "Lowering jump table\n"
+ << "First entry: " << First << ". Last entry: " << Last << '\n'
+ << "Range: " << Range
+ << "Size: " << TSize << ". Density: " << Density << "\n\n");
+
+ // Get the MachineFunction which holds the current MBB. This is used when
+ // inserting any additional MBBs necessary to represent the switch.
+ MachineFunction *CurMF = FuncInfo.MF;
+
+ // Figure out which block is immediately after the current one.
+ MachineFunction::iterator BBI = CR.CaseBB;
+ ++BBI;
+
+ const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+ // Create a new basic block to hold the code for loading the address
+ // of the jump table, and jumping to it. Update successor information;
+ // we will either branch to the default case for the switch, or the jump
+ // table.
+ MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+ CurMF->insert(BBI, JumpTableBB);
+ CR.CaseBB->addSuccessor(Default);
+ CR.CaseBB->addSuccessor(JumpTableBB);
+
+ // Build a vector of destination BBs, corresponding to each target
+ // of the jump table. If the value of the jump table slot corresponds to
+ // a case statement, push the case's BB onto the vector, otherwise, push
+ // the default BB.
+ std::vector<MachineBasicBlock*> DestBBs;
+ APInt TEI = First;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) {
+ const APInt &Low = cast<ConstantInt>(I->Low)->getValue();
+ const APInt &High = cast<ConstantInt>(I->High)->getValue();
+
+ if (Low.sle(TEI) && TEI.sle(High)) {
+ DestBBs.push_back(I->BB);
+ if (TEI==High)
+ ++I;
+ } else {
+ DestBBs.push_back(Default);
+ }
+ }
+
+ // Update successor info. Add one edge to each unique successor.
+ BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs());
+ for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(),
+ E = DestBBs.end(); I != E; ++I) {
+ if (!SuccsHandled[(*I)->getNumber()]) {
+ SuccsHandled[(*I)->getNumber()] = true;
+ JumpTableBB->addSuccessor(*I);
+ }
+ }
+
+ // Create a jump table index for this jump table, or return an existing
+ // one.
+ unsigned JTEncoding = TLI.getJumpTableEncoding();
+ unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding)
+ ->getJumpTableIndex(DestBBs);
+
+ // Set the jump table information so that we can codegen it as a second
+ // MachineBasicBlock
+ JumpTable JT(-1U, JTI, JumpTableBB, Default);
+ JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == CurMBB));
+ if (CR.CaseBB == CurMBB)
+ visitJumpTableHeader(JT, JTH);
+
+ JTCases.push_back(JumpTableBlock(JTH, JT));
+
+ return true;
+}
+
+/// handleBTSplitSwitchCase - emit comparison and split binary search tree into
+/// 2 subtrees.
+bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ Value* SV,
+ MachineBasicBlock* Default) {
+ // Get the MachineFunction which holds the current MBB. This is used when
+ // inserting any additional MBBs necessary to represent the switch.
+ MachineFunction *CurMF = FuncInfo.MF;
+
+ // Figure out which block is immediately after the current one.
+ MachineFunction::iterator BBI = CR.CaseBB;
+ ++BBI;
+
+ Case& FrontCase = *CR.Range.first;
+ Case& BackCase = *(CR.Range.second-1);
+ const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+ // Size is the number of Cases represented by this range.
+ unsigned Size = CR.Range.second - CR.Range.first;
+
+ const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
+ const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue();
+ double FMetric = 0;
+ CaseItr Pivot = CR.Range.first + Size/2;
+
+ // Select optimal pivot, maximizing sum density of LHS and RHS. This will
+ // (heuristically) allow us to emit JumpTable's later.
+ APInt TSize(First.getBitWidth(), 0);
+ for (CaseItr I = CR.Range.first, E = CR.Range.second;
+ I!=E; ++I)
+ TSize += I->size();
+
+ APInt LSize = FrontCase.size();
+ APInt RSize = TSize-LSize;
+ DEBUG(dbgs() << "Selecting best pivot: \n"
+ << "First: " << First << ", Last: " << Last <<'\n'
+ << "LSize: " << LSize << ", RSize: " << RSize << '\n');
+ for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
+ J!=E; ++I, ++J) {
+ const APInt &LEnd = cast<ConstantInt>(I->High)->getValue();
+ const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue();
+ APInt Range = ComputeRange(LEnd, RBegin);
+ assert((Range - 2ULL).isNonNegative() &&
+ "Invalid case distance");
+ double LDensity = (double)LSize.roundToDouble() /
+ (LEnd - First + 1ULL).roundToDouble();
+ double RDensity = (double)RSize.roundToDouble() /
+ (Last - RBegin + 1ULL).roundToDouble();
+ double Metric = Range.logBase2()*(LDensity+RDensity);
+ // Should always split in some non-trivial place
+ DEBUG(dbgs() <<"=>Step\n"
+ << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n'
+ << "LDensity: " << LDensity
+ << ", RDensity: " << RDensity << '\n'
+ << "Metric: " << Metric << '\n');
+ if (FMetric < Metric) {
+ Pivot = J;
+ FMetric = Metric;
+ DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n');
+ }
+
+ LSize += J->size();
+ RSize -= J->size();
+ }
+ if (areJTsAllowed(TLI)) {
+ // If our case is dense we *really* should handle it earlier!
+ assert((FMetric > 0) && "Should handle dense range earlier!");
+ } else {
+ Pivot = CR.Range.first + Size/2;
+ }
+
+ CaseRange LHSR(CR.Range.first, Pivot);
+ CaseRange RHSR(Pivot, CR.Range.second);
+ Constant *C = Pivot->Low;
+ MachineBasicBlock *FalseBB = 0, *TrueBB = 0;
+
+ // We know that we branch to the LHS if the Value being switched on is
+ // less than the Pivot value, C. We use this to optimize our binary
+ // tree a bit, by recognizing that if SV is greater than or equal to the
+ // LHS's Case Value, and that Case Value is exactly one less than the
+ // Pivot's Value, then we can branch directly to the LHS's Target,
+ // rather than creating a leaf node for it.
+ if ((LHSR.second - LHSR.first) == 1 &&
+ LHSR.first->High == CR.GE &&
+ cast<ConstantInt>(C)->getValue() ==
+ (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) {
+ TrueBB = LHSR.first->BB;
+ } else {
+ TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+ CurMF->insert(BBI, TrueBB);
+ WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR));
+
+ // Put SV in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(SV);
+ }
+
+ // Similar to the optimization above, if the Value being switched on is
+ // known to be less than the Constant CR.LT, and the current Case Value
+ // is CR.LT - 1, then we can branch directly to the target block for
+ // the current Case Value, rather than emitting a RHS leaf node for it.
+ if ((RHSR.second - RHSR.first) == 1 && CR.LT &&
+ cast<ConstantInt>(RHSR.first->Low)->getValue() ==
+ (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) {
+ FalseBB = RHSR.first->BB;
+ } else {
+ FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+ CurMF->insert(BBI, FalseBB);
+ WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR));
+
+ // Put SV in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(SV);
+ }
+
+ // Create a CaseBlock record representing a conditional branch to
+ // the LHS node if the value being switched on SV is less than C.
+ // Otherwise, branch to LHS.
+ CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
+
+ if (CR.CaseBB == CurMBB)
+ visitSwitchCase(CB);
+ else
+ SwitchCases.push_back(CB);
+
+ return true;
+}
+
+/// handleBitTestsSwitchCase - if current case range has few destination and
+/// range span less, than machine word bitwidth, encode case range into series
+/// of masks and emit bit tests with these masks.
+bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ Value* SV,
+ MachineBasicBlock* Default){
+ EVT PTy = TLI.getPointerTy();
+ unsigned IntPtrBits = PTy.getSizeInBits();
+
+ Case& FrontCase = *CR.Range.first;
+ Case& BackCase = *(CR.Range.second-1);
+
+ // Get the MachineFunction which holds the current MBB. This is used when
+ // inserting any additional MBBs necessary to represent the switch.
+ MachineFunction *CurMF = FuncInfo.MF;
+
+ // If target does not have legal shift left, do not emit bit tests at all.
+ if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy()))
+ return false;
+
+ size_t numCmps = 0;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second;
+ I!=E; ++I) {
+ // Single case counts one, case range - two.
+ numCmps += (I->Low == I->High ? 1 : 2);
+ }
+
+ // Count unique destinations
+ SmallSet<MachineBasicBlock*, 4> Dests;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
+ Dests.insert(I->BB);
+ if (Dests.size() > 3)
+ // Don't bother the code below, if there are too much unique destinations
+ return false;
+ }
+ DEBUG(dbgs() << "Total number of unique destinations: "
+ << Dests.size() << '\n'
+ << "Total number of comparisons: " << numCmps << '\n');
+
+ // Compute span of values.
+ const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue();
+ const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue();
+ APInt cmpRange = maxValue - minValue;
+
+ DEBUG(dbgs() << "Compare range: " << cmpRange << '\n'
+ << "Low bound: " << minValue << '\n'
+ << "High bound: " << maxValue << '\n');
+
+ if (cmpRange.uge(APInt(cmpRange.getBitWidth(), IntPtrBits)) ||
+ (!(Dests.size() == 1 && numCmps >= 3) &&
+ !(Dests.size() == 2 && numCmps >= 5) &&
+ !(Dests.size() >= 3 && numCmps >= 6)))
+ return false;
+
+ DEBUG(dbgs() << "Emitting bit tests\n");
+ APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth());
+
+ // Optimize the case where all the case values fit in a
+ // word without having to subtract minValue. In this case,
+ // we can optimize away the subtraction.
+ if (minValue.isNonNegative() &&
+ maxValue.slt(APInt(maxValue.getBitWidth(), IntPtrBits))) {
+ cmpRange = maxValue;
+ } else {
+ lowBound = minValue;
+ }
+
+ CaseBitsVector CasesBits;
+ unsigned i, count = 0;
+
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
+ MachineBasicBlock* Dest = I->BB;
+ for (i = 0; i < count; ++i)
+ if (Dest == CasesBits[i].BB)
+ break;
+
+ if (i == count) {
+ assert((count < 3) && "Too much destinations to test!");
+ CasesBits.push_back(CaseBits(0, Dest, 0));
+ count++;
+ }
+
+ const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue();
+ const APInt& highValue = cast<ConstantInt>(I->High)->getValue();
+
+ uint64_t lo = (lowValue - lowBound).getZExtValue();
+ uint64_t hi = (highValue - lowBound).getZExtValue();
+
+ for (uint64_t j = lo; j <= hi; j++) {
+ CasesBits[i].Mask |= 1ULL << j;
+ CasesBits[i].Bits++;
+ }
+
+ }
+ std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp());
+
+ BitTestInfo BTC;
+
+ // Figure out which block is immediately after the current one.
+ MachineFunction::iterator BBI = CR.CaseBB;
+ ++BBI;
+
+ const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+ DEBUG(dbgs() << "Cases:\n");
+ for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) {
+ DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask
+ << ", Bits: " << CasesBits[i].Bits
+ << ", BB: " << CasesBits[i].BB << '\n');
+
+ MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+ CurMF->insert(BBI, CaseBB);
+ BTC.push_back(BitTestCase(CasesBits[i].Mask,
+ CaseBB,
+ CasesBits[i].BB));
+
+ // Put SV in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(SV);
+ }
+
+ BitTestBlock BTB(lowBound, cmpRange, SV,
+ -1U, (CR.CaseBB == CurMBB),
+ CR.CaseBB, Default, BTC);
+
+ if (CR.CaseBB == CurMBB)
+ visitBitTestHeader(BTB);
+
+ BitTestCases.push_back(BTB);
+
+ return true;
+}
+
+/// Clusterify - Transform simple list of Cases into list of CaseRange's
+size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
+ const SwitchInst& SI) {
+ size_t numCmps = 0;
+
+ // Start with "simple" cases
+ for (size_t i = 1; i < SI.getNumSuccessors(); ++i) {
+ MachineBasicBlock *SMBB = FuncInfo.MBBMap[SI.getSuccessor(i)];
+ Cases.push_back(Case(SI.getSuccessorValue(i),
+ SI.getSuccessorValue(i),
+ SMBB));
+ }
+ std::sort(Cases.begin(), Cases.end(), CaseCmp());
+
+ // Merge case into clusters
+ if (Cases.size() >= 2)
+ // Must recompute end() each iteration because it may be
+ // invalidated by erase if we hold on to it
+ for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) {
+ const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
+ const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
+ MachineBasicBlock* nextBB = J->BB;
+ MachineBasicBlock* currentBB = I->BB;
+
+ // If the two neighboring cases go to the same destination, merge them
+ // into a single case.
+ if ((nextValue - currentValue == 1) && (currentBB == nextBB)) {
+ I->High = J->High;
+ J = Cases.erase(J);
+ } else {
+ I = J++;
+ }
+ }
+
+ for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
+ if (I->Low != I->High)
+ // A range counts double, since it requires two compares.
+ ++numCmps;
+ }
+
+ return numCmps;
+}
+
+void SelectionDAGBuilder::visitSwitch(SwitchInst &SI) {
+ // Figure out which block is immediately after the current one.
+ MachineBasicBlock *NextBlock = 0;
+ MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
+
+ // If there is only the default destination, branch to it if it is not the
+ // next basic block. Otherwise, just fall through.
+ if (SI.getNumOperands() == 2) {
+ // Update machine-CFG edges.
+
+ // If this is not a fall-through branch, emit the branch.
+ CurMBB->addSuccessor(Default);
+ if (Default != NextBlock)
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+ MVT::Other, getControlRoot(),
+ DAG.getBasicBlock(Default)));
+
+ return;
+ }
+
+ // If there are any non-default case statements, create a vector of Cases
+ // representing each one, and sort the vector so that we can efficiently
+ // create a binary search tree from them.
+ CaseVector Cases;
+ size_t numCmps = Clusterify(Cases, SI);
+ DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
+ << ". Total compares: " << numCmps << '\n');
+ numCmps = 0;
+
+ // Get the Value to be switched on and default basic blocks, which will be
+ // inserted into CaseBlock records, representing basic blocks in the binary
+ // search tree.
+ Value *SV = SI.getOperand(0);
+
+ // Push the initial CaseRec onto the worklist
+ CaseRecVector WorkList;
+ WorkList.push_back(CaseRec(CurMBB,0,0,CaseRange(Cases.begin(),Cases.end())));
+
+ while (!WorkList.empty()) {
+ // Grab a record representing a case range to process off the worklist
+ CaseRec CR = WorkList.back();
+ WorkList.pop_back();
+
+ if (handleBitTestsSwitchCase(CR, WorkList, SV, Default))
+ continue;
+
+ // If the range has few cases (two or less) emit a series of specific
+ // tests.
+ if (handleSmallSwitchRange(CR, WorkList, SV, Default))
+ continue;
+
+ // If the switch has more than 5 blocks, and at least 40% dense, and the
+ // target supports indirect branches, then emit a jump table rather than
+ // lowering the switch to a binary tree of conditional branches.
+ if (handleJTSwitchCase(CR, WorkList, SV, Default))
+ continue;
+
+ // Emit binary tree. We need to pick a pivot, and push left and right ranges
+ // onto the worklist. Leafs are handled via handleSmallSwitchRange() call.
+ handleBTSplitSwitchCase(CR, WorkList, SV, Default);
+ }
+}
+
+void SelectionDAGBuilder::visitIndirectBr(IndirectBrInst &I) {
+ // Update machine-CFG edges.
+ for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i)
+ CurMBB->addSuccessor(FuncInfo.MBBMap[I.getSuccessor(i)]);
+
+ DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(),
+ MVT::Other, getControlRoot(),
+ getValue(I.getAddress())));
+}
+
+void SelectionDAGBuilder::visitFSub(User &I) {
+ // -0.0 - X --> fneg
+ const Type *Ty = I.getType();
+ if (isa<VectorType>(Ty)) {
+ if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) {
+ const VectorType *DestTy = cast<VectorType>(I.getType());
+ const Type *ElTy = DestTy->getElementType();
+ unsigned VL = DestTy->getNumElements();
+ std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy));
+ Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size());
+ if (CV == CNZ) {
+ SDValue Op2 = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
+ Op2.getValueType(), Op2));
+ return;
+ }
+ }
+ }
+
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0)))
+ if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) {
+ SDValue Op2 = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
+ Op2.getValueType(), Op2));
+ return;
+ }
+
+ visitBinary(I, ISD::FSUB);
+}
+
+void SelectionDAGBuilder::visitBinary(User &I, unsigned OpCode) {
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(),
+ Op1.getValueType(), Op1, Op2));
+}
+
+void SelectionDAGBuilder::visitShift(User &I, unsigned Opcode) {
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+ if (!isa<VectorType>(I.getType()) &&
+ Op2.getValueType() != TLI.getShiftAmountTy()) {
+ // If the operand is smaller than the shift count type, promote it.
+ EVT PTy = TLI.getPointerTy();
+ EVT STy = TLI.getShiftAmountTy();
+ if (STy.bitsGT(Op2.getValueType()))
+ Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
+ TLI.getShiftAmountTy(), Op2);
+ // If the operand is larger than the shift count type but the shift
+ // count type has enough bits to represent any shift value, truncate
+ // it now. This is a common case and it exposes the truncate to
+ // optimization early.
+ else if (STy.getSizeInBits() >=
+ Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
+ Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
+ TLI.getShiftAmountTy(), Op2);
+ // Otherwise we'll need to temporarily settle for some other
+ // convenient type; type legalization will make adjustments as
+ // needed.
+ else if (PTy.bitsLT(Op2.getValueType()))
+ Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
+ TLI.getPointerTy(), Op2);
+ else if (PTy.bitsGT(Op2.getValueType()))
+ Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
+ TLI.getPointerTy(), Op2);
+ }
+
+ setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(),
+ Op1.getValueType(), Op1, Op2));
+}
+
+void SelectionDAGBuilder::visitICmp(User &I) {
+ ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(&I))
+ predicate = IC->getPredicate();
+ else if (ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
+ predicate = ICmpInst::Predicate(IC->getPredicate());
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+ ISD::CondCode Opcode = getICmpCondCode(predicate);
+
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode));
+}
+
+void SelectionDAGBuilder::visitFCmp(User &I) {
+ FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
+ if (FCmpInst *FC = dyn_cast<FCmpInst>(&I))
+ predicate = FC->getPredicate();
+ else if (ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
+ predicate = FCmpInst::Predicate(FC->getPredicate());
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+ ISD::CondCode Condition = getFCmpCondCode(predicate);
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
+}
+
+void SelectionDAGBuilder::visitSelect(User &I) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, I.getType(), ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0) return;
+
+ SmallVector<SDValue, 4> Values(NumValues);
+ SDValue Cond = getValue(I.getOperand(0));
+ SDValue TrueVal = getValue(I.getOperand(1));
+ SDValue FalseVal = getValue(I.getOperand(2));
+
+ for (unsigned i = 0; i != NumValues; ++i)
+ Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(),
+ TrueVal.getNode()->getValueType(i), Cond,
+ SDValue(TrueVal.getNode(),
+ TrueVal.getResNo() + i),
+ SDValue(FalseVal.getNode(),
+ FalseVal.getResNo() + i));
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+ DAG.getVTList(&ValueVTs[0], NumValues),
+ &Values[0], NumValues));
+}
+
+void SelectionDAGBuilder::visitTrunc(User &I) {
+ // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitZExt(User &I) {
+ // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
+ // ZExt also can't be a cast to bool for same reason. So, nothing much to do
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitSExt(User &I) {
+ // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
+ // SExt also can't be a cast to bool for same reason. So, nothing much to do
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPTrunc(User &I) {
+ // FPTrunc is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(),
+ DestVT, N, DAG.getIntPtrConstant(0)));
+}
+
+void SelectionDAGBuilder::visitFPExt(User &I){
+ // FPTrunc is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPToUI(User &I) {
+ // FPToUI is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPToSI(User &I) {
+ // FPToSI is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitUIToFP(User &I) {
+ // UIToFP is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitSIToFP(User &I){
+ // SIToFP is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitPtrToInt(User &I) {
+ // What to do depends on the size of the integer and the size of the pointer.
+ // We can either truncate, zero extend, or no-op, accordingly.
+ SDValue N = getValue(I.getOperand(0));
+ EVT SrcVT = N.getValueType();
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
+}
+
+void SelectionDAGBuilder::visitIntToPtr(User &I) {
+ // What to do depends on the size of the integer and the size of the pointer.
+ // We can either truncate, zero extend, or no-op, accordingly.
+ SDValue N = getValue(I.getOperand(0));
+ EVT SrcVT = N.getValueType();
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
+}
+
+void SelectionDAGBuilder::visitBitCast(User &I) {
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+
+ // BitCast assures us that source and destination are the same size so this is
+ // either a BIT_CONVERT or a no-op.
+ if (DestVT != N.getValueType())
+ setValue(&I, DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+ DestVT, N)); // convert types.
+ else
+ setValue(&I, N); // noop cast.
+}
+
+void SelectionDAGBuilder::visitInsertElement(User &I) {
+ SDValue InVec = getValue(I.getOperand(0));
+ SDValue InVal = getValue(I.getOperand(1));
+ SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
+ TLI.getPointerTy(),
+ getValue(I.getOperand(2)));
+ setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(),
+ TLI.getValueType(I.getType()),
+ InVec, InVal, InIdx));
+}
+
+void SelectionDAGBuilder::visitExtractElement(User &I) {
+ SDValue InVec = getValue(I.getOperand(0));
+ SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
+ TLI.getPointerTy(),
+ getValue(I.getOperand(1)));
+ setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+ TLI.getValueType(I.getType()), InVec, InIdx));
+}
+
+// Utility for visitShuffleVector - Returns true if the mask is mask starting
+// from SIndx and increasing to the element length (undefs are allowed).
+static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) {
+ unsigned MaskNumElts = Mask.size();
+ for (unsigned i = 0; i != MaskNumElts; ++i)
+ if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx)))
+ return false;
+ return true;
+}
+
+void SelectionDAGBuilder::visitShuffleVector(User &I) {
+ SmallVector<int, 8> Mask;
+ SDValue Src1 = getValue(I.getOperand(0));
+ SDValue Src2 = getValue(I.getOperand(1));
+
+ // Convert the ConstantVector mask operand into an array of ints, with -1
+ // representing undef values.
+ SmallVector<Constant*, 8> MaskElts;
+ cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts);
+ unsigned MaskNumElts = MaskElts.size();
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ if (isa<UndefValue>(MaskElts[i]))
+ Mask.push_back(-1);
+ else
+ Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue());
+ }
+
+ EVT VT = TLI.getValueType(I.getType());
+ EVT SrcVT = Src1.getValueType();
+ unsigned SrcNumElts = SrcVT.getVectorNumElements();
+
+ if (SrcNumElts == MaskNumElts) {
+ setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+ &Mask[0]));
+ return;
+ }
+
+ // Normalize the shuffle vector since mask and vector length don't match.
+ if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) {
+ // Mask is longer than the source vectors and is a multiple of the source
+ // vectors. We can use concatenate vector to make the mask and vectors
+ // lengths match.
+ if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) {
+ // The shuffle is concatenating two vectors together.
+ setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
+ VT, Src1, Src2));
+ return;
+ }
+
+ // Pad both vectors with undefs to make them the same length as the mask.
+ unsigned NumConcat = MaskNumElts / SrcNumElts;
+ bool Src1U = Src1.getOpcode() == ISD::UNDEF;
+ bool Src2U = Src2.getOpcode() == ISD::UNDEF;
+ SDValue UndefVal = DAG.getUNDEF(SrcVT);
+
+ SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
+ SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
+ MOps1[0] = Src1;
+ MOps2[0] = Src2;
+
+ Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
+ getCurDebugLoc(), VT,
+ &MOps1[0], NumConcat);
+ Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
+ getCurDebugLoc(), VT,
+ &MOps2[0], NumConcat);
+
+ // Readjust mask for new input vector length.
+ SmallVector<int, 8> MappedOps;
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = Mask[i];
+ if (Idx < (int)SrcNumElts)
+ MappedOps.push_back(Idx);
+ else
+ MappedOps.push_back(Idx + MaskNumElts - SrcNumElts);
+ }
+
+ setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+ &MappedOps[0]));
+ return;
+ }
+
+ if (SrcNumElts > MaskNumElts) {
+ // Analyze the access pattern of the vector to see if we can extract
+ // two subvectors and do the shuffle. The analysis is done by calculating
+ // the range of elements the mask access on both vectors.
+ int MinRange[2] = { SrcNumElts+1, SrcNumElts+1};
+ int MaxRange[2] = {-1, -1};
+
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = Mask[i];
+ int Input = 0;
+ if (Idx < 0)
+ continue;
+
+ if (Idx >= (int)SrcNumElts) {
+ Input = 1;
+ Idx -= SrcNumElts;
+ }
+ if (Idx > MaxRange[Input])
+ MaxRange[Input] = Idx;
+ if (Idx < MinRange[Input])
+ MinRange[Input] = Idx;
+ }
+
+ // Check if the access is smaller than the vector size and can we find
+ // a reasonable extract index.
+ int RangeUse[2] = { 2, 2 }; // 0 = Unused, 1 = Extract, 2 = Can not
+ // Extract.
+ int StartIdx[2]; // StartIdx to extract from
+ for (int Input=0; Input < 2; ++Input) {
+ if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) {
+ RangeUse[Input] = 0; // Unused
+ StartIdx[Input] = 0;
+ } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) {
+ // Fits within range but we should see if we can find a good
+ // start index that is a multiple of the mask length.
+ if (MaxRange[Input] < (int)MaskNumElts) {
+ RangeUse[Input] = 1; // Extract from beginning of the vector
+ StartIdx[Input] = 0;
+ } else {
+ StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
+ if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
+ StartIdx[Input] + MaskNumElts < SrcNumElts)
+ RangeUse[Input] = 1; // Extract from a multiple of the mask length.
+ }
+ }
+ }
+
+ if (RangeUse[0] == 0 && RangeUse[1] == 0) {
+ setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
+ return;
+ }
+ else if (RangeUse[0] < 2 && RangeUse[1] < 2) {
+ // Extract appropriate subvector and generate a vector shuffle
+ for (int Input=0; Input < 2; ++Input) {
+ SDValue &Src = Input == 0 ? Src1 : Src2;
+ if (RangeUse[Input] == 0)
+ Src = DAG.getUNDEF(VT);
+ else
+ Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurDebugLoc(), VT,
+ Src, DAG.getIntPtrConstant(StartIdx[Input]));
+ }
+
+ // Calculate new mask.
+ SmallVector<int, 8> MappedOps;
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = Mask[i];
+ if (Idx < 0)
+ MappedOps.push_back(Idx);
+ else if (Idx < (int)SrcNumElts)
+ MappedOps.push_back(Idx - StartIdx[0]);
+ else
+ MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts);
+ }
+
+ setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+ &MappedOps[0]));
+ return;
+ }
+ }
+
+ // We can't use either concat vectors or extract subvectors so fall back to
+ // replacing the shuffle with extract and build vector.
+ // to insert and build vector.
+ EVT EltVT = VT.getVectorElementType();
+ EVT PtrVT = TLI.getPointerTy();
+ SmallVector<SDValue,8> Ops;
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ if (Mask[i] < 0) {
+ Ops.push_back(DAG.getUNDEF(EltVT));
+ } else {
+ int Idx = Mask[i];
+ SDValue Res;
+
+ if (Idx < (int)SrcNumElts)
+ Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+ EltVT, Src1, DAG.getConstant(Idx, PtrVT));
+ else
+ Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+ EltVT, Src2,
+ DAG.getConstant(Idx - SrcNumElts, PtrVT));
+
+ Ops.push_back(Res);
+ }
+ }
+
+ setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
+ VT, &Ops[0], Ops.size()));
+}
+
+void SelectionDAGBuilder::visitInsertValue(InsertValueInst &I) {
+ const Value *Op0 = I.getOperand(0);
+ const Value *Op1 = I.getOperand(1);
+ const Type *AggTy = I.getType();
+ const Type *ValTy = Op1->getType();
+ bool IntoUndef = isa<UndefValue>(Op0);
+ bool FromUndef = isa<UndefValue>(Op1);
+
+ unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
+ I.idx_begin(), I.idx_end());
+
+ SmallVector<EVT, 4> AggValueVTs;
+ ComputeValueVTs(TLI, AggTy, AggValueVTs);
+ SmallVector<EVT, 4> ValValueVTs;
+ ComputeValueVTs(TLI, ValTy, ValValueVTs);
+
+ unsigned NumAggValues = AggValueVTs.size();
+ unsigned NumValValues = ValValueVTs.size();
+ SmallVector<SDValue, 4> Values(NumAggValues);
+
+ SDValue Agg = getValue(Op0);
+ SDValue Val = getValue(Op1);
+ unsigned i = 0;
+ // Copy the beginning value(s) from the original aggregate.
+ for (; i != LinearIndex; ++i)
+ Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+ SDValue(Agg.getNode(), Agg.getResNo() + i);
+ // Copy values from the inserted value(s).
+ for (; i != LinearIndex + NumValValues; ++i)
+ Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+ SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
+ // Copy remaining value(s) from the original aggregate.
+ for (; i != NumAggValues; ++i)
+ Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+ SDValue(Agg.getNode(), Agg.getResNo() + i);
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+ DAG.getVTList(&AggValueVTs[0], NumAggValues),
+ &Values[0], NumAggValues));
+}
+
+void SelectionDAGBuilder::visitExtractValue(ExtractValueInst &I) {
+ const Value *Op0 = I.getOperand(0);
+ const Type *AggTy = Op0->getType();
+ const Type *ValTy = I.getType();
+ bool OutOfUndef = isa<UndefValue>(Op0);
+
+ unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
+ I.idx_begin(), I.idx_end());
+
+ SmallVector<EVT, 4> ValValueVTs;
+ ComputeValueVTs(TLI, ValTy, ValValueVTs);
+
+ unsigned NumValValues = ValValueVTs.size();
+ SmallVector<SDValue, 4> Values(NumValValues);
+
+ SDValue Agg = getValue(Op0);
+ // Copy out the selected value(s).
+ for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
+ Values[i - LinearIndex] =
+ OutOfUndef ?
+ DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
+ SDValue(Agg.getNode(), Agg.getResNo() + i);
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+ DAG.getVTList(&ValValueVTs[0], NumValValues),
+ &Values[0], NumValValues));
+}
+
+void SelectionDAGBuilder::visitGetElementPtr(User &I) {
+ SDValue N = getValue(I.getOperand(0));
+ const Type *Ty = I.getOperand(0)->getType();
+
+ for (GetElementPtrInst::op_iterator OI = I.op_begin()+1, E = I.op_end();
+ OI != E; ++OI) {
+ Value *Idx = *OI;
+ if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
+ unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+ if (Field) {
+ // N = N + Offset
+ uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field);
+ N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
+ DAG.getIntPtrConstant(Offset));
+ }
+
+ Ty = StTy->getElementType(Field);
+ } else {
+ Ty = cast<SequentialType>(Ty)->getElementType();
+
+ // If this is a constant subscript, handle it quickly.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
+ if (CI->getZExtValue() == 0) continue;
+ uint64_t Offs =
+ TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
+ SDValue OffsVal;
+ EVT PTy = TLI.getPointerTy();
+ unsigned PtrBits = PTy.getSizeInBits();
+ if (PtrBits < 64)
+ OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
+ TLI.getPointerTy(),
+ DAG.getConstant(Offs, MVT::i64));
+ else
+ OffsVal = DAG.getIntPtrConstant(Offs);
+
+ N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
+ OffsVal);
+ continue;
+ }
+
+ // N = N + Idx * ElementSize;
+ APInt ElementSize = APInt(TLI.getPointerTy().getSizeInBits(),
+ TD->getTypeAllocSize(Ty));
+ SDValue IdxN = getValue(Idx);
+
+ // If the index is smaller or larger than intptr_t, truncate or extend
+ // it.
+ IdxN = DAG.getSExtOrTrunc(IdxN, getCurDebugLoc(), N.getValueType());
+
+ // If this is a multiply by a power of two, turn it into a shl
+ // immediately. This is a very common case.
+ if (ElementSize != 1) {
+ if (ElementSize.isPowerOf2()) {
+ unsigned Amt = ElementSize.logBase2();
+ IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(),
+ N.getValueType(), IdxN,
+ DAG.getConstant(Amt, TLI.getPointerTy()));
+ } else {
+ SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy());
+ IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(),
+ N.getValueType(), IdxN, Scale);
+ }
+ }
+
+ N = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+ N.getValueType(), N, IdxN);
+ }
+ }
+
+ setValue(&I, N);
+}
+
+void SelectionDAGBuilder::visitAlloca(AllocaInst &I) {
+ // If this is a fixed sized alloca in the entry block of the function,
+ // allocate it statically on the stack.
+ if (FuncInfo.StaticAllocaMap.count(&I))
+ return; // getValue will auto-populate this.
+
+ const Type *Ty = I.getAllocatedType();
+ uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+ unsigned Align =
+ std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
+ I.getAlignment());
+
+ SDValue AllocSize = getValue(I.getArraySize());
+
+ AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), AllocSize.getValueType(),
+ AllocSize,
+ DAG.getConstant(TySize, AllocSize.getValueType()));
+
+ EVT IntPtr = TLI.getPointerTy();
+ AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr);
+
+ // Handle alignment. If the requested alignment is less than or equal to
+ // the stack alignment, ignore it. If the size is greater than or equal to
+ // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
+ unsigned StackAlign =
+ TLI.getTargetMachine().getFrameInfo()->getStackAlignment();
+ if (Align <= StackAlign)
+ Align = 0;
+
+ // Round the size of the allocation up to the stack alignment size
+ // by add SA-1 to the size.
+ AllocSize = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+ AllocSize.getValueType(), AllocSize,
+ DAG.getIntPtrConstant(StackAlign-1));
+
+ // Mask out the low bits for alignment purposes.
+ AllocSize = DAG.getNode(ISD::AND, getCurDebugLoc(),
+ AllocSize.getValueType(), AllocSize,
+ DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1)));
+
+ SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) };
+ SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
+ SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurDebugLoc(),
+ VTs, Ops, 3);
+ setValue(&I, DSA);
+ DAG.setRoot(DSA.getValue(1));
+
+ // Inform the Frame Information that we have just allocated a variable-sized
+ // object.
+ FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject();
+}
+
+void SelectionDAGBuilder::visitLoad(LoadInst &I) {
+ const Value *SV = I.getOperand(0);
+ SDValue Ptr = getValue(SV);
+
+ const Type *Ty = I.getType();
+ bool isVolatile = I.isVolatile();
+ unsigned Alignment = I.getAlignment();
+
+ SmallVector<EVT, 4> ValueVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0)
+ return;
+
+ SDValue Root;
+ bool ConstantMemory = false;
+ if (I.isVolatile())
+ // Serialize volatile loads with other side effects.
+ Root = getRoot();
+ else if (AA->pointsToConstantMemory(SV)) {
+ // Do not serialize (non-volatile) loads of constant memory with anything.
+ Root = DAG.getEntryNode();
+ ConstantMemory = true;
+ } else {
+ // Do not serialize non-volatile loads against each other.
+ Root = DAG.getRoot();
+ }
+
+ SmallVector<SDValue, 4> Values(NumValues);
+ SmallVector<SDValue, 4> Chains(NumValues);
+ EVT PtrVT = Ptr.getValueType();
+ for (unsigned i = 0; i != NumValues; ++i) {
+ SDValue A = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+ PtrVT, Ptr,
+ DAG.getConstant(Offsets[i], PtrVT));
+ SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
+ A, SV, Offsets[i], isVolatile, Alignment);
+
+ Values[i] = L;
+ Chains[i] = L.getValue(1);
+ }
+
+ if (!ConstantMemory) {
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], NumValues);
+ if (isVolatile)
+ DAG.setRoot(Chain);
+ else
+ PendingLoads.push_back(Chain);
+ }
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+ DAG.getVTList(&ValueVTs[0], NumValues),
+ &Values[0], NumValues));
+}
+
+void SelectionDAGBuilder::visitStore(StoreInst &I) {
+ Value *SrcV = I.getOperand(0);
+ Value *PtrV = I.getOperand(1);
+
+ SmallVector<EVT, 4> ValueVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0)
+ return;
+
+ // Get the lowered operands. Note that we do this after
+ // checking if NumResults is zero, because with zero results
+ // the operands won't have values in the map.
+ SDValue Src = getValue(SrcV);
+ SDValue Ptr = getValue(PtrV);
+
+ SDValue Root = getRoot();
+ SmallVector<SDValue, 4> Chains(NumValues);
+ EVT PtrVT = Ptr.getValueType();
+ bool isVolatile = I.isVolatile();
+ unsigned Alignment = I.getAlignment();
+
+ for (unsigned i = 0; i != NumValues; ++i) {
+ SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr,
+ DAG.getConstant(Offsets[i], PtrVT));
+ Chains[i] = DAG.getStore(Root, getCurDebugLoc(),
+ SDValue(Src.getNode(), Src.getResNo() + i),
+ Add, PtrV, Offsets[i], isVolatile, Alignment);
+ }
+
+ DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], NumValues));
+}
+
+/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
+/// node.
+void SelectionDAGBuilder::visitTargetIntrinsic(CallInst &I,
+ unsigned Intrinsic) {
+ bool HasChain = !I.doesNotAccessMemory();
+ bool OnlyLoad = HasChain && I.onlyReadsMemory();
+
+ // Build the operand list.
+ SmallVector<SDValue, 8> Ops;
+ if (HasChain) { // If this intrinsic has side-effects, chainify it.
+ if (OnlyLoad) {
+ // We don't need to serialize loads against other loads.
+ Ops.push_back(DAG.getRoot());
+ } else {
+ Ops.push_back(getRoot());
+ }
+ }
+
+ // Info is set by getTgtMemInstrinsic
+ TargetLowering::IntrinsicInfo Info;
+ bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
+
+ // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
+ if (!IsTgtIntrinsic)
+ Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));
+
+ // Add all operands of the call to the operand list.
+ for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) {
+ SDValue Op = getValue(I.getOperand(i));
+ assert(TLI.isTypeLegal(Op.getValueType()) &&
+ "Intrinsic uses a non-legal type?");
+ Ops.push_back(Op);
+ }
+
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, I.getType(), ValueVTs);
+#ifndef NDEBUG
+ for (unsigned Val = 0, E = ValueVTs.size(); Val != E; ++Val) {
+ assert(TLI.isTypeLegal(ValueVTs[Val]) &&
+ "Intrinsic uses a non-legal type?");
+ }
+#endif // NDEBUG
+
+ if (HasChain)
+ ValueVTs.push_back(MVT::Other);
+
+ SDVTList VTs = DAG.getVTList(ValueVTs.data(), ValueVTs.size());
+
+ // Create the node.
+ SDValue Result;
+ if (IsTgtIntrinsic) {
+ // This is target intrinsic that touches memory
+ Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(),
+ VTs, &Ops[0], Ops.size(),
+ Info.memVT, Info.ptrVal, Info.offset,
+ Info.align, Info.vol,
+ Info.readMem, Info.writeMem);
+ } else if (!HasChain) {
+ Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(),
+ VTs, &Ops[0], Ops.size());
+ } else if (!I.getType()->isVoidTy()) {
+ Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(),
+ VTs, &Ops[0], Ops.size());
+ } else {
+ Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurDebugLoc(),
+ VTs, &Ops[0], Ops.size());
+ }
+
+ if (HasChain) {
+ SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
+ if (OnlyLoad)
+ PendingLoads.push_back(Chain);
+ else
+ DAG.setRoot(Chain);
+ }
+
+ if (!I.getType()->isVoidTy()) {
+ if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
+ EVT VT = TLI.getValueType(PTy);
+ Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result);
+ }
+
+ setValue(&I, Result);
+ }
+}
+
+/// GetSignificand - Get the significand and build it into a floating-point
+/// number with exponent of 1:
+///
+/// Op = (Op & 0x007fffff) | 0x3f800000;
+///
+/// where Op is the hexidecimal representation of floating point value.
+static SDValue
+GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl, unsigned Order) {
+ SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
+ DAG.getConstant(0x007fffff, MVT::i32));
+ SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
+ DAG.getConstant(0x3f800000, MVT::i32));
+ return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t2);
+}
+
+/// GetExponent - Get the exponent:
+///
+/// (float)(int)(((Op & 0x7f800000) >> 23) - 127);
+///
+/// where Op is the hexidecimal representation of floating point value.
+static SDValue
+GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
+ DebugLoc dl, unsigned Order) {
+ SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
+ DAG.getConstant(0x7f800000, MVT::i32));
+ SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0,
+ DAG.getConstant(23, TLI.getPointerTy()));
+ SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
+ DAG.getConstant(127, MVT::i32));
+ return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
+}
+
+/// getF32Constant - Get 32-bit floating point constant.
+static SDValue
+getF32Constant(SelectionDAG &DAG, unsigned Flt) {
+ return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32);
+}
+
+/// Inlined utility function to implement binary input atomic intrinsics for
+/// visitIntrinsicCall: I is a call instruction
+/// Op is the associated NodeType for I
+const char *
+SelectionDAGBuilder::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) {
+ SDValue Root = getRoot();
+ SDValue L =
+ DAG.getAtomic(Op, getCurDebugLoc(),
+ getValue(I.getOperand(2)).getValueType().getSimpleVT(),
+ Root,
+ getValue(I.getOperand(1)),
+ getValue(I.getOperand(2)),
+ I.getOperand(1));
+ setValue(&I, L);
+ DAG.setRoot(L.getValue(1));
+ return 0;
+}
+
+// implVisitAluOverflow - Lower arithmetic overflow instrinsics.
+const char *
+SelectionDAGBuilder::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) {
+ SDValue Op1 = getValue(I.getOperand(1));
+ SDValue Op2 = getValue(I.getOperand(2));
+
+ SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
+ setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2));
+ return 0;
+}
+
+/// visitExp - Lower an exp intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGBuilder::visitExp(CallInst &I) {
+ SDValue result;
+ DebugLoc dl = getCurDebugLoc();
+
+ if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op = getValue(I.getOperand(1));
+
+ // Put the exponent in the right bit position for later addition to the
+ // final result:
+ //
+ // #define LOG2OFe 1.4426950f
+ // IntegerPartOfX = ((int32_t)(X * LOG2OFe));
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
+ getF32Constant(DAG, 0x3fb8aa3b));
+ SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
+
+ // FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX;
+ SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+ SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
+
+ // IntegerPartOfX <<= 23;
+ IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+ DAG.getConstant(23, TLI.getPointerTy()));
+
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.997535578f +
+ // (0.735607626f + 0.252464424f * x) * x;
+ //
+ // error 0.0144103317, which is 6 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3e814304));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f3c50c8));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f7f5e7e));
+ SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t5);
+
+ // Add the exponent into the result in integer domain.
+ SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+ TwoToFracPartOfX, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t6);
+ } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999892986f +
+ // (0.696457318f +
+ // (0.224338339f + 0.792043434e-1f * x) * x) * x;
+ //
+ // 0.000107046256 error, which is 13 to 14 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3da235e3));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3e65b8f3));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f324b07));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3f7ff8fd));
+ SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t7);
+
+ // Add the exponent into the result in integer domain.
+ SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+ TwoToFracPartOfX, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t8);
+ } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999999982f +
+ // (0.693148872f +
+ // (0.240227044f +
+ // (0.554906021e-1f +
+ // (0.961591928e-2f +
+ // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+ //
+ // error 2.47208000*10^(-7), which is better than 18 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3924b03e));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3ab24b87));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3c1d8c17));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3d634a1d));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3e75fe14));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x3f317234));
+ SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+ SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+ getF32Constant(DAG, 0x3f800000));
+ SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::i32, t13);
+
+ // Add the exponent into the result in integer domain.
+ SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+ TwoToFracPartOfX, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t14);
+ }
+ } else {
+ // No special expansion.
+ result = DAG.getNode(ISD::FEXP, dl,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1)));
+ }
+
+ setValue(&I, result);
+}
+
+/// visitLog - Lower a log intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGBuilder::visitLog(CallInst &I) {
+ SDValue result;
+ DebugLoc dl = getCurDebugLoc();
+
+ if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op = getValue(I.getOperand(1));
+ SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+
+ // Scale the exponent by log(2) [0.69314718f].
+ SDValue Exp = GetExponent(DAG, Op1, TLI, dl, SDNodeOrder);
+ SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
+ getF32Constant(DAG, 0x3f317218));
+
+ // Get the significand and build it into a floating-point number with
+ // exponent of 1.
+ SDValue X = GetSignificand(DAG, Op1, dl, SDNodeOrder);
+
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // LogofMantissa =
+ // -1.1609546f +
+ // (1.4034025f - 0.23903021f * x) * x;
+ //
+ // error 0.0034276066, which is better than 8 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbe74c456));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3fb3a2b1));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f949a29));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, LogOfMantissa);
+ } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // LogOfMantissa =
+ // -1.7417939f +
+ // (2.8212026f +
+ // (-1.4699568f +
+ // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
+ //
+ // error 0.000061011436, which is 14 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbd67b6d6));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3ee4f4b8));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3fbc278b));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x40348e95));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3fdef31a));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, LogOfMantissa);
+ } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // LogOfMantissa =
+ // -2.1072184f +
+ // (4.2372794f +
+ // (-3.7029485f +
+ // (2.2781945f +
+ // (-0.87823314f +
+ // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
+ //
+ // error 0.0000023660568, which is better than 18 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbc91e5ac));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3e4350aa));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f60d3e3));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x4011cdf0));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x406cfd1c));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x408797cb));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x4006dcab));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, LogOfMantissa);
+ }
+ } else {
+ // No special expansion.
+ result = DAG.getNode(ISD::FLOG, dl,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1)));
+ }
+
+ setValue(&I, result);
+}
+
+/// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGBuilder::visitLog2(CallInst &I) {
+ SDValue result;
+ DebugLoc dl = getCurDebugLoc();
+
+ if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op = getValue(I.getOperand(1));
+ SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+
+ // Get the exponent.
+ SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl, SDNodeOrder);
+
+ // Get the significand and build it into a floating-point number with
+ // exponent of 1.
+ SDValue X = GetSignificand(DAG, Op1, dl, SDNodeOrder);
+
+ // Different possible minimax approximations of significand in
+ // floating-point for various degrees of accuracy over [1,2].
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
+ //
+ // error 0.0049451742, which is more than 7 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbeb08fe0));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x40019463));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3fd6633d));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, Log2ofMantissa);
+ } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // Log2ofMantissa =
+ // -2.51285454f +
+ // (4.07009056f +
+ // (-2.12067489f +
+ // (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
+ //
+ // error 0.0000876136000, which is better than 13 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbda7262e));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3f25280b));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x4007b923));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x40823e2f));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x4020d29c));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, Log2ofMantissa);
+ } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // Log2ofMantissa =
+ // -3.0400495f +
+ // (6.1129976f +
+ // (-5.3420409f +
+ // (3.2865683f +
+ // (-1.2669343f +
+ // (0.27515199f -
+ // 0.25691327e-1f * x) * x) * x) * x) * x) * x;
+ //
+ // error 0.0000018516, which is better than 18 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbcd2769e));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3e8ce0b9));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3fa22ae7));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x40525723));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x40aaf200));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x40c39dad));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x4042902c));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, Log2ofMantissa);
+ }
+ } else {
+ // No special expansion.
+ result = DAG.getNode(ISD::FLOG2, dl,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1)));
+ }
+
+ setValue(&I, result);
+}
+
+/// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGBuilder::visitLog10(CallInst &I) {
+ SDValue result;
+ DebugLoc dl = getCurDebugLoc();
+
+ if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op = getValue(I.getOperand(1));
+ SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+
+ // Scale the exponent by log10(2) [0.30102999f].
+ SDValue Exp = GetExponent(DAG, Op1, TLI, dl, SDNodeOrder);
+ SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
+ getF32Constant(DAG, 0x3e9a209a));
+
+ // Get the significand and build it into a floating-point number with
+ // exponent of 1.
+ SDValue X = GetSignificand(DAG, Op1, dl, SDNodeOrder);
+
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // Log10ofMantissa =
+ // -0.50419619f +
+ // (0.60948995f - 0.10380950f * x) * x;
+ //
+ // error 0.0014886165, which is 6 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbdd49a13));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3f1c0789));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f011300));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, Log10ofMantissa);
+ } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // Log10ofMantissa =
+ // -0.64831180f +
+ // (0.91751397f +
+ // (-0.31664806f + 0.47637168e-1f * x) * x) * x;
+ //
+ // error 0.00019228036, which is better than 12 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3d431f31));
+ SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3ea21fb2));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f6ae232));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f25f7c3));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, Log10ofMantissa);
+ } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // Log10ofMantissa =
+ // -0.84299375f +
+ // (1.5327582f +
+ // (-1.0688956f +
+ // (0.49102474f +
+ // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
+ //
+ // error 0.0000037995730, which is better than 18 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3c5d51ce));
+ SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3e00685a));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3efb6798));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f88d192));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3fc4316c));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3f57ce70));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, Log10ofMantissa);
+ }
+ } else {
+ // No special expansion.
+ result = DAG.getNode(ISD::FLOG10, dl,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1)));
+ }
+
+ setValue(&I, result);
+}
+
+/// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGBuilder::visitExp2(CallInst &I) {
+ SDValue result;
+ DebugLoc dl = getCurDebugLoc();
+
+ if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op = getValue(I.getOperand(1));
+
+ SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op);
+
+ // FractionalPartOfX = x - (float)IntegerPartOfX;
+ SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+ SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1);
+
+ // IntegerPartOfX <<= 23;
+ IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+ DAG.getConstant(23, TLI.getPointerTy()));
+
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.997535578f +
+ // (0.735607626f + 0.252464424f * x) * x;
+ //
+ // error 0.0144103317, which is 6 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3e814304));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f3c50c8));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f7f5e7e));
+ SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
+ SDValue TwoToFractionalPartOfX =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::f32, TwoToFractionalPartOfX);
+ } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999892986f +
+ // (0.696457318f +
+ // (0.224338339f + 0.792043434e-1f * x) * x) * x;
+ //
+ // error 0.000107046256, which is 13 to 14 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3da235e3));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3e65b8f3));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f324b07));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3f7ff8fd));
+ SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
+ SDValue TwoToFractionalPartOfX =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::f32, TwoToFractionalPartOfX);
+ } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999999982f +
+ // (0.693148872f +
+ // (0.240227044f +
+ // (0.554906021e-1f +
+ // (0.961591928e-2f +
+ // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+ // error 2.47208000*10^(-7), which is better than 18 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3924b03e));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3ab24b87));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3c1d8c17));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3d634a1d));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3e75fe14));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x3f317234));
+ SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+ SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+ getF32Constant(DAG, 0x3f800000));
+ SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
+ SDValue TwoToFractionalPartOfX =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::f32, TwoToFractionalPartOfX);
+ }
+ } else {
+ // No special expansion.
+ result = DAG.getNode(ISD::FEXP2, dl,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1)));
+ }
+
+ setValue(&I, result);
+}
+
+/// visitPow - Lower a pow intrinsic. Handles the special sequences for
+/// limited-precision mode with x == 10.0f.
+void
+SelectionDAGBuilder::visitPow(CallInst &I) {
+ SDValue result;
+ Value *Val = I.getOperand(1);
+ DebugLoc dl = getCurDebugLoc();
+ bool IsExp10 = false;
+
+ if (getValue(Val).getValueType() == MVT::f32 &&
+ getValue(I.getOperand(2)).getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(Val))) {
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+ APFloat Ten(10.0f);
+ IsExp10 = CFP->getValueAPF().bitwiseIsEqual(Ten);
+ }
+ }
+ }
+
+ if (IsExp10 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op = getValue(I.getOperand(2));
+
+ // Put the exponent in the right bit position for later addition to the
+ // final result:
+ //
+ // #define LOG2OF10 3.3219281f
+ // IntegerPartOfX = (int32_t)(x * LOG2OF10);
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
+ getF32Constant(DAG, 0x40549a78));
+ SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
+
+ // FractionalPartOfX = x - (float)IntegerPartOfX;
+ SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+ SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
+
+ // IntegerPartOfX <<= 23;
+ IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+ DAG.getConstant(23, TLI.getPointerTy()));
+
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // twoToFractionalPartOfX =
+ // 0.997535578f +
+ // (0.735607626f + 0.252464424f * x) * x;
+ //
+ // error 0.0144103317, which is 6 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3e814304));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f3c50c8));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f7f5e7e));
+ SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
+ SDValue TwoToFractionalPartOfX =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::f32, TwoToFractionalPartOfX);
+ } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999892986f +
+ // (0.696457318f +
+ // (0.224338339f + 0.792043434e-1f * x) * x) * x;
+ //
+ // error 0.000107046256, which is 13 to 14 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3da235e3));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3e65b8f3));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f324b07));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3f7ff8fd));
+ SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
+ SDValue TwoToFractionalPartOfX =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::f32, TwoToFractionalPartOfX);
+ } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999999982f +
+ // (0.693148872f +
+ // (0.240227044f +
+ // (0.554906021e-1f +
+ // (0.961591928e-2f +
+ // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+ // error 2.47208000*10^(-7), which is better than 18 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3924b03e));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3ab24b87));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3c1d8c17));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3d634a1d));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3e75fe14));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x3f317234));
+ SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+ SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+ getF32Constant(DAG, 0x3f800000));
+ SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
+ SDValue TwoToFractionalPartOfX =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::f32, TwoToFractionalPartOfX);
+ }
+ } else {
+ // No special expansion.
+ result = DAG.getNode(ISD::FPOW, dl,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1)),
+ getValue(I.getOperand(2)));
+ }
+
+ setValue(&I, result);
+}
+
+
+/// ExpandPowI - Expand a llvm.powi intrinsic.
+static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG) {
+ // If RHS is a constant, we can expand this out to a multiplication tree,
+ // otherwise we end up lowering to a call to __powidf2 (for example). When
+ // optimizing for size, we only want to do this if the expansion would produce
+ // a small number of multiplies, otherwise we do the full expansion.
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
+ // Get the exponent as a positive value.
+ unsigned Val = RHSC->getSExtValue();
+ if ((int)Val < 0) Val = -Val;
+
+ // powi(x, 0) -> 1.0
+ if (Val == 0)
+ return DAG.getConstantFP(1.0, LHS.getValueType());
+
+ Function *F = DAG.getMachineFunction().getFunction();
+ if (!F->hasFnAttr(Attribute::OptimizeForSize) ||
+ // If optimizing for size, don't insert too many multiplies. This
+ // inserts up to 5 multiplies.
+ CountPopulation_32(Val)+Log2_32(Val) < 7) {
+ // We use the simple binary decomposition method to generate the multiply
+ // sequence. There are more optimal ways to do this (for example,
+ // powi(x,15) generates one more multiply than it should), but this has
+ // the benefit of being both really simple and much better than a libcall.
+ SDValue Res; // Logically starts equal to 1.0
+ SDValue CurSquare = LHS;
+ while (Val) {
+ if (Val & 1) {
+ if (Res.getNode())
+ Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare);
+ else
+ Res = CurSquare; // 1.0*CurSquare.
+ }
+
+ CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(),
+ CurSquare, CurSquare);
+ Val >>= 1;
+ }
+
+ // If the original was negative, invert the result, producing 1/(x*x*x).
+ if (RHSC->getSExtValue() < 0)
+ Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
+ DAG.getConstantFP(1.0, LHS.getValueType()), Res);
+ return Res;
+ }
+ }
+
+ // Otherwise, expand to a libcall.
+ return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
+}
+
+
+/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If
+/// we want to emit this as a call to a named external function, return the name
+/// otherwise lower it and return null.
+const char *
+SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
+ DebugLoc dl = getCurDebugLoc();
+ SDValue Res;
+
+ switch (Intrinsic) {
+ default:
+ // By default, turn this into a target intrinsic node.
+ visitTargetIntrinsic(I, Intrinsic);
+ return 0;
+ case Intrinsic::vastart: visitVAStart(I); return 0;
+ case Intrinsic::vaend: visitVAEnd(I); return 0;
+ case Intrinsic::vacopy: visitVACopy(I); return 0;
+ case Intrinsic::returnaddress:
+ setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(),
+ getValue(I.getOperand(1))));
+ return 0;
+ case Intrinsic::frameaddress:
+ setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(),
+ getValue(I.getOperand(1))));
+ return 0;
+ case Intrinsic::setjmp:
+ return "_setjmp"+!TLI.usesUnderscoreSetJmp();
+ case Intrinsic::longjmp:
+ return "_longjmp"+!TLI.usesUnderscoreLongJmp();
+ case Intrinsic::memcpy: {
+ SDValue Op1 = getValue(I.getOperand(1));
+ SDValue Op2 = getValue(I.getOperand(2));
+ SDValue Op3 = getValue(I.getOperand(3));
+ unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
+ DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false,
+ I.getOperand(1), 0, I.getOperand(2), 0));
+ return 0;
+ }
+ case Intrinsic::memset: {
+ SDValue Op1 = getValue(I.getOperand(1));
+ SDValue Op2 = getValue(I.getOperand(2));
+ SDValue Op3 = getValue(I.getOperand(3));
+ unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
+ DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align,
+ I.getOperand(1), 0));
+ return 0;
+ }
+ case Intrinsic::memmove: {
+ SDValue Op1 = getValue(I.getOperand(1));
+ SDValue Op2 = getValue(I.getOperand(2));
+ SDValue Op3 = getValue(I.getOperand(3));
+ unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
+
+ // If the source and destination are known to not be aliases, we can
+ // lower memmove as memcpy.
+ uint64_t Size = -1ULL;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3))
+ Size = C->getZExtValue();
+ if (AA->alias(I.getOperand(1), Size, I.getOperand(2), Size) ==
+ AliasAnalysis::NoAlias) {
+ DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false,
+ I.getOperand(1), 0, I.getOperand(2), 0));
+ return 0;
+ }
+
+ DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align,
+ I.getOperand(1), 0, I.getOperand(2), 0));
+ return 0;
+ }
+ case Intrinsic::dbg_declare: {
+ // FIXME: currently, we get here only if OptLevel != CodeGenOpt::None.
+ // The real handling of this intrinsic is in FastISel.
+ if (OptLevel != CodeGenOpt::None)
+ // FIXME: Variable debug info is not supported here.
+ return 0;
+ DwarfWriter *DW = DAG.getDwarfWriter();
+ if (!DW)
+ return 0;
+ DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
+ if (!DIDescriptor::ValidDebugInfo(DI.getVariable(), CodeGenOpt::None))
+ return 0;
+
+ MDNode *Variable = DI.getVariable();
+ Value *Address = DI.getAddress();
+ if (!Address)
+ return 0;
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+ Address = BCI->getOperand(0);
+ AllocaInst *AI = dyn_cast<AllocaInst>(Address);
+ // Don't handle byval struct arguments or VLAs, for example.
+ if (!AI)
+ return 0;
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI == FuncInfo.StaticAllocaMap.end())
+ return 0; // VLAs.
+ int FI = SI->second;
+
+ if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo())
+ if (MDNode *Dbg = DI.getMetadata("dbg"))
+ MMI->setVariableDbgInfo(Variable, FI, Dbg);
+ return 0;
+ }
+ case Intrinsic::dbg_value: {
+ // FIXME: currently, we get here only if OptLevel != CodeGenOpt::None.
+ // The real handling of this intrinsic is in FastISel.
+ if (OptLevel != CodeGenOpt::None)
+ // FIXME: Variable debug info is not supported here.
+ return 0;
+ DwarfWriter *DW = DAG.getDwarfWriter();
+ if (!DW)
+ return 0;
+ DbgValueInst &DI = cast<DbgValueInst>(I);
+ if (!DIDescriptor::ValidDebugInfo(DI.getVariable(), CodeGenOpt::None))
+ return 0;
+
+ MDNode *Variable = DI.getVariable();
+ Value *V = DI.getValue();
+ if (!V)
+ return 0;
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(V))
+ V = BCI->getOperand(0);
+ AllocaInst *AI = dyn_cast<AllocaInst>(V);
+ // Don't handle byval struct arguments or VLAs, for example.
+ if (!AI)
+ return 0;
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI == FuncInfo.StaticAllocaMap.end())
+ return 0; // VLAs.
+ int FI = SI->second;
+ if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo())
+ if (MDNode *Dbg = DI.getMetadata("dbg"))
+ MMI->setVariableDbgInfo(Variable, FI, Dbg);
+ return 0;
+ }
+ case Intrinsic::eh_exception: {
+ // Insert the EXCEPTIONADDR instruction.
+ assert(CurMBB->isLandingPad() &&"Call to eh.exception not in landing pad!");
+ SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+ SDValue Ops[1];
+ Ops[0] = DAG.getRoot();
+ SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1);
+ setValue(&I, Op);
+ DAG.setRoot(Op.getValue(1));
+ return 0;
+ }
+
+ case Intrinsic::eh_selector: {
+ MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+
+ if (CurMBB->isLandingPad())
+ AddCatchInfo(I, MMI, CurMBB);
+ else {
+#ifndef NDEBUG
+ FuncInfo.CatchInfoLost.insert(&I);
+#endif
+ // FIXME: Mark exception selector register as live in. Hack for PR1508.
+ unsigned Reg = TLI.getExceptionSelectorRegister();
+ if (Reg) CurMBB->addLiveIn(Reg);
+ }
+
+ // Insert the EHSELECTION instruction.
+ SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+ SDValue Ops[2];
+ Ops[0] = getValue(I.getOperand(1));
+ Ops[1] = getRoot();
+ SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2);
+ DAG.setRoot(Op.getValue(1));
+ setValue(&I, DAG.getSExtOrTrunc(Op, dl, MVT::i32));
+ return 0;
+ }
+
+ case Intrinsic::eh_typeid_for: {
+ MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+
+ if (MMI) {
+ // Find the type id for the given typeinfo.
+ GlobalVariable *GV = ExtractTypeInfo(I.getOperand(1));
+ unsigned TypeID = MMI->getTypeIDFor(GV);
+ Res = DAG.getConstant(TypeID, MVT::i32);
+ } else {
+ // Return something different to eh_selector.
+ Res = DAG.getConstant(1, MVT::i32);
+ }
+
+ setValue(&I, Res);
+ return 0;
+ }
+
+ case Intrinsic::eh_return_i32:
+ case Intrinsic::eh_return_i64:
+ if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) {
+ MMI->setCallsEHReturn(true);
+ DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl,
+ MVT::Other,
+ getControlRoot(),
+ getValue(I.getOperand(1)),
+ getValue(I.getOperand(2))));
+ } else {
+ setValue(&I, DAG.getConstant(0, TLI.getPointerTy()));
+ }
+
+ return 0;
+ case Intrinsic::eh_unwind_init:
+ if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) {
+ MMI->setCallsUnwindInit(true);
+ }
+ return 0;
+ case Intrinsic::eh_dwarf_cfa: {
+ EVT VT = getValue(I.getOperand(1)).getValueType();
+ SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), dl,
+ TLI.getPointerTy());
+ SDValue Offset = DAG.getNode(ISD::ADD, dl,
+ TLI.getPointerTy(),
+ DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl,
+ TLI.getPointerTy()),
+ CfaArg);
+ SDValue FA = DAG.getNode(ISD::FRAMEADDR, dl,
+ TLI.getPointerTy(),
+ DAG.getConstant(0, TLI.getPointerTy()));
+ setValue(&I, DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
+ FA, Offset));
+ return 0;
+ }
+ case Intrinsic::eh_sjlj_callsite: {
+ MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+ ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1));
+ assert(CI && "Non-constant call site value in eh.sjlj.callsite!");
+ assert(MMI->getCurrentCallSite() == 0 && "Overlapping call sites!");
+
+ MMI->setCurrentCallSite(CI->getZExtValue());
+ return 0;
+ }
+
+ case Intrinsic::convertff:
+ case Intrinsic::convertfsi:
+ case Intrinsic::convertfui:
+ case Intrinsic::convertsif:
+ case Intrinsic::convertuif:
+ case Intrinsic::convertss:
+ case Intrinsic::convertsu:
+ case Intrinsic::convertus:
+ case Intrinsic::convertuu: {
+ ISD::CvtCode Code = ISD::CVT_INVALID;
+ switch (Intrinsic) {
+ case Intrinsic::convertff: Code = ISD::CVT_FF; break;
+ case Intrinsic::convertfsi: Code = ISD::CVT_FS; break;
+ case Intrinsic::convertfui: Code = ISD::CVT_FU; break;
+ case Intrinsic::convertsif: Code = ISD::CVT_SF; break;
+ case Intrinsic::convertuif: Code = ISD::CVT_UF; break;
+ case Intrinsic::convertss: Code = ISD::CVT_SS; break;
+ case Intrinsic::convertsu: Code = ISD::CVT_SU; break;
+ case Intrinsic::convertus: Code = ISD::CVT_US; break;
+ case Intrinsic::convertuu: Code = ISD::CVT_UU; break;
+ }
+ EVT DestVT = TLI.getValueType(I.getType());
+ Value *Op1 = I.getOperand(1);
+ Res = DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1),
+ DAG.getValueType(DestVT),
+ DAG.getValueType(getValue(Op1).getValueType()),
+ getValue(I.getOperand(2)),
+ getValue(I.getOperand(3)),
+ Code);
+ setValue(&I, Res);
+ return 0;
+ }
+ case Intrinsic::sqrt:
+ setValue(&I, DAG.getNode(ISD::FSQRT, dl,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1))));
+ return 0;
+ case Intrinsic::powi:
+ setValue(&I, ExpandPowI(dl, getValue(I.getOperand(1)),
+ getValue(I.getOperand(2)), DAG));
+ return 0;
+ case Intrinsic::sin:
+ setValue(&I, DAG.getNode(ISD::FSIN, dl,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1))));
+ return 0;
+ case Intrinsic::cos:
+ setValue(&I, DAG.getNode(ISD::FCOS, dl,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1))));
+ return 0;
+ case Intrinsic::log:
+ visitLog(I);
+ return 0;
+ case Intrinsic::log2:
+ visitLog2(I);
+ return 0;
+ case Intrinsic::log10:
+ visitLog10(I);
+ return 0;
+ case Intrinsic::exp:
+ visitExp(I);
+ return 0;
+ case Intrinsic::exp2:
+ visitExp2(I);
+ return 0;
+ case Intrinsic::pow:
+ visitPow(I);
+ return 0;
+ case Intrinsic::pcmarker: {
+ SDValue Tmp = getValue(I.getOperand(1));
+ DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp));
+ return 0;
+ }
+ case Intrinsic::readcyclecounter: {
+ SDValue Op = getRoot();
+ Res = DAG.getNode(ISD::READCYCLECOUNTER, dl,
+ DAG.getVTList(MVT::i64, MVT::Other),
+ &Op, 1);
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
+ return 0;
+ }
+ case Intrinsic::bswap:
+ setValue(&I, DAG.getNode(ISD::BSWAP, dl,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1))));
+ return 0;
+ case Intrinsic::cttz: {
+ SDValue Arg = getValue(I.getOperand(1));
+ EVT Ty = Arg.getValueType();
+ setValue(&I, DAG.getNode(ISD::CTTZ, dl, Ty, Arg));
+ return 0;
+ }
+ case Intrinsic::ctlz: {
+ SDValue Arg = getValue(I.getOperand(1));
+ EVT Ty = Arg.getValueType();
+ setValue(&I, DAG.getNode(ISD::CTLZ, dl, Ty, Arg));
+ return 0;
+ }
+ case Intrinsic::ctpop: {
+ SDValue Arg = getValue(I.getOperand(1));
+ EVT Ty = Arg.getValueType();
+ setValue(&I, DAG.getNode(ISD::CTPOP, dl, Ty, Arg));
+ return 0;
+ }
+ case Intrinsic::stacksave: {
+ SDValue Op = getRoot();
+ Res = DAG.getNode(ISD::STACKSAVE, dl,
+ DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1);
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
+ return 0;
+ }
+ case Intrinsic::stackrestore: {
+ Res = getValue(I.getOperand(1));
+ DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Res));
+ return 0;
+ }
+ case Intrinsic::stackprotector: {
+ // Emit code into the DAG to store the stack guard onto the stack.
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ EVT PtrTy = TLI.getPointerTy();
+
+ SDValue Src = getValue(I.getOperand(1)); // The guard's value.
+ AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2));
+
+ int FI = FuncInfo.StaticAllocaMap[Slot];
+ MFI->setStackProtectorIndex(FI);
+
+ SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
+
+ // Store the stack protector onto the stack.
+ Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN,
+ PseudoSourceValue::getFixedStack(FI),
+ 0, true);
+ setValue(&I, Res);
+ DAG.setRoot(Res);
+ return 0;
+ }
+ case Intrinsic::objectsize: {
+ // If we don't know by now, we're never going to know.
+ ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(2));
+
+ assert(CI && "Non-constant type in __builtin_object_size?");
+
+ SDValue Arg = getValue(I.getOperand(0));
+ EVT Ty = Arg.getValueType();
+
+ if (CI->getZExtValue() == 0)
+ Res = DAG.getConstant(-1ULL, Ty);
+ else
+ Res = DAG.getConstant(0, Ty);
+
+ setValue(&I, Res);
+ return 0;
+ }
+ case Intrinsic::var_annotation:
+ // Discard annotate attributes
+ return 0;
+
+ case Intrinsic::init_trampoline: {
+ const Function *F = cast<Function>(I.getOperand(2)->stripPointerCasts());
+
+ SDValue Ops[6];
+ Ops[0] = getRoot();
+ Ops[1] = getValue(I.getOperand(1));
+ Ops[2] = getValue(I.getOperand(2));
+ Ops[3] = getValue(I.getOperand(3));
+ Ops[4] = DAG.getSrcValue(I.getOperand(1));
+ Ops[5] = DAG.getSrcValue(F);
+
+ Res = DAG.getNode(ISD::TRAMPOLINE, dl,
+ DAG.getVTList(TLI.getPointerTy(), MVT::Other),
+ Ops, 6);
+
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
+ return 0;
+ }
+ case Intrinsic::gcroot:
+ if (GFI) {
+ Value *Alloca = I.getOperand(1);
+ Constant *TypeMap = cast<Constant>(I.getOperand(2));
+
+ FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
+ GFI->addStackRoot(FI->getIndex(), TypeMap);
+ }
+ return 0;
+ case Intrinsic::gcread:
+ case Intrinsic::gcwrite:
+ llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
+ return 0;
+ case Intrinsic::flt_rounds:
+ setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
+ return 0;
+ case Intrinsic::trap:
+ DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
+ return 0;
+ case Intrinsic::uadd_with_overflow:
+ return implVisitAluOverflow(I, ISD::UADDO);
+ case Intrinsic::sadd_with_overflow:
+ return implVisitAluOverflow(I, ISD::SADDO);
+ case Intrinsic::usub_with_overflow:
+ return implVisitAluOverflow(I, ISD::USUBO);
+ case Intrinsic::ssub_with_overflow:
+ return implVisitAluOverflow(I, ISD::SSUBO);
+ case Intrinsic::umul_with_overflow:
+ return implVisitAluOverflow(I, ISD::UMULO);
+ case Intrinsic::smul_with_overflow:
+ return implVisitAluOverflow(I, ISD::SMULO);
+
+ case Intrinsic::prefetch: {
+ SDValue Ops[4];
+ Ops[0] = getRoot();
+ Ops[1] = getValue(I.getOperand(1));
+ Ops[2] = getValue(I.getOperand(2));
+ Ops[3] = getValue(I.getOperand(3));
+ DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4));
+ return 0;
+ }
+
+ case Intrinsic::memory_barrier: {
+ SDValue Ops[6];
+ Ops[0] = getRoot();
+ for (int x = 1; x < 6; ++x)
+ Ops[x] = getValue(I.getOperand(x));
+
+ DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6));
+ return 0;
+ }
+ case Intrinsic::atomic_cmp_swap: {
+ SDValue Root = getRoot();
+ SDValue L =
+ DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(),
+ getValue(I.getOperand(2)).getValueType().getSimpleVT(),
+ Root,
+ getValue(I.getOperand(1)),
+ getValue(I.getOperand(2)),
+ getValue(I.getOperand(3)),
+ I.getOperand(1));
+ setValue(&I, L);
+ DAG.setRoot(L.getValue(1));
+ return 0;
+ }
+ case Intrinsic::atomic_load_add:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_ADD);
+ case Intrinsic::atomic_load_sub:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_SUB);
+ case Intrinsic::atomic_load_or:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_OR);
+ case Intrinsic::atomic_load_xor:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_XOR);
+ case Intrinsic::atomic_load_and:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_AND);
+ case Intrinsic::atomic_load_nand:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_NAND);
+ case Intrinsic::atomic_load_max:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MAX);
+ case Intrinsic::atomic_load_min:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MIN);
+ case Intrinsic::atomic_load_umin:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMIN);
+ case Intrinsic::atomic_load_umax:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX);
+ case Intrinsic::atomic_swap:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP);
+
+ case Intrinsic::invariant_start:
+ case Intrinsic::lifetime_start:
+ // Discard region information.
+ setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
+ return 0;
+ case Intrinsic::invariant_end:
+ case Intrinsic::lifetime_end:
+ // Discard region information.
+ return 0;
+ }
+}
+
+/// Test if the given instruction is in a position to be optimized
+/// with a tail-call. This roughly means that it's in a block with
+/// a return and there's nothing that needs to be scheduled
+/// between it and the return.
+///
+/// This function only tests target-independent requirements.
+static bool
+isInTailCallPosition(CallSite CS, Attributes CalleeRetAttr,
+ const TargetLowering &TLI) {
+ const Instruction *I = CS.getInstruction();
+ const BasicBlock *ExitBB = I->getParent();
+ const TerminatorInst *Term = ExitBB->getTerminator();
+ const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
+ const Function *F = ExitBB->getParent();
+
+ // The block must end in a return statement or unreachable.
+ //
+ // FIXME: Decline tailcall if it's not guaranteed and if the block ends in
+ // an unreachable, for now. The way tailcall optimization is currently
+ // implemented means it will add an epilogue followed by a jump. That is
+ // not profitable. Also, if the callee is a special function (e.g.
+ // longjmp on x86), it can end up causing miscompilation that has not
+ // been fully understood.
+ if (!Ret &&
+ (!GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) return false;
+
+ // If I will have a chain, make sure no other instruction that will have a
+ // chain interposes between I and the return.
+ if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
+ !I->isSafeToSpeculativelyExecute())
+ for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
+ --BBI) {
+ if (&*BBI == I)
+ break;
+ if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
+ !BBI->isSafeToSpeculativelyExecute())
+ return false;
+ }
+
+ // If the block ends with a void return or unreachable, it doesn't matter
+ // what the call's return type is.
+ if (!Ret || Ret->getNumOperands() == 0) return true;
+
+ // If the return value is undef, it doesn't matter what the call's
+ // return type is.
+ if (isa<UndefValue>(Ret->getOperand(0))) return true;
+
+ // Conservatively require the attributes of the call to match those of
+ // the return. Ignore noalias because it doesn't affect the call sequence.
+ unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+ if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
+ return false;
+
+ // It's not safe to eliminate the sign / zero extension of the return value.
+ if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+ return false;
+
+ // Otherwise, make sure the unmodified return value of I is the return value.
+ for (const Instruction *U = dyn_cast<Instruction>(Ret->getOperand(0)); ;
+ U = dyn_cast<Instruction>(U->getOperand(0))) {
+ if (!U)
+ return false;
+ if (!U->hasOneUse())
+ return false;
+ if (U == I)
+ break;
+ // Check for a truly no-op truncate.
+ if (isa<TruncInst>(U) &&
+ TLI.isTruncateFree(U->getOperand(0)->getType(), U->getType()))
+ continue;
+ // Check for a truly no-op bitcast.
+ if (isa<BitCastInst>(U) &&
+ (U->getOperand(0)->getType() == U->getType() ||
+ (isa<PointerType>(U->getOperand(0)->getType()) &&
+ isa<PointerType>(U->getType()))))
+ continue;
+ // Otherwise it's not a true no-op.
+ return false;
+ }
+
+ return true;
+}
+
+void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
+ bool isTailCall,
+ MachineBasicBlock *LandingPad) {
+ const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ const Type *RetTy = FTy->getReturnType();
+ MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+ unsigned BeginLabel = 0, EndLabel = 0;
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Args.reserve(CS.arg_size());
+
+ // Check whether the function can return without sret-demotion.
+ SmallVector<EVT, 4> OutVTs;
+ SmallVector<ISD::ArgFlagsTy, 4> OutsFlags;
+ SmallVector<uint64_t, 4> Offsets;
+ getReturnInfo(RetTy, CS.getAttributes().getRetAttributes(),
+ OutVTs, OutsFlags, TLI, &Offsets);
+
+ bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
+ FTy->isVarArg(), OutVTs, OutsFlags, DAG);
+
+ SDValue DemoteStackSlot;
+
+ if (!CanLowerReturn) {
+ uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(
+ FTy->getReturnType());
+ unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(
+ FTy->getReturnType());
+ MachineFunction &MF = DAG.getMachineFunction();
+ int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+ const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
+
+ DemoteStackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+ Entry.Node = DemoteStackSlot;
+ Entry.Ty = StackSlotPtrType;
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Entry.isInReg = false;
+ Entry.isSRet = true;
+ Entry.isNest = false;
+ Entry.isByVal = false;
+ Entry.Alignment = Align;
+ Args.push_back(Entry);
+ RetTy = Type::getVoidTy(FTy->getContext());
+ }
+
+ for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+ i != e; ++i) {
+ SDValue ArgNode = getValue(*i);
+ Entry.Node = ArgNode; Entry.Ty = (*i)->getType();
+
+ unsigned attrInd = i - CS.arg_begin() + 1;
+ Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt);
+ Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt);
+ Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg);
+ Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet);
+ Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest);
+ Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal);
+ Entry.Alignment = CS.getParamAlignment(attrInd);
+ Args.push_back(Entry);
+ }
+
+ if (LandingPad && MMI) {
+ // Insert a label before the invoke call to mark the try range. This can be
+ // used to detect deletion of the invoke via the MachineModuleInfo.
+ BeginLabel = MMI->NextLabelID();
+
+ // For SjLj, keep track of which landing pads go with which invokes
+ // so as to maintain the ordering of pads in the LSDA.
+ unsigned CallSiteIndex = MMI->getCurrentCallSite();
+ if (CallSiteIndex) {
+ MMI->setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
+ // Now that the call site is handled, stop tracking it.
+ MMI->setCurrentCallSite(0);
+ }
+
+ // Both PendingLoads and PendingExports must be flushed here;
+ // this call might not return.
+ (void)getRoot();
+ DAG.setRoot(DAG.getLabel(ISD::EH_LABEL, getCurDebugLoc(),
+ getControlRoot(), BeginLabel));
+ }
+
+ // Check if target-independent constraints permit a tail call here.
+ // Target-dependent constraints are checked within TLI.LowerCallTo.
+ if (isTailCall &&
+ !isInTailCallPosition(CS, CS.getAttributes().getRetAttributes(), TLI))
+ isTailCall = false;
+
+ std::pair<SDValue,SDValue> Result =
+ TLI.LowerCallTo(getRoot(), RetTy,
+ CS.paramHasAttr(0, Attribute::SExt),
+ CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(),
+ CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(),
+ CS.getCallingConv(),
+ isTailCall,
+ !CS.getInstruction()->use_empty(),
+ Callee, Args, DAG, getCurDebugLoc(), SDNodeOrder);
+ assert((isTailCall || Result.second.getNode()) &&
+ "Non-null chain expected with non-tail call!");
+ assert((Result.second.getNode() || !Result.first.getNode()) &&
+ "Null value expected with tail call!");
+ if (Result.first.getNode()) {
+ setValue(CS.getInstruction(), Result.first);
+ } else if (!CanLowerReturn && Result.second.getNode()) {
+ // The instruction result is the result of loading from the
+ // hidden sret parameter.
+ SmallVector<EVT, 1> PVTs;
+ const Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
+
+ ComputeValueVTs(TLI, PtrRetTy, PVTs);
+ assert(PVTs.size() == 1 && "Pointers should fit in one register");
+ EVT PtrVT = PVTs[0];
+ unsigned NumValues = OutVTs.size();
+ SmallVector<SDValue, 4> Values(NumValues);
+ SmallVector<SDValue, 4> Chains(NumValues);
+
+ for (unsigned i = 0; i < NumValues; ++i) {
+ SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT,
+ DemoteStackSlot,
+ DAG.getConstant(Offsets[i], PtrVT));
+ SDValue L = DAG.getLoad(OutVTs[i], getCurDebugLoc(), Result.second,
+ Add, NULL, Offsets[i], false, 1);
+ Values[i] = L;
+ Chains[i] = L.getValue(1);
+ }
+
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], NumValues);
+ PendingLoads.push_back(Chain);
+
+ // Collect the legal value parts into potentially illegal values
+ // that correspond to the original function's return values.
+ SmallVector<EVT, 4> RetTys;
+ RetTy = FTy->getReturnType();
+ ComputeValueVTs(TLI, RetTy, RetTys);
+ ISD::NodeType AssertOp = ISD::DELETED_NODE;
+ SmallVector<SDValue, 4> ReturnValues;
+ unsigned CurReg = 0;
+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ EVT VT = RetTys[I];
+ EVT RegisterVT = TLI.getRegisterType(RetTy->getContext(), VT);
+ unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT);
+
+ SDValue ReturnValue =
+ getCopyFromParts(DAG, getCurDebugLoc(), SDNodeOrder, &Values[CurReg], NumRegs,
+ RegisterVT, VT, AssertOp);
+ ReturnValues.push_back(ReturnValue);
+ CurReg += NumRegs;
+ }
+
+ setValue(CS.getInstruction(),
+ DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+ DAG.getVTList(&RetTys[0], RetTys.size()),
+ &ReturnValues[0], ReturnValues.size()));
+
+ }
+
+ // As a special case, a null chain means that a tail call has been emitted and
+ // the DAG root is already updated.
+ if (Result.second.getNode())
+ DAG.setRoot(Result.second);
+ else
+ HasTailCall = true;
+
+ if (LandingPad && MMI) {
+ // Insert a label at the end of the invoke call to mark the try range. This
+ // can be used to detect deletion of the invoke via the MachineModuleInfo.
+ EndLabel = MMI->NextLabelID();
+ DAG.setRoot(DAG.getLabel(ISD::EH_LABEL, getCurDebugLoc(),
+ getRoot(), EndLabel));
+
+ // Inform MachineModuleInfo of range.
+ MMI->addInvoke(LandingPad, BeginLabel, EndLabel);
+ }
+}
+
+/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
+/// value is equal or not-equal to zero.
+static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+ UI != E; ++UI) {
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+ if (IC->isEquality())
+ if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
+ if (C->isNullValue())
+ continue;
+ // Unknown instruction.
+ return false;
+ }
+ return true;
+}
+
+static SDValue getMemCmpLoad(Value *PtrVal, MVT LoadVT, const Type *LoadTy,
+ SelectionDAGBuilder &Builder) {
+
+ // Check to see if this load can be trivially constant folded, e.g. if the
+ // input is from a string literal.
+ if (Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
+ // Cast pointer to the type we really want to load.
+ LoadInput = ConstantExpr::getBitCast(LoadInput,
+ PointerType::getUnqual(LoadTy));
+
+ if (Constant *LoadCst = ConstantFoldLoadFromConstPtr(LoadInput, Builder.TD))
+ return Builder.getValue(LoadCst);
+ }
+
+ // Otherwise, we have to emit the load. If the pointer is to unfoldable but
+ // still constant memory, the input chain can be the entry node.
+ SDValue Root;
+ bool ConstantMemory = false;
+
+ // Do not serialize (non-volatile) loads of constant memory with anything.
+ if (Builder.AA->pointsToConstantMemory(PtrVal)) {
+ Root = Builder.DAG.getEntryNode();
+ ConstantMemory = true;
+ } else {
+ // Do not serialize non-volatile loads against each other.
+ Root = Builder.DAG.getRoot();
+ }
+
+ SDValue Ptr = Builder.getValue(PtrVal);
+ SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root,
+ Ptr, PtrVal /*SrcValue*/, 0/*SVOffset*/,
+ false /*volatile*/, 1 /* align=1 */);
+
+ if (!ConstantMemory)
+ Builder.PendingLoads.push_back(LoadVal.getValue(1));
+ return LoadVal;
+}
+
+
+/// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form.
+/// If so, return true and lower it, otherwise return false and it will be
+/// lowered like a normal call.
+bool SelectionDAGBuilder::visitMemCmpCall(CallInst &I) {
+ // Verify that the prototype makes sense. int memcmp(void*,void*,size_t)
+ if (I.getNumOperands() != 4)
+ return false;
+
+ Value *LHS = I.getOperand(1), *RHS = I.getOperand(2);
+ if (!isa<PointerType>(LHS->getType()) || !isa<PointerType>(RHS->getType()) ||
+ !isa<IntegerType>(I.getOperand(3)->getType()) ||
+ !isa<IntegerType>(I.getType()))
+ return false;
+
+ ConstantInt *Size = dyn_cast<ConstantInt>(I.getOperand(3));
+
+ // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0
+ // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0
+ if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) {
+ bool ActuallyDoIt = true;
+ MVT LoadVT;
+ const Type *LoadTy;
+ switch (Size->getZExtValue()) {
+ default:
+ LoadVT = MVT::Other;
+ LoadTy = 0;
+ ActuallyDoIt = false;
+ break;
+ case 2:
+ LoadVT = MVT::i16;
+ LoadTy = Type::getInt16Ty(Size->getContext());
+ break;
+ case 4:
+ LoadVT = MVT::i32;
+ LoadTy = Type::getInt32Ty(Size->getContext());
+ break;
+ case 8:
+ LoadVT = MVT::i64;
+ LoadTy = Type::getInt64Ty(Size->getContext());
+ break;
+ /*
+ case 16:
+ LoadVT = MVT::v4i32;
+ LoadTy = Type::getInt32Ty(Size->getContext());
+ LoadTy = VectorType::get(LoadTy, 4);
+ break;
+ */
+ }
+
+ // This turns into unaligned loads. We only do this if the target natively
+ // supports the MVT we'll be loading or if it is small enough (<= 4) that
+ // we'll only produce a small number of byte loads.
+
+ // Require that we can find a legal MVT, and only do this if the target
+ // supports unaligned loads of that type. Expanding into byte loads would
+ // bloat the code.
+ if (ActuallyDoIt && Size->getZExtValue() > 4) {
+ // TODO: Handle 5 byte compare as 4-byte + 1 byte.
+ // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
+ if (!TLI.isTypeLegal(LoadVT) ||!TLI.allowsUnalignedMemoryAccesses(LoadVT))
+ ActuallyDoIt = false;
+ }
+
+ if (ActuallyDoIt) {
+ SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this);
+ SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this);
+
+ SDValue Res = DAG.getSetCC(getCurDebugLoc(), MVT::i1, LHSVal, RHSVal,
+ ISD::SETNE);
+ EVT CallVT = TLI.getValueType(I.getType(), true);
+ setValue(&I, DAG.getZExtOrTrunc(Res, getCurDebugLoc(), CallVT));
+ return true;
+ }
+ }
+
+
+ return false;
+}
+
+
+void SelectionDAGBuilder::visitCall(CallInst &I) {
+ const char *RenameFn = 0;
+ if (Function *F = I.getCalledFunction()) {
+ if (F->isDeclaration()) {
+ const TargetIntrinsicInfo *II = TLI.getTargetMachine().getIntrinsicInfo();
+ if (II) {
+ if (unsigned IID = II->getIntrinsicID(F)) {
+ RenameFn = visitIntrinsicCall(I, IID);
+ if (!RenameFn)
+ return;
+ }
+ }
+ if (unsigned IID = F->getIntrinsicID()) {
+ RenameFn = visitIntrinsicCall(I, IID);
+ if (!RenameFn)
+ return;
+ }
+ }
+
+ // Check for well-known libc/libm calls. If the function is internal, it
+ // can't be a library call.
+ if (!F->hasLocalLinkage() && F->hasName()) {
+ StringRef Name = F->getName();
+ if (Name == "copysign" || Name == "copysignf") {
+ if (I.getNumOperands() == 3 && // Basic sanity checks.
+ I.getOperand(1)->getType()->isFloatingPoint() &&
+ I.getType() == I.getOperand(1)->getType() &&
+ I.getType() == I.getOperand(2)->getType()) {
+ SDValue LHS = getValue(I.getOperand(1));
+ SDValue RHS = getValue(I.getOperand(2));
+ setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(),
+ LHS.getValueType(), LHS, RHS));
+ return;
+ }
+ } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") {
+ if (I.getNumOperands() == 2 && // Basic sanity checks.
+ I.getOperand(1)->getType()->isFloatingPoint() &&
+ I.getType() == I.getOperand(1)->getType()) {
+ SDValue Tmp = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if (Name == "sin" || Name == "sinf" || Name == "sinl") {
+ if (I.getNumOperands() == 2 && // Basic sanity checks.
+ I.getOperand(1)->getType()->isFloatingPoint() &&
+ I.getType() == I.getOperand(1)->getType() &&
+ I.onlyReadsMemory()) {
+ SDValue Tmp = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if (Name == "cos" || Name == "cosf" || Name == "cosl") {
+ if (I.getNumOperands() == 2 && // Basic sanity checks.
+ I.getOperand(1)->getType()->isFloatingPoint() &&
+ I.getType() == I.getOperand(1)->getType() &&
+ I.onlyReadsMemory()) {
+ SDValue Tmp = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") {
+ if (I.getNumOperands() == 2 && // Basic sanity checks.
+ I.getOperand(1)->getType()->isFloatingPoint() &&
+ I.getType() == I.getOperand(1)->getType() &&
+ I.onlyReadsMemory()) {
+ SDValue Tmp = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if (Name == "memcmp") {
+ if (visitMemCmpCall(I))
+ return;
+ }
+ }
+ } else if (isa<InlineAsm>(I.getOperand(0))) {
+ visitInlineAsm(&I);
+ return;
+ }
+
+ SDValue Callee;
+ if (!RenameFn)
+ Callee = getValue(I.getOperand(0));
+ else
+ Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy());
+
+ // Check if we can potentially perform a tail call. More detailed checking is
+ // be done within LowerCallTo, after more information about the call is known.
+ LowerCallTo(&I, Callee, I.isTailCall());
+}
+
+/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+/// this value and returns the result as a ValueVT value. This uses
+/// Chain/Flag as the input and updates them for the output Chain/Flag.
+/// If the Flag pointer is NULL, no flag is used.
+SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
+ unsigned Order, SDValue &Chain,
+ SDValue *Flag) const {
+ // Assemble the legal parts into the final values.
+ SmallVector<SDValue, 4> Values(ValueVTs.size());
+ SmallVector<SDValue, 8> Parts;
+ for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ // Copy the legal parts from the registers.
+ EVT ValueVT = ValueVTs[Value];
+ unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVT);
+ EVT RegisterVT = RegVTs[Value];
+
+ Parts.resize(NumRegs);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ SDValue P;
+ if (Flag == 0) {
+ P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
+ } else {
+ P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
+ *Flag = P.getValue(2);
+ }
+
+ Chain = P.getValue(1);
+
+ // If the source register was virtual and if we know something about it,
+ // add an assert node.
+ if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) &&
+ RegisterVT.isInteger() && !RegisterVT.isVector()) {
+ unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister;
+ FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
+ if (FLI.LiveOutRegInfo.size() > SlotNo) {
+ FunctionLoweringInfo::LiveOutInfo &LOI = FLI.LiveOutRegInfo[SlotNo];
+
+ unsigned RegSize = RegisterVT.getSizeInBits();
+ unsigned NumSignBits = LOI.NumSignBits;
+ unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes();
+
+ // FIXME: We capture more information than the dag can represent. For
+ // now, just use the tightest assertzext/assertsext possible.
+ bool isSExt = true;
+ EVT FromVT(MVT::Other);
+ if (NumSignBits == RegSize)
+ isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1
+ else if (NumZeroBits >= RegSize-1)
+ isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1
+ else if (NumSignBits > RegSize-8)
+ isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8
+ else if (NumZeroBits >= RegSize-8)
+ isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8
+ else if (NumSignBits > RegSize-16)
+ isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16
+ else if (NumZeroBits >= RegSize-16)
+ isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
+ else if (NumSignBits > RegSize-32)
+ isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32
+ else if (NumZeroBits >= RegSize-32)
+ isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
+
+ if (FromVT != MVT::Other)
+ P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
+ RegisterVT, P, DAG.getValueType(FromVT));
+ }
+ }
+
+ Parts[i] = P;
+ }
+
+ Values[Value] = getCopyFromParts(DAG, dl, Order, Parts.begin(),
+ NumRegs, RegisterVT, ValueVT);
+ Part += NumRegs;
+ Parts.clear();
+ }
+
+ return DAG.getNode(ISD::MERGE_VALUES, dl,
+ DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
+ &Values[0], ValueVTs.size());
+}
+
+/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
+/// specified value into the registers specified by this object. This uses
+/// Chain/Flag as the input and updates them for the output Chain/Flag.
+/// If the Flag pointer is NULL, no flag is used.
+void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
+ unsigned Order, SDValue &Chain,
+ SDValue *Flag) const {
+ // Get the list of the values's legal parts.
+ unsigned NumRegs = Regs.size();
+ SmallVector<SDValue, 8> Parts(NumRegs);
+ for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ EVT ValueVT = ValueVTs[Value];
+ unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), ValueVT);
+ EVT RegisterVT = RegVTs[Value];
+
+ getCopyToParts(DAG, dl, Order,
+ Val.getValue(Val.getResNo() + Value),
+ &Parts[Part], NumParts, RegisterVT);
+ Part += NumParts;
+ }
+
+ // Copy the parts into the registers.
+ SmallVector<SDValue, 8> Chains(NumRegs);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ SDValue Part;
+ if (Flag == 0) {
+ Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
+ } else {
+ Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
+ *Flag = Part.getValue(1);
+ }
+
+ Chains[i] = Part.getValue(0);
+ }
+
+ if (NumRegs == 1 || Flag)
+ // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
+ // flagged to it. That is the CopyToReg nodes and the user are considered
+ // a single scheduling unit. If we create a TokenFactor and return it as
+ // chain, then the TokenFactor is both a predecessor (operand) of the
+ // user as well as a successor (the TF operands are flagged to the user).
+ // c1, f1 = CopyToReg
+ // c2, f2 = CopyToReg
+ // c3 = TokenFactor c1, c2
+ // ...
+ // = op c3, ..., f2
+ Chain = Chains[NumRegs-1];
+ else
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs);
+}
+
+/// AddInlineAsmOperands - Add this value to the specified inlineasm node
+/// operand list. This adds the code marker and includes the number of
+/// values added into it.
+void RegsForValue::AddInlineAsmOperands(unsigned Code,
+ bool HasMatching,unsigned MatchingIdx,
+ SelectionDAG &DAG, unsigned Order,
+ std::vector<SDValue> &Ops) const {
+ assert(Regs.size() < (1 << 13) && "Too many inline asm outputs!");
+ unsigned Flag = Code | (Regs.size() << 3);
+ if (HasMatching)
+ Flag |= 0x80000000 | (MatchingIdx << 16);
+ SDValue Res = DAG.getTargetConstant(Flag, MVT::i32);
+ Ops.push_back(Res);
+
+ for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
+ EVT RegisterVT = RegVTs[Value];
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ assert(Reg < Regs.size() && "Mismatch in # registers expected");
+ Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT));
+ }
+ }
+}
+
+/// isAllocatableRegister - If the specified register is safe to allocate,
+/// i.e. it isn't a stack pointer or some other special register, return the
+/// register class for the register. Otherwise, return null.
+static const TargetRegisterClass *
+isAllocatableRegister(unsigned Reg, MachineFunction &MF,
+ const TargetLowering &TLI,
+ const TargetRegisterInfo *TRI) {
+ EVT FoundVT = MVT::Other;
+ const TargetRegisterClass *FoundRC = 0;
+ for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(),
+ E = TRI->regclass_end(); RCI != E; ++RCI) {
+ EVT ThisVT = MVT::Other;
+
+ const TargetRegisterClass *RC = *RCI;
+ // If none of the value types for this register class are valid, we
+ // can't use it. For example, 64-bit reg classes on 32-bit targets.
+ for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
+ I != E; ++I) {
+ if (TLI.isTypeLegal(*I)) {
+ // If we have already found this register in a different register class,
+ // choose the one with the largest VT specified. For example, on
+ // PowerPC, we favor f64 register classes over f32.
+ if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) {
+ ThisVT = *I;
+ break;
+ }
+ }
+ }
+
+ if (ThisVT == MVT::Other) continue;
+
+ // NOTE: This isn't ideal. In particular, this might allocate the
+ // frame pointer in functions that need it (due to them not being taken
+ // out of allocation, because a variable sized allocation hasn't been seen
+ // yet). This is a slight code pessimization, but should still work.
+ for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
+ E = RC->allocation_order_end(MF); I != E; ++I)
+ if (*I == Reg) {
+ // We found a matching register class. Keep looking at others in case
+ // we find one with larger registers that this physreg is also in.
+ FoundRC = RC;
+ FoundVT = ThisVT;
+ break;
+ }
+ }
+ return FoundRC;
+}
+
+
+namespace llvm {
+/// AsmOperandInfo - This contains information for each constraint that we are
+/// lowering.
+class VISIBILITY_HIDDEN SDISelAsmOperandInfo :
+ public TargetLowering::AsmOperandInfo {
+public:
+ /// CallOperand - If this is the result output operand or a clobber
+ /// this is null, otherwise it is the incoming operand to the CallInst.
+ /// This gets modified as the asm is processed.
+ SDValue CallOperand;
+
+ /// AssignedRegs - If this is a register or register class operand, this
+ /// contains the set of register corresponding to the operand.
+ RegsForValue AssignedRegs;
+
+ explicit SDISelAsmOperandInfo(const InlineAsm::ConstraintInfo &info)
+ : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {
+ }
+
+ /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers
+ /// busy in OutputRegs/InputRegs.
+ void MarkAllocatedRegs(bool isOutReg, bool isInReg,
+ std::set<unsigned> &OutputRegs,
+ std::set<unsigned> &InputRegs,
+ const TargetRegisterInfo &TRI) const {
+ if (isOutReg) {
+ for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
+ MarkRegAndAliases(AssignedRegs.Regs[i], OutputRegs, TRI);
+ }
+ if (isInReg) {
+ for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
+ MarkRegAndAliases(AssignedRegs.Regs[i], InputRegs, TRI);
+ }
+ }
+
+ /// getCallOperandValEVT - Return the EVT of the Value* that this operand
+ /// corresponds to. If there is no Value* for this operand, it returns
+ /// MVT::Other.
+ EVT getCallOperandValEVT(LLVMContext &Context,
+ const TargetLowering &TLI,
+ const TargetData *TD) const {
+ if (CallOperandVal == 0) return MVT::Other;
+
+ if (isa<BasicBlock>(CallOperandVal))
+ return TLI.getPointerTy();
+
+ const llvm::Type *OpTy = CallOperandVal->getType();
+
+ // If this is an indirect operand, the operand is a pointer to the
+ // accessed type.
+ if (isIndirect) {
+ const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+ if (!PtrTy)
+ llvm_report_error("Indirect operand for inline asm not a pointer!");
+ OpTy = PtrTy->getElementType();
+ }
+
+ // If OpTy is not a single value, it may be a struct/union that we
+ // can tile with integers.
+ if (!OpTy->isSingleValueType() && OpTy->isSized()) {
+ unsigned BitSize = TD->getTypeSizeInBits(OpTy);
+ switch (BitSize) {
+ default: break;
+ case 1:
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ case 128:
+ OpTy = IntegerType::get(Context, BitSize);
+ break;
+ }
+ }
+
+ return TLI.getValueType(OpTy, true);
+ }
+
+private:
+ /// MarkRegAndAliases - Mark the specified register and all aliases in the
+ /// specified set.
+ static void MarkRegAndAliases(unsigned Reg, std::set<unsigned> &Regs,
+ const TargetRegisterInfo &TRI) {
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Isn't a physreg");
+ Regs.insert(Reg);
+ if (const unsigned *Aliases = TRI.getAliasSet(Reg))
+ for (; *Aliases; ++Aliases)
+ Regs.insert(*Aliases);
+ }
+};
+} // end llvm namespace.
+
+
+/// GetRegistersForValue - Assign registers (virtual or physical) for the
+/// specified operand. We prefer to assign virtual registers, to allow the
+/// register allocator to handle the assignment process. However, if the asm
+/// uses features that we can't model on machineinstrs, we have SDISel do the
+/// allocation. This produces generally horrible, but correct, code.
+///
+/// OpInfo describes the operand.
+/// Input and OutputRegs are the set of already allocated physical registers.
+///
+void SelectionDAGBuilder::
+GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
+ std::set<unsigned> &OutputRegs,
+ std::set<unsigned> &InputRegs) {
+ LLVMContext &Context = FuncInfo.Fn->getContext();
+
+ // Compute whether this value requires an input register, an output register,
+ // or both.
+ bool isOutReg = false;
+ bool isInReg = false;
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ isOutReg = true;
+
+ // If there is an input constraint that matches this, we need to reserve
+ // the input register so no other inputs allocate to it.
+ isInReg = OpInfo.hasMatchingInput();
+ break;
+ case InlineAsm::isInput:
+ isInReg = true;
+ isOutReg = false;
+ break;
+ case InlineAsm::isClobber:
+ isOutReg = true;
+ isInReg = true;
+ break;
+ }
+
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ SmallVector<unsigned, 4> Regs;
+
+ // If this is a constraint for a single physreg, or a constraint for a
+ // register class, find it.
+ std::pair<unsigned, const TargetRegisterClass*> PhysReg =
+ TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
+
+ unsigned NumRegs = 1;
+ if (OpInfo.ConstraintVT != MVT::Other) {
+ // If this is a FP input in an integer register (or visa versa) insert a bit
+ // cast of the input value. More generally, handle any case where the input
+ // value disagrees with the register class we plan to stick this in.
+ if (OpInfo.Type == InlineAsm::isInput &&
+ PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) {
+ // Try to convert to the first EVT that the reg class contains. If the
+ // types are identical size, use a bitcast to convert (e.g. two differing
+ // vector types).
+ EVT RegVT = *PhysReg.second->vt_begin();
+ if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
+ OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+ RegVT, OpInfo.CallOperand);
+ OpInfo.ConstraintVT = RegVT;
+ } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
+ // If the input is a FP value and we want it in FP registers, do a
+ // bitcast to the corresponding integer type. This turns an f64 value
+ // into i64, which can be passed with two i32 values on a 32-bit
+ // machine.
+ RegVT = EVT::getIntegerVT(Context,
+ OpInfo.ConstraintVT.getSizeInBits());
+ OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+ RegVT, OpInfo.CallOperand);
+ OpInfo.ConstraintVT = RegVT;
+ }
+ }
+
+ NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
+ }
+
+ EVT RegVT;
+ EVT ValueVT = OpInfo.ConstraintVT;
+
+ // If this is a constraint for a specific physical register, like {r17},
+ // assign it now.
+ if (unsigned AssignedReg = PhysReg.first) {
+ const TargetRegisterClass *RC = PhysReg.second;
+ if (OpInfo.ConstraintVT == MVT::Other)
+ ValueVT = *RC->vt_begin();
+
+ // Get the actual register value type. This is important, because the user
+ // may have asked for (e.g.) the AX register in i32 type. We need to
+ // remember that AX is actually i16 to get the right extension.
+ RegVT = *RC->vt_begin();
+
+ // This is a explicit reference to a physical register.
+ Regs.push_back(AssignedReg);
+
+ // If this is an expanded reference, add the rest of the regs to Regs.
+ if (NumRegs != 1) {
+ TargetRegisterClass::iterator I = RC->begin();
+ for (; *I != AssignedReg; ++I)
+ assert(I != RC->end() && "Didn't find reg!");
+
+ // Already added the first reg.
+ --NumRegs; ++I;
+ for (; NumRegs; --NumRegs, ++I) {
+ assert(I != RC->end() && "Ran out of registers to allocate!");
+ Regs.push_back(*I);
+ }
+ }
+
+ OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT);
+ const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+ OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
+ return;
+ }
+
+ // Otherwise, if this was a reference to an LLVM register class, create vregs
+ // for this reference.
+ if (const TargetRegisterClass *RC = PhysReg.second) {
+ RegVT = *RC->vt_begin();
+ if (OpInfo.ConstraintVT == MVT::Other)
+ ValueVT = RegVT;
+
+ // Create the appropriate number of virtual registers.
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ for (; NumRegs; --NumRegs)
+ Regs.push_back(RegInfo.createVirtualRegister(RC));
+
+ OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT);
+ return;
+ }
+
+ // This is a reference to a register class that doesn't directly correspond
+ // to an LLVM register class. Allocate NumRegs consecutive, available,
+ // registers from the class.
+ std::vector<unsigned> RegClassRegs
+ = TLI.getRegClassForInlineAsmConstraint(OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
+
+ const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+ unsigned NumAllocated = 0;
+ for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) {
+ unsigned Reg = RegClassRegs[i];
+ // See if this register is available.
+ if ((isOutReg && OutputRegs.count(Reg)) || // Already used.
+ (isInReg && InputRegs.count(Reg))) { // Already used.
+ // Make sure we find consecutive registers.
+ NumAllocated = 0;
+ continue;
+ }
+
+ // Check to see if this register is allocatable (i.e. don't give out the
+ // stack pointer).
+ const TargetRegisterClass *RC = isAllocatableRegister(Reg, MF, TLI, TRI);
+ if (!RC) { // Couldn't allocate this register.
+ // Reset NumAllocated to make sure we return consecutive registers.
+ NumAllocated = 0;
+ continue;
+ }
+
+ // Okay, this register is good, we can use it.
+ ++NumAllocated;
+
+ // If we allocated enough consecutive registers, succeed.
+ if (NumAllocated == NumRegs) {
+ unsigned RegStart = (i-NumAllocated)+1;
+ unsigned RegEnd = i+1;
+ // Mark all of the allocated registers used.
+ for (unsigned i = RegStart; i != RegEnd; ++i)
+ Regs.push_back(RegClassRegs[i]);
+
+ OpInfo.AssignedRegs = RegsForValue(TLI, Regs, *RC->vt_begin(),
+ OpInfo.ConstraintVT);
+ OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
+ return;
+ }
+ }
+
+ // Otherwise, we couldn't allocate enough registers for this.
+}
+
+/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
+/// processed uses a memory 'm' constraint.
+static bool
+hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos,
+ const TargetLowering &TLI) {
+ for (unsigned i = 0, e = CInfos.size(); i != e; ++i) {
+ InlineAsm::ConstraintInfo &CI = CInfos[i];
+ for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) {
+ TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]);
+ if (CType == TargetLowering::C_Memory)
+ return true;
+ }
+
+ // Indirect operand accesses access memory.
+ if (CI.isIndirect)
+ return true;
+ }
+
+ return false;
+}
+
+/// visitInlineAsm - Handle a call to an InlineAsm object.
+///
+void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
+ InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+
+ /// ConstraintOperands - Information about all of the constraints.
+ std::vector<SDISelAsmOperandInfo> ConstraintOperands;
+
+ std::set<unsigned> OutputRegs, InputRegs;
+
+ // Do a prepass over the constraints, canonicalizing them, and building up the
+ // ConstraintOperands list.
+ std::vector<InlineAsm::ConstraintInfo>
+ ConstraintInfos = IA->ParseConstraints();
+
+ bool hasMemory = hasInlineAsmMemConstraint(ConstraintInfos, TLI);
+
+ SDValue Chain, Flag;
+
+ // We won't need to flush pending loads if this asm doesn't touch
+ // memory and is nonvolatile.
+ if (hasMemory || IA->hasSideEffects())
+ Chain = getRoot();
+ else
+ Chain = DAG.getRoot();
+
+ unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
+ unsigned ResNo = 0; // ResNo - The result number of the next output.
+ for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+ ConstraintOperands.push_back(SDISelAsmOperandInfo(ConstraintInfos[i]));
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+ EVT OpVT = MVT::Other;
+
+ // Compute the value type for each operand.
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ // Indirect outputs just consume an argument.
+ if (OpInfo.isIndirect) {
+ OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
+ break;
+ }
+
+ // The return value of the call is this value. As such, there is no
+ // corresponding argument.
+ assert(!CS.getType()->isVoidTy() &&
+ "Bad inline asm!");
+ if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {
+ OpVT = TLI.getValueType(STy->getElementType(ResNo));
+ } else {
+ assert(ResNo == 0 && "Asm only has one result!");
+ OpVT = TLI.getValueType(CS.getType());
+ }
+ ++ResNo;
+ break;
+ case InlineAsm::isInput:
+ OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
+ break;
+ case InlineAsm::isClobber:
+ // Nothing to do.
+ break;
+ }
+
+ // If this is an input or an indirect output, process the call argument.
+ // BasicBlocks are labels, currently appearing only in asm's.
+ if (OpInfo.CallOperandVal) {
+ // Strip bitcasts, if any. This mostly comes up for functions.
+ OpInfo.CallOperandVal = OpInfo.CallOperandVal->stripPointerCasts();
+
+ if (BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
+ OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
+ } else {
+ OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
+ }
+
+ OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD);
+ }
+
+ OpInfo.ConstraintVT = OpVT;
+ }
+
+ // Second pass over the constraints: compute which constraint option to use
+ // and assign registers to constraints that want a specific physreg.
+ for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+ // If this is an output operand with a matching input operand, look up the
+ // matching input. If their types mismatch, e.g. one is an integer, the
+ // other is floating point, or their sizes are different, flag it as an
+ // error.
+ if (OpInfo.hasMatchingInput()) {
+ SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+ if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+ if ((OpInfo.ConstraintVT.isInteger() !=
+ Input.ConstraintVT.isInteger()) ||
+ (OpInfo.ConstraintVT.getSizeInBits() !=
+ Input.ConstraintVT.getSizeInBits())) {
+ llvm_report_error("Unsupported asm: input constraint"
+ " with a matching output constraint of incompatible"
+ " type!");
+ }
+ Input.ConstraintVT = OpInfo.ConstraintVT;
+ }
+ }
+
+ // Compute the constraint code and ConstraintType to use.
+ TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, hasMemory, &DAG);
+
+ // If this is a memory input, and if the operand is not indirect, do what we
+ // need to to provide an address for the memory input.
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
+ !OpInfo.isIndirect) {
+ assert(OpInfo.Type == InlineAsm::isInput &&
+ "Can only indirectify direct input operands!");
+
+ // Memory operands really want the address of the value. If we don't have
+ // an indirect input, put it in the constpool if we can, otherwise spill
+ // it to a stack slot.
+
+ // If the operand is a float, integer, or vector constant, spill to a
+ // constant pool entry to get its address.
+ Value *OpVal = OpInfo.CallOperandVal;
+ if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
+ isa<ConstantVector>(OpVal)) {
+ OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
+ TLI.getPointerTy());
+ } else {
+ // Otherwise, create a stack slot and emit a store to it before the
+ // asm.
+ const Type *Ty = OpVal->getType();
+ uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+ unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(Ty);
+ MachineFunction &MF = DAG.getMachineFunction();
+ int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+ Chain = DAG.getStore(Chain, getCurDebugLoc(),
+ OpInfo.CallOperand, StackSlot, NULL, 0);
+ OpInfo.CallOperand = StackSlot;
+ }
+
+ // There is no longer a Value* corresponding to this operand.
+ OpInfo.CallOperandVal = 0;
+
+ // It is now an indirect operand.
+ OpInfo.isIndirect = true;
+ }
+
+ // If this constraint is for a specific register, allocate it before
+ // anything else.
+ if (OpInfo.ConstraintType == TargetLowering::C_Register)
+ GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
+ }
+
+ ConstraintInfos.clear();
+
+ // Second pass - Loop over all of the operands, assigning virtual or physregs
+ // to register class operands.
+ for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+ // C_Register operands have already been allocated, Other/Memory don't need
+ // to be.
+ if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
+ GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
+ }
+
+ // AsmNodeOperands - The operands for the ISD::INLINEASM node.
+ std::vector<SDValue> AsmNodeOperands;
+ AsmNodeOperands.push_back(SDValue()); // reserve space for input chain
+ AsmNodeOperands.push_back(
+ DAG.getTargetExternalSymbol(IA->getAsmString().c_str(),
+ TLI.getPointerTy()));
+
+
+ // Loop over all of the inputs, copying the operand values into the
+ // appropriate registers and processing the output regs.
+ RegsForValue RetValRegs;
+
+ // IndirectStoresToEmit - The set of stores to emit after the inline asm node.
+ std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit;
+
+ for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput: {
+ if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
+ OpInfo.ConstraintType != TargetLowering::C_Register) {
+ // Memory output, or 'other' output (e.g. 'X' constraint).
+ assert(OpInfo.isIndirect && "Memory output must be indirect operand");
+
+ // Add information to the INLINEASM node to know about this output.
+ unsigned ResOpType = 4/*MEM*/ | (1<<3);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+ TLI.getPointerTy()));
+ AsmNodeOperands.push_back(OpInfo.CallOperand);
+ break;
+ }
+
+ // Otherwise, this is a register or register class output.
+
+ // Copy the output from the appropriate register. Find a register that
+ // we can use.
+ if (OpInfo.AssignedRegs.Regs.empty()) {
+ llvm_report_error("Couldn't allocate output reg for"
+ " constraint '" + OpInfo.ConstraintCode + "'!");
+ }
+
+ // If this is an indirect operand, store through the pointer after the
+ // asm.
+ if (OpInfo.isIndirect) {
+ IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs,
+ OpInfo.CallOperandVal));
+ } else {
+ // This is the result value of the call.
+ assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
+ // Concatenate this output onto the outputs list.
+ RetValRegs.append(OpInfo.AssignedRegs);
+ }
+
+ // Add information to the INLINEASM node to know that this register is
+ // set.
+ OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ?
+ 6 /* EARLYCLOBBER REGDEF */ :
+ 2 /* REGDEF */ ,
+ false,
+ 0,
+ DAG, SDNodeOrder,
+ AsmNodeOperands);
+ break;
+ }
+ case InlineAsm::isInput: {
+ SDValue InOperandVal = OpInfo.CallOperand;
+
+ if (OpInfo.isMatchingInputConstraint()) { // Matching constraint?
+ // If this is required to match an output register we have already set,
+ // just use its register.
+ unsigned OperandNo = OpInfo.getMatchedOperand();
+
+ // Scan until we find the definition we already emitted of this operand.
+ // When we find it, create a RegsForValue operand.
+ unsigned CurOp = 2; // The first operand.
+ for (; OperandNo; --OperandNo) {
+ // Advance to the next operand.
+ unsigned OpFlag =
+ cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
+ assert(((OpFlag & 7) == 2 /*REGDEF*/ ||
+ (OpFlag & 7) == 6 /*EARLYCLOBBER REGDEF*/ ||
+ (OpFlag & 7) == 4 /*MEM*/) &&
+ "Skipped past definitions?");
+ CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1;
+ }
+
+ unsigned OpFlag =
+ cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
+ if ((OpFlag & 7) == 2 /*REGDEF*/
+ || (OpFlag & 7) == 6 /* EARLYCLOBBER REGDEF */) {
+ // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
+ if (OpInfo.isIndirect) {
+ llvm_report_error("Don't know how to handle tied indirect "
+ "register inputs yet!");
+ }
+ RegsForValue MatchedRegs;
+ MatchedRegs.TLI = &TLI;
+ MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
+ EVT RegVT = AsmNodeOperands[CurOp+1].getValueType();
+ MatchedRegs.RegVTs.push_back(RegVT);
+ MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
+ for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
+ i != e; ++i)
+ MatchedRegs.Regs.push_back
+ (RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)));
+
+ // Use the produced MatchedRegs object to
+ MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
+ SDNodeOrder, Chain, &Flag);
+ MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/,
+ true, OpInfo.getMatchedOperand(),
+ DAG, SDNodeOrder, AsmNodeOperands);
+ break;
+ } else {
+ assert(((OpFlag & 7) == 4) && "Unknown matching constraint!");
+ assert((InlineAsm::getNumOperandRegisters(OpFlag)) == 1 &&
+ "Unexpected number of operands");
+ // Add information to the INLINEASM node to know about this input.
+ // See InlineAsm.h isUseOperandTiedToDef.
+ OpFlag |= 0x80000000 | (OpInfo.getMatchedOperand() << 16);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,
+ TLI.getPointerTy()));
+ AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
+ break;
+ }
+ }
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Other) {
+ assert(!OpInfo.isIndirect &&
+ "Don't know how to handle indirect other inputs yet!");
+
+ std::vector<SDValue> Ops;
+ TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0],
+ hasMemory, Ops, DAG);
+ if (Ops.empty()) {
+ llvm_report_error("Invalid operand for inline asm"
+ " constraint '" + OpInfo.ConstraintCode + "'!");
+ }
+
+ // Add information to the INLINEASM node to know about this input.
+ unsigned ResOpType = 3 /*IMM*/ | (Ops.size() << 3);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+ TLI.getPointerTy()));
+ AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
+ break;
+ } else if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
+ assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
+ assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
+ "Memory operands expect pointer values");
+
+ // Add information to the INLINEASM node to know about this input.
+ unsigned ResOpType = 4/*MEM*/ | (1<<3);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+ TLI.getPointerTy()));
+ AsmNodeOperands.push_back(InOperandVal);
+ break;
+ }
+
+ assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
+ OpInfo.ConstraintType == TargetLowering::C_Register) &&
+ "Unknown constraint type!");
+ assert(!OpInfo.isIndirect &&
+ "Don't know how to handle indirect register inputs yet!");
+
+ // Copy the input into the appropriate registers.
+ if (OpInfo.AssignedRegs.Regs.empty() ||
+ !OpInfo.AssignedRegs.areValueTypesLegal()) {
+ llvm_report_error("Couldn't allocate input reg for"
+ " constraint '"+ OpInfo.ConstraintCode +"'!");
+ }
+
+ OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
+ SDNodeOrder, Chain, &Flag);
+
+ OpInfo.AssignedRegs.AddInlineAsmOperands(1/*REGUSE*/, false, 0,
+ DAG, SDNodeOrder,
+ AsmNodeOperands);
+ break;
+ }
+ case InlineAsm::isClobber: {
+ // Add the clobbered value to the operand list, so that the register
+ // allocator is aware that the physreg got clobbered.
+ if (!OpInfo.AssignedRegs.Regs.empty())
+ OpInfo.AssignedRegs.AddInlineAsmOperands(6 /* EARLYCLOBBER REGDEF */,
+ false, 0, DAG, SDNodeOrder,
+ AsmNodeOperands);
+ break;
+ }
+ }
+ }
+
+ // Finish up input operands.
+ AsmNodeOperands[0] = Chain;
+ if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
+
+ Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(),
+ DAG.getVTList(MVT::Other, MVT::Flag),
+ &AsmNodeOperands[0], AsmNodeOperands.size());
+ Flag = Chain.getValue(1);
+
+ // If this asm returns a register value, copy the result from that register
+ // and set it as the value of the call.
+ if (!RetValRegs.Regs.empty()) {
+ SDValue Val = RetValRegs.getCopyFromRegs(DAG, getCurDebugLoc(),
+ SDNodeOrder, Chain, &Flag);
+
+ // FIXME: Why don't we do this for inline asms with MRVs?
+ if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
+ EVT ResultType = TLI.getValueType(CS.getType());
+
+ // If any of the results of the inline asm is a vector, it may have the
+ // wrong width/num elts. This can happen for register classes that can
+ // contain multiple different value types. The preg or vreg allocated may
+ // not have the same VT as was expected. Convert it to the right type
+ // with bit_convert.
+ if (ResultType != Val.getValueType() && Val.getValueType().isVector()) {
+ Val = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+ ResultType, Val);
+
+ } else if (ResultType != Val.getValueType() &&
+ ResultType.isInteger() && Val.getValueType().isInteger()) {
+ // If a result value was tied to an input value, the computed result may
+ // have a wider width than the expected result. Extract the relevant
+ // portion.
+ Val = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), ResultType, Val);
+ }
+
+ assert(ResultType == Val.getValueType() && "Asm result value mismatch!");
+ }
+
+ setValue(CS.getInstruction(), Val);
+ // Don't need to use this as a chain in this case.
+ if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty())
+ return;
+ }
+
+ std::vector<std::pair<SDValue, Value*> > StoresToEmit;
+
+ // Process indirect outputs, first output all of the flagged copies out of
+ // physregs.
+ for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
+ RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
+ Value *Ptr = IndirectStoresToEmit[i].second;
+ SDValue OutVal = OutRegs.getCopyFromRegs(DAG, getCurDebugLoc(),
+ SDNodeOrder, Chain, &Flag);
+ StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
+
+ }
+
+ // Emit the non-flagged stores from the physregs.
+ SmallVector<SDValue, 8> OutChains;
+ for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) {
+ SDValue Val = DAG.getStore(Chain, getCurDebugLoc(),
+ StoresToEmit[i].first,
+ getValue(StoresToEmit[i].second),
+ StoresToEmit[i].second, 0);
+ OutChains.push_back(Val);
+ }
+
+ if (!OutChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
+ &OutChains[0], OutChains.size());
+
+ DAG.setRoot(Chain);
+}
+
+void SelectionDAGBuilder::visitVAStart(CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(),
+ MVT::Other, getRoot(),
+ getValue(I.getOperand(1)),
+ DAG.getSrcValue(I.getOperand(1))));
+}
+
+void SelectionDAGBuilder::visitVAArg(VAArgInst &I) {
+ SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),
+ getRoot(), getValue(I.getOperand(0)),
+ DAG.getSrcValue(I.getOperand(0)));
+ setValue(&I, V);
+ DAG.setRoot(V.getValue(1));
+}
+
+void SelectionDAGBuilder::visitVAEnd(CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(),
+ MVT::Other, getRoot(),
+ getValue(I.getOperand(1)),
+ DAG.getSrcValue(I.getOperand(1))));
+}
+
+void SelectionDAGBuilder::visitVACopy(CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(),
+ MVT::Other, getRoot(),
+ getValue(I.getOperand(1)),
+ getValue(I.getOperand(2)),
+ DAG.getSrcValue(I.getOperand(1)),
+ DAG.getSrcValue(I.getOperand(2))));
+}
+
+/// TargetLowering::LowerCallTo - This is the default LowerCallTo
+/// implementation, which just calls LowerCall.
+/// FIXME: When all targets are
+/// migrated to using LowerCall, this hook should be integrated into SDISel.
+std::pair<SDValue, SDValue>
+TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
+ bool RetSExt, bool RetZExt, bool isVarArg,
+ bool isInreg, unsigned NumFixedArgs,
+ CallingConv::ID CallConv, bool isTailCall,
+ bool isReturnValueUsed,
+ SDValue Callee,
+ ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl,
+ unsigned Order) {
+ // Handle all of the outgoing arguments.
+ SmallVector<ISD::OutputArg, 32> Outs;
+ for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(*this, Args[i].Ty, ValueVTs);
+ for (unsigned Value = 0, NumValues = ValueVTs.size();
+ Value != NumValues; ++Value) {
+ EVT VT = ValueVTs[Value];
+ const Type *ArgTy = VT.getTypeForEVT(RetTy->getContext());
+ SDValue Op = SDValue(Args[i].Node.getNode(),
+ Args[i].Node.getResNo() + Value);
+ ISD::ArgFlagsTy Flags;
+ unsigned OriginalAlignment =
+ getTargetData()->getABITypeAlignment(ArgTy);
+
+ if (Args[i].isZExt)
+ Flags.setZExt();
+ if (Args[i].isSExt)
+ Flags.setSExt();
+ if (Args[i].isInReg)
+ Flags.setInReg();
+ if (Args[i].isSRet)
+ Flags.setSRet();
+ if (Args[i].isByVal) {
+ Flags.setByVal();
+ const PointerType *Ty = cast<PointerType>(Args[i].Ty);
+ const Type *ElementTy = Ty->getElementType();
+ unsigned FrameAlign = getByValTypeAlignment(ElementTy);
+ unsigned FrameSize = getTargetData()->getTypeAllocSize(ElementTy);
+ // For ByVal, alignment should come from FE. BE will guess if this
+ // info is not there but there are cases it cannot get right.
+ if (Args[i].Alignment)
+ FrameAlign = Args[i].Alignment;
+ Flags.setByValAlign(FrameAlign);
+ Flags.setByValSize(FrameSize);
+ }
+ if (Args[i].isNest)
+ Flags.setNest();
+ Flags.setOrigAlign(OriginalAlignment);
+
+ EVT PartVT = getRegisterType(RetTy->getContext(), VT);
+ unsigned NumParts = getNumRegisters(RetTy->getContext(), VT);
+ SmallVector<SDValue, 4> Parts(NumParts);
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+ if (Args[i].isSExt)
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (Args[i].isZExt)
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ getCopyToParts(DAG, dl, Order, Op, &Parts[0], NumParts,
+ PartVT, ExtendKind);
+
+ for (unsigned j = 0; j != NumParts; ++j) {
+ // if it isn't first piece, alignment must be 1
+ ISD::OutputArg MyFlags(Flags, Parts[j], i < NumFixedArgs);
+ if (NumParts > 1 && j == 0)
+ MyFlags.Flags.setSplit();
+ else if (j != 0)
+ MyFlags.Flags.setOrigAlign(1);
+
+ Outs.push_back(MyFlags);
+ }
+ }
+ }
+
+ // Handle the incoming return values from the call.
+ SmallVector<ISD::InputArg, 32> Ins;
+ SmallVector<EVT, 4> RetTys;
+ ComputeValueVTs(*this, RetTy, RetTys);
+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ EVT VT = RetTys[I];
+ EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
+ unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ ISD::InputArg MyFlags;
+ MyFlags.VT = RegisterVT;
+ MyFlags.Used = isReturnValueUsed;
+ if (RetSExt)
+ MyFlags.Flags.setSExt();
+ if (RetZExt)
+ MyFlags.Flags.setZExt();
+ if (isInreg)
+ MyFlags.Flags.setInReg();
+ Ins.push_back(MyFlags);
+ }
+ }
+
+ SmallVector<SDValue, 4> InVals;
+ Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall,
+ Outs, Ins, dl, DAG, InVals);
+
+ // Verify that the target's LowerCall behaved as expected.
+ assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
+ "LowerCall didn't return a valid chain!");
+ assert((!isTailCall || InVals.empty()) &&
+ "LowerCall emitted a return value for a tail call!");
+ assert((isTailCall || InVals.size() == Ins.size()) &&
+ "LowerCall didn't emit the correct number of values!");
+ DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ assert(InVals[i].getNode() &&
+ "LowerCall emitted a null value!");
+ assert(Ins[i].VT == InVals[i].getValueType() &&
+ "LowerCall emitted a value with the wrong type!");
+ });
+
+ // For a tail call, the return value is merely live-out and there aren't
+ // any nodes in the DAG representing it. Return a special value to
+ // indicate that a tail call has been emitted and no more Instructions
+ // should be processed in the current block.
+ if (isTailCall) {
+ DAG.setRoot(Chain);
+ return std::make_pair(SDValue(), SDValue());
+ }
+
+ // Collect the legal value parts into potentially illegal values
+ // that correspond to the original function's return values.
+ ISD::NodeType AssertOp = ISD::DELETED_NODE;
+ if (RetSExt)
+ AssertOp = ISD::AssertSext;
+ else if (RetZExt)
+ AssertOp = ISD::AssertZext;
+ SmallVector<SDValue, 4> ReturnValues;
+ unsigned CurReg = 0;
+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ EVT VT = RetTys[I];
+ EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
+ unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
+
+ ReturnValues.push_back(getCopyFromParts(DAG, dl, Order, &InVals[CurReg],
+ NumRegs, RegisterVT, VT,
+ AssertOp));
+ CurReg += NumRegs;
+ }
+
+ // For a function returning void, there is no return value. We can't create
+ // such a node, so we just return a null return value in that case. In
+ // that case, nothing will actualy look at the value.
+ if (ReturnValues.empty())
+ return std::make_pair(SDValue(), Chain);
+
+ SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
+ DAG.getVTList(&RetTys[0], RetTys.size()),
+ &ReturnValues[0], ReturnValues.size());
+ return std::make_pair(Res, Chain);
+}
+
+void TargetLowering::LowerOperationWrapper(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) {
+ SDValue Res = LowerOperation(SDValue(N, 0), DAG);
+ if (Res.getNode())
+ Results.push_back(Res);
+}
+
+SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+ llvm_unreachable("LowerOperation not implemented for this target!");
+ return SDValue();
+}
+
+void SelectionDAGBuilder::CopyValueToVirtualRegister(Value *V, unsigned Reg) {
+ SDValue Op = getValue(V);
+ assert((Op.getOpcode() != ISD::CopyFromReg ||
+ cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
+ "Copy from a reg to the same reg!");
+ assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
+
+ RegsForValue RFV(V->getContext(), TLI, Reg, V->getType());
+ SDValue Chain = DAG.getEntryNode();
+ RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), SDNodeOrder, Chain, 0);
+ PendingExports.push_back(Chain);
+}
+
+#include "llvm/CodeGen/SelectionDAGISel.h"
+
+void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
+ // If this is the entry block, emit arguments.
+ Function &F = *LLVMBB->getParent();
+ SelectionDAG &DAG = SDB->DAG;
+ SDValue OldRoot = DAG.getRoot();
+ DebugLoc dl = SDB->getCurDebugLoc();
+ const TargetData *TD = TLI.getTargetData();
+ SmallVector<ISD::InputArg, 16> Ins;
+
+ // Check whether the function can return without sret-demotion.
+ SmallVector<EVT, 4> OutVTs;
+ SmallVector<ISD::ArgFlagsTy, 4> OutsFlags;
+ getReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
+ OutVTs, OutsFlags, TLI);
+ FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
+
+ FLI.CanLowerReturn = TLI.CanLowerReturn(F.getCallingConv(), F.isVarArg(),
+ OutVTs, OutsFlags, DAG);
+ if (!FLI.CanLowerReturn) {
+ // Put in an sret pointer parameter before all the other parameters.
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
+
+ // NOTE: Assuming that a pointer will never break down to more than one VT
+ // or one register.
+ ISD::ArgFlagsTy Flags;
+ Flags.setSRet();
+ EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), ValueVTs[0]);
+ ISD::InputArg RetArg(Flags, RegisterVT, true);
+ Ins.push_back(RetArg);
+ }
+
+ // Set up the incoming argument description vector.
+ unsigned Idx = 1;
+ for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
+ I != E; ++I, ++Idx) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, I->getType(), ValueVTs);
+ bool isArgValueUsed = !I->use_empty();
+ for (unsigned Value = 0, NumValues = ValueVTs.size();
+ Value != NumValues; ++Value) {
+ EVT VT = ValueVTs[Value];
+ const Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
+ ISD::ArgFlagsTy Flags;
+ unsigned OriginalAlignment =
+ TD->getABITypeAlignment(ArgTy);
+
+ if (F.paramHasAttr(Idx, Attribute::ZExt))
+ Flags.setZExt();
+ if (F.paramHasAttr(Idx, Attribute::SExt))
+ Flags.setSExt();
+ if (F.paramHasAttr(Idx, Attribute::InReg))
+ Flags.setInReg();
+ if (F.paramHasAttr(Idx, Attribute::StructRet))
+ Flags.setSRet();
+ if (F.paramHasAttr(Idx, Attribute::ByVal)) {
+ Flags.setByVal();
+ const PointerType *Ty = cast<PointerType>(I->getType());
+ const Type *ElementTy = Ty->getElementType();
+ unsigned FrameAlign = TLI.getByValTypeAlignment(ElementTy);
+ unsigned FrameSize = TD->getTypeAllocSize(ElementTy);
+ // For ByVal, alignment should be passed from FE. BE will guess if
+ // this info is not there but there are cases it cannot get right.
+ if (F.getParamAlignment(Idx))
+ FrameAlign = F.getParamAlignment(Idx);
+ Flags.setByValAlign(FrameAlign);
+ Flags.setByValSize(FrameSize);
+ }
+ if (F.paramHasAttr(Idx, Attribute::Nest))
+ Flags.setNest();
+ Flags.setOrigAlign(OriginalAlignment);
+
+ EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+ unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed);
+ if (NumRegs > 1 && i == 0)
+ MyFlags.Flags.setSplit();
+ // if it isn't first piece, alignment must be 1
+ else if (i > 0)
+ MyFlags.Flags.setOrigAlign(1);
+ Ins.push_back(MyFlags);
+ }
+ }
+ }
+
+ // Call the target to set up the argument values.
+ SmallVector<SDValue, 8> InVals;
+ SDValue NewRoot = TLI.LowerFormalArguments(DAG.getRoot(), F.getCallingConv(),
+ F.isVarArg(), Ins,
+ dl, DAG, InVals);
+
+ // Verify that the target's LowerFormalArguments behaved as expected.
+ assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other &&
+ "LowerFormalArguments didn't return a valid chain!");
+ assert(InVals.size() == Ins.size() &&
+ "LowerFormalArguments didn't emit the correct number of values!");
+ DEBUG({
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ assert(InVals[i].getNode() &&
+ "LowerFormalArguments emitted a null value!");
+ assert(Ins[i].VT == InVals[i].getValueType() &&
+ "LowerFormalArguments emitted a value with the wrong type!");
+ }
+ });
+
+ // Update the DAG with the new chain value resulting from argument lowering.
+ DAG.setRoot(NewRoot);
+
+ // Set up the argument values.
+ unsigned i = 0;
+ Idx = 1;
+ if (!FLI.CanLowerReturn) {
+ // Create a virtual register for the sret pointer, and put in a copy
+ // from the sret argument into it.
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
+ EVT VT = ValueVTs[0];
+ EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+ ISD::NodeType AssertOp = ISD::DELETED_NODE;
+ SDValue ArgValue = getCopyFromParts(DAG, dl, 0, &InVals[0], 1,
+ RegVT, VT, AssertOp);
+
+ MachineFunction& MF = SDB->DAG.getMachineFunction();
+ MachineRegisterInfo& RegInfo = MF.getRegInfo();
+ unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT));
+ FLI.DemoteRegister = SRetReg;
+ NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurDebugLoc(),
+ SRetReg, ArgValue);
+ DAG.setRoot(NewRoot);
+
+ // i indexes lowered arguments. Bump it past the hidden sret argument.
+ // Idx indexes LLVM arguments. Don't touch it.
+ ++i;
+ }
+
+ for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
+ ++I, ++Idx) {
+ SmallVector<SDValue, 4> ArgValues;
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, I->getType(), ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ for (unsigned Value = 0; Value != NumValues; ++Value) {
+ EVT VT = ValueVTs[Value];
+ EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+ unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT);
+
+ if (!I->use_empty()) {
+ ISD::NodeType AssertOp = ISD::DELETED_NODE;
+ if (F.paramHasAttr(Idx, Attribute::SExt))
+ AssertOp = ISD::AssertSext;
+ else if (F.paramHasAttr(Idx, Attribute::ZExt))
+ AssertOp = ISD::AssertZext;
+
+ ArgValues.push_back(getCopyFromParts(DAG, dl, 0, &InVals[i],
+ NumParts, PartVT, VT,
+ AssertOp));
+ }
+
+ i += NumParts;
+ }
+
+ if (!I->use_empty()) {
+ SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues,
+ SDB->getCurDebugLoc());
+ SDB->setValue(I, Res);
+
+ // If this argument is live outside of the entry block, insert a copy from
+ // whereever we got it to the vreg that other BB's will reference it as.
+ SDB->CopyToExportRegsIfNeeded(I);
+ }
+ }
+
+ assert(i == InVals.size() && "Argument register count mismatch!");
+
+ // Finally, if the target has anything special to do, allow it to do so.
+ // FIXME: this should insert code into the DAG!
+ EmitFunctionEntryCode(F, SDB->DAG.getMachineFunction());
+}
+
+/// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to
+/// ensure constants are generated when needed. Remember the virtual registers
+/// that need to be added to the Machine PHI nodes as input. We cannot just
+/// directly add them, because expansion might result in multiple MBB's for one
+/// BB. As such, the start of the BB might correspond to a different MBB than
+/// the end.
+///
+void
+SelectionDAGISel::HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB) {
+ TerminatorInst *TI = LLVMBB->getTerminator();
+
+ SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
+
+ // Check successor nodes' PHI nodes that expect a constant to be available
+ // from this block.
+ for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
+ BasicBlock *SuccBB = TI->getSuccessor(succ);
+ if (!isa<PHINode>(SuccBB->begin())) continue;
+ MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB];
+
+ // If this terminator has multiple identical successors (common for
+ // switches), only handle each succ once.
+ if (!SuccsHandled.insert(SuccMBB)) continue;
+
+ MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+ PHINode *PN;
+
+ // At this point we know that there is a 1-1 correspondence between LLVM PHI
+ // nodes and Machine PHI nodes, but the incoming operands have not been
+ // emitted yet.
+ for (BasicBlock::iterator I = SuccBB->begin();
+ (PN = dyn_cast<PHINode>(I)); ++I) {
+ // Ignore dead phi's.
+ if (PN->use_empty()) continue;
+
+ unsigned Reg;
+ Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+
+ if (Constant *C = dyn_cast<Constant>(PHIOp)) {
+ unsigned &RegOut = SDB->ConstantsOut[C];
+ if (RegOut == 0) {
+ RegOut = FuncInfo->CreateRegForValue(C);
+ SDB->CopyValueToVirtualRegister(C, RegOut);
+ }
+ Reg = RegOut;
+ } else {
+ Reg = FuncInfo->ValueMap[PHIOp];
+ if (Reg == 0) {
+ assert(isa<AllocaInst>(PHIOp) &&
+ FuncInfo->StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
+ "Didn't codegen value into a register!??");
+ Reg = FuncInfo->CreateRegForValue(PHIOp);
+ SDB->CopyValueToVirtualRegister(PHIOp, Reg);
+ }
+ }
+
+ // Remember that this register needs to added to the machine PHI node as
+ // the input for this MBB.
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, PN->getType(), ValueVTs);
+ for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
+ EVT VT = ValueVTs[vti];
+ unsigned NumRegisters = TLI.getNumRegisters(*CurDAG->getContext(), VT);
+ for (unsigned i = 0, e = NumRegisters; i != e; ++i)
+ SDB->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
+ Reg += NumRegisters;
+ }
+ }
+ }
+ SDB->ConstantsOut.clear();
+}
+
+/// This is the Fast-ISel version of HandlePHINodesInSuccessorBlocks. It only
+/// supports legal types, and it emits MachineInstrs directly instead of
+/// creating SelectionDAG nodes.
+///
+bool
+SelectionDAGISel::HandlePHINodesInSuccessorBlocksFast(BasicBlock *LLVMBB,
+ FastISel *F) {
+ TerminatorInst *TI = LLVMBB->getTerminator();
+
+ SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
+ unsigned OrigNumPHINodesToUpdate = SDB->PHINodesToUpdate.size();
+
+ // Check successor nodes' PHI nodes that expect a constant to be available
+ // from this block.
+ for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
+ BasicBlock *SuccBB = TI->getSuccessor(succ);
+ if (!isa<PHINode>(SuccBB->begin())) continue;
+ MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB];
+
+ // If this terminator has multiple identical successors (common for
+ // switches), only handle each succ once.
+ if (!SuccsHandled.insert(SuccMBB)) continue;
+
+ MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+ PHINode *PN;
+
+ // At this point we know that there is a 1-1 correspondence between LLVM PHI
+ // nodes and Machine PHI nodes, but the incoming operands have not been
+ // emitted yet.
+ for (BasicBlock::iterator I = SuccBB->begin();
+ (PN = dyn_cast<PHINode>(I)); ++I) {
+ // Ignore dead phi's.
+ if (PN->use_empty()) continue;
+
+ // Only handle legal types. Two interesting things to note here. First,
+ // by bailing out early, we may leave behind some dead instructions,
+ // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its
+ // own moves. Second, this check is necessary becuase FastISel doesn't
+ // use CreateRegForValue to create registers, so it always creates
+ // exactly one register for each non-void instruction.
+ EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
+ if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
+ // Promote MVT::i1.
+ if (VT == MVT::i1)
+ VT = TLI.getTypeToTransformTo(*CurDAG->getContext(), VT);
+ else {
+ SDB->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+ return false;
+ }
+ }
+
+ Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+
+ unsigned Reg = F->getRegForValue(PHIOp);
+ if (Reg == 0) {
+ SDB->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+ return false;
+ }
+ SDB->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
+ }
+ }
+
+ return true;
+}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
new file mode 100644
index 0000000..bc4b33d
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -0,0 +1,502 @@
+//===-- SelectionDAGBuilder.h - Selection-DAG building --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating from LLVM IR into SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SELECTIONDAGBUILDER_H
+#define SELECTIONDAGBUILDER_H
+
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#ifndef NDEBUG
+#include "llvm/ADT/SmallSet.h"
+#endif
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <vector>
+#include <set>
+
+namespace llvm {
+
+class AliasAnalysis;
+class AllocaInst;
+class BasicBlock;
+class BitCastInst;
+class BranchInst;
+class CallInst;
+class ExtractElementInst;
+class ExtractValueInst;
+class FCmpInst;
+class FPExtInst;
+class FPToSIInst;
+class FPToUIInst;
+class FPTruncInst;
+class Function;
+class FunctionLoweringInfo;
+class GetElementPtrInst;
+class GCFunctionInfo;
+class ICmpInst;
+class IntToPtrInst;
+class IndirectBrInst;
+class InvokeInst;
+class InsertElementInst;
+class InsertValueInst;
+class Instruction;
+class LoadInst;
+class MachineBasicBlock;
+class MachineFunction;
+class MachineInstr;
+class MachineRegisterInfo;
+class PHINode;
+class PtrToIntInst;
+class ReturnInst;
+class SDISelAsmOperandInfo;
+class SExtInst;
+class SelectInst;
+class ShuffleVectorInst;
+class SIToFPInst;
+class StoreInst;
+class SwitchInst;
+class TargetData;
+class TargetLowering;
+class TruncInst;
+class UIToFPInst;
+class UnreachableInst;
+class UnwindInst;
+class VAArgInst;
+class ZExtInst;
+
+//===----------------------------------------------------------------------===//
+/// SelectionDAGBuilder - This is the common target-independent lowering
+/// implementation that is parameterized by a TargetLowering object.
+/// Also, targets can overload any lowering method.
+///
+class SelectionDAGBuilder {
+ MachineBasicBlock *CurMBB;
+
+ /// CurDebugLoc - current file + line number. Changes as we build the DAG.
+ DebugLoc CurDebugLoc;
+
+ DenseMap<const Value*, SDValue> NodeMap;
+
+public:
+ /// PendingLoads - Loads are not emitted to the program immediately. We bunch
+ /// them up and then emit token factor nodes when possible. This allows us to
+ /// get simple disambiguation between loads without worrying about alias
+ /// analysis.
+ SmallVector<SDValue, 8> PendingLoads;
+private:
+
+ /// PendingExports - CopyToReg nodes that copy values to virtual registers
+ /// for export to other blocks need to be emitted before any terminator
+ /// instruction, but they have no other ordering requirements. We bunch them
+ /// up and the emit a single tokenfactor for them just before terminator
+ /// instructions.
+ SmallVector<SDValue, 8> PendingExports;
+
+ /// SDNodeOrder - A unique monotonically increasing number used to order the
+ /// SDNodes we create.
+ unsigned SDNodeOrder;
+
+ /// Case - A struct to record the Value for a switch case, and the
+ /// case's target basic block.
+ struct Case {
+ Constant* Low;
+ Constant* High;
+ MachineBasicBlock* BB;
+
+ Case() : Low(0), High(0), BB(0) { }
+ Case(Constant* low, Constant* high, MachineBasicBlock* bb) :
+ Low(low), High(high), BB(bb) { }
+ APInt size() const {
+ const APInt &rHigh = cast<ConstantInt>(High)->getValue();
+ const APInt &rLow = cast<ConstantInt>(Low)->getValue();
+ return (rHigh - rLow + 1ULL);
+ }
+ };
+
+ struct CaseBits {
+ uint64_t Mask;
+ MachineBasicBlock* BB;
+ unsigned Bits;
+
+ CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits):
+ Mask(mask), BB(bb), Bits(bits) { }
+ };
+
+ typedef std::vector<Case> CaseVector;
+ typedef std::vector<CaseBits> CaseBitsVector;
+ typedef CaseVector::iterator CaseItr;
+ typedef std::pair<CaseItr, CaseItr> CaseRange;
+
+ /// CaseRec - A struct with ctor used in lowering switches to a binary tree
+ /// of conditional branches.
+ struct CaseRec {
+ CaseRec(MachineBasicBlock *bb, Constant *lt, Constant *ge, CaseRange r) :
+ CaseBB(bb), LT(lt), GE(ge), Range(r) {}
+
+ /// CaseBB - The MBB in which to emit the compare and branch
+ MachineBasicBlock *CaseBB;
+ /// LT, GE - If nonzero, we know the current case value must be less-than or
+ /// greater-than-or-equal-to these Constants.
+ Constant *LT;
+ Constant *GE;
+ /// Range - A pair of iterators representing the range of case values to be
+ /// processed at this point in the binary search tree.
+ CaseRange Range;
+ };
+
+ typedef std::vector<CaseRec> CaseRecVector;
+
+ /// The comparison function for sorting the switch case values in the vector.
+ /// WARNING: Case ranges should be disjoint!
+ struct CaseCmp {
+ bool operator()(const Case &C1, const Case &C2) {
+ assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High));
+ const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
+ const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
+ return CI1->getValue().slt(CI2->getValue());
+ }
+ };
+
+ struct CaseBitsCmp {
+ bool operator()(const CaseBits &C1, const CaseBits &C2) {
+ return C1.Bits > C2.Bits;
+ }
+ };
+
+ size_t Clusterify(CaseVector &Cases, const SwitchInst &SI);
+
+ /// CaseBlock - This structure is used to communicate between
+ /// SelectionDAGBuilder and SDISel for the code generation of additional basic
+ /// blocks needed by multi-case switch statements.
+ struct CaseBlock {
+ CaseBlock(ISD::CondCode cc, Value *cmplhs, Value *cmprhs, Value *cmpmiddle,
+ MachineBasicBlock *truebb, MachineBasicBlock *falsebb,
+ MachineBasicBlock *me)
+ : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
+ TrueBB(truebb), FalseBB(falsebb), ThisBB(me) {}
+ // CC - the condition code to use for the case block's setcc node
+ ISD::CondCode CC;
+ // CmpLHS/CmpRHS/CmpMHS - The LHS/MHS/RHS of the comparison to emit.
+ // Emit by default LHS op RHS. MHS is used for range comparisons:
+ // If MHS is not null: (LHS <= MHS) and (MHS <= RHS).
+ Value *CmpLHS, *CmpMHS, *CmpRHS;
+ // TrueBB/FalseBB - the block to branch to if the setcc is true/false.
+ MachineBasicBlock *TrueBB, *FalseBB;
+ // ThisBB - the block into which to emit the code for the setcc and branches
+ MachineBasicBlock *ThisBB;
+ };
+ struct JumpTable {
+ JumpTable(unsigned R, unsigned J, MachineBasicBlock *M,
+ MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {}
+
+ /// Reg - the virtual register containing the index of the jump table entry
+ //. to jump to.
+ unsigned Reg;
+ /// JTI - the JumpTableIndex for this jump table in the function.
+ unsigned JTI;
+ /// MBB - the MBB into which to emit the code for the indirect jump.
+ MachineBasicBlock *MBB;
+ /// Default - the MBB of the default bb, which is a successor of the range
+ /// check MBB. This is when updating PHI nodes in successors.
+ MachineBasicBlock *Default;
+ };
+ struct JumpTableHeader {
+ JumpTableHeader(APInt F, APInt L, Value *SV, MachineBasicBlock *H,
+ bool E = false):
+ First(F), Last(L), SValue(SV), HeaderBB(H), Emitted(E) {}
+ APInt First;
+ APInt Last;
+ Value *SValue;
+ MachineBasicBlock *HeaderBB;
+ bool Emitted;
+ };
+ typedef std::pair<JumpTableHeader, JumpTable> JumpTableBlock;
+
+ struct BitTestCase {
+ BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr):
+ Mask(M), ThisBB(T), TargetBB(Tr) { }
+ uint64_t Mask;
+ MachineBasicBlock *ThisBB;
+ MachineBasicBlock *TargetBB;
+ };
+
+ typedef SmallVector<BitTestCase, 3> BitTestInfo;
+
+ struct BitTestBlock {
+ BitTestBlock(APInt F, APInt R, Value* SV,
+ unsigned Rg, bool E,
+ MachineBasicBlock* P, MachineBasicBlock* D,
+ const BitTestInfo& C):
+ First(F), Range(R), SValue(SV), Reg(Rg), Emitted(E),
+ Parent(P), Default(D), Cases(C) { }
+ APInt First;
+ APInt Range;
+ Value *SValue;
+ unsigned Reg;
+ bool Emitted;
+ MachineBasicBlock *Parent;
+ MachineBasicBlock *Default;
+ BitTestInfo Cases;
+ };
+
+public:
+ // TLI - This is information that describes the available target features we
+ // need for lowering. This indicates when operations are unavailable,
+ // implemented with a libcall, etc.
+ TargetLowering &TLI;
+ SelectionDAG &DAG;
+ const TargetData *TD;
+ AliasAnalysis *AA;
+
+ /// SwitchCases - Vector of CaseBlock structures used to communicate
+ /// SwitchInst code generation information.
+ std::vector<CaseBlock> SwitchCases;
+ /// JTCases - Vector of JumpTable structures used to communicate
+ /// SwitchInst code generation information.
+ std::vector<JumpTableBlock> JTCases;
+ /// BitTestCases - Vector of BitTestBlock structures used to communicate
+ /// SwitchInst code generation information.
+ std::vector<BitTestBlock> BitTestCases;
+
+ /// PHINodesToUpdate - A list of phi instructions whose operand list will
+ /// be updated after processing the current basic block.
+ std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate;
+
+ /// EdgeMapping - If an edge from CurMBB to any MBB is changed (e.g. due to
+ /// scheduler custom lowering), track the change here.
+ DenseMap<MachineBasicBlock*, MachineBasicBlock*> EdgeMapping;
+
+ // Emit PHI-node-operand constants only once even if used by multiple
+ // PHI nodes.
+ DenseMap<Constant*, unsigned> ConstantsOut;
+
+ /// FuncInfo - Information about the function as a whole.
+ ///
+ FunctionLoweringInfo &FuncInfo;
+
+ /// OptLevel - What optimization level we're generating code for.
+ ///
+ CodeGenOpt::Level OptLevel;
+
+ /// GFI - Garbage collection metadata for the function.
+ GCFunctionInfo *GFI;
+
+ /// HasTailCall - This is set to true if a call in the current
+ /// block has been translated as a tail call. In this case,
+ /// no subsequent DAG nodes should be created.
+ ///
+ bool HasTailCall;
+
+ LLVMContext *Context;
+
+ SelectionDAGBuilder(SelectionDAG &dag, TargetLowering &tli,
+ FunctionLoweringInfo &funcinfo,
+ CodeGenOpt::Level ol)
+ : CurDebugLoc(DebugLoc::getUnknownLoc()), SDNodeOrder(0),
+ TLI(tli), DAG(dag), FuncInfo(funcinfo), OptLevel(ol),
+ HasTailCall(false),
+ Context(dag.getContext()) {
+ }
+
+ void init(GCFunctionInfo *gfi, AliasAnalysis &aa);
+
+ /// clear - Clear out the curret SelectionDAG and the associated
+ /// state and prepare this SelectionDAGBuilder object to be used
+ /// for a new block. This doesn't clear out information about
+ /// additional blocks that are needed to complete switch lowering
+ /// or PHI node updating; that information is cleared out as it is
+ /// consumed.
+ void clear();
+
+ /// getRoot - Return the current virtual root of the Selection DAG,
+ /// flushing any PendingLoad items. This must be done before emitting
+ /// a store or any other node that may need to be ordered after any
+ /// prior load instructions.
+ ///
+ SDValue getRoot();
+
+ /// getControlRoot - Similar to getRoot, but instead of flushing all the
+ /// PendingLoad items, flush all the PendingExports items. It is necessary
+ /// to do this before emitting a terminator instruction.
+ ///
+ SDValue getControlRoot();
+
+ DebugLoc getCurDebugLoc() const { return CurDebugLoc; }
+ void setCurDebugLoc(DebugLoc dl) { CurDebugLoc = dl; }
+
+ unsigned getSDNodeOrder() const { return SDNodeOrder; }
+
+ void CopyValueToVirtualRegister(Value *V, unsigned Reg);
+
+ /// AssignOrderingToNode - Assign an ordering to the node. The order is gotten
+ /// from how the code appeared in the source. The ordering is used by the
+ /// scheduler to effectively turn off scheduling.
+ void AssignOrderingToNode(const SDNode *Node);
+
+ void visit(Instruction &I);
+
+ void visit(unsigned Opcode, User &I);
+
+ void setCurrentBasicBlock(MachineBasicBlock *MBB) { CurMBB = MBB; }
+
+ SDValue getValue(const Value *V);
+
+ void setValue(const Value *V, SDValue NewN) {
+ SDValue &N = NodeMap[V];
+ assert(N.getNode() == 0 && "Already set a value for this node!");
+ N = NewN;
+ }
+
+ void GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
+ std::set<unsigned> &OutputRegs,
+ std::set<unsigned> &InputRegs);
+
+ void FindMergedConditions(Value *Cond, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
+ unsigned Opc);
+ void EmitBranchForMergedCondition(Value *Cond, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB);
+ bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases);
+ bool isExportableFromCurrentBlock(Value *V, const BasicBlock *FromBB);
+ void CopyToExportRegsIfNeeded(Value *V);
+ void ExportFromCurrentBlock(Value *V);
+ void LowerCallTo(CallSite CS, SDValue Callee, bool IsTailCall,
+ MachineBasicBlock *LandingPad = NULL);
+
+private:
+ // Terminator instructions.
+ void visitRet(ReturnInst &I);
+ void visitBr(BranchInst &I);
+ void visitSwitch(SwitchInst &I);
+ void visitIndirectBr(IndirectBrInst &I);
+ void visitUnreachable(UnreachableInst &I) { /* noop */ }
+
+ // Helpers for visitSwitch
+ bool handleSmallSwitchRange(CaseRec& CR,
+ CaseRecVector& WorkList,
+ Value* SV,
+ MachineBasicBlock* Default);
+ bool handleJTSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ Value* SV,
+ MachineBasicBlock* Default);
+ bool handleBTSplitSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ Value* SV,
+ MachineBasicBlock* Default);
+ bool handleBitTestsSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ Value* SV,
+ MachineBasicBlock* Default);
+public:
+ void visitSwitchCase(CaseBlock &CB);
+ void visitBitTestHeader(BitTestBlock &B);
+ void visitBitTestCase(MachineBasicBlock* NextMBB,
+ unsigned Reg,
+ BitTestCase &B);
+ void visitJumpTable(JumpTable &JT);
+ void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH);
+
+private:
+ // These all get lowered before this pass.
+ void visitInvoke(InvokeInst &I);
+ void visitUnwind(UnwindInst &I);
+
+ void visitBinary(User &I, unsigned OpCode);
+ void visitShift(User &I, unsigned Opcode);
+ void visitAdd(User &I) { visitBinary(I, ISD::ADD); }
+ void visitFAdd(User &I) { visitBinary(I, ISD::FADD); }
+ void visitSub(User &I) { visitBinary(I, ISD::SUB); }
+ void visitFSub(User &I);
+ void visitMul(User &I) { visitBinary(I, ISD::MUL); }
+ void visitFMul(User &I) { visitBinary(I, ISD::FMUL); }
+ void visitURem(User &I) { visitBinary(I, ISD::UREM); }
+ void visitSRem(User &I) { visitBinary(I, ISD::SREM); }
+ void visitFRem(User &I) { visitBinary(I, ISD::FREM); }
+ void visitUDiv(User &I) { visitBinary(I, ISD::UDIV); }
+ void visitSDiv(User &I) { visitBinary(I, ISD::SDIV); }
+ void visitFDiv(User &I) { visitBinary(I, ISD::FDIV); }
+ void visitAnd (User &I) { visitBinary(I, ISD::AND); }
+ void visitOr (User &I) { visitBinary(I, ISD::OR); }
+ void visitXor (User &I) { visitBinary(I, ISD::XOR); }
+ void visitShl (User &I) { visitShift(I, ISD::SHL); }
+ void visitLShr(User &I) { visitShift(I, ISD::SRL); }
+ void visitAShr(User &I) { visitShift(I, ISD::SRA); }
+ void visitICmp(User &I);
+ void visitFCmp(User &I);
+ // Visit the conversion instructions
+ void visitTrunc(User &I);
+ void visitZExt(User &I);
+ void visitSExt(User &I);
+ void visitFPTrunc(User &I);
+ void visitFPExt(User &I);
+ void visitFPToUI(User &I);
+ void visitFPToSI(User &I);
+ void visitUIToFP(User &I);
+ void visitSIToFP(User &I);
+ void visitPtrToInt(User &I);
+ void visitIntToPtr(User &I);
+ void visitBitCast(User &I);
+
+ void visitExtractElement(User &I);
+ void visitInsertElement(User &I);
+ void visitShuffleVector(User &I);
+
+ void visitExtractValue(ExtractValueInst &I);
+ void visitInsertValue(InsertValueInst &I);
+
+ void visitGetElementPtr(User &I);
+ void visitSelect(User &I);
+
+ void visitAlloca(AllocaInst &I);
+ void visitLoad(LoadInst &I);
+ void visitStore(StoreInst &I);
+ void visitPHI(PHINode &I) { } // PHI nodes are handled specially.
+ void visitCall(CallInst &I);
+ bool visitMemCmpCall(CallInst &I);
+
+ void visitInlineAsm(CallSite CS);
+ const char *visitIntrinsicCall(CallInst &I, unsigned Intrinsic);
+ void visitTargetIntrinsic(CallInst &I, unsigned Intrinsic);
+
+ void visitPow(CallInst &I);
+ void visitExp2(CallInst &I);
+ void visitExp(CallInst &I);
+ void visitLog(CallInst &I);
+ void visitLog2(CallInst &I);
+ void visitLog10(CallInst &I);
+
+ void visitVAStart(CallInst &I);
+ void visitVAArg(VAArgInst &I);
+ void visitVAEnd(CallInst &I);
+ void visitVACopy(CallInst &I);
+
+ void visitUserOp1(Instruction &I) {
+ llvm_unreachable("UserOp1 should not exist at instruction selection time!");
+ }
+ void visitUserOp2(Instruction &I) {
+ llvm_unreachable("UserOp2 should not exist at instruction selection time!");
+ }
+
+ const char *implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op);
+ const char *implVisitAluOverflow(CallInst &I, ISD::NodeType Op);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
new file mode 100644
index 0000000..da2e6e4
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -0,0 +1,1462 @@
+//===-- SelectionDAGISel.cpp - Implement the SelectionDAGISel class -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAGISel class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "isel"
+#include "ScheduleDAGSDNodes.h"
+#include "SelectionDAGBuilder.h"
+#include "FunctionLoweringInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+static cl::opt<bool>
+EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
+ cl::desc("Enable verbose messages in the \"fast\" "
+ "instruction selector"));
+static cl::opt<bool>
+EnableFastISelAbort("fast-isel-abort", cl::Hidden,
+ cl::desc("Enable abort calls when \"fast\" instruction fails"));
+static cl::opt<bool>
+SchedLiveInCopies("schedule-livein-copies", cl::Hidden,
+ cl::desc("Schedule copies of livein registers"),
+ cl::init(false));
+
+#ifndef NDEBUG
+static cl::opt<bool>
+ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before the first "
+ "dag combine pass"));
+static cl::opt<bool>
+ViewLegalizeTypesDAGs("view-legalize-types-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before legalize types"));
+static cl::opt<bool>
+ViewLegalizeDAGs("view-legalize-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before legalize"));
+static cl::opt<bool>
+ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before the second "
+ "dag combine pass"));
+static cl::opt<bool>
+ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before the post legalize types"
+ " dag combine pass"));
+static cl::opt<bool>
+ViewISelDAGs("view-isel-dags", cl::Hidden,
+ cl::desc("Pop up a window to show isel dags as they are selected"));
+static cl::opt<bool>
+ViewSchedDAGs("view-sched-dags", cl::Hidden,
+ cl::desc("Pop up a window to show sched dags as they are processed"));
+static cl::opt<bool>
+ViewSUnitDAGs("view-sunit-dags", cl::Hidden,
+ cl::desc("Pop up a window to show SUnit dags after they are processed"));
+#else
+static const bool ViewDAGCombine1 = false,
+ ViewLegalizeTypesDAGs = false, ViewLegalizeDAGs = false,
+ ViewDAGCombine2 = false,
+ ViewDAGCombineLT = false,
+ ViewISelDAGs = false, ViewSchedDAGs = false,
+ ViewSUnitDAGs = false;
+#endif
+
+//===---------------------------------------------------------------------===//
+///
+/// RegisterScheduler class - Track the registration of instruction schedulers.
+///
+//===---------------------------------------------------------------------===//
+MachinePassRegistry RegisterScheduler::Registry;
+
+//===---------------------------------------------------------------------===//
+///
+/// ISHeuristic command line option for instruction schedulers.
+///
+//===---------------------------------------------------------------------===//
+static cl::opt<RegisterScheduler::FunctionPassCtor, false,
+ RegisterPassParser<RegisterScheduler> >
+ISHeuristic("pre-RA-sched",
+ cl::init(&createDefaultScheduler),
+ cl::desc("Instruction schedulers available (before register"
+ " allocation):"));
+
+static RegisterScheduler
+defaultListDAGScheduler("default", "Best scheduler for the target",
+ createDefaultScheduler);
+
+namespace llvm {
+ //===--------------------------------------------------------------------===//
+ /// createDefaultScheduler - This creates an instruction scheduler appropriate
+ /// for the target.
+ ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetLowering &TLI = IS->getTargetLowering();
+
+ if (OptLevel == CodeGenOpt::None)
+ return createFastDAGScheduler(IS, OptLevel);
+ if (TLI.getSchedulingPreference() == TargetLowering::SchedulingForLatency)
+ return createTDListDAGScheduler(IS, OptLevel);
+ assert(TLI.getSchedulingPreference() ==
+ TargetLowering::SchedulingForRegPressure && "Unknown sched type!");
+ return createBURRListDAGScheduler(IS, OptLevel);
+ }
+}
+
+// EmitInstrWithCustomInserter - This method should be implemented by targets
+// that mark instructions with the 'usesCustomInserter' flag. These
+// instructions are special in various ways, which require special support to
+// insert. The specified MachineInstr is created but not inserted into any
+// basic blocks, and this method is called to expand it into a sequence of
+// instructions, potentially also creating new basic blocks and control flow.
+// When new basic blocks are inserted and the edges from MBB to its successors
+// are modified, the method should insert pairs of <OldSucc, NewSucc> into the
+// DenseMap.
+MachineBasicBlock *TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+#ifndef NDEBUG
+ dbgs() << "If a target marks an instruction with "
+ "'usesCustomInserter', it must implement "
+ "TargetLowering::EmitInstrWithCustomInserter!";
+#endif
+ llvm_unreachable(0);
+ return 0;
+}
+
+/// EmitLiveInCopy - Emit a copy for a live in physical register. If the
+/// physical register has only a single copy use, then coalesced the copy
+/// if possible.
+static void EmitLiveInCopy(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &InsertPos,
+ unsigned VirtReg, unsigned PhysReg,
+ const TargetRegisterClass *RC,
+ DenseMap<MachineInstr*, unsigned> &CopyRegMap,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII) {
+ unsigned NumUses = 0;
+ MachineInstr *UseMI = NULL;
+ for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(VirtReg),
+ UE = MRI.use_end(); UI != UE; ++UI) {
+ UseMI = &*UI;
+ if (++NumUses > 1)
+ break;
+ }
+
+ // If the number of uses is not one, or the use is not a move instruction,
+ // don't coalesce. Also, only coalesce away a virtual register to virtual
+ // register copy.
+ bool Coalesced = false;
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ if (NumUses == 1 &&
+ TII.isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
+ TargetRegisterInfo::isVirtualRegister(DstReg)) {
+ VirtReg = DstReg;
+ Coalesced = true;
+ }
+
+ // Now find an ideal location to insert the copy.
+ MachineBasicBlock::iterator Pos = InsertPos;
+ while (Pos != MBB->begin()) {
+ MachineInstr *PrevMI = prior(Pos);
+ DenseMap<MachineInstr*, unsigned>::iterator RI = CopyRegMap.find(PrevMI);
+ // copyRegToReg might emit multiple instructions to do a copy.
+ unsigned CopyDstReg = (RI == CopyRegMap.end()) ? 0 : RI->second;
+ if (CopyDstReg && !TRI.regsOverlap(CopyDstReg, PhysReg))
+ // This is what the BB looks like right now:
+ // r1024 = mov r0
+ // ...
+ // r1 = mov r1024
+ //
+ // We want to insert "r1025 = mov r1". Inserting this copy below the
+ // move to r1024 makes it impossible for that move to be coalesced.
+ //
+ // r1025 = mov r1
+ // r1024 = mov r0
+ // ...
+ // r1 = mov 1024
+ // r2 = mov 1025
+ break; // Woot! Found a good location.
+ --Pos;
+ }
+
+ bool Emitted = TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC);
+ assert(Emitted && "Unable to issue a live-in copy instruction!\n");
+ (void) Emitted;
+
+ CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg));
+ if (Coalesced) {
+ if (&*InsertPos == UseMI) ++InsertPos;
+ MBB->erase(UseMI);
+ }
+}
+
+/// EmitLiveInCopies - If this is the first basic block in the function,
+/// and if it has live ins that need to be copied into vregs, emit the
+/// copies into the block.
+static void EmitLiveInCopies(MachineBasicBlock *EntryMBB,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII) {
+ if (SchedLiveInCopies) {
+ // Emit the copies at a heuristically-determined location in the block.
+ DenseMap<MachineInstr*, unsigned> CopyRegMap;
+ MachineBasicBlock::iterator InsertPos = EntryMBB->begin();
+ for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
+ E = MRI.livein_end(); LI != E; ++LI)
+ if (LI->second) {
+ const TargetRegisterClass *RC = MRI.getRegClass(LI->second);
+ EmitLiveInCopy(EntryMBB, InsertPos, LI->second, LI->first,
+ RC, CopyRegMap, MRI, TRI, TII);
+ }
+ } else {
+ // Emit the copies into the top of the block.
+ for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
+ E = MRI.livein_end(); LI != E; ++LI)
+ if (LI->second) {
+ const TargetRegisterClass *RC = MRI.getRegClass(LI->second);
+ bool Emitted = TII.copyRegToReg(*EntryMBB, EntryMBB->begin(),
+ LI->second, LI->first, RC, RC);
+ assert(Emitted && "Unable to issue a live-in copy instruction!\n");
+ (void) Emitted;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// SelectionDAGISel code
+//===----------------------------------------------------------------------===//
+
+SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) :
+ MachineFunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()),
+ FuncInfo(new FunctionLoweringInfo(TLI)),
+ CurDAG(new SelectionDAG(TLI, *FuncInfo)),
+ SDB(new SelectionDAGBuilder(*CurDAG, TLI, *FuncInfo, OL)),
+ GFI(),
+ OptLevel(OL),
+ DAGSize(0)
+{}
+
+SelectionDAGISel::~SelectionDAGISel() {
+ delete SDB;
+ delete CurDAG;
+ delete FuncInfo;
+}
+
+unsigned SelectionDAGISel::MakeReg(EVT VT) {
+ return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT));
+}
+
+void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<GCModuleInfo>();
+ AU.addPreserved<GCModuleInfo>();
+ AU.addRequired<DwarfWriter>();
+ AU.addPreserved<DwarfWriter>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
+ Function &Fn = *mf.getFunction();
+
+ // Do some sanity-checking on the command-line options.
+ assert((!EnableFastISelVerbose || EnableFastISel) &&
+ "-fast-isel-verbose requires -fast-isel");
+ assert((!EnableFastISelAbort || EnableFastISel) &&
+ "-fast-isel-abort requires -fast-isel");
+
+ // Get alias analysis for load/store combining.
+ AA = &getAnalysis<AliasAnalysis>();
+
+ MF = &mf;
+ const TargetInstrInfo &TII = *TM.getInstrInfo();
+ const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+
+ if (Fn.hasGC())
+ GFI = &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn);
+ else
+ GFI = 0;
+ RegInfo = &MF->getRegInfo();
+ DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
+
+ MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>();
+ CurDAG->init(*MF, MMI, DW);
+ FuncInfo->set(Fn, *MF, EnableFastISel);
+ SDB->init(GFI, *AA);
+
+ for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
+ if (InvokeInst *Invoke = dyn_cast<InvokeInst>(I->getTerminator()))
+ // Mark landing pad.
+ FuncInfo->MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad();
+
+ SelectAllBasicBlocks(Fn, *MF, MMI, DW, TII);
+
+ // If the first basic block in the function has live ins that need to be
+ // copied into vregs, emit the copies into the top of the block before
+ // emitting the code for the block.
+ EmitLiveInCopies(MF->begin(), *RegInfo, TRI, TII);
+
+ // Add function live-ins to entry block live-in set.
+ for (MachineRegisterInfo::livein_iterator I = RegInfo->livein_begin(),
+ E = RegInfo->livein_end(); I != E; ++I)
+ MF->begin()->addLiveIn(I->first);
+
+#ifndef NDEBUG
+ assert(FuncInfo->CatchInfoFound.size() == FuncInfo->CatchInfoLost.size() &&
+ "Not all catch info was assigned to a landing pad!");
+#endif
+
+ FuncInfo->clear();
+
+ return true;
+}
+
+/// SetDebugLoc - Update MF's and SDB's DebugLocs if debug information is
+/// attached with this instruction.
+static void SetDebugLoc(unsigned MDDbgKind, Instruction *I,
+ SelectionDAGBuilder *SDB,
+ FastISel *FastIS, MachineFunction *MF) {
+ if (isa<DbgInfoIntrinsic>(I)) return;
+
+ if (MDNode *Dbg = I->getMetadata(MDDbgKind)) {
+ DILocation DILoc(Dbg);
+ DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo());
+
+ SDB->setCurDebugLoc(Loc);
+
+ if (FastIS)
+ FastIS->setCurDebugLoc(Loc);
+
+ // If the function doesn't have a default debug location yet, set
+ // it. This is kind of a hack.
+ if (MF->getDefaultDebugLoc().isUnknown())
+ MF->setDefaultDebugLoc(Loc);
+ }
+}
+
+/// ResetDebugLoc - Set MF's and SDB's DebugLocs to Unknown.
+static void ResetDebugLoc(SelectionDAGBuilder *SDB, FastISel *FastIS) {
+ SDB->setCurDebugLoc(DebugLoc::getUnknownLoc());
+ if (FastIS)
+ FastIS->setCurDebugLoc(DebugLoc::getUnknownLoc());
+}
+
+void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB,
+ BasicBlock::iterator Begin,
+ BasicBlock::iterator End,
+ bool &HadTailCall) {
+ SDB->setCurrentBasicBlock(BB);
+ unsigned MDDbgKind = LLVMBB->getContext().getMDKindID("dbg");
+
+ // Lower all of the non-terminator instructions. If a call is emitted
+ // as a tail call, cease emitting nodes for this block.
+ for (BasicBlock::iterator I = Begin; I != End && !SDB->HasTailCall; ++I) {
+ SetDebugLoc(MDDbgKind, I, SDB, 0, MF);
+
+ if (!isa<TerminatorInst>(I)) {
+ SDB->visit(*I);
+
+ // Set the current debug location back to "unknown" so that it doesn't
+ // spuriously apply to subsequent instructions.
+ ResetDebugLoc(SDB, 0);
+ }
+ }
+
+ if (!SDB->HasTailCall) {
+ // Ensure that all instructions which are used outside of their defining
+ // blocks are available as virtual registers. Invoke is handled elsewhere.
+ for (BasicBlock::iterator I = Begin; I != End; ++I)
+ if (!isa<PHINode>(I) && !isa<InvokeInst>(I))
+ SDB->CopyToExportRegsIfNeeded(I);
+
+ // Handle PHI nodes in successor blocks.
+ if (End == LLVMBB->end()) {
+ HandlePHINodesInSuccessorBlocks(LLVMBB);
+
+ // Lower the terminator after the copies are emitted.
+ SetDebugLoc(MDDbgKind, LLVMBB->getTerminator(), SDB, 0, MF);
+ SDB->visit(*LLVMBB->getTerminator());
+ ResetDebugLoc(SDB, 0);
+ }
+ }
+
+ // Make sure the root of the DAG is up-to-date.
+ CurDAG->setRoot(SDB->getControlRoot());
+
+ // Final step, emit the lowered DAG as machine code.
+ CodeGenAndEmitDAG();
+ HadTailCall = SDB->HasTailCall;
+ SDB->clear();
+}
+
+namespace {
+/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted
+/// nodes from the worklist.
+class SDOPsWorkListRemover : public SelectionDAG::DAGUpdateListener {
+ SmallVector<SDNode*, 128> &Worklist;
+public:
+ SDOPsWorkListRemover(SmallVector<SDNode*, 128> &wl) : Worklist(wl) {}
+
+ virtual void NodeDeleted(SDNode *N, SDNode *E) {
+ Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), N),
+ Worklist.end());
+ }
+
+ virtual void NodeUpdated(SDNode *N) {
+ // Ignore updates.
+ }
+};
+}
+
+/// TrivialTruncElim - Eliminate some trivial nops that can result from
+/// ShrinkDemandedOps: (trunc (ext n)) -> n.
+static bool TrivialTruncElim(SDValue Op,
+ TargetLowering::TargetLoweringOpt &TLO) {
+ SDValue N0 = Op.getOperand(0);
+ EVT VT = Op.getValueType();
+ if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND ||
+ N0.getOpcode() == ISD::ANY_EXTEND) &&
+ N0.getOperand(0).getValueType() == VT) {
+ return TLO.CombineTo(Op, N0.getOperand(0));
+ }
+ return false;
+}
+
+/// ShrinkDemandedOps - A late transformation pass that shrink expressions
+/// using TargetLowering::TargetLoweringOpt::ShrinkDemandedOp. It converts
+/// x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
+void SelectionDAGISel::ShrinkDemandedOps() {
+ SmallVector<SDNode*, 128> Worklist;
+
+ // Add all the dag nodes to the worklist.
+ Worklist.reserve(CurDAG->allnodes_size());
+ for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+ E = CurDAG->allnodes_end(); I != E; ++I)
+ Worklist.push_back(I);
+
+ APInt Mask;
+ APInt KnownZero;
+ APInt KnownOne;
+
+ TargetLowering::TargetLoweringOpt TLO(*CurDAG, true);
+ while (!Worklist.empty()) {
+ SDNode *N = Worklist.pop_back_val();
+
+ if (N->use_empty() && N != CurDAG->getRoot().getNode()) {
+ CurDAG->DeleteNode(N);
+ continue;
+ }
+
+ // Run ShrinkDemandedOp on scalar binary operations.
+ if (N->getNumValues() == 1 &&
+ N->getValueType(0).isSimple() && N->getValueType(0).isInteger()) {
+ unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
+ APInt Demanded = APInt::getAllOnesValue(BitWidth);
+ APInt KnownZero, KnownOne;
+ if (TLI.SimplifyDemandedBits(SDValue(N, 0), Demanded,
+ KnownZero, KnownOne, TLO) ||
+ (N->getOpcode() == ISD::TRUNCATE &&
+ TrivialTruncElim(SDValue(N, 0), TLO))) {
+ // Revisit the node.
+ Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), N),
+ Worklist.end());
+ Worklist.push_back(N);
+
+ // Replace the old value with the new one.
+ DEBUG(errs() << "\nReplacing ";
+ TLO.Old.getNode()->dump(CurDAG);
+ errs() << "\nWith: ";
+ TLO.New.getNode()->dump(CurDAG);
+ errs() << '\n');
+
+ Worklist.push_back(TLO.New.getNode());
+
+ SDOPsWorkListRemover DeadNodes(Worklist);
+ CurDAG->ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes);
+
+ if (TLO.Old.getNode()->use_empty()) {
+ for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands();
+ i != e; ++i) {
+ SDNode *OpNode = TLO.Old.getNode()->getOperand(i).getNode();
+ if (OpNode->hasOneUse()) {
+ Worklist.erase(std::remove(Worklist.begin(), Worklist.end(),
+ OpNode), Worklist.end());
+ Worklist.push_back(OpNode);
+ }
+ }
+
+ Worklist.erase(std::remove(Worklist.begin(), Worklist.end(),
+ TLO.Old.getNode()), Worklist.end());
+ CurDAG->DeleteNode(TLO.Old.getNode());
+ }
+ }
+ }
+ }
+}
+
+void SelectionDAGISel::ComputeLiveOutVRegInfo() {
+ SmallPtrSet<SDNode*, 128> VisitedNodes;
+ SmallVector<SDNode*, 128> Worklist;
+
+ Worklist.push_back(CurDAG->getRoot().getNode());
+
+ APInt Mask;
+ APInt KnownZero;
+ APInt KnownOne;
+
+ do {
+ SDNode *N = Worklist.pop_back_val();
+
+ // If we've already seen this node, ignore it.
+ if (!VisitedNodes.insert(N))
+ continue;
+
+ // Otherwise, add all chain operands to the worklist.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i).getValueType() == MVT::Other)
+ Worklist.push_back(N->getOperand(i).getNode());
+
+ // If this is a CopyToReg with a vreg dest, process it.
+ if (N->getOpcode() != ISD::CopyToReg)
+ continue;
+
+ unsigned DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(DestReg))
+ continue;
+
+ // Ignore non-scalar or non-integer values.
+ SDValue Src = N->getOperand(2);
+ EVT SrcVT = Src.getValueType();
+ if (!SrcVT.isInteger() || SrcVT.isVector())
+ continue;
+
+ unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src);
+ Mask = APInt::getAllOnesValue(SrcVT.getSizeInBits());
+ CurDAG->ComputeMaskedBits(Src, Mask, KnownZero, KnownOne);
+
+ // Only install this information if it tells us something.
+ if (NumSignBits != 1 || KnownZero != 0 || KnownOne != 0) {
+ DestReg -= TargetRegisterInfo::FirstVirtualRegister;
+ if (DestReg >= FuncInfo->LiveOutRegInfo.size())
+ FuncInfo->LiveOutRegInfo.resize(DestReg+1);
+ FunctionLoweringInfo::LiveOutInfo &LOI =
+ FuncInfo->LiveOutRegInfo[DestReg];
+ LOI.NumSignBits = NumSignBits;
+ LOI.KnownOne = KnownOne;
+ LOI.KnownZero = KnownZero;
+ }
+ } while (!Worklist.empty());
+}
+
+void SelectionDAGISel::CodeGenAndEmitDAG() {
+ std::string GroupName;
+ if (TimePassesIsEnabled)
+ GroupName = "Instruction Selection and Scheduling";
+ std::string BlockName;
+ if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs ||
+ ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs ||
+ ViewSUnitDAGs)
+ BlockName = MF->getFunction()->getNameStr() + ":" +
+ BB->getBasicBlock()->getNameStr();
+
+ DEBUG(dbgs() << "Initial selection DAG:\n");
+ DEBUG(CurDAG->dump());
+
+ if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName);
+
+ // Run the DAG combiner in pre-legalize mode.
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("DAG Combining 1", GroupName);
+ CurDAG->Combine(Unrestricted, *AA, OptLevel);
+ } else {
+ CurDAG->Combine(Unrestricted, *AA, OptLevel);
+ }
+
+ DEBUG(dbgs() << "Optimized lowered selection DAG:\n");
+ DEBUG(CurDAG->dump());
+
+ // Second step, hack on the DAG until it only uses operations and types that
+ // the target supports.
+ if (ViewLegalizeTypesDAGs) CurDAG->viewGraph("legalize-types input for " +
+ BlockName);
+
+ bool Changed;
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("Type Legalization", GroupName);
+ Changed = CurDAG->LegalizeTypes();
+ } else {
+ Changed = CurDAG->LegalizeTypes();
+ }
+
+ DEBUG(dbgs() << "Type-legalized selection DAG:\n");
+ DEBUG(CurDAG->dump());
+
+ if (Changed) {
+ if (ViewDAGCombineLT)
+ CurDAG->viewGraph("dag-combine-lt input for " + BlockName);
+
+ // Run the DAG combiner in post-type-legalize mode.
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("DAG Combining after legalize types", GroupName);
+ CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
+ } else {
+ CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
+ }
+
+ DEBUG(dbgs() << "Optimized type-legalized selection DAG:\n");
+ DEBUG(CurDAG->dump());
+ }
+
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("Vector Legalization", GroupName);
+ Changed = CurDAG->LegalizeVectors();
+ } else {
+ Changed = CurDAG->LegalizeVectors();
+ }
+
+ if (Changed) {
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("Type Legalization 2", GroupName);
+ CurDAG->LegalizeTypes();
+ } else {
+ CurDAG->LegalizeTypes();
+ }
+
+ if (ViewDAGCombineLT)
+ CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
+
+ // Run the DAG combiner in post-type-legalize mode.
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("DAG Combining after legalize vectors", GroupName);
+ CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+ } else {
+ CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+ }
+
+ DEBUG(dbgs() << "Optimized vector-legalized selection DAG:\n");
+ DEBUG(CurDAG->dump());
+ }
+
+ if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName);
+
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("DAG Legalization", GroupName);
+ CurDAG->Legalize(OptLevel);
+ } else {
+ CurDAG->Legalize(OptLevel);
+ }
+
+ DEBUG(dbgs() << "Legalized selection DAG:\n");
+ DEBUG(CurDAG->dump());
+
+ if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName);
+
+ // Run the DAG combiner in post-legalize mode.
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("DAG Combining 2", GroupName);
+ CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+ } else {
+ CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+ }
+
+ DEBUG(dbgs() << "Optimized legalized selection DAG:\n");
+ DEBUG(CurDAG->dump());
+
+ if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName);
+
+ if (OptLevel != CodeGenOpt::None) {
+ ShrinkDemandedOps();
+ ComputeLiveOutVRegInfo();
+ }
+
+ // Third, instruction select all of the operations to machine code, adding the
+ // code to the MachineBasicBlock.
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("Instruction Selection", GroupName);
+ InstructionSelect();
+ } else {
+ InstructionSelect();
+ }
+
+ DEBUG(dbgs() << "Selected selection DAG:\n");
+ DEBUG(CurDAG->dump());
+
+ if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName);
+
+ // Schedule machine code.
+ ScheduleDAGSDNodes *Scheduler = CreateScheduler();
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("Instruction Scheduling", GroupName);
+ Scheduler->Run(CurDAG, BB, BB->end());
+ } else {
+ Scheduler->Run(CurDAG, BB, BB->end());
+ }
+
+ if (ViewSUnitDAGs) Scheduler->viewGraph();
+
+ // Emit machine code to BB. This can change 'BB' to the last block being
+ // inserted into.
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("Instruction Creation", GroupName);
+ BB = Scheduler->EmitSchedule(&SDB->EdgeMapping);
+ } else {
+ BB = Scheduler->EmitSchedule(&SDB->EdgeMapping);
+ }
+
+ // Free the scheduler state.
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName);
+ delete Scheduler;
+ } else {
+ delete Scheduler;
+ }
+
+ DEBUG(dbgs() << "Selected machine code:\n");
+ DEBUG(BB->dump());
+}
+
+void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
+ MachineFunction &MF,
+ MachineModuleInfo *MMI,
+ DwarfWriter *DW,
+ const TargetInstrInfo &TII) {
+ // Initialize the Fast-ISel state, if needed.
+ FastISel *FastIS = 0;
+ if (EnableFastISel)
+ FastIS = TLI.createFastISel(MF, MMI, DW,
+ FuncInfo->ValueMap,
+ FuncInfo->MBBMap,
+ FuncInfo->StaticAllocaMap
+#ifndef NDEBUG
+ , FuncInfo->CatchInfoLost
+#endif
+ );
+
+ unsigned MDDbgKind = Fn.getContext().getMDKindID("dbg");
+
+ // Iterate over all basic blocks in the function.
+ for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
+ BasicBlock *LLVMBB = &*I;
+ BB = FuncInfo->MBBMap[LLVMBB];
+
+ BasicBlock::iterator const Begin = LLVMBB->begin();
+ BasicBlock::iterator const End = LLVMBB->end();
+ BasicBlock::iterator BI = Begin;
+
+ // Lower any arguments needed in this block if this is the entry block.
+ bool SuppressFastISel = false;
+ if (LLVMBB == &Fn.getEntryBlock()) {
+ LowerArguments(LLVMBB);
+
+ // If any of the arguments has the byval attribute, forgo
+ // fast-isel in the entry block.
+ if (FastIS) {
+ unsigned j = 1;
+ for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end();
+ I != E; ++I, ++j)
+ if (Fn.paramHasAttr(j, Attribute::ByVal)) {
+ if (EnableFastISelVerbose || EnableFastISelAbort)
+ dbgs() << "FastISel skips entry block due to byval argument\n";
+ SuppressFastISel = true;
+ break;
+ }
+ }
+ }
+
+ if (MMI && BB->isLandingPad()) {
+ // Add a label to mark the beginning of the landing pad. Deletion of the
+ // landing pad can thus be detected via the MachineModuleInfo.
+ unsigned LabelID = MMI->addLandingPad(BB);
+
+ const TargetInstrDesc &II = TII.get(TargetOpcode::EH_LABEL);
+ BuildMI(BB, SDB->getCurDebugLoc(), II).addImm(LabelID);
+
+ // Mark exception register as live in.
+ unsigned Reg = TLI.getExceptionAddressRegister();
+ if (Reg) BB->addLiveIn(Reg);
+
+ // Mark exception selector register as live in.
+ Reg = TLI.getExceptionSelectorRegister();
+ if (Reg) BB->addLiveIn(Reg);
+
+ // FIXME: Hack around an exception handling flaw (PR1508): the personality
+ // function and list of typeids logically belong to the invoke (or, if you
+ // like, the basic block containing the invoke), and need to be associated
+ // with it in the dwarf exception handling tables. Currently however the
+ // information is provided by an intrinsic (eh.selector) that can be moved
+ // to unexpected places by the optimizers: if the unwind edge is critical,
+ // then breaking it can result in the intrinsics being in the successor of
+ // the landing pad, not the landing pad itself. This results
+ // in exceptions not being caught because no typeids are associated with
+ // the invoke. This may not be the only way things can go wrong, but it
+ // is the only way we try to work around for the moment.
+ BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator());
+
+ if (Br && Br->isUnconditional()) { // Critical edge?
+ BasicBlock::iterator I, E;
+ for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I)
+ if (isa<EHSelectorInst>(I))
+ break;
+
+ if (I == E)
+ // No catch info found - try to extract some from the successor.
+ CopyCatchInfo(Br->getSuccessor(0), LLVMBB, MMI, *FuncInfo);
+ }
+ }
+
+ // Before doing SelectionDAG ISel, see if FastISel has been requested.
+ if (FastIS && !SuppressFastISel) {
+ // Emit code for any incoming arguments. This must happen before
+ // beginning FastISel on the entry block.
+ if (LLVMBB == &Fn.getEntryBlock()) {
+ CurDAG->setRoot(SDB->getControlRoot());
+ CodeGenAndEmitDAG();
+ SDB->clear();
+ }
+ FastIS->startNewBlock(BB);
+ // Do FastISel on as many instructions as possible.
+ for (; BI != End; ++BI) {
+ // Just before the terminator instruction, insert instructions to
+ // feed PHI nodes in successor blocks.
+ if (isa<TerminatorInst>(BI))
+ if (!HandlePHINodesInSuccessorBlocksFast(LLVMBB, FastIS)) {
+ ResetDebugLoc(SDB, FastIS);
+ if (EnableFastISelVerbose || EnableFastISelAbort) {
+ dbgs() << "FastISel miss: ";
+ BI->dump();
+ }
+ assert(!EnableFastISelAbort &&
+ "FastISel didn't handle a PHI in a successor");
+ break;
+ }
+
+ SetDebugLoc(MDDbgKind, BI, SDB, FastIS, &MF);
+
+ // Try to select the instruction with FastISel.
+ if (FastIS->SelectInstruction(BI)) {
+ ResetDebugLoc(SDB, FastIS);
+ continue;
+ }
+
+ // Clear out the debug location so that it doesn't carry over to
+ // unrelated instructions.
+ ResetDebugLoc(SDB, FastIS);
+
+ // Then handle certain instructions as single-LLVM-Instruction blocks.
+ if (isa<CallInst>(BI)) {
+ if (EnableFastISelVerbose || EnableFastISelAbort) {
+ dbgs() << "FastISel missed call: ";
+ BI->dump();
+ }
+
+ if (!BI->getType()->isVoidTy()) {
+ unsigned &R = FuncInfo->ValueMap[BI];
+ if (!R)
+ R = FuncInfo->CreateRegForValue(BI);
+ }
+
+ bool HadTailCall = false;
+ SelectBasicBlock(LLVMBB, BI, llvm::next(BI), HadTailCall);
+
+ // If the call was emitted as a tail call, we're done with the block.
+ if (HadTailCall) {
+ BI = End;
+ break;
+ }
+
+ // If the instruction was codegen'd with multiple blocks,
+ // inform the FastISel object where to resume inserting.
+ FastIS->setCurrentBlock(BB);
+ continue;
+ }
+
+ // Otherwise, give up on FastISel for the rest of the block.
+ // For now, be a little lenient about non-branch terminators.
+ if (!isa<TerminatorInst>(BI) || isa<BranchInst>(BI)) {
+ if (EnableFastISelVerbose || EnableFastISelAbort) {
+ dbgs() << "FastISel miss: ";
+ BI->dump();
+ }
+ if (EnableFastISelAbort)
+ // The "fast" selector couldn't handle something and bailed.
+ // For the purpose of debugging, just abort.
+ llvm_unreachable("FastISel didn't select the entire block");
+ }
+ break;
+ }
+ }
+
+ // Run SelectionDAG instruction selection on the remainder of the block
+ // not handled by FastISel. If FastISel is not run, this is the entire
+ // block.
+ if (BI != End) {
+ bool HadTailCall;
+ SelectBasicBlock(LLVMBB, BI, End, HadTailCall);
+ }
+
+ FinishBasicBlock();
+ }
+
+ delete FastIS;
+}
+
+void
+SelectionDAGISel::FinishBasicBlock() {
+
+ DEBUG(dbgs() << "Target-post-processed machine code:\n");
+ DEBUG(BB->dump());
+
+ DEBUG(dbgs() << "Total amount of phi nodes to update: "
+ << SDB->PHINodesToUpdate.size() << "\n");
+ DEBUG(for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i)
+ dbgs() << "Node " << i << " : ("
+ << SDB->PHINodesToUpdate[i].first
+ << ", " << SDB->PHINodesToUpdate[i].second << ")\n");
+
+ // Next, now that we know what the last MBB the LLVM BB expanded is, update
+ // PHI nodes in successors.
+ if (SDB->SwitchCases.empty() &&
+ SDB->JTCases.empty() &&
+ SDB->BitTestCases.empty()) {
+ for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i) {
+ MachineInstr *PHI = SDB->PHINodesToUpdate[i].first;
+ assert(PHI->isPHI() &&
+ "This is not a machine PHI node that we are updating!");
+ PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[i].second,
+ false));
+ PHI->addOperand(MachineOperand::CreateMBB(BB));
+ }
+ SDB->PHINodesToUpdate.clear();
+ return;
+ }
+
+ for (unsigned i = 0, e = SDB->BitTestCases.size(); i != e; ++i) {
+ // Lower header first, if it wasn't already lowered
+ if (!SDB->BitTestCases[i].Emitted) {
+ // Set the current basic block to the mbb we wish to insert the code into
+ BB = SDB->BitTestCases[i].Parent;
+ SDB->setCurrentBasicBlock(BB);
+ // Emit the code
+ SDB->visitBitTestHeader(SDB->BitTestCases[i]);
+ CurDAG->setRoot(SDB->getRoot());
+ CodeGenAndEmitDAG();
+ SDB->clear();
+ }
+
+ for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) {
+ // Set the current basic block to the mbb we wish to insert the code into
+ BB = SDB->BitTestCases[i].Cases[j].ThisBB;
+ SDB->setCurrentBasicBlock(BB);
+ // Emit the code
+ if (j+1 != ej)
+ SDB->visitBitTestCase(SDB->BitTestCases[i].Cases[j+1].ThisBB,
+ SDB->BitTestCases[i].Reg,
+ SDB->BitTestCases[i].Cases[j]);
+ else
+ SDB->visitBitTestCase(SDB->BitTestCases[i].Default,
+ SDB->BitTestCases[i].Reg,
+ SDB->BitTestCases[i].Cases[j]);
+
+
+ CurDAG->setRoot(SDB->getRoot());
+ CodeGenAndEmitDAG();
+ SDB->clear();
+ }
+
+ // Update PHI Nodes
+ for (unsigned pi = 0, pe = SDB->PHINodesToUpdate.size(); pi != pe; ++pi) {
+ MachineInstr *PHI = SDB->PHINodesToUpdate[pi].first;
+ MachineBasicBlock *PHIBB = PHI->getParent();
+ assert(PHI->isPHI() &&
+ "This is not a machine PHI node that we are updating!");
+ // This is "default" BB. We have two jumps to it. From "header" BB and
+ // from last "case" BB.
+ if (PHIBB == SDB->BitTestCases[i].Default) {
+ PHI->addOperand(MachineOperand::
+ CreateReg(SDB->PHINodesToUpdate[pi].second, false));
+ PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Parent));
+ PHI->addOperand(MachineOperand::
+ CreateReg(SDB->PHINodesToUpdate[pi].second, false));
+ PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Cases.
+ back().ThisBB));
+ }
+ // One of "cases" BB.
+ for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size();
+ j != ej; ++j) {
+ MachineBasicBlock* cBB = SDB->BitTestCases[i].Cases[j].ThisBB;
+ if (cBB->isSuccessor(PHIBB)) {
+ PHI->addOperand(MachineOperand::
+ CreateReg(SDB->PHINodesToUpdate[pi].second, false));
+ PHI->addOperand(MachineOperand::CreateMBB(cBB));
+ }
+ }
+ }
+ }
+ SDB->BitTestCases.clear();
+
+ // If the JumpTable record is filled in, then we need to emit a jump table.
+ // Updating the PHI nodes is tricky in this case, since we need to determine
+ // whether the PHI is a successor of the range check MBB or the jump table MBB
+ for (unsigned i = 0, e = SDB->JTCases.size(); i != e; ++i) {
+ // Lower header first, if it wasn't already lowered
+ if (!SDB->JTCases[i].first.Emitted) {
+ // Set the current basic block to the mbb we wish to insert the code into
+ BB = SDB->JTCases[i].first.HeaderBB;
+ SDB->setCurrentBasicBlock(BB);
+ // Emit the code
+ SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first);
+ CurDAG->setRoot(SDB->getRoot());
+ CodeGenAndEmitDAG();
+ SDB->clear();
+ }
+
+ // Set the current basic block to the mbb we wish to insert the code into
+ BB = SDB->JTCases[i].second.MBB;
+ SDB->setCurrentBasicBlock(BB);
+ // Emit the code
+ SDB->visitJumpTable(SDB->JTCases[i].second);
+ CurDAG->setRoot(SDB->getRoot());
+ CodeGenAndEmitDAG();
+ SDB->clear();
+
+ // Update PHI Nodes
+ for (unsigned pi = 0, pe = SDB->PHINodesToUpdate.size(); pi != pe; ++pi) {
+ MachineInstr *PHI = SDB->PHINodesToUpdate[pi].first;
+ MachineBasicBlock *PHIBB = PHI->getParent();
+ assert(PHI->isPHI() &&
+ "This is not a machine PHI node that we are updating!");
+ // "default" BB. We can go there only from header BB.
+ if (PHIBB == SDB->JTCases[i].second.Default) {
+ PHI->addOperand
+ (MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, false));
+ PHI->addOperand
+ (MachineOperand::CreateMBB(SDB->JTCases[i].first.HeaderBB));
+ }
+ // JT BB. Just iterate over successors here
+ if (BB->isSuccessor(PHIBB)) {
+ PHI->addOperand
+ (MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, false));
+ PHI->addOperand(MachineOperand::CreateMBB(BB));
+ }
+ }
+ }
+ SDB->JTCases.clear();
+
+ // If the switch block involved a branch to one of the actual successors, we
+ // need to update PHI nodes in that block.
+ for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i) {
+ MachineInstr *PHI = SDB->PHINodesToUpdate[i].first;
+ assert(PHI->isPHI() &&
+ "This is not a machine PHI node that we are updating!");
+ if (BB->isSuccessor(PHI->getParent())) {
+ PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[i].second,
+ false));
+ PHI->addOperand(MachineOperand::CreateMBB(BB));
+ }
+ }
+
+ // If we generated any switch lowering information, build and codegen any
+ // additional DAGs necessary.
+ for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) {
+ // Set the current basic block to the mbb we wish to insert the code into
+ MachineBasicBlock *ThisBB = BB = SDB->SwitchCases[i].ThisBB;
+ SDB->setCurrentBasicBlock(BB);
+
+ // Emit the code
+ SDB->visitSwitchCase(SDB->SwitchCases[i]);
+ CurDAG->setRoot(SDB->getRoot());
+ CodeGenAndEmitDAG();
+
+ // Handle any PHI nodes in successors of this chunk, as if we were coming
+ // from the original BB before switch expansion. Note that PHI nodes can
+ // occur multiple times in PHINodesToUpdate. We have to be very careful to
+ // handle them the right number of times.
+ while ((BB = SDB->SwitchCases[i].TrueBB)) { // Handle LHS and RHS.
+ // If new BB's are created during scheduling, the edges may have been
+ // updated. That is, the edge from ThisBB to BB may have been split and
+ // BB's predecessor is now another block.
+ DenseMap<MachineBasicBlock*, MachineBasicBlock*>::iterator EI =
+ SDB->EdgeMapping.find(BB);
+ if (EI != SDB->EdgeMapping.end())
+ ThisBB = EI->second;
+
+ // BB may have been removed from the CFG if a branch was constant folded.
+ if (ThisBB->isSuccessor(BB)) {
+ for (MachineBasicBlock::iterator Phi = BB->begin();
+ Phi != BB->end() && Phi->isPHI();
+ ++Phi) {
+ // This value for this PHI node is recorded in PHINodesToUpdate.
+ for (unsigned pn = 0; ; ++pn) {
+ assert(pn != SDB->PHINodesToUpdate.size() &&
+ "Didn't find PHI entry!");
+ if (SDB->PHINodesToUpdate[pn].first == Phi) {
+ Phi->addOperand(MachineOperand::
+ CreateReg(SDB->PHINodesToUpdate[pn].second,
+ false));
+ Phi->addOperand(MachineOperand::CreateMBB(ThisBB));
+ break;
+ }
+ }
+ }
+ }
+
+ // Don't process RHS if same block as LHS.
+ if (BB == SDB->SwitchCases[i].FalseBB)
+ SDB->SwitchCases[i].FalseBB = 0;
+
+ // If we haven't handled the RHS, do so now. Otherwise, we're done.
+ SDB->SwitchCases[i].TrueBB = SDB->SwitchCases[i].FalseBB;
+ SDB->SwitchCases[i].FalseBB = 0;
+ }
+ assert(SDB->SwitchCases[i].TrueBB == 0 && SDB->SwitchCases[i].FalseBB == 0);
+ SDB->clear();
+ }
+ SDB->SwitchCases.clear();
+
+ SDB->PHINodesToUpdate.clear();
+}
+
+
+/// Create the scheduler. If a specific scheduler was specified
+/// via the SchedulerRegistry, use it, otherwise select the
+/// one preferred by the target.
+///
+ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() {
+ RegisterScheduler::FunctionPassCtor Ctor = RegisterScheduler::getDefault();
+
+ if (!Ctor) {
+ Ctor = ISHeuristic;
+ RegisterScheduler::setDefault(Ctor);
+ }
+
+ return Ctor(this, OptLevel);
+}
+
+ScheduleHazardRecognizer *SelectionDAGISel::CreateTargetHazardRecognizer() {
+ return new ScheduleHazardRecognizer();
+}
+
+//===----------------------------------------------------------------------===//
+// Helper functions used by the generated instruction selector.
+//===----------------------------------------------------------------------===//
+// Calls to these methods are generated by tblgen.
+
+/// CheckAndMask - The isel is trying to match something like (and X, 255). If
+/// the dag combiner simplified the 255, we still want to match. RHS is the
+/// actual value in the DAG on the RHS of an AND, and DesiredMaskS is the value
+/// specified in the .td file (e.g. 255).
+bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS,
+ int64_t DesiredMaskS) const {
+ const APInt &ActualMask = RHS->getAPIntValue();
+ const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);
+
+ // If the actual mask exactly matches, success!
+ if (ActualMask == DesiredMask)
+ return true;
+
+ // If the actual AND mask is allowing unallowed bits, this doesn't match.
+ if (ActualMask.intersects(~DesiredMask))
+ return false;
+
+ // Otherwise, the DAG Combiner may have proven that the value coming in is
+ // either already zero or is not demanded. Check for known zero input bits.
+ APInt NeededMask = DesiredMask & ~ActualMask;
+ if (CurDAG->MaskedValueIsZero(LHS, NeededMask))
+ return true;
+
+ // TODO: check to see if missing bits are just not demanded.
+
+ // Otherwise, this pattern doesn't match.
+ return false;
+}
+
+/// CheckOrMask - The isel is trying to match something like (or X, 255). If
+/// the dag combiner simplified the 255, we still want to match. RHS is the
+/// actual value in the DAG on the RHS of an OR, and DesiredMaskS is the value
+/// specified in the .td file (e.g. 255).
+bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
+ int64_t DesiredMaskS) const {
+ const APInt &ActualMask = RHS->getAPIntValue();
+ const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);
+
+ // If the actual mask exactly matches, success!
+ if (ActualMask == DesiredMask)
+ return true;
+
+ // If the actual AND mask is allowing unallowed bits, this doesn't match.
+ if (ActualMask.intersects(~DesiredMask))
+ return false;
+
+ // Otherwise, the DAG Combiner may have proven that the value coming in is
+ // either already zero or is not demanded. Check for known zero input bits.
+ APInt NeededMask = DesiredMask & ~ActualMask;
+
+ APInt KnownZero, KnownOne;
+ CurDAG->ComputeMaskedBits(LHS, NeededMask, KnownZero, KnownOne);
+
+ // If all the missing bits in the or are already known to be set, match!
+ if ((NeededMask & KnownOne) == NeededMask)
+ return true;
+
+ // TODO: check to see if missing bits are just not demanded.
+
+ // Otherwise, this pattern doesn't match.
+ return false;
+}
+
+
+/// SelectInlineAsmMemoryOperands - Calls to this are automatically generated
+/// by tblgen. Others should not call it.
+void SelectionDAGISel::
+SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
+ std::vector<SDValue> InOps;
+ std::swap(InOps, Ops);
+
+ Ops.push_back(InOps[0]); // input chain.
+ Ops.push_back(InOps[1]); // input asm string.
+
+ unsigned i = 2, e = InOps.size();
+ if (InOps[e-1].getValueType() == MVT::Flag)
+ --e; // Don't process a flag operand if it is here.
+
+ while (i != e) {
+ unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue();
+ if ((Flags & 7) != 4 /*MEM*/) {
+ // Just skip over this operand, copying the operands verbatim.
+ Ops.insert(Ops.end(), InOps.begin()+i,
+ InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1);
+ i += InlineAsm::getNumOperandRegisters(Flags) + 1;
+ } else {
+ assert(InlineAsm::getNumOperandRegisters(Flags) == 1 &&
+ "Memory operand with multiple values?");
+ // Otherwise, this is a memory operand. Ask the target to select it.
+ std::vector<SDValue> SelOps;
+ if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps)) {
+ llvm_report_error("Could not match memory address. Inline asm"
+ " failure!");
+ }
+
+ // Add this to the output node.
+ Ops.push_back(CurDAG->getTargetConstant(4/*MEM*/ | (SelOps.size()<< 3),
+ MVT::i32));
+ Ops.insert(Ops.end(), SelOps.begin(), SelOps.end());
+ i += 2;
+ }
+ }
+
+ // Add the flag input back if present.
+ if (e != InOps.size())
+ Ops.push_back(InOps.back());
+}
+
+/// findFlagUse - Return use of EVT::Flag value produced by the specified
+/// SDNode.
+///
+static SDNode *findFlagUse(SDNode *N) {
+ unsigned FlagResNo = N->getNumValues()-1;
+ for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+ SDUse &Use = I.getUse();
+ if (Use.getResNo() == FlagResNo)
+ return Use.getUser();
+ }
+ return NULL;
+}
+
+/// findNonImmUse - Return true if "Use" is a non-immediate use of "Def".
+/// This function recursively traverses up the operand chain, ignoring
+/// certain nodes.
+static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
+ SDNode *Root,
+ SmallPtrSet<SDNode*, 16> &Visited) {
+ if (Use->getNodeId() < Def->getNodeId() ||
+ !Visited.insert(Use))
+ return false;
+
+ for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
+ SDNode *N = Use->getOperand(i).getNode();
+ if (N == Def) {
+ if (Use == ImmedUse || Use == Root)
+ continue; // We are not looking for immediate use.
+ assert(N != Root);
+ return true;
+ }
+
+ // Traverse up the operand chain.
+ if (findNonImmUse(N, Def, ImmedUse, Root, Visited))
+ return true;
+ }
+ return false;
+}
+
+/// isNonImmUse - Start searching from Root up the DAG to check is Def can
+/// be reached. Return true if that's the case. However, ignore direct uses
+/// by ImmedUse (which would be U in the example illustrated in
+/// IsLegalAndProfitableToFold) and by Root (which can happen in the store
+/// case).
+/// FIXME: to be really generic, we should allow direct use by any node
+/// that is being folded. But realisticly since we only fold loads which
+/// have one non-chain use, we only need to watch out for load/op/store
+/// and load/op/cmp case where the root (store / cmp) may reach the load via
+/// its chain operand.
+static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse) {
+ SmallPtrSet<SDNode*, 16> Visited;
+ return findNonImmUse(Root, Def, ImmedUse, Root, Visited);
+}
+
+/// IsLegalAndProfitableToFold - Returns true if the specific operand node N of
+/// U can be folded during instruction selection that starts at Root and
+/// folding N is profitable.
+bool SelectionDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
+ SDNode *Root) const {
+ if (OptLevel == CodeGenOpt::None) return false;
+
+ // If Root use can somehow reach N through a path that that doesn't contain
+ // U then folding N would create a cycle. e.g. In the following
+ // diagram, Root can reach N through X. If N is folded into into Root, then
+ // X is both a predecessor and a successor of U.
+ //
+ // [N*] //
+ // ^ ^ //
+ // / \ //
+ // [U*] [X]? //
+ // ^ ^ //
+ // \ / //
+ // \ / //
+ // [Root*] //
+ //
+ // * indicates nodes to be folded together.
+ //
+ // If Root produces a flag, then it gets (even more) interesting. Since it
+ // will be "glued" together with its flag use in the scheduler, we need to
+ // check if it might reach N.
+ //
+ // [N*] //
+ // ^ ^ //
+ // / \ //
+ // [U*] [X]? //
+ // ^ ^ //
+ // \ \ //
+ // \ | //
+ // [Root*] | //
+ // ^ | //
+ // f | //
+ // | / //
+ // [Y] / //
+ // ^ / //
+ // f / //
+ // | / //
+ // [FU] //
+ //
+ // If FU (flag use) indirectly reaches N (the load), and Root folds N
+ // (call it Fold), then X is a predecessor of FU and a successor of
+ // Fold. But since Fold and FU are flagged together, this will create
+ // a cycle in the scheduling graph.
+
+ EVT VT = Root->getValueType(Root->getNumValues()-1);
+ while (VT == MVT::Flag) {
+ SDNode *FU = findFlagUse(Root);
+ if (FU == NULL)
+ break;
+ Root = FU;
+ VT = Root->getValueType(Root->getNumValues()-1);
+ }
+
+ return !isNonImmUse(Root, N, U);
+}
+
+SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
+ std::vector<SDValue> Ops(N->op_begin(), N->op_end());
+ SelectInlineAsmMemoryOperands(Ops);
+
+ std::vector<EVT> VTs;
+ VTs.push_back(MVT::Other);
+ VTs.push_back(MVT::Flag);
+ SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(),
+ VTs, &Ops[0], Ops.size());
+ return New.getNode();
+}
+
+SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) {
+ return CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF,N->getValueType(0));
+}
+
+SDNode *SelectionDAGISel::Select_EH_LABEL(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ unsigned C = cast<LabelSDNode>(N)->getLabelID();
+ SDValue Tmp = CurDAG->getTargetConstant(C, MVT::i32);
+ return CurDAG->SelectNodeTo(N, TargetOpcode::EH_LABEL,
+ MVT::Other, Tmp, Chain);
+}
+
+void SelectionDAGISel::CannotYetSelect(SDNode *N) {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "Cannot yet select: ";
+ N->printrFull(Msg, CurDAG);
+ llvm_report_error(Msg.str());
+}
+
+void SelectionDAGISel::CannotYetSelectIntrinsic(SDNode *N) {
+ dbgs() << "Cannot yet select: ";
+ unsigned iid =
+ cast<ConstantSDNode>(N->getOperand(N->getOperand(0).getValueType() ==
+ MVT::Other))->getZExtValue();
+ if (iid < Intrinsic::num_intrinsics)
+ llvm_report_error("Cannot yet select: intrinsic %" +
+ Intrinsic::getName((Intrinsic::ID)iid));
+ else if (const TargetIntrinsicInfo *tii = TM.getIntrinsicInfo())
+ llvm_report_error(Twine("Cannot yet select: target intrinsic %") +
+ tii->getName(iid));
+}
+
+char SelectionDAGISel::ID = 0;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
new file mode 100644
index 0000000..3786bd1
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -0,0 +1,301 @@
+//===-- SelectionDAGPrinter.cpp - Implement SelectionDAG::viewGraph() -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG::viewGraph method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/config.h"
+using namespace llvm;
+
+namespace llvm {
+ template<>
+ struct DOTGraphTraits<SelectionDAG*> : public DefaultDOTGraphTraits {
+
+ explicit DOTGraphTraits(bool isSimple=false) :
+ DefaultDOTGraphTraits(isSimple) {}
+
+ static bool hasEdgeDestLabels() {
+ return true;
+ }
+
+ static unsigned numEdgeDestLabels(const void *Node) {
+ return ((const SDNode *) Node)->getNumValues();
+ }
+
+ static std::string getEdgeDestLabel(const void *Node, unsigned i) {
+ return ((const SDNode *) Node)->getValueType(i).getEVTString();
+ }
+
+ template<typename EdgeIter>
+ static std::string getEdgeSourceLabel(const void *Node, EdgeIter I) {
+ return itostr(I - SDNodeIterator::begin((SDNode *) Node));
+ }
+
+ /// edgeTargetsEdgeSource - This method returns true if this outgoing edge
+ /// should actually target another edge source, not a node. If this method
+ /// is implemented, getEdgeTarget should be implemented.
+ template<typename EdgeIter>
+ static bool edgeTargetsEdgeSource(const void *Node, EdgeIter I) {
+ return true;
+ }
+
+ /// getEdgeTarget - If edgeTargetsEdgeSource returns true, this method is
+ /// called to determine which outgoing edge of Node is the target of this
+ /// edge.
+ template<typename EdgeIter>
+ static EdgeIter getEdgeTarget(const void *Node, EdgeIter I) {
+ SDNode *TargetNode = *I;
+ SDNodeIterator NI = SDNodeIterator::begin(TargetNode);
+ std::advance(NI, I.getNode()->getOperand(I.getOperand()).getResNo());
+ return NI;
+ }
+
+ static std::string getGraphName(const SelectionDAG *G) {
+ return G->getMachineFunction().getFunction()->getName();
+ }
+
+ static bool renderGraphFromBottomUp() {
+ return true;
+ }
+
+ static bool hasNodeAddressLabel(const SDNode *Node,
+ const SelectionDAG *Graph) {
+ return true;
+ }
+
+ /// If you want to override the dot attributes printed for a particular
+ /// edge, override this method.
+ template<typename EdgeIter>
+ static std::string getEdgeAttributes(const void *Node, EdgeIter EI) {
+ SDValue Op = EI.getNode()->getOperand(EI.getOperand());
+ EVT VT = Op.getValueType();
+ if (VT == MVT::Flag)
+ return "color=red,style=bold";
+ else if (VT == MVT::Other)
+ return "color=blue,style=dashed";
+ return "";
+ }
+
+
+ static std::string getSimpleNodeLabel(const SDNode *Node,
+ const SelectionDAG *G) {
+ std::string Result = Node->getOperationName(G);
+ {
+ raw_string_ostream OS(Result);
+ Node->print_details(OS, G);
+ }
+ return Result;
+ }
+ std::string getNodeLabel(const SDNode *Node, const SelectionDAG *Graph);
+ static std::string getNodeAttributes(const SDNode *N,
+ const SelectionDAG *Graph) {
+#ifndef NDEBUG
+ const std::string &Attrs = Graph->getGraphAttrs(N);
+ if (!Attrs.empty()) {
+ if (Attrs.find("shape=") == std::string::npos)
+ return std::string("shape=Mrecord,") + Attrs;
+ else
+ return Attrs;
+ }
+#endif
+ return "shape=Mrecord";
+ }
+
+ static void addCustomGraphFeatures(SelectionDAG *G,
+ GraphWriter<SelectionDAG*> &GW) {
+ GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot");
+ if (G->getRoot().getNode())
+ GW.emitEdge(0, -1, G->getRoot().getNode(), G->getRoot().getResNo(),
+ "color=blue,style=dashed");
+ }
+ };
+}
+
+std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
+ const SelectionDAG *G) {
+ return DOTGraphTraits<SelectionDAG*>::getSimpleNodeLabel(Node, G);
+}
+
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+///
+void SelectionDAG::viewGraph(const std::string &Title) {
+// This code is only for debugging!
+#ifndef NDEBUG
+ ViewGraph(this, "dag." + getMachineFunction().getFunction()->getNameStr(),
+ false, Title);
+#else
+ errs() << "SelectionDAG::viewGraph is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+// This overload is defined out-of-line here instead of just using a
+// default parameter because this is easiest for gdb to call.
+void SelectionDAG::viewGraph() {
+ viewGraph("");
+}
+
+/// clearGraphAttrs - Clear all previously defined node graph attributes.
+/// Intended to be used from a debugging tool (eg. gdb).
+void SelectionDAG::clearGraphAttrs() {
+#ifndef NDEBUG
+ NodeGraphAttrs.clear();
+#else
+ errs() << "SelectionDAG::clearGraphAttrs is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+
+/// setGraphAttrs - Set graph attributes for a node. (eg. "color=red".)
+///
+void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) {
+#ifndef NDEBUG
+ NodeGraphAttrs[N] = Attrs;
+#else
+ errs() << "SelectionDAG::setGraphAttrs is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+
+/// getGraphAttrs - Get graph attributes for a node. (eg. "color=red".)
+/// Used from getNodeAttributes.
+const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const {
+#ifndef NDEBUG
+ std::map<const SDNode *, std::string>::const_iterator I =
+ NodeGraphAttrs.find(N);
+
+ if (I != NodeGraphAttrs.end())
+ return I->second;
+ else
+ return "";
+#else
+ errs() << "SelectionDAG::getGraphAttrs is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+ return std::string("");
+#endif
+}
+
+/// setGraphColor - Convenience for setting node color attribute.
+///
+void SelectionDAG::setGraphColor(const SDNode *N, const char *Color) {
+#ifndef NDEBUG
+ NodeGraphAttrs[N] = std::string("color=") + Color;
+#else
+ errs() << "SelectionDAG::setGraphColor is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+/// setSubgraphColorHelper - Implement setSubgraphColor. Return
+/// whether we truncated the search.
+///
+bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet<SDNode *> &visited,
+ int level, bool &printed) {
+ bool hit_limit = false;
+
+#ifndef NDEBUG
+ if (level >= 20) {
+ if (!printed) {
+ printed = true;
+ DEBUG(dbgs() << "setSubgraphColor hit max level\n");
+ }
+ return true;
+ }
+
+ unsigned oldSize = visited.size();
+ visited.insert(N);
+ if (visited.size() != oldSize) {
+ setGraphColor(N, Color);
+ for(SDNodeIterator i = SDNodeIterator::begin(N), iend = SDNodeIterator::end(N);
+ i != iend;
+ ++i) {
+ hit_limit = setSubgraphColorHelper(*i, Color, visited, level+1, printed) || hit_limit;
+ }
+ }
+#else
+ errs() << "SelectionDAG::setSubgraphColor is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+ return hit_limit;
+}
+
+/// setSubgraphColor - Convenience for setting subgraph color attribute.
+///
+void SelectionDAG::setSubgraphColor(SDNode *N, const char *Color) {
+#ifndef NDEBUG
+ DenseSet<SDNode *> visited;
+ bool printed = false;
+ if (setSubgraphColorHelper(N, Color, visited, 0, printed)) {
+ // Visually mark that we hit the limit
+ if (strcmp(Color, "red") == 0) {
+ setSubgraphColorHelper(N, "blue", visited, 0, printed);
+ } else if (strcmp(Color, "yellow") == 0) {
+ setSubgraphColorHelper(N, "green", visited, 0, printed);
+ }
+ }
+
+#else
+ errs() << "SelectionDAG::setSubgraphColor is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const {
+ std::string s;
+ raw_string_ostream O(s);
+ O << "SU(" << SU->NodeNum << "): ";
+ if (SU->getNode()) {
+ SmallVector<SDNode *, 4> FlaggedNodes;
+ for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
+ FlaggedNodes.push_back(N);
+ while (!FlaggedNodes.empty()) {
+ O << DOTGraphTraits<SelectionDAG*>
+ ::getSimpleNodeLabel(FlaggedNodes.back(), DAG);
+ FlaggedNodes.pop_back();
+ if (!FlaggedNodes.empty())
+ O << "\n ";
+ }
+ } else {
+ O << "CROSS RC COPY";
+ }
+ return O.str();
+}
+
+void ScheduleDAGSDNodes::getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const {
+ if (DAG) {
+ // Draw a special "GraphRoot" node to indicate the root of the graph.
+ GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot");
+ const SDNode *N = DAG->getRoot().getNode();
+ if (N && N->getNodeId() != -1)
+ GW.emitEdge(0, -1, &SUnits[N->getNodeId()], -1,
+ "color=blue,style=dashed");
+ }
+}
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
new file mode 100644
index 0000000..d74ec7e
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -0,0 +1,2675 @@
+//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the TargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+namespace llvm {
+TLSModel::Model getTLSModel(const GlobalValue *GV, Reloc::Model reloc) {
+ bool isLocal = GV->hasLocalLinkage();
+ bool isDeclaration = GV->isDeclaration();
+ // FIXME: what should we do for protected and internal visibility?
+ // For variables, is internal different from hidden?
+ bool isHidden = GV->hasHiddenVisibility();
+
+ if (reloc == Reloc::PIC_) {
+ if (isLocal || isHidden)
+ return TLSModel::LocalDynamic;
+ else
+ return TLSModel::GeneralDynamic;
+ } else {
+ if (!isDeclaration || isHidden)
+ return TLSModel::LocalExec;
+ else
+ return TLSModel::InitialExec;
+ }
+}
+}
+
+/// InitLibcallNames - Set default libcall names.
+///
+static void InitLibcallNames(const char **Names) {
+ Names[RTLIB::SHL_I16] = "__ashlhi3";
+ Names[RTLIB::SHL_I32] = "__ashlsi3";
+ Names[RTLIB::SHL_I64] = "__ashldi3";
+ Names[RTLIB::SHL_I128] = "__ashlti3";
+ Names[RTLIB::SRL_I16] = "__lshrhi3";
+ Names[RTLIB::SRL_I32] = "__lshrsi3";
+ Names[RTLIB::SRL_I64] = "__lshrdi3";
+ Names[RTLIB::SRL_I128] = "__lshrti3";
+ Names[RTLIB::SRA_I16] = "__ashrhi3";
+ Names[RTLIB::SRA_I32] = "__ashrsi3";
+ Names[RTLIB::SRA_I64] = "__ashrdi3";
+ Names[RTLIB::SRA_I128] = "__ashrti3";
+ Names[RTLIB::MUL_I8] = "__mulqi3";
+ Names[RTLIB::MUL_I16] = "__mulhi3";
+ Names[RTLIB::MUL_I32] = "__mulsi3";
+ Names[RTLIB::MUL_I64] = "__muldi3";
+ Names[RTLIB::MUL_I128] = "__multi3";
+ Names[RTLIB::SDIV_I8] = "__divqi3";
+ Names[RTLIB::SDIV_I16] = "__divhi3";
+ Names[RTLIB::SDIV_I32] = "__divsi3";
+ Names[RTLIB::SDIV_I64] = "__divdi3";
+ Names[RTLIB::SDIV_I128] = "__divti3";
+ Names[RTLIB::UDIV_I8] = "__udivqi3";
+ Names[RTLIB::UDIV_I16] = "__udivhi3";
+ Names[RTLIB::UDIV_I32] = "__udivsi3";
+ Names[RTLIB::UDIV_I64] = "__udivdi3";
+ Names[RTLIB::UDIV_I128] = "__udivti3";
+ Names[RTLIB::SREM_I8] = "__modqi3";
+ Names[RTLIB::SREM_I16] = "__modhi3";
+ Names[RTLIB::SREM_I32] = "__modsi3";
+ Names[RTLIB::SREM_I64] = "__moddi3";
+ Names[RTLIB::SREM_I128] = "__modti3";
+ Names[RTLIB::UREM_I8] = "__umodqi3";
+ Names[RTLIB::UREM_I16] = "__umodhi3";
+ Names[RTLIB::UREM_I32] = "__umodsi3";
+ Names[RTLIB::UREM_I64] = "__umoddi3";
+ Names[RTLIB::UREM_I128] = "__umodti3";
+ Names[RTLIB::NEG_I32] = "__negsi2";
+ Names[RTLIB::NEG_I64] = "__negdi2";
+ Names[RTLIB::ADD_F32] = "__addsf3";
+ Names[RTLIB::ADD_F64] = "__adddf3";
+ Names[RTLIB::ADD_F80] = "__addxf3";
+ Names[RTLIB::ADD_PPCF128] = "__gcc_qadd";
+ Names[RTLIB::SUB_F32] = "__subsf3";
+ Names[RTLIB::SUB_F64] = "__subdf3";
+ Names[RTLIB::SUB_F80] = "__subxf3";
+ Names[RTLIB::SUB_PPCF128] = "__gcc_qsub";
+ Names[RTLIB::MUL_F32] = "__mulsf3";
+ Names[RTLIB::MUL_F64] = "__muldf3";
+ Names[RTLIB::MUL_F80] = "__mulxf3";
+ Names[RTLIB::MUL_PPCF128] = "__gcc_qmul";
+ Names[RTLIB::DIV_F32] = "__divsf3";
+ Names[RTLIB::DIV_F64] = "__divdf3";
+ Names[RTLIB::DIV_F80] = "__divxf3";
+ Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv";
+ Names[RTLIB::REM_F32] = "fmodf";
+ Names[RTLIB::REM_F64] = "fmod";
+ Names[RTLIB::REM_F80] = "fmodl";
+ Names[RTLIB::REM_PPCF128] = "fmodl";
+ Names[RTLIB::POWI_F32] = "__powisf2";
+ Names[RTLIB::POWI_F64] = "__powidf2";
+ Names[RTLIB::POWI_F80] = "__powixf2";
+ Names[RTLIB::POWI_PPCF128] = "__powitf2";
+ Names[RTLIB::SQRT_F32] = "sqrtf";
+ Names[RTLIB::SQRT_F64] = "sqrt";
+ Names[RTLIB::SQRT_F80] = "sqrtl";
+ Names[RTLIB::SQRT_PPCF128] = "sqrtl";
+ Names[RTLIB::LOG_F32] = "logf";
+ Names[RTLIB::LOG_F64] = "log";
+ Names[RTLIB::LOG_F80] = "logl";
+ Names[RTLIB::LOG_PPCF128] = "logl";
+ Names[RTLIB::LOG2_F32] = "log2f";
+ Names[RTLIB::LOG2_F64] = "log2";
+ Names[RTLIB::LOG2_F80] = "log2l";
+ Names[RTLIB::LOG2_PPCF128] = "log2l";
+ Names[RTLIB::LOG10_F32] = "log10f";
+ Names[RTLIB::LOG10_F64] = "log10";
+ Names[RTLIB::LOG10_F80] = "log10l";
+ Names[RTLIB::LOG10_PPCF128] = "log10l";
+ Names[RTLIB::EXP_F32] = "expf";
+ Names[RTLIB::EXP_F64] = "exp";
+ Names[RTLIB::EXP_F80] = "expl";
+ Names[RTLIB::EXP_PPCF128] = "expl";
+ Names[RTLIB::EXP2_F32] = "exp2f";
+ Names[RTLIB::EXP2_F64] = "exp2";
+ Names[RTLIB::EXP2_F80] = "exp2l";
+ Names[RTLIB::EXP2_PPCF128] = "exp2l";
+ Names[RTLIB::SIN_F32] = "sinf";
+ Names[RTLIB::SIN_F64] = "sin";
+ Names[RTLIB::SIN_F80] = "sinl";
+ Names[RTLIB::SIN_PPCF128] = "sinl";
+ Names[RTLIB::COS_F32] = "cosf";
+ Names[RTLIB::COS_F64] = "cos";
+ Names[RTLIB::COS_F80] = "cosl";
+ Names[RTLIB::COS_PPCF128] = "cosl";
+ Names[RTLIB::POW_F32] = "powf";
+ Names[RTLIB::POW_F64] = "pow";
+ Names[RTLIB::POW_F80] = "powl";
+ Names[RTLIB::POW_PPCF128] = "powl";
+ Names[RTLIB::CEIL_F32] = "ceilf";
+ Names[RTLIB::CEIL_F64] = "ceil";
+ Names[RTLIB::CEIL_F80] = "ceill";
+ Names[RTLIB::CEIL_PPCF128] = "ceill";
+ Names[RTLIB::TRUNC_F32] = "truncf";
+ Names[RTLIB::TRUNC_F64] = "trunc";
+ Names[RTLIB::TRUNC_F80] = "truncl";
+ Names[RTLIB::TRUNC_PPCF128] = "truncl";
+ Names[RTLIB::RINT_F32] = "rintf";
+ Names[RTLIB::RINT_F64] = "rint";
+ Names[RTLIB::RINT_F80] = "rintl";
+ Names[RTLIB::RINT_PPCF128] = "rintl";
+ Names[RTLIB::NEARBYINT_F32] = "nearbyintf";
+ Names[RTLIB::NEARBYINT_F64] = "nearbyint";
+ Names[RTLIB::NEARBYINT_F80] = "nearbyintl";
+ Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl";
+ Names[RTLIB::FLOOR_F32] = "floorf";
+ Names[RTLIB::FLOOR_F64] = "floor";
+ Names[RTLIB::FLOOR_F80] = "floorl";
+ Names[RTLIB::FLOOR_PPCF128] = "floorl";
+ Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2";
+ Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2";
+ Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2";
+ Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2";
+ Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2";
+ Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2";
+ Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfi8";
+ Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfi16";
+ Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi";
+ Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi";
+ Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti";
+ Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi";
+ Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi";
+ Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti";
+ Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi";
+ Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi";
+ Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti";
+ Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi";
+ Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi";
+ Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti";
+ Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfi8";
+ Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfi16";
+ Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi";
+ Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi";
+ Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti";
+ Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi";
+ Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi";
+ Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti";
+ Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi";
+ Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi";
+ Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti";
+ Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi";
+ Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi";
+ Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti";
+ Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf";
+ Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf";
+ Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf";
+ Names[RTLIB::SINTTOFP_I32_PPCF128] = "__floatsitf";
+ Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf";
+ Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf";
+ Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf";
+ Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf";
+ Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf";
+ Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf";
+ Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf";
+ Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf";
+ Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf";
+ Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf";
+ Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf";
+ Names[RTLIB::UINTTOFP_I32_PPCF128] = "__floatunsitf";
+ Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf";
+ Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf";
+ Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf";
+ Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf";
+ Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf";
+ Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf";
+ Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf";
+ Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf";
+ Names[RTLIB::OEQ_F32] = "__eqsf2";
+ Names[RTLIB::OEQ_F64] = "__eqdf2";
+ Names[RTLIB::UNE_F32] = "__nesf2";
+ Names[RTLIB::UNE_F64] = "__nedf2";
+ Names[RTLIB::OGE_F32] = "__gesf2";
+ Names[RTLIB::OGE_F64] = "__gedf2";
+ Names[RTLIB::OLT_F32] = "__ltsf2";
+ Names[RTLIB::OLT_F64] = "__ltdf2";
+ Names[RTLIB::OLE_F32] = "__lesf2";
+ Names[RTLIB::OLE_F64] = "__ledf2";
+ Names[RTLIB::OGT_F32] = "__gtsf2";
+ Names[RTLIB::OGT_F64] = "__gtdf2";
+ Names[RTLIB::UO_F32] = "__unordsf2";
+ Names[RTLIB::UO_F64] = "__unorddf2";
+ Names[RTLIB::O_F32] = "__unordsf2";
+ Names[RTLIB::O_F64] = "__unorddf2";
+ Names[RTLIB::MEMCPY] = "memcpy";
+ Names[RTLIB::MEMMOVE] = "memmove";
+ Names[RTLIB::MEMSET] = "memset";
+ Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";
+}
+
+/// InitLibcallCallingConvs - Set default libcall CallingConvs.
+///
+static void InitLibcallCallingConvs(CallingConv::ID *CCs) {
+ for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {
+ CCs[i] = CallingConv::C;
+ }
+}
+
+/// getFPEXT - Return the FPEXT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::f32) {
+ if (RetVT == MVT::f64)
+ return FPEXT_F32_F64;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getFPROUND - Return the FPROUND_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) {
+ if (RetVT == MVT::f32) {
+ if (OpVT == MVT::f64)
+ return FPROUND_F64_F32;
+ if (OpVT == MVT::f80)
+ return FPROUND_F80_F32;
+ if (OpVT == MVT::ppcf128)
+ return FPROUND_PPCF128_F32;
+ } else if (RetVT == MVT::f64) {
+ if (OpVT == MVT::f80)
+ return FPROUND_F80_F64;
+ if (OpVT == MVT::ppcf128)
+ return FPROUND_PPCF128_F64;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::f32) {
+ if (RetVT == MVT::i8)
+ return FPTOSINT_F32_I8;
+ if (RetVT == MVT::i16)
+ return FPTOSINT_F32_I16;
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F32_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F32_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F32_I128;
+ } else if (OpVT == MVT::f64) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F64_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F64_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F64_I128;
+ } else if (OpVT == MVT::f80) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F80_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F80_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F80_I128;
+ } else if (OpVT == MVT::ppcf128) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_PPCF128_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_PPCF128_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_PPCF128_I128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::f32) {
+ if (RetVT == MVT::i8)
+ return FPTOUINT_F32_I8;
+ if (RetVT == MVT::i16)
+ return FPTOUINT_F32_I16;
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F32_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F32_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F32_I128;
+ } else if (OpVT == MVT::f64) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F64_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F64_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F64_I128;
+ } else if (OpVT == MVT::f80) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F80_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F80_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F80_I128;
+ } else if (OpVT == MVT::ppcf128) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_PPCF128_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_PPCF128_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_PPCF128_I128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::i32) {
+ if (RetVT == MVT::f32)
+ return SINTTOFP_I32_F32;
+ else if (RetVT == MVT::f64)
+ return SINTTOFP_I32_F64;
+ else if (RetVT == MVT::f80)
+ return SINTTOFP_I32_F80;
+ else if (RetVT == MVT::ppcf128)
+ return SINTTOFP_I32_PPCF128;
+ } else if (OpVT == MVT::i64) {
+ if (RetVT == MVT::f32)
+ return SINTTOFP_I64_F32;
+ else if (RetVT == MVT::f64)
+ return SINTTOFP_I64_F64;
+ else if (RetVT == MVT::f80)
+ return SINTTOFP_I64_F80;
+ else if (RetVT == MVT::ppcf128)
+ return SINTTOFP_I64_PPCF128;
+ } else if (OpVT == MVT::i128) {
+ if (RetVT == MVT::f32)
+ return SINTTOFP_I128_F32;
+ else if (RetVT == MVT::f64)
+ return SINTTOFP_I128_F64;
+ else if (RetVT == MVT::f80)
+ return SINTTOFP_I128_F80;
+ else if (RetVT == MVT::ppcf128)
+ return SINTTOFP_I128_PPCF128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::i32) {
+ if (RetVT == MVT::f32)
+ return UINTTOFP_I32_F32;
+ else if (RetVT == MVT::f64)
+ return UINTTOFP_I32_F64;
+ else if (RetVT == MVT::f80)
+ return UINTTOFP_I32_F80;
+ else if (RetVT == MVT::ppcf128)
+ return UINTTOFP_I32_PPCF128;
+ } else if (OpVT == MVT::i64) {
+ if (RetVT == MVT::f32)
+ return UINTTOFP_I64_F32;
+ else if (RetVT == MVT::f64)
+ return UINTTOFP_I64_F64;
+ else if (RetVT == MVT::f80)
+ return UINTTOFP_I64_F80;
+ else if (RetVT == MVT::ppcf128)
+ return UINTTOFP_I64_PPCF128;
+ } else if (OpVT == MVT::i128) {
+ if (RetVT == MVT::f32)
+ return UINTTOFP_I128_F32;
+ else if (RetVT == MVT::f64)
+ return UINTTOFP_I128_F64;
+ else if (RetVT == MVT::f80)
+ return UINTTOFP_I128_F80;
+ else if (RetVT == MVT::ppcf128)
+ return UINTTOFP_I128_PPCF128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// InitCmpLibcallCCs - Set default comparison libcall CC.
+///
+static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
+ memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL);
+ CCs[RTLIB::OEQ_F32] = ISD::SETEQ;
+ CCs[RTLIB::OEQ_F64] = ISD::SETEQ;
+ CCs[RTLIB::UNE_F32] = ISD::SETNE;
+ CCs[RTLIB::UNE_F64] = ISD::SETNE;
+ CCs[RTLIB::OGE_F32] = ISD::SETGE;
+ CCs[RTLIB::OGE_F64] = ISD::SETGE;
+ CCs[RTLIB::OLT_F32] = ISD::SETLT;
+ CCs[RTLIB::OLT_F64] = ISD::SETLT;
+ CCs[RTLIB::OLE_F32] = ISD::SETLE;
+ CCs[RTLIB::OLE_F64] = ISD::SETLE;
+ CCs[RTLIB::OGT_F32] = ISD::SETGT;
+ CCs[RTLIB::OGT_F64] = ISD::SETGT;
+ CCs[RTLIB::UO_F32] = ISD::SETNE;
+ CCs[RTLIB::UO_F64] = ISD::SETNE;
+ CCs[RTLIB::O_F32] = ISD::SETEQ;
+ CCs[RTLIB::O_F64] = ISD::SETEQ;
+}
+
+/// NOTE: The constructor takes ownership of TLOF.
+TargetLowering::TargetLowering(TargetMachine &tm,TargetLoweringObjectFile *tlof)
+ : TM(tm), TD(TM.getTargetData()), TLOF(*tlof) {
+ // All operations default to being supported.
+ memset(OpActions, 0, sizeof(OpActions));
+ memset(LoadExtActions, 0, sizeof(LoadExtActions));
+ memset(TruncStoreActions, 0, sizeof(TruncStoreActions));
+ memset(IndexedModeActions, 0, sizeof(IndexedModeActions));
+ memset(ConvertActions, 0, sizeof(ConvertActions));
+ memset(CondCodeActions, 0, sizeof(CondCodeActions));
+
+ // Set default actions for various operations.
+ for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) {
+ // Default all indexed load / store to expand.
+ for (unsigned IM = (unsigned)ISD::PRE_INC;
+ IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) {
+ setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand);
+ setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand);
+ }
+
+ // These operations default to expand.
+ setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand);
+ }
+
+ // Most targets ignore the @llvm.prefetch intrinsic.
+ setOperationAction(ISD::PREFETCH, MVT::Other, Expand);
+
+ // ConstantFP nodes default to expand. Targets can either change this to
+ // Legal, in which case all fp constants are legal, or use isFPImmLegal()
+ // to optimize expansions for certain constants.
+ setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
+
+ // These library functions default to expand.
+ setOperationAction(ISD::FLOG , MVT::f64, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG10,MVT::f64, Expand);
+ setOperationAction(ISD::FEXP , MVT::f64, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG , MVT::f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG10,MVT::f32, Expand);
+ setOperationAction(ISD::FEXP , MVT::f32, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f32, Expand);
+
+ // Default ISD::TRAP to expand (which turns it into abort).
+ setOperationAction(ISD::TRAP, MVT::Other, Expand);
+
+ IsLittleEndian = TD->isLittleEndian();
+ ShiftAmountTy = PointerTy = MVT::getIntegerVT(8*TD->getPointerSize());
+ memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
+ memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
+ maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8;
+ benefitFromCodePlacementOpt = false;
+ UseUnderscoreSetJmp = false;
+ UseUnderscoreLongJmp = false;
+ SelectIsExpensive = false;
+ IntDivIsCheap = false;
+ Pow2DivIsCheap = false;
+ StackPointerRegisterToSaveRestore = 0;
+ ExceptionPointerRegister = 0;
+ ExceptionSelectorRegister = 0;
+ BooleanContents = UndefinedBooleanContent;
+ SchedPreferenceInfo = SchedulingForLatency;
+ JumpBufSize = 0;
+ JumpBufAlignment = 0;
+ IfCvtBlockSizeLimit = 2;
+ IfCvtDupBlockSizeLimit = 0;
+ PrefLoopAlignment = 0;
+
+ InitLibcallNames(LibcallRoutineNames);
+ InitCmpLibcallCCs(CmpLibcallCCs);
+ InitLibcallCallingConvs(LibcallCallingConvs);
+}
+
+TargetLowering::~TargetLowering() {
+ delete &TLOF;
+}
+
+static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
+ unsigned &NumIntermediates,
+ EVT &RegisterVT,
+ TargetLowering* TLI) {
+ // Figure out the right, legal destination reg to copy into.
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT EltTy = VT.getVectorElementType();
+
+ unsigned NumVectorRegs = 1;
+
+ // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
+ // could break down into LHS/RHS like LegalizeDAG does.
+ if (!isPowerOf2_32(NumElts)) {
+ NumVectorRegs = NumElts;
+ NumElts = 1;
+ }
+
+ // Divide the input until we get to a supported size. This will always
+ // end with a scalar if the target doesn't support vectors.
+ while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) {
+ NumElts >>= 1;
+ NumVectorRegs <<= 1;
+ }
+
+ NumIntermediates = NumVectorRegs;
+
+ MVT NewVT = MVT::getVectorVT(EltTy, NumElts);
+ if (!TLI->isTypeLegal(NewVT))
+ NewVT = EltTy;
+ IntermediateVT = NewVT;
+
+ EVT DestVT = TLI->getRegisterType(NewVT);
+ RegisterVT = DestVT;
+ if (EVT(DestVT).bitsLT(NewVT)) {
+ // Value is expanded, e.g. i64 -> i16.
+ return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
+ } else {
+ // Otherwise, promotion or legal types use the same number of registers as
+ // the vector decimated to the appropriate level.
+ return NumVectorRegs;
+ }
+
+ return 1;
+}
+
+/// computeRegisterProperties - Once all of the register classes are added,
+/// this allows us to compute derived properties we expose.
+void TargetLowering::computeRegisterProperties() {
+ assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
+ "Too many value types for ValueTypeActions to hold!");
+
+ // Everything defaults to needing one register.
+ for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
+ NumRegistersForVT[i] = 1;
+ RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i;
+ }
+ // ...except isVoid, which doesn't need any registers.
+ NumRegistersForVT[MVT::isVoid] = 0;
+
+ // Find the largest integer register class.
+ unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE;
+ for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg)
+ assert(LargestIntReg != MVT::i1 && "No integer registers defined!");
+
+ // Every integer value type larger than this largest register takes twice as
+ // many registers to represent as the previous ValueType.
+ for (unsigned ExpandedReg = LargestIntReg + 1; ; ++ExpandedReg) {
+ EVT ExpandedVT = (MVT::SimpleValueType)ExpandedReg;
+ if (!ExpandedVT.isInteger())
+ break;
+ NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1];
+ RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg;
+ TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1);
+ ValueTypeActions.setTypeAction(ExpandedVT, Expand);
+ }
+
+ // Inspect all of the ValueType's smaller than the largest integer
+ // register to see which ones need promotion.
+ unsigned LegalIntReg = LargestIntReg;
+ for (unsigned IntReg = LargestIntReg - 1;
+ IntReg >= (unsigned)MVT::i1; --IntReg) {
+ EVT IVT = (MVT::SimpleValueType)IntReg;
+ if (isTypeLegal(IVT)) {
+ LegalIntReg = IntReg;
+ } else {
+ RegisterTypeForVT[IntReg] = TransformToType[IntReg] =
+ (MVT::SimpleValueType)LegalIntReg;
+ ValueTypeActions.setTypeAction(IVT, Promote);
+ }
+ }
+
+ // ppcf128 type is really two f64's.
+ if (!isTypeLegal(MVT::ppcf128)) {
+ NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64];
+ RegisterTypeForVT[MVT::ppcf128] = MVT::f64;
+ TransformToType[MVT::ppcf128] = MVT::f64;
+ ValueTypeActions.setTypeAction(MVT::ppcf128, Expand);
+ }
+
+ // Decide how to handle f64. If the target does not have native f64 support,
+ // expand it to i64 and we will be generating soft float library calls.
+ if (!isTypeLegal(MVT::f64)) {
+ NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64];
+ RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64];
+ TransformToType[MVT::f64] = MVT::i64;
+ ValueTypeActions.setTypeAction(MVT::f64, Expand);
+ }
+
+ // Decide how to handle f32. If the target does not have native support for
+ // f32, promote it to f64 if it is legal. Otherwise, expand it to i32.
+ if (!isTypeLegal(MVT::f32)) {
+ if (isTypeLegal(MVT::f64)) {
+ NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64];
+ RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64];
+ TransformToType[MVT::f32] = MVT::f64;
+ ValueTypeActions.setTypeAction(MVT::f32, Promote);
+ } else {
+ NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
+ RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
+ TransformToType[MVT::f32] = MVT::i32;
+ ValueTypeActions.setTypeAction(MVT::f32, Expand);
+ }
+ }
+
+ // Loop over all of the vector value types to see which need transformations.
+ for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT VT = (MVT::SimpleValueType)i;
+ if (!isTypeLegal(VT)) {
+ MVT IntermediateVT;
+ EVT RegisterVT;
+ unsigned NumIntermediates;
+ NumRegistersForVT[i] =
+ getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
+ RegisterVT, this);
+ RegisterTypeForVT[i] = RegisterVT;
+
+ // Determine if there is a legal wider type.
+ bool IsLegalWiderType = false;
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NElts = VT.getVectorNumElements();
+ for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+ EVT SVT = (MVT::SimpleValueType)nVT;
+ if (isTypeLegal(SVT) && SVT.getVectorElementType() == EltVT &&
+ SVT.getVectorNumElements() > NElts && NElts != 1) {
+ TransformToType[i] = SVT;
+ ValueTypeActions.setTypeAction(VT, Promote);
+ IsLegalWiderType = true;
+ break;
+ }
+ }
+ if (!IsLegalWiderType) {
+ EVT NVT = VT.getPow2VectorType();
+ if (NVT == VT) {
+ // Type is already a power of 2. The default action is to split.
+ TransformToType[i] = MVT::Other;
+ ValueTypeActions.setTypeAction(VT, Expand);
+ } else {
+ TransformToType[i] = NVT;
+ ValueTypeActions.setTypeAction(VT, Promote);
+ }
+ }
+ }
+ }
+}
+
+const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
+ return NULL;
+}
+
+
+MVT::SimpleValueType TargetLowering::getSetCCResultType(EVT VT) const {
+ return PointerTy.SimpleTy;
+}
+
+MVT::SimpleValueType TargetLowering::getCmpLibcallReturnType() const {
+ return MVT::i32; // return the default value
+}
+
+/// getVectorTypeBreakdown - Vector types are broken down into some number of
+/// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32
+/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
+/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86.
+///
+/// This method returns the number of registers needed, and the VT for each
+/// register. It also returns the VT and quantity of the intermediate values
+/// before they are promoted/expanded.
+///
+unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
+ EVT &IntermediateVT,
+ unsigned &NumIntermediates,
+ EVT &RegisterVT) const {
+ // Figure out the right, legal destination reg to copy into.
+ unsigned NumElts = VT.getVectorNumElements();
+ EVT EltTy = VT.getVectorElementType();
+
+ unsigned NumVectorRegs = 1;
+
+ // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
+ // could break down into LHS/RHS like LegalizeDAG does.
+ if (!isPowerOf2_32(NumElts)) {
+ NumVectorRegs = NumElts;
+ NumElts = 1;
+ }
+
+ // Divide the input until we get to a supported size. This will always
+ // end with a scalar if the target doesn't support vectors.
+ while (NumElts > 1 && !isTypeLegal(
+ EVT::getVectorVT(Context, EltTy, NumElts))) {
+ NumElts >>= 1;
+ NumVectorRegs <<= 1;
+ }
+
+ NumIntermediates = NumVectorRegs;
+
+ EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts);
+ if (!isTypeLegal(NewVT))
+ NewVT = EltTy;
+ IntermediateVT = NewVT;
+
+ EVT DestVT = getRegisterType(Context, NewVT);
+ RegisterVT = DestVT;
+ if (DestVT.bitsLT(NewVT)) {
+ // Value is expanded, e.g. i64 -> i16.
+ return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
+ } else {
+ // Otherwise, promotion or legal types use the same number of registers as
+ // the vector decimated to the appropriate level.
+ return NumVectorRegs;
+ }
+
+ return 1;
+}
+
+/// getWidenVectorType: given a vector type, returns the type to widen to
+/// (e.g., v7i8 to v8i8). If the vector type is legal, it returns itself.
+/// If there is no vector type that we want to widen to, returns MVT::Other
+/// When and where to widen is target dependent based on the cost of
+/// scalarizing vs using the wider vector type.
+EVT TargetLowering::getWidenVectorType(EVT VT) const {
+ assert(VT.isVector());
+ if (isTypeLegal(VT))
+ return VT;
+
+ // Default is not to widen until moved to LegalizeTypes
+ return MVT::Other;
+}
+
+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area. This is the actual
+/// alignment, not its logarithm.
+unsigned TargetLowering::getByValTypeAlignment(const Type *Ty) const {
+ return TD->getCallFrameTypeAlignment(Ty);
+}
+
+/// getJumpTableEncoding - Return the entry encoding for a jump table in the
+/// current function. The returned value is a member of the
+/// MachineJumpTableInfo::JTEntryKind enum.
+unsigned TargetLowering::getJumpTableEncoding() const {
+ // In non-pic modes, just use the address of a block.
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+ return MachineJumpTableInfo::EK_BlockAddress;
+
+ // In PIC mode, if the target supports a GPRel32 directive, use it.
+ if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != 0)
+ return MachineJumpTableInfo::EK_GPRel32BlockAddress;
+
+ // Otherwise, use a label difference.
+ return MachineJumpTableInfo::EK_LabelDifference32;
+}
+
+SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
+ SelectionDAG &DAG) const {
+ // If our PIC model is GP relative, use the global offset table as the base.
+ if (getJumpTableEncoding() == MachineJumpTableInfo::EK_GPRel32BlockAddress)
+ return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy());
+ return Table;
+}
+
+/// getPICJumpTableRelocBaseExpr - This returns the relocation base for the
+/// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an
+/// MCExpr.
+const MCExpr *
+TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
+ unsigned JTI,MCContext &Ctx) const{
+ // The normal PIC reloc base is the label at the start of the jump table.
+ return MCSymbolRefExpr::Create(MF->getJTISymbol(JTI, Ctx), Ctx);
+}
+
+bool
+TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // Assume that everything is safe in static mode.
+ if (getTargetMachine().getRelocationModel() == Reloc::Static)
+ return true;
+
+ // In dynamic-no-pic mode, assume that known defined values are safe.
+ if (getTargetMachine().getRelocationModel() == Reloc::DynamicNoPIC &&
+ GA &&
+ !GA->getGlobal()->isDeclaration() &&
+ !GA->getGlobal()->isWeakForLinker())
+ return true;
+
+ // Otherwise assume nothing is safe.
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Optimization Methods
+//===----------------------------------------------------------------------===//
+
+/// ShrinkDemandedConstant - Check to see if the specified operand of the
+/// specified instruction is a constant integer. If so, check to see if there
+/// are any bits set in the constant that are not demanded. If so, shrink the
+/// constant and return true.
+bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
+ const APInt &Demanded) {
+ DebugLoc dl = Op.getDebugLoc();
+
+ // FIXME: ISD::SELECT, ISD::SELECT_CC
+ switch (Op.getOpcode()) {
+ default: break;
+ case ISD::XOR:
+ case ISD::AND:
+ case ISD::OR: {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (!C) return false;
+
+ if (Op.getOpcode() == ISD::XOR &&
+ (C->getAPIntValue() | (~Demanded)).isAllOnesValue())
+ return false;
+
+ // if we can expand it to have all bits set, do it
+ if (C->getAPIntValue().intersects(~Demanded)) {
+ EVT VT = Op.getValueType();
+ SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0),
+ DAG.getConstant(Demanded &
+ C->getAPIntValue(),
+ VT));
+ return CombineTo(Op, New);
+ }
+
+ break;
+ }
+ }
+
+ return false;
+}
+
+/// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the
+/// casts are free. This uses isZExtFree and ZERO_EXTEND for the widening
+/// cast, but it could be generalized for targets with other types of
+/// implicit widening casts.
+bool
+TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
+ unsigned BitWidth,
+ const APInt &Demanded,
+ DebugLoc dl) {
+ assert(Op.getNumOperands() == 2 &&
+ "ShrinkDemandedOp only supports binary operators!");
+ assert(Op.getNode()->getNumValues() == 1 &&
+ "ShrinkDemandedOp only supports nodes with one result!");
+
+ // Don't do this if the node has another user, which may require the
+ // full value.
+ if (!Op.getNode()->hasOneUse())
+ return false;
+
+ // Search for the smallest integer type with free casts to and from
+ // Op's type. For expedience, just check power-of-2 integer types.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned SmallVTBits = BitWidth - Demanded.countLeadingZeros();
+ if (!isPowerOf2_32(SmallVTBits))
+ SmallVTBits = NextPowerOf2(SmallVTBits);
+ for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
+ EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
+ if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
+ TLI.isZExtFree(SmallVT, Op.getValueType())) {
+ // We found a type with free casts.
+ SDValue X = DAG.getNode(Op.getOpcode(), dl, SmallVT,
+ DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
+ Op.getNode()->getOperand(0)),
+ DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
+ Op.getNode()->getOperand(1)));
+ SDValue Z = DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), X);
+ return CombineTo(Op, Z);
+ }
+ }
+ return false;
+}
+
+/// SimplifyDemandedBits - Look at Op. At this point, we know that only the
+/// DemandedMask bits of the result of Op are ever used downstream. If we can
+/// use this information to simplify Op, create a new simplified DAG node and
+/// return true, returning the original and new nodes in Old and New. Otherwise,
+/// analyze the expression and return a mask of KnownOne and KnownZero bits for
+/// the expression (used to simplify the caller). The KnownZero/One bits may
+/// only be accurate for those bits in the DemandedMask.
+bool TargetLowering::SimplifyDemandedBits(SDValue Op,
+ const APInt &DemandedMask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ TargetLoweringOpt &TLO,
+ unsigned Depth) const {
+ unsigned BitWidth = DemandedMask.getBitWidth();
+ assert(Op.getValueType().getScalarType().getSizeInBits() == BitWidth &&
+ "Mask size mismatches value type size!");
+ APInt NewMask = DemandedMask;
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Don't know anything.
+ KnownZero = KnownOne = APInt(BitWidth, 0);
+
+ // Other users may use these bits.
+ if (!Op.getNode()->hasOneUse()) {
+ if (Depth != 0) {
+ // If not at the root, Just compute the KnownZero/KnownOne bits to
+ // simplify things downstream.
+ TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth);
+ return false;
+ }
+ // If this is the root being simplified, allow it to have multiple uses,
+ // just set the NewMask to all bits.
+ NewMask = APInt::getAllOnesValue(BitWidth);
+ } else if (DemandedMask == 0) {
+ // Not demanding any bits from Op.
+ if (Op.getOpcode() != ISD::UNDEF)
+ return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType()));
+ return false;
+ } else if (Depth == 6) { // Limit search depth.
+ return false;
+ }
+
+ APInt KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut;
+ switch (Op.getOpcode()) {
+ case ISD::Constant:
+ // We know all of the bits for a constant!
+ KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & NewMask;
+ KnownZero = ~KnownOne & NewMask;
+ return false; // Don't fall through, will infinitely loop.
+ case ISD::AND:
+ // If the RHS is a constant, check to see if the LHS would be zero without
+ // using the bits from the RHS. Below, we use knowledge about the RHS to
+ // simplify the LHS, here we're using information from the LHS to simplify
+ // the RHS.
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ APInt LHSZero, LHSOne;
+ TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask,
+ LHSZero, LHSOne, Depth+1);
+ // If the LHS already has zeros where RHSC does, this and is dead.
+ if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ // If any of the set bits in the RHS are known zero on the LHS, shrink
+ // the constant.
+ if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask))
+ return true;
+ }
+
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask,
+ KnownZero2, KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known one on one side, return the other.
+ // These bits cannot contribute to the result of the 'and'.
+ if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // If all of the demanded bits in the inputs are known zeros, return zero.
+ if ((NewMask & (KnownZero|KnownZero2)) == NewMask)
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, Op.getValueType()));
+ // If the RHS is a constant, see if we can simplify it.
+ if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask))
+ return true;
+ // If the operation can be done in a smaller type, do so.
+ if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ return true;
+
+ // Output known-1 bits are only known if set in both the LHS & RHS.
+ KnownOne &= KnownOne2;
+ // Output known-0 are known to be clear if zero in either the LHS | RHS.
+ KnownZero |= KnownZero2;
+ break;
+ case ISD::OR:
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask,
+ KnownZero2, KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'or'.
+ if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ if ((NewMask & ~KnownOne & KnownZero2) == (~KnownOne & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // If all of the potentially set bits on one side are known to be set on
+ // the other side, just use the 'other' side.
+ if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // If the RHS is a constant, see if we can simplify it.
+ if (TLO.ShrinkDemandedConstant(Op, NewMask))
+ return true;
+ // If the operation can be done in a smaller type, do so.
+ if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ return true;
+
+ // Output known-0 bits are only known if clear in both the LHS & RHS.
+ KnownZero &= KnownZero2;
+ // Output known-1 are known to be set if set in either the LHS | RHS.
+ KnownOne |= KnownOne2;
+ break;
+ case ISD::XOR:
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'xor'.
+ if ((KnownZero & NewMask) == NewMask)
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ if ((KnownZero2 & NewMask) == NewMask)
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // If the operation can be done in a smaller type, do so.
+ if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ return true;
+
+ // If all of the unknown bits are known to be zero on one side or the other
+ // (but not both) turn this into an *inclusive* or.
+ // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
+ if ((NewMask & ~KnownZero & ~KnownZero2) == 0)
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(),
+ Op.getOperand(0),
+ Op.getOperand(1)));
+
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+
+ // If all of the demanded bits on one side are known, and all of the set
+ // bits on that side are also known to be set on the other side, turn this
+ // into an AND, as we know the bits will be cleared.
+ // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
+ if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known
+ if ((KnownOne & KnownOne2) == KnownOne) {
+ EVT VT = Op.getValueType();
+ SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT);
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
+ Op.getOperand(0), ANDC));
+ }
+ }
+
+ // If the RHS is a constant, see if we can simplify it.
+ // for XOR, we prefer to force bits to 1 if they will make a -1.
+ // if we can't force bits, try to shrink constant
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ APInt Expanded = C->getAPIntValue() | (~NewMask);
+ // if we can expand it to have all bits set, do it
+ if (Expanded.isAllOnesValue()) {
+ if (Expanded != C->getAPIntValue()) {
+ EVT VT = Op.getValueType();
+ SDValue New = TLO.DAG.getNode(Op.getOpcode(), dl,VT, Op.getOperand(0),
+ TLO.DAG.getConstant(Expanded, VT));
+ return TLO.CombineTo(Op, New);
+ }
+ // if it already has all the bits set, nothing to change
+ // but don't shrink either!
+ } else if (TLO.ShrinkDemandedConstant(Op, NewMask)) {
+ return true;
+ }
+ }
+
+ KnownZero = KnownZeroOut;
+ KnownOne = KnownOneOut;
+ break;
+ case ISD::SELECT:
+ if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If the operands are constants, see if we can simplify them.
+ if (TLO.ShrinkDemandedConstant(Op, NewMask))
+ return true;
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ case ISD::SELECT_CC:
+ if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If the operands are constants, see if we can simplify them.
+ if (TLO.ShrinkDemandedConstant(Op, NewMask))
+ return true;
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ case ISD::SHL:
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned ShAmt = SA->getZExtValue();
+ SDValue InOp = Op.getOperand(0);
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ break;
+
+ // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
+ // single shift. We can do this if the bottom bits (which are shifted
+ // out) are never demanded.
+ if (InOp.getOpcode() == ISD::SRL &&
+ isa<ConstantSDNode>(InOp.getOperand(1))) {
+ if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
+ unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();
+ unsigned Opc = ISD::SHL;
+ int Diff = ShAmt-C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ Opc = ISD::SRL;
+ }
+
+ SDValue NewSA =
+ TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
+ EVT VT = Op.getValueType();
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
+ InOp.getOperand(0), NewSA));
+ }
+ }
+
+ if (SimplifyDemandedBits(Op.getOperand(0), NewMask.lshr(ShAmt),
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ KnownZero <<= SA->getZExtValue();
+ KnownOne <<= SA->getZExtValue();
+ // low bits known zero.
+ KnownZero |= APInt::getLowBitsSet(BitWidth, SA->getZExtValue());
+ }
+ break;
+ case ISD::SRL:
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ EVT VT = Op.getValueType();
+ unsigned ShAmt = SA->getZExtValue();
+ unsigned VTSize = VT.getSizeInBits();
+ SDValue InOp = Op.getOperand(0);
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ break;
+
+ // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
+ // single shift. We can do this if the top bits (which are shifted out)
+ // are never demanded.
+ if (InOp.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(InOp.getOperand(1))) {
+ if (ShAmt && (NewMask & APInt::getHighBitsSet(VTSize, ShAmt)) == 0) {
+ unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();
+ unsigned Opc = ISD::SRL;
+ int Diff = ShAmt-C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ Opc = ISD::SHL;
+ }
+
+ SDValue NewSA =
+ TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
+ InOp.getOperand(0), NewSA));
+ }
+ }
+
+ // Compute the new bits that are at the top now.
+ if (SimplifyDemandedBits(InOp, (NewMask << ShAmt),
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.lshr(ShAmt);
+ KnownOne = KnownOne.lshr(ShAmt);
+
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
+ KnownZero |= HighBits; // High bits known zero.
+ }
+ break;
+ case ISD::SRA:
+ // If this is an arithmetic shift right and only the low-bit is set, we can
+ // always convert this into a logical shr, even if the shift amount is
+ // variable. The low bit of the shift cannot be an input sign bit unless
+ // the shift amount is >= the size of the datatype, which is undefined.
+ if (DemandedMask == 1)
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
+ Op.getOperand(0), Op.getOperand(1)));
+
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ EVT VT = Op.getValueType();
+ unsigned ShAmt = SA->getZExtValue();
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ break;
+
+ APInt InDemandedMask = (NewMask << ShAmt);
+
+ // If any of the demanded bits are produced by the sign extension, we also
+ // demand the input sign bit.
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
+ if (HighBits.intersects(NewMask))
+ InDemandedMask |= APInt::getSignBit(VT.getScalarType().getSizeInBits());
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.lshr(ShAmt);
+ KnownOne = KnownOne.lshr(ShAmt);
+
+ // Handle the sign bit, adjusted to where it is now in the mask.
+ APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt);
+
+ // If the input sign bit is known to be zero, or if none of the top bits
+ // are demanded, turn this into an unsigned shift right.
+ if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) {
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
+ Op.getOperand(0),
+ Op.getOperand(1)));
+ } else if (KnownOne.intersects(SignBit)) { // New bits are known one.
+ KnownOne |= HighBits;
+ }
+ }
+ break;
+ case ISD::SIGN_EXTEND_INREG: {
+ EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+
+ // Sign extension. Compute the demanded bits in the result that are not
+ // present in the input.
+ APInt NewBits =
+ APInt::getHighBitsSet(BitWidth,
+ BitWidth - EVT.getScalarType().getSizeInBits()) &
+ NewMask;
+
+ // If none of the extended bits are demanded, eliminate the sextinreg.
+ if (NewBits == 0)
+ return TLO.CombineTo(Op, Op.getOperand(0));
+
+ APInt InSignBit = APInt::getSignBit(EVT.getScalarType().getSizeInBits());
+ InSignBit.zext(BitWidth);
+ APInt InputDemandedBits =
+ APInt::getLowBitsSet(BitWidth,
+ EVT.getScalarType().getSizeInBits()) &
+ NewMask;
+
+ // Since the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ InputDemandedBits |= InSignBit;
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
+ // If the sign bit of the input is known set or clear, then we know the
+ // top bits of the result.
+
+ // If the input sign bit is known zero, convert this into a zero extension.
+ if (KnownZero.intersects(InSignBit))
+ return TLO.CombineTo(Op,
+ TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT));
+
+ if (KnownOne.intersects(InSignBit)) { // Input sign bit known set
+ KnownOne |= NewBits;
+ KnownZero &= ~NewBits;
+ } else { // Input sign bit unknown
+ KnownZero &= ~NewBits;
+ KnownOne &= ~NewBits;
+ }
+ break;
+ }
+ case ISD::ZERO_EXTEND: {
+ unsigned OperandBitWidth =
+ Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+ APInt InMask = NewMask;
+ InMask.trunc(OperandBitWidth);
+
+ // If none of the top bits are demanded, convert this into an any_extend.
+ APInt NewBits =
+ APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask;
+ if (!NewBits.intersects(NewMask))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
+ Op.getValueType(),
+ Op.getOperand(0)));
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero.zext(BitWidth);
+ KnownOne.zext(BitWidth);
+ KnownZero |= NewBits;
+ break;
+ }
+ case ISD::SIGN_EXTEND: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getScalarType().getSizeInBits();
+ APInt InMask = APInt::getLowBitsSet(BitWidth, InBits);
+ APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits);
+ APInt NewBits = ~InMask & NewMask;
+
+ // If none of the top bits are demanded, convert this into an any_extend.
+ if (NewBits == 0)
+ return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
+ Op.getValueType(),
+ Op.getOperand(0)));
+
+ // Since some of the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ APInt InDemandedBits = InMask & NewMask;
+ InDemandedBits |= InSignBit;
+ InDemandedBits.trunc(InBits);
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ KnownZero.zext(BitWidth);
+ KnownOne.zext(BitWidth);
+
+ // If the sign bit is known zero, convert this to a zero extend.
+ if (KnownZero.intersects(InSignBit))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl,
+ Op.getValueType(),
+ Op.getOperand(0)));
+
+ // If the sign bit is known one, the top bits match.
+ if (KnownOne.intersects(InSignBit)) {
+ KnownOne |= NewBits;
+ KnownZero &= ~NewBits;
+ } else { // Otherwise, top bits aren't known.
+ KnownOne &= ~NewBits;
+ KnownZero &= ~NewBits;
+ }
+ break;
+ }
+ case ISD::ANY_EXTEND: {
+ unsigned OperandBitWidth =
+ Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+ APInt InMask = NewMask;
+ InMask.trunc(OperandBitWidth);
+ if (SimplifyDemandedBits(Op.getOperand(0), InMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero.zext(BitWidth);
+ KnownOne.zext(BitWidth);
+ break;
+ }
+ case ISD::TRUNCATE: {
+ // Simplify the input, using demanded bit information, and compute the known
+ // zero/one bits live out.
+ APInt TruncMask = NewMask;
+ TruncMask.zext(Op.getOperand(0).getValueSizeInBits());
+ if (SimplifyDemandedBits(Op.getOperand(0), TruncMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ KnownZero.trunc(BitWidth);
+ KnownOne.trunc(BitWidth);
+
+ // If the input is only used by this truncate, see if we can shrink it based
+ // on the known demanded bits.
+ if (Op.getOperand(0).getNode()->hasOneUse()) {
+ SDValue In = Op.getOperand(0);
+ unsigned InBitWidth = In.getValueSizeInBits();
+ switch (In.getOpcode()) {
+ default: break;
+ case ISD::SRL:
+ // Shrink SRL by a constant if none of the high bits shifted in are
+ // demanded.
+ if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1))){
+ APInt HighBits = APInt::getHighBitsSet(InBitWidth,
+ InBitWidth - BitWidth);
+ HighBits = HighBits.lshr(ShAmt->getZExtValue());
+ HighBits.trunc(BitWidth);
+
+ if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) {
+ // None of the shifted in bits are needed. Add a truncate of the
+ // shift input, then shift it.
+ SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,
+ Op.getValueType(),
+ In.getOperand(0));
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
+ Op.getValueType(),
+ NewTrunc,
+ In.getOperand(1)));
+ }
+ }
+ break;
+ }
+ }
+
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ break;
+ }
+ case ISD::AssertZext: {
+ EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ APInt InMask = APInt::getLowBitsSet(BitWidth,
+ VT.getSizeInBits());
+ if (SimplifyDemandedBits(Op.getOperand(0), InMask & NewMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero |= ~InMask & NewMask;
+ break;
+ }
+ case ISD::BIT_CONVERT:
+#if 0
+ // If this is an FP->Int bitcast and if the sign bit is the only thing that
+ // is demanded, turn this into a FGETSIGN.
+ if (NewMask == EVT::getIntegerVTSignBit(Op.getValueType()) &&
+ MVT::isFloatingPoint(Op.getOperand(0).getValueType()) &&
+ !MVT::isVector(Op.getOperand(0).getValueType())) {
+ // Only do this xform if FGETSIGN is valid or if before legalize.
+ if (!TLO.AfterLegalize ||
+ isOperationLegal(ISD::FGETSIGN, Op.getValueType())) {
+ // Make a FGETSIGN + SHL to move the sign bit into the appropriate
+ // place. We expect the SHL to be eliminated by other optimizations.
+ SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(),
+ Op.getOperand(0));
+ unsigned ShVal = Op.getValueType().getSizeInBits()-1;
+ SDValue ShAmt = TLO.DAG.getConstant(ShVal, getShiftAmountTy());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, Op.getValueType(),
+ Sign, ShAmt));
+ }
+ }
+#endif
+ break;
+ case ISD::ADD:
+ case ISD::MUL:
+ case ISD::SUB: {
+ // Add, Sub, and Mul don't demand any bits in positions beyond that
+ // of the highest bit demanded of them.
+ APInt LoMask = APInt::getLowBitsSet(BitWidth,
+ BitWidth - NewMask.countLeadingZeros());
+ if (SimplifyDemandedBits(Op.getOperand(0), LoMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ if (SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ // See if the operation should be performed at a smaller bit width.
+ if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ return true;
+ }
+ // FALL THROUGH
+ default:
+ // Just use ComputeMaskedBits to compute output bits.
+ TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth);
+ break;
+ }
+
+ // If we know the value of all of the demanded bits, return this as a
+ // constant.
+ if ((NewMask & (KnownZero|KnownOne)) == NewMask)
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType()));
+
+ return false;
+}
+
+/// computeMaskedBitsForTargetNode - Determine which of the bits specified
+/// in Mask are known to be either zero or one and return them in the
+/// KnownZero/KnownOne bitsets.
+void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use MaskedValueIsZero if you don't know whether Op"
+ " is a target node!");
+ KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+}
+
+/// ComputeNumSignBitsForTargetNode - This method can be implemented by
+/// targets that want to expose additional information about sign bits to the
+/// DAG Combiner.
+unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+ unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use ComputeNumSignBits if you don't know whether Op"
+ " is a target node!");
+ return 1;
+}
+
+/// ValueHasExactlyOneBitSet - Test if the given value is known to have exactly
+/// one bit set. This differs from ComputeMaskedBits in that it doesn't need to
+/// determine which bit is set.
+///
+static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
+ // A left-shift of a constant one will have exactly one bit set, because
+ // shifting the bit off the end is undefined.
+ if (Val.getOpcode() == ISD::SHL)
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0)))
+ if (C->getAPIntValue() == 1)
+ return true;
+
+ // Similarly, a right-shift of a constant sign-bit will have exactly
+ // one bit set.
+ if (Val.getOpcode() == ISD::SRL)
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0)))
+ if (C->getAPIntValue().isSignBit())
+ return true;
+
+ // More could be done here, though the above checks are enough
+ // to handle some common cases.
+
+ // Fall back to ComputeMaskedBits to catch other known cases.
+ EVT OpVT = Val.getValueType();
+ unsigned BitWidth = OpVT.getSizeInBits();
+ APInt Mask = APInt::getAllOnesValue(BitWidth);
+ APInt KnownZero, KnownOne;
+ DAG.ComputeMaskedBits(Val, Mask, KnownZero, KnownOne);
+ return (KnownZero.countPopulation() == BitWidth - 1) &&
+ (KnownOne.countPopulation() == 1);
+}
+
+/// SimplifySetCC - Try to simplify a setcc built with the specified operands
+/// and cc. If it is unable to simplify it, return a null SDValue.
+SDValue
+TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
+ ISD::CondCode Cond, bool foldBooleans,
+ DAGCombinerInfo &DCI, DebugLoc dl) const {
+ SelectionDAG &DAG = DCI.DAG;
+ LLVMContext &Context = *DAG.getContext();
+
+ // These setcc operations always fold.
+ switch (Cond) {
+ default: break;
+ case ISD::SETFALSE:
+ case ISD::SETFALSE2: return DAG.getConstant(0, VT);
+ case ISD::SETTRUE:
+ case ISD::SETTRUE2: return DAG.getConstant(1, VT);
+ }
+
+ if (isa<ConstantSDNode>(N0.getNode())) {
+ // Ensure that the constant occurs on the RHS, and fold constant
+ // comparisons.
+ return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));
+ }
+
+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+ const APInt &C1 = N1C->getAPIntValue();
+
+ // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
+ // equality comparison, then we're just comparing whether X itself is
+ // zero.
+ if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) &&
+ N0.getOperand(0).getOpcode() == ISD::CTLZ &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ const APInt &ShAmt
+ = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ ShAmt == Log2_32(N0.getValueType().getSizeInBits())) {
+ if ((C1 == 0) == (Cond == ISD::SETEQ)) {
+ // (srl (ctlz x), 5) == 0 -> X != 0
+ // (srl (ctlz x), 5) != 1 -> X != 0
+ Cond = ISD::SETNE;
+ } else {
+ // (srl (ctlz x), 5) != 0 -> X == 0
+ // (srl (ctlz x), 5) == 1 -> X == 0
+ Cond = ISD::SETEQ;
+ }
+ SDValue Zero = DAG.getConstant(0, N0.getValueType());
+ return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
+ Zero, Cond);
+ }
+ }
+
+ // If the LHS is '(and load, const)', the RHS is 0,
+ // the test is for equality or unsigned, and all 1 bits of the const are
+ // in the same partial word, see if we can shorten the load.
+ if (DCI.isBeforeLegalize() &&
+ N0.getOpcode() == ISD::AND && C1 == 0 &&
+ N0.getNode()->hasOneUse() &&
+ isa<LoadSDNode>(N0.getOperand(0)) &&
+ N0.getOperand(0).getNode()->hasOneUse() &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
+ APInt bestMask;
+ unsigned bestWidth = 0, bestOffset = 0;
+ if (!Lod->isVolatile() && Lod->isUnindexed()) {
+ unsigned origWidth = N0.getValueType().getSizeInBits();
+ unsigned maskWidth = origWidth;
+ // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
+ // 8 bits, but have to be careful...
+ if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
+ origWidth = Lod->getMemoryVT().getSizeInBits();
+ const APInt &Mask =
+ cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ for (unsigned width = origWidth / 2; width>=8; width /= 2) {
+ APInt newMask = APInt::getLowBitsSet(maskWidth, width);
+ for (unsigned offset=0; offset<origWidth/width; offset++) {
+ if ((newMask & Mask) == Mask) {
+ if (!TD->isLittleEndian())
+ bestOffset = (origWidth/width - offset - 1) * (width/8);
+ else
+ bestOffset = (uint64_t)offset * (width/8);
+ bestMask = Mask.lshr(offset * (width/8) * 8);
+ bestWidth = width;
+ break;
+ }
+ newMask = newMask << width;
+ }
+ }
+ }
+ if (bestWidth) {
+ EVT newVT = EVT::getIntegerVT(Context, bestWidth);
+ if (newVT.isRound()) {
+ EVT PtrType = Lod->getOperand(1).getValueType();
+ SDValue Ptr = Lod->getBasePtr();
+ if (bestOffset != 0)
+ Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),
+ DAG.getConstant(bestOffset, PtrType));
+ unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
+ SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
+ Lod->getSrcValue(),
+ Lod->getSrcValueOffset() + bestOffset,
+ false, NewAlign);
+ return DAG.getSetCC(dl, VT,
+ DAG.getNode(ISD::AND, dl, newVT, NewLoad,
+ DAG.getConstant(bestMask.trunc(bestWidth),
+ newVT)),
+ DAG.getConstant(0LL, newVT), Cond);
+ }
+ }
+ }
+
+ // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
+ if (N0.getOpcode() == ISD::ZERO_EXTEND) {
+ unsigned InSize = N0.getOperand(0).getValueType().getSizeInBits();
+
+ // If the comparison constant has bits in the upper part, the
+ // zero-extended value could never match.
+ if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
+ C1.getBitWidth() - InSize))) {
+ switch (Cond) {
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ case ISD::SETEQ: return DAG.getConstant(0, VT);
+ case ISD::SETULT:
+ case ISD::SETULE:
+ case ISD::SETNE: return DAG.getConstant(1, VT);
+ case ISD::SETGT:
+ case ISD::SETGE:
+ // True if the sign bit of C1 is set.
+ return DAG.getConstant(C1.isNegative(), VT);
+ case ISD::SETLT:
+ case ISD::SETLE:
+ // True if the sign bit of C1 isn't set.
+ return DAG.getConstant(C1.isNonNegative(), VT);
+ default:
+ break;
+ }
+ }
+
+ // Otherwise, we can perform the comparison with the low bits.
+ switch (Cond) {
+ case ISD::SETEQ:
+ case ISD::SETNE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ case ISD::SETULT:
+ case ISD::SETULE: {
+ EVT newVT = N0.getOperand(0).getValueType();
+ if (DCI.isBeforeLegalizeOps() ||
+ (isOperationLegal(ISD::SETCC, newVT) &&
+ getCondCodeAction(Cond, newVT)==Legal))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(APInt(C1).trunc(InSize), newVT),
+ Cond);
+ break;
+ }
+ default:
+ break; // todo, be more careful with signed comparisons
+ }
+ } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
+ unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
+ EVT ExtDstTy = N0.getValueType();
+ unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
+
+ // If the extended part has any inconsistent bits, it cannot ever
+ // compare equal. In other words, they have to be all ones or all
+ // zeros.
+ APInt ExtBits =
+ APInt::getHighBitsSet(ExtDstTyBits, ExtDstTyBits - ExtSrcTyBits);
+ if ((C1 & ExtBits) != 0 && (C1 & ExtBits) != ExtBits)
+ return DAG.getConstant(Cond == ISD::SETNE, VT);
+
+ SDValue ZextOp;
+ EVT Op0Ty = N0.getOperand(0).getValueType();
+ if (Op0Ty == ExtSrcTy) {
+ ZextOp = N0.getOperand(0);
+ } else {
+ APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
+ ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
+ DAG.getConstant(Imm, Op0Ty));
+ }
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(ZextOp.getNode());
+ // Otherwise, make this a use of a zext.
+ return DAG.getSetCC(dl, VT, ZextOp,
+ DAG.getConstant(C1 & APInt::getLowBitsSet(
+ ExtDstTyBits,
+ ExtSrcTyBits),
+ ExtDstTy),
+ Cond);
+ } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+
+ // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
+ if (N0.getOpcode() == ISD::SETCC) {
+ bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1);
+ if (TrueWhenTrue)
+ return N0;
+
+ // Invert the condition.
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ CC = ISD::getSetCCInverse(CC,
+ N0.getOperand(0).getValueType().isInteger());
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
+ }
+
+ if ((N0.getOpcode() == ISD::XOR ||
+ (N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::XOR &&
+ N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
+ isa<ConstantSDNode>(N0.getOperand(1)) &&
+ cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue() == 1) {
+ // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
+ // can only do this if the top bits are known zero.
+ unsigned BitWidth = N0.getValueSizeInBits();
+ if (DAG.MaskedValueIsZero(N0,
+ APInt::getHighBitsSet(BitWidth,
+ BitWidth-1))) {
+ // Okay, get the un-inverted input value.
+ SDValue Val;
+ if (N0.getOpcode() == ISD::XOR)
+ Val = N0.getOperand(0);
+ else {
+ assert(N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::XOR);
+ // ((X^1)&1)^1 -> X & 1
+ Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
+ N0.getOperand(0).getOperand(0),
+ N0.getOperand(1));
+ }
+ return DAG.getSetCC(dl, VT, Val, N1,
+ Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+ }
+ }
+ }
+
+ APInt MinVal, MaxVal;
+ unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits();
+ if (ISD::isSignedIntSetCC(Cond)) {
+ MinVal = APInt::getSignedMinValue(OperandBitSize);
+ MaxVal = APInt::getSignedMaxValue(OperandBitSize);
+ } else {
+ MinVal = APInt::getMinValue(OperandBitSize);
+ MaxVal = APInt::getMaxValue(OperandBitSize);
+ }
+
+ // Canonicalize GE/LE comparisons to use GT/LT comparisons.
+ if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
+ if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true
+ // X >= C0 --> X > (C0-1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(C1-1, N1.getValueType()),
+ (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT);
+ }
+
+ if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
+ if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true
+ // X <= C0 --> X < (C0+1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(C1+1, N1.getValueType()),
+ (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT);
+ }
+
+ if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal)
+ return DAG.getConstant(0, VT); // X < MIN --> false
+ if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal)
+ return DAG.getConstant(1, VT); // X >= MIN --> true
+ if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal)
+ return DAG.getConstant(0, VT); // X > MAX --> false
+ if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal)
+ return DAG.getConstant(1, VT); // X <= MAX --> true
+
+ // Canonicalize setgt X, Min --> setne X, Min
+ if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal)
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
+ // Canonicalize setlt X, Max --> setne X, Max
+ if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal)
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
+
+ // If we have setult X, 1, turn it into seteq X, 0
+ if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(MinVal, N0.getValueType()),
+ ISD::SETEQ);
+ // If we have setugt X, Max-1, turn it into seteq X, Max
+ else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(MaxVal, N0.getValueType()),
+ ISD::SETEQ);
+
+ // If we have "setcc X, C0", check to see if we can shrink the immediate
+ // by changing cc.
+
+ // SETUGT X, SINTMAX -> SETLT X, 0
+ if (Cond == ISD::SETUGT &&
+ C1 == APInt::getSignedMaxValue(OperandBitSize))
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(0, N1.getValueType()),
+ ISD::SETLT);
+
+ // SETULT X, SINTMIN -> SETGT X, -1
+ if (Cond == ISD::SETULT &&
+ C1 == APInt::getSignedMinValue(OperandBitSize)) {
+ SDValue ConstMinusOne =
+ DAG.getConstant(APInt::getAllOnesValue(OperandBitSize),
+ N1.getValueType());
+ return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
+ }
+
+ // Fold bit comparisons when we can.
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ (VT == N0.getValueType() ||
+ (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) &&
+ N0.getOpcode() == ISD::AND)
+ if (ConstantSDNode *AndRHS =
+ dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ EVT ShiftTy = DCI.isBeforeLegalize() ?
+ getPointerTy() : getShiftAmountTy();
+ if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
+ // Perform the xform if the AND RHS is a single bit.
+ if (AndRHS->getAPIntValue().isPowerOf2()) {
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
+ DAG.getConstant(AndRHS->getAPIntValue().logBase2(), ShiftTy)));
+ }
+ } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
+ // (X & 8) == 8 --> (X & 8) >> 3
+ // Perform the xform if C1 is a single bit.
+ if (C1.isPowerOf2()) {
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
+ DAG.getConstant(C1.logBase2(), ShiftTy)));
+ }
+ }
+ }
+ }
+
+ if (isa<ConstantFPSDNode>(N0.getNode())) {
+ // Constant fold or commute setcc.
+ SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl);
+ if (O.getNode()) return O;
+ } else if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
+ // If the RHS of an FP comparison is a constant, simplify it away in
+ // some cases.
+ if (CFP->getValueAPF().isNaN()) {
+ // If an operand is known to be a nan, we can fold it.
+ switch (ISD::getUnorderedFlavor(Cond)) {
+ default: llvm_unreachable("Unknown flavor!");
+ case 0: // Known false.
+ return DAG.getConstant(0, VT);
+ case 1: // Known true.
+ return DAG.getConstant(1, VT);
+ case 2: // Undefined.
+ return DAG.getUNDEF(VT);
+ }
+ }
+
+ // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
+ // constant if knowing that the operand is non-nan is enough. We prefer to
+ // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
+ // materialize 0.0.
+ if (Cond == ISD::SETO || Cond == ISD::SETUO)
+ return DAG.getSetCC(dl, VT, N0, N0, Cond);
+
+ // If the condition is not legal, see if we can find an equivalent one
+ // which is legal.
+ if (!isCondCodeLegal(Cond, N0.getValueType())) {
+ // If the comparison was an awkward floating-point == or != and one of
+ // the comparison operands is infinity or negative infinity, convert the
+ // condition to a less-awkward <= or >=.
+ if (CFP->getValueAPF().isInfinity()) {
+ if (CFP->getValueAPF().isNegative()) {
+ if (Cond == ISD::SETOEQ &&
+ isCondCodeLegal(ISD::SETOLE, N0.getValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE);
+ if (Cond == ISD::SETUEQ &&
+ isCondCodeLegal(ISD::SETOLE, N0.getValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE);
+ if (Cond == ISD::SETUNE &&
+ isCondCodeLegal(ISD::SETUGT, N0.getValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT);
+ if (Cond == ISD::SETONE &&
+ isCondCodeLegal(ISD::SETUGT, N0.getValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT);
+ } else {
+ if (Cond == ISD::SETOEQ &&
+ isCondCodeLegal(ISD::SETOGE, N0.getValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE);
+ if (Cond == ISD::SETUEQ &&
+ isCondCodeLegal(ISD::SETOGE, N0.getValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE);
+ if (Cond == ISD::SETUNE &&
+ isCondCodeLegal(ISD::SETULT, N0.getValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT);
+ if (Cond == ISD::SETONE &&
+ isCondCodeLegal(ISD::SETULT, N0.getValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT);
+ }
+ }
+ }
+ }
+
+ if (N0 == N1) {
+ // We can always fold X == X for integer setcc's.
+ if (N0.getValueType().isInteger())
+ return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+ unsigned UOF = ISD::getUnorderedFlavor(Cond);
+ if (UOF == 2) // FP operators that are undefined on NaNs.
+ return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+ if (UOF == unsigned(ISD::isTrueWhenEqual(Cond)))
+ return DAG.getConstant(UOF, VT);
+ // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
+ // if it is not already.
+ ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
+ if (NewCond != Cond)
+ return DAG.getSetCC(dl, VT, N0, N1, NewCond);
+ }
+
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ N0.getValueType().isInteger()) {
+ if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
+ N0.getOpcode() == ISD::XOR) {
+ // Simplify (X+Y) == (X+Z) --> Y == Z
+ if (N0.getOpcode() == N1.getOpcode()) {
+ if (N0.getOperand(0) == N1.getOperand(0))
+ return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
+ if (N0.getOperand(1) == N1.getOperand(1))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
+ if (DAG.isCommutativeBinOp(N0.getOpcode())) {
+ // If X op Y == Y op X, try other combinations.
+ if (N0.getOperand(0) == N1.getOperand(1))
+ return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
+ Cond);
+ if (N0.getOperand(1) == N1.getOperand(0))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
+ Cond);
+ }
+ }
+
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {
+ if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ // Turn (X+C1) == C2 --> X == C2-C1
+ if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
+ return DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(RHSC->getAPIntValue()-
+ LHSR->getAPIntValue(),
+ N0.getValueType()), Cond);
+ }
+
+ // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
+ if (N0.getOpcode() == ISD::XOR)
+ // If we know that all of the inverted bits are zero, don't bother
+ // performing the inversion.
+ if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
+ return
+ DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(LHSR->getAPIntValue() ^
+ RHSC->getAPIntValue(),
+ N0.getValueType()),
+ Cond);
+ }
+
+ // Turn (C1-X) == C2 --> X == C1-C2
+ if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
+ if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
+ return
+ DAG.getSetCC(dl, VT, N0.getOperand(1),
+ DAG.getConstant(SUBC->getAPIntValue() -
+ RHSC->getAPIntValue(),
+ N0.getValueType()),
+ Cond);
+ }
+ }
+ }
+
+ // Simplify (X+Z) == X --> Z == 0
+ if (N0.getOperand(0) == N1)
+ return DAG.getSetCC(dl, VT, N0.getOperand(1),
+ DAG.getConstant(0, N0.getValueType()), Cond);
+ if (N0.getOperand(1) == N1) {
+ if (DAG.isCommutativeBinOp(N0.getOpcode()))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(0, N0.getValueType()), Cond);
+ else if (N0.getNode()->hasOneUse()) {
+ assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
+ // (Z-X) == X --> Z == X<<1
+ SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(),
+ N1,
+ DAG.getConstant(1, getShiftAmountTy()));
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(SH.getNode());
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
+ }
+ }
+ }
+
+ if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
+ N1.getOpcode() == ISD::XOR) {
+ // Simplify X == (X+Z) --> Z == 0
+ if (N1.getOperand(0) == N0) {
+ return DAG.getSetCC(dl, VT, N1.getOperand(1),
+ DAG.getConstant(0, N1.getValueType()), Cond);
+ } else if (N1.getOperand(1) == N0) {
+ if (DAG.isCommutativeBinOp(N1.getOpcode())) {
+ return DAG.getSetCC(dl, VT, N1.getOperand(0),
+ DAG.getConstant(0, N1.getValueType()), Cond);
+ } else if (N1.getNode()->hasOneUse()) {
+ assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
+ // X == (Z-X) --> X<<1 == Z
+ SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0,
+ DAG.getConstant(1, getShiftAmountTy()));
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(SH.getNode());
+ return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond);
+ }
+ }
+ }
+
+ // Simplify x&y == y to x&y != 0 if y has exactly one bit set.
+ // Note that where y is variable and is known to have at most
+ // one bit set (for example, if it is z&1) we cannot do this;
+ // the expressions are not equivalent when y==0.
+ if (N0.getOpcode() == ISD::AND)
+ if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) {
+ if (ValueHasExactlyOneBitSet(N1, DAG)) {
+ Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
+ SDValue Zero = DAG.getConstant(0, N1.getValueType());
+ return DAG.getSetCC(dl, VT, N0, Zero, Cond);
+ }
+ }
+ if (N1.getOpcode() == ISD::AND)
+ if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) {
+ if (ValueHasExactlyOneBitSet(N0, DAG)) {
+ Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
+ SDValue Zero = DAG.getConstant(0, N0.getValueType());
+ return DAG.getSetCC(dl, VT, N1, Zero, Cond);
+ }
+ }
+ }
+
+ // Fold away ALL boolean setcc's.
+ SDValue Temp;
+ if (N0.getValueType() == MVT::i1 && foldBooleans) {
+ switch (Cond) {
+ default: llvm_unreachable("Unknown integer setcc!");
+ case ISD::SETEQ: // X == Y -> ~(X^Y)
+ Temp = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1);
+ N0 = DAG.getNOT(dl, Temp, MVT::i1);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETNE: // X != Y --> (X^Y)
+ N0 = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1);
+ break;
+ case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
+ case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
+ Temp = DAG.getNOT(dl, N0, MVT::i1);
+ N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N1, Temp);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
+ case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
+ Temp = DAG.getNOT(dl, N1, MVT::i1);
+ N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N0, Temp);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
+ case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
+ Temp = DAG.getNOT(dl, N0, MVT::i1);
+ N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N1, Temp);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
+ case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
+ Temp = DAG.getNOT(dl, N1, MVT::i1);
+ N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N0, Temp);
+ break;
+ }
+ if (VT != MVT::i1) {
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(N0.getNode());
+ // FIXME: If running after legalize, we probably can't do this.
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0);
+ }
+ return N0;
+ }
+
+ // Could not fold it.
+ return SDValue();
+}
+
+/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
+/// node is a GlobalAddress + offset.
+bool TargetLowering::isGAPlusOffset(SDNode *N, GlobalValue* &GA,
+ int64_t &Offset) const {
+ if (isa<GlobalAddressSDNode>(N)) {
+ GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N);
+ GA = GASD->getGlobal();
+ Offset += GASD->getOffset();
+ return true;
+ }
+
+ if (N->getOpcode() == ISD::ADD) {
+ SDValue N1 = N->getOperand(0);
+ SDValue N2 = N->getOperand(1);
+ if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2);
+ if (V) {
+ Offset += V->getSExtValue();
+ return true;
+ }
+ } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1);
+ if (V) {
+ Offset += V->getSExtValue();
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+
+SDValue TargetLowering::
+PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
+ // Default implementation: no optimization.
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembler Implementation Methods
+//===----------------------------------------------------------------------===//
+
+
+TargetLowering::ConstraintType
+TargetLowering::getConstraintType(const std::string &Constraint) const {
+ // FIXME: lots more standard ones to handle.
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'r': return C_RegisterClass;
+ case 'm': // memory
+ case 'o': // offsetable
+ case 'V': // not offsetable
+ return C_Memory;
+ case 'i': // Simple Integer or Relocatable Constant
+ case 'n': // Simple Integer
+ case 's': // Relocatable Constant
+ case 'X': // Allow ANY value.
+ case 'I': // Target registers.
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'O':
+ case 'P':
+ return C_Other;
+ }
+ }
+
+ if (Constraint.size() > 1 && Constraint[0] == '{' &&
+ Constraint[Constraint.size()-1] == '}')
+ return C_Register;
+ return C_Unknown;
+}
+
+/// LowerXConstraint - try to replace an X constraint, which matches anything,
+/// with another that has more specific requirements based on the type of the
+/// corresponding operand.
+const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{
+ if (ConstraintVT.isInteger())
+ return "r";
+ if (ConstraintVT.isFloatingPoint())
+ return "f"; // works for many targets
+ return 0;
+}
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector. If it is invalid, don't add anything to Ops.
+void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ char ConstraintLetter,
+ bool hasMemory,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const {
+ switch (ConstraintLetter) {
+ default: break;
+ case 'X': // Allows any operand; labels (basic block) use this.
+ if (Op.getOpcode() == ISD::BasicBlock) {
+ Ops.push_back(Op);
+ return;
+ }
+ // fall through
+ case 'i': // Simple Integer or Relocatable Constant
+ case 'n': // Simple Integer
+ case 's': { // Relocatable Constant
+ // These operands are interested in values of the form (GV+C), where C may
+ // be folded in as an offset of GV, or it may be explicitly added. Also, it
+ // is possible and fine if either GV or C are missing.
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
+
+ // If we have "(add GV, C)", pull out GV/C
+ if (Op.getOpcode() == ISD::ADD) {
+ C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
+ if (C == 0 || GA == 0) {
+ C = dyn_cast<ConstantSDNode>(Op.getOperand(0));
+ GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1));
+ }
+ if (C == 0 || GA == 0)
+ C = 0, GA = 0;
+ }
+
+ // If we find a valid operand, map to the TargetXXX version so that the
+ // value itself doesn't get selected.
+ if (GA) { // Either &GV or &GV+C
+ if (ConstraintLetter != 'n') {
+ int64_t Offs = GA->getOffset();
+ if (C) Offs += C->getZExtValue();
+ Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
+ Op.getValueType(), Offs));
+ return;
+ }
+ }
+ if (C) { // just C, no GV.
+ // Simple constants are not allowed for 's'.
+ if (ConstraintLetter != 's') {
+ // gcc prints these as sign extended. Sign extend value to 64 bits
+ // now; without this it would get ZExt'd later in
+ // ScheduleDAGSDNodes::EmitNode, which is very generic.
+ Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(),
+ MVT::i64));
+ return;
+ }
+ }
+ break;
+ }
+ }
+}
+
+std::vector<unsigned> TargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const {
+ return std::vector<unsigned>();
+}
+
+
+std::pair<unsigned, const TargetRegisterClass*> TargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const {
+ if (Constraint[0] != '{')
+ return std::pair<unsigned, const TargetRegisterClass*>(0, 0);
+ assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
+
+ // Remove the braces from around the name.
+ StringRef RegName(Constraint.data()+1, Constraint.size()-2);
+
+ // Figure out which register class contains this reg.
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
+ E = RI->regclass_end(); RCI != E; ++RCI) {
+ const TargetRegisterClass *RC = *RCI;
+
+ // If none of the value types for this register class are valid, we
+ // can't use it. For example, 64-bit reg classes on 32-bit targets.
+ bool isLegal = false;
+ for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
+ I != E; ++I) {
+ if (isTypeLegal(*I)) {
+ isLegal = true;
+ break;
+ }
+ }
+
+ if (!isLegal) continue;
+
+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+ I != E; ++I) {
+ if (RegName.equals_lower(RI->getName(*I)))
+ return std::make_pair(*I, RC);
+ }
+ }
+
+ return std::pair<unsigned, const TargetRegisterClass*>(0, 0);
+}
+
+//===----------------------------------------------------------------------===//
+// Constraint Selection.
+
+/// isMatchingInputConstraint - Return true of this is an input operand that is
+/// a matching constraint like "4".
+bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
+ assert(!ConstraintCode.empty() && "No known constraint!");
+ return isdigit(ConstraintCode[0]);
+}
+
+/// getMatchedOperand - If this is an input matching constraint, this method
+/// returns the output operand it matches.
+unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
+ assert(!ConstraintCode.empty() && "No known constraint!");
+ return atoi(ConstraintCode.c_str());
+}
+
+
+/// getConstraintGenerality - Return an integer indicating how general CT
+/// is.
+static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
+ switch (CT) {
+ default: llvm_unreachable("Unknown constraint type!");
+ case TargetLowering::C_Other:
+ case TargetLowering::C_Unknown:
+ return 0;
+ case TargetLowering::C_Register:
+ return 1;
+ case TargetLowering::C_RegisterClass:
+ return 2;
+ case TargetLowering::C_Memory:
+ return 3;
+ }
+}
+
+/// ChooseConstraint - If there are multiple different constraints that we
+/// could pick for this operand (e.g. "imr") try to pick the 'best' one.
+/// This is somewhat tricky: constraints fall into four classes:
+/// Other -> immediates and magic values
+/// Register -> one specific register
+/// RegisterClass -> a group of regs
+/// Memory -> memory
+/// Ideally, we would pick the most specific constraint possible: if we have
+/// something that fits into a register, we would pick it. The problem here
+/// is that if we have something that could either be in a register or in
+/// memory that use of the register could cause selection of *other*
+/// operands to fail: they might only succeed if we pick memory. Because of
+/// this the heuristic we use is:
+///
+/// 1) If there is an 'other' constraint, and if the operand is valid for
+/// that constraint, use it. This makes us take advantage of 'i'
+/// constraints when available.
+/// 2) Otherwise, pick the most general constraint present. This prefers
+/// 'm' over 'r', for example.
+///
+static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
+ bool hasMemory, const TargetLowering &TLI,
+ SDValue Op, SelectionDAG *DAG) {
+ assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
+ unsigned BestIdx = 0;
+ TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
+ int BestGenerality = -1;
+
+ // Loop over the options, keeping track of the most general one.
+ for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
+ TargetLowering::ConstraintType CType =
+ TLI.getConstraintType(OpInfo.Codes[i]);
+
+ // If this is an 'other' constraint, see if the operand is valid for it.
+ // For example, on X86 we might have an 'rI' constraint. If the operand
+ // is an integer in the range [0..31] we want to use I (saving a load
+ // of a register), otherwise we must use 'r'.
+ if (CType == TargetLowering::C_Other && Op.getNode()) {
+ assert(OpInfo.Codes[i].size() == 1 &&
+ "Unhandled multi-letter 'other' constraint");
+ std::vector<SDValue> ResultOps;
+ TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i][0], hasMemory,
+ ResultOps, *DAG);
+ if (!ResultOps.empty()) {
+ BestType = CType;
+ BestIdx = i;
+ break;
+ }
+ }
+
+ // This constraint letter is more general than the previous one, use it.
+ int Generality = getConstraintGenerality(CType);
+ if (Generality > BestGenerality) {
+ BestType = CType;
+ BestIdx = i;
+ BestGenerality = Generality;
+ }
+ }
+
+ OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
+ OpInfo.ConstraintType = BestType;
+}
+
+/// ComputeConstraintToUse - Determines the constraint code and constraint
+/// type to use for the specific AsmOperandInfo, setting
+/// OpInfo.ConstraintCode and OpInfo.ConstraintType.
+void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
+ SDValue Op,
+ bool hasMemory,
+ SelectionDAG *DAG) const {
+ assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
+
+ // Single-letter constraints ('r') are very common.
+ if (OpInfo.Codes.size() == 1) {
+ OpInfo.ConstraintCode = OpInfo.Codes[0];
+ OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
+ } else {
+ ChooseConstraint(OpInfo, hasMemory, *this, Op, DAG);
+ }
+
+ // 'X' matches anything.
+ if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
+ // Labels and constants are handled elsewhere ('X' is the only thing
+ // that matches labels). For Functions, the type here is the type of
+ // the result, which is not what we want to look at; leave them alone.
+ Value *v = OpInfo.CallOperandVal;
+ if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) {
+ OpInfo.CallOperandVal = v;
+ return;
+ }
+
+ // Otherwise, try to resolve it to something we know about by looking at
+ // the actual operand type.
+ if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
+ OpInfo.ConstraintCode = Repl;
+ OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Loop Strength Reduction hooks
+//===----------------------------------------------------------------------===//
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ const Type *Ty) const {
+ // The default implementation of this implements a conservative RISCy, r+r and
+ // r+i addr mode.
+
+ // Allows a sign-extended 16-bit immediate field.
+ if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
+ return false;
+
+ // No global is ever allowed as a base.
+ if (AM.BaseGV)
+ return false;
+
+ // Only support r+r,
+ switch (AM.Scale) {
+ case 0: // "r+i" or just "i", depending on HasBaseReg.
+ break;
+ case 1:
+ if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
+ return false;
+ // Otherwise we have r+r or r+i.
+ break;
+ case 2:
+ if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
+ return false;
+ // Allow 2*r as r+r.
+ break;
+ }
+
+ return true;
+}
+
+/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number. See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
+ std::vector<SDNode*>* Created) const {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl= N->getDebugLoc();
+
+ // Check to see if we can do this.
+ // FIXME: We should be more aggressive here.
+ if (!isTypeLegal(VT))
+ return SDValue();
+
+ APInt d = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
+ APInt::ms magics = d.magic();
+
+ // Multiply the numerator (operand 0) by the magic value
+ // FIXME: We should support doing a MUL in a wider type
+ SDValue Q;
+ if (isOperationLegalOrCustom(ISD::MULHS, VT))
+ Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0),
+ DAG.getConstant(magics.m, VT));
+ else if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT))
+ Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT),
+ N->getOperand(0),
+ DAG.getConstant(magics.m, VT)).getNode(), 1);
+ else
+ return SDValue(); // No mulhs or equvialent
+ // If d > 0 and m < 0, add the numerator
+ if (d.isStrictlyPositive() && magics.m.isNegative()) {
+ Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0));
+ if (Created)
+ Created->push_back(Q.getNode());
+ }
+ // If d < 0 and m > 0, subtract the numerator.
+ if (d.isNegative() && magics.m.isStrictlyPositive()) {
+ Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0));
+ if (Created)
+ Created->push_back(Q.getNode());
+ }
+ // Shift right algebraic if shift value is nonzero
+ if (magics.s > 0) {
+ Q = DAG.getNode(ISD::SRA, dl, VT, Q,
+ DAG.getConstant(magics.s, getShiftAmountTy()));
+ if (Created)
+ Created->push_back(Q.getNode());
+ }
+ // Extract the sign bit and add it to the quotient
+ SDValue T =
+ DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(VT.getSizeInBits()-1,
+ getShiftAmountTy()));
+ if (Created)
+ Created->push_back(T.getNode());
+ return DAG.getNode(ISD::ADD, dl, VT, Q, T);
+}
+
+/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number. See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
+ std::vector<SDNode*>* Created) const {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // Check to see if we can do this.
+ // FIXME: We should be more aggressive here.
+ if (!isTypeLegal(VT))
+ return SDValue();
+
+ // FIXME: We should use a narrower constant when the upper
+ // bits are known to be zero.
+ ConstantSDNode *N1C = cast<ConstantSDNode>(N->getOperand(1));
+ APInt::mu magics = N1C->getAPIntValue().magicu();
+
+ // Multiply the numerator (operand 0) by the magic value
+ // FIXME: We should support doing a MUL in a wider type
+ SDValue Q;
+ if (isOperationLegalOrCustom(ISD::MULHU, VT))
+ Q = DAG.getNode(ISD::MULHU, dl, VT, N->getOperand(0),
+ DAG.getConstant(magics.m, VT));
+ else if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT))
+ Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT),
+ N->getOperand(0),
+ DAG.getConstant(magics.m, VT)).getNode(), 1);
+ else
+ return SDValue(); // No mulhu or equvialent
+ if (Created)
+ Created->push_back(Q.getNode());
+
+ if (magics.a == 0) {
+ assert(magics.s < N1C->getAPIntValue().getBitWidth() &&
+ "We shouldn't generate an undefined shift!");
+ return DAG.getNode(ISD::SRL, dl, VT, Q,
+ DAG.getConstant(magics.s, getShiftAmountTy()));
+ } else {
+ SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);
+ if (Created)
+ Created->push_back(NPQ.getNode());
+ NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ,
+ DAG.getConstant(1, getShiftAmountTy()));
+ if (Created)
+ Created->push_back(NPQ.getNode());
+ NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
+ if (Created)
+ Created->push_back(NPQ.getNode());
+ return DAG.getNode(ISD::SRL, dl, VT, NPQ,
+ DAG.getConstant(magics.s-1, getShiftAmountTy()));
+ }
+}
diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp
new file mode 100644
index 0000000..0e6d479
--- /dev/null
+++ b/lib/CodeGen/ShadowStackGC.cpp
@@ -0,0 +1,449 @@
+//===-- ShadowStackGC.cpp - GC support for uncooperative targets ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements lowering for the llvm.gc* intrinsics for targets that do
+// not natively support them (which includes the C backend). Note that the code
+// generated is not quite as efficient as algorithms which generate stack maps
+// to identify roots.
+//
+// This pass implements the code transformation described in this paper:
+// "Accurate Garbage Collection in an Uncooperative Environment"
+// Fergus Henderson, ISMM, 2002
+//
+// In runtime/GC/SemiSpace.cpp is a prototype runtime which is compatible with
+// ShadowStackGC.
+//
+// In order to support this particular transformation, all stack roots are
+// coallocated in the stack. This allows a fully target-independent stack map
+// while introducing only minor runtime overhead.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "shadowstackgc"
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Support/IRBuilder.h"
+
+using namespace llvm;
+
+namespace {
+
+ class ShadowStackGC : public GCStrategy {
+ /// RootChain - This is the global linked-list that contains the chain of GC
+ /// roots.
+ GlobalVariable *Head;
+
+ /// StackEntryTy - Abstract type of a link in the shadow stack.
+ ///
+ const StructType *StackEntryTy;
+
+ /// Roots - GC roots in the current function. Each is a pair of the
+ /// intrinsic call and its corresponding alloca.
+ std::vector<std::pair<CallInst*,AllocaInst*> > Roots;
+
+ public:
+ ShadowStackGC();
+
+ bool initializeCustomLowering(Module &M);
+ bool performCustomLowering(Function &F);
+
+ private:
+ bool IsNullValue(Value *V);
+ Constant *GetFrameMap(Function &F);
+ const Type* GetConcreteStackEntryType(Function &F);
+ void CollectRoots(Function &F);
+ static GetElementPtrInst *CreateGEP(LLVMContext &Context,
+ IRBuilder<> &B, Value *BasePtr,
+ int Idx1, const char *Name);
+ static GetElementPtrInst *CreateGEP(LLVMContext &Context,
+ IRBuilder<> &B, Value *BasePtr,
+ int Idx1, int Idx2, const char *Name);
+ };
+
+}
+
+static GCRegistry::Add<ShadowStackGC>
+X("shadow-stack", "Very portable GC for uncooperative code generators");
+
+namespace {
+ /// EscapeEnumerator - This is a little algorithm to find all escape points
+ /// from a function so that "finally"-style code can be inserted. In addition
+ /// to finding the existing return and unwind instructions, it also (if
+ /// necessary) transforms any call instructions into invokes and sends them to
+ /// a landing pad.
+ ///
+ /// It's wrapped up in a state machine using the same transform C# uses for
+ /// 'yield return' enumerators, This transform allows it to be non-allocating.
+ class EscapeEnumerator {
+ Function &F;
+ const char *CleanupBBName;
+
+ // State.
+ int State;
+ Function::iterator StateBB, StateE;
+ IRBuilder<> Builder;
+
+ public:
+ EscapeEnumerator(Function &F, const char *N = "cleanup")
+ : F(F), CleanupBBName(N), State(0), Builder(F.getContext()) {}
+
+ IRBuilder<> *Next() {
+ switch (State) {
+ default:
+ return 0;
+
+ case 0:
+ StateBB = F.begin();
+ StateE = F.end();
+ State = 1;
+
+ case 1:
+ // Find all 'return' and 'unwind' instructions.
+ while (StateBB != StateE) {
+ BasicBlock *CurBB = StateBB++;
+
+ // Branches and invokes do not escape, only unwind and return do.
+ TerminatorInst *TI = CurBB->getTerminator();
+ if (!isa<UnwindInst>(TI) && !isa<ReturnInst>(TI))
+ continue;
+
+ Builder.SetInsertPoint(TI->getParent(), TI);
+ return &Builder;
+ }
+
+ State = 2;
+
+ // Find all 'call' instructions.
+ SmallVector<Instruction*,16> Calls;
+ for (Function::iterator BB = F.begin(),
+ E = F.end(); BB != E; ++BB)
+ for (BasicBlock::iterator II = BB->begin(),
+ EE = BB->end(); II != EE; ++II)
+ if (CallInst *CI = dyn_cast<CallInst>(II))
+ if (!CI->getCalledFunction() ||
+ !CI->getCalledFunction()->getIntrinsicID())
+ Calls.push_back(CI);
+
+ if (Calls.empty())
+ return 0;
+
+ // Create a cleanup block.
+ BasicBlock *CleanupBB = BasicBlock::Create(F.getContext(),
+ CleanupBBName, &F);
+ UnwindInst *UI = new UnwindInst(F.getContext(), CleanupBB);
+
+ // Transform the 'call' instructions into 'invoke's branching to the
+ // cleanup block. Go in reverse order to make prettier BB names.
+ SmallVector<Value*,16> Args;
+ for (unsigned I = Calls.size(); I != 0; ) {
+ CallInst *CI = cast<CallInst>(Calls[--I]);
+
+ // Split the basic block containing the function call.
+ BasicBlock *CallBB = CI->getParent();
+ BasicBlock *NewBB =
+ CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont");
+
+ // Remove the unconditional branch inserted at the end of CallBB.
+ CallBB->getInstList().pop_back();
+ NewBB->getInstList().remove(CI);
+
+ // Create a new invoke instruction.
+ Args.clear();
+ Args.append(CI->op_begin() + 1, CI->op_end());
+
+ InvokeInst *II = InvokeInst::Create(CI->getOperand(0),
+ NewBB, CleanupBB,
+ Args.begin(), Args.end(),
+ CI->getName(), CallBB);
+ II->setCallingConv(CI->getCallingConv());
+ II->setAttributes(CI->getAttributes());
+ CI->replaceAllUsesWith(II);
+ delete CI;
+ }
+
+ Builder.SetInsertPoint(UI->getParent(), UI);
+ return &Builder;
+ }
+ }
+ };
+}
+
+// -----------------------------------------------------------------------------
+
+void llvm::linkShadowStackGC() { }
+
+ShadowStackGC::ShadowStackGC() : Head(0), StackEntryTy(0) {
+ InitRoots = true;
+ CustomRoots = true;
+}
+
+Constant *ShadowStackGC::GetFrameMap(Function &F) {
+ // doInitialization creates the abstract type of this value.
+ const Type *VoidPtr = Type::getInt8PtrTy(F.getContext());
+
+ // Truncate the ShadowStackDescriptor if some metadata is null.
+ unsigned NumMeta = 0;
+ SmallVector<Constant*,16> Metadata;
+ for (unsigned I = 0; I != Roots.size(); ++I) {
+ Constant *C = cast<Constant>(Roots[I].first->getOperand(2));
+ if (!C->isNullValue())
+ NumMeta = I + 1;
+ Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr));
+ }
+
+ Constant *BaseElts[] = {
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), Roots.size(), false),
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), NumMeta, false),
+ };
+
+ Constant *DescriptorElts[] = {
+ ConstantStruct::get(F.getContext(), BaseElts, 2, false),
+ ConstantArray::get(ArrayType::get(VoidPtr, NumMeta),
+ Metadata.begin(), NumMeta)
+ };
+
+ Constant *FrameMap = ConstantStruct::get(F.getContext(), DescriptorElts, 2,
+ false);
+
+ std::string TypeName("gc_map.");
+ TypeName += utostr(NumMeta);
+ F.getParent()->addTypeName(TypeName, FrameMap->getType());
+
+ // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems
+ // that, short of multithreaded LLVM, it should be safe; all that is
+ // necessary is that a simple Module::iterator loop not be invalidated.
+ // Appending to the GlobalVariable list is safe in that sense.
+ //
+ // All of the output passes emit globals last. The ExecutionEngine
+ // explicitly supports adding globals to the module after
+ // initialization.
+ //
+ // Still, if it isn't deemed acceptable, then this transformation needs
+ // to be a ModulePass (which means it cannot be in the 'llc' pipeline
+ // (which uses a FunctionPassManager (which segfaults (not asserts) if
+ // provided a ModulePass))).
+ Constant *GV = new GlobalVariable(*F.getParent(), FrameMap->getType(), true,
+ GlobalVariable::InternalLinkage,
+ FrameMap, "__gc_" + F.getName());
+
+ Constant *GEPIndices[2] = {
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), 0),
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)
+ };
+ return ConstantExpr::getGetElementPtr(GV, GEPIndices, 2);
+}
+
+const Type* ShadowStackGC::GetConcreteStackEntryType(Function &F) {
+ // doInitialization creates the generic version of this type.
+ std::vector<const Type*> EltTys;
+ EltTys.push_back(StackEntryTy);
+ for (size_t I = 0; I != Roots.size(); I++)
+ EltTys.push_back(Roots[I].second->getAllocatedType());
+ Type *Ty = StructType::get(F.getContext(), EltTys);
+
+ std::string TypeName("gc_stackentry.");
+ TypeName += F.getName();
+ F.getParent()->addTypeName(TypeName, Ty);
+
+ return Ty;
+}
+
+/// doInitialization - If this module uses the GC intrinsics, find them now. If
+/// not, exit fast.
+bool ShadowStackGC::initializeCustomLowering(Module &M) {
+ // struct FrameMap {
+ // int32_t NumRoots; // Number of roots in stack frame.
+ // int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots.
+ // void *Meta[]; // May be absent for roots without metadata.
+ // };
+ std::vector<const Type*> EltTys;
+ // 32 bits is ok up to a 32GB stack frame. :)
+ EltTys.push_back(Type::getInt32Ty(M.getContext()));
+ // Specifies length of variable length array.
+ EltTys.push_back(Type::getInt32Ty(M.getContext()));
+ StructType *FrameMapTy = StructType::get(M.getContext(), EltTys);
+ M.addTypeName("gc_map", FrameMapTy);
+ PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy);
+
+ // struct StackEntry {
+ // ShadowStackEntry *Next; // Caller's stack entry.
+ // FrameMap *Map; // Pointer to constant FrameMap.
+ // void *Roots[]; // Stack roots (in-place array, so we pretend).
+ // };
+ OpaqueType *RecursiveTy = OpaqueType::get(M.getContext());
+
+ EltTys.clear();
+ EltTys.push_back(PointerType::getUnqual(RecursiveTy));
+ EltTys.push_back(FrameMapPtrTy);
+ PATypeHolder LinkTyH = StructType::get(M.getContext(), EltTys);
+
+ RecursiveTy->refineAbstractTypeTo(LinkTyH.get());
+ StackEntryTy = cast<StructType>(LinkTyH.get());
+ const PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy);
+ M.addTypeName("gc_stackentry", LinkTyH.get()); // FIXME: Is this safe from
+ // a FunctionPass?
+
+ // Get the root chain if it already exists.
+ Head = M.getGlobalVariable("llvm_gc_root_chain");
+ if (!Head) {
+ // If the root chain does not exist, insert a new one with linkonce
+ // linkage!
+ Head = new GlobalVariable(M, StackEntryPtrTy, false,
+ GlobalValue::LinkOnceAnyLinkage,
+ Constant::getNullValue(StackEntryPtrTy),
+ "llvm_gc_root_chain");
+ } else if (Head->hasExternalLinkage() && Head->isDeclaration()) {
+ Head->setInitializer(Constant::getNullValue(StackEntryPtrTy));
+ Head->setLinkage(GlobalValue::LinkOnceAnyLinkage);
+ }
+
+ return true;
+}
+
+bool ShadowStackGC::IsNullValue(Value *V) {
+ if (Constant *C = dyn_cast<Constant>(V))
+ return C->isNullValue();
+ return false;
+}
+
+void ShadowStackGC::CollectRoots(Function &F) {
+ // FIXME: Account for original alignment. Could fragment the root array.
+ // Approach 1: Null initialize empty slots at runtime. Yuck.
+ // Approach 2: Emit a map of the array instead of just a count.
+
+ assert(Roots.empty() && "Not cleaned up?");
+
+ SmallVector<std::pair<CallInst*,AllocaInst*>,16> MetaRoots;
+
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;)
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
+ if (Function *F = CI->getCalledFunction())
+ if (F->getIntrinsicID() == Intrinsic::gcroot) {
+ std::pair<CallInst*,AllocaInst*> Pair = std::make_pair(
+ CI, cast<AllocaInst>(CI->getOperand(1)->stripPointerCasts()));
+ if (IsNullValue(CI->getOperand(2)))
+ Roots.push_back(Pair);
+ else
+ MetaRoots.push_back(Pair);
+ }
+
+ // Number roots with metadata (usually empty) at the beginning, so that the
+ // FrameMap::Meta array can be elided.
+ Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end());
+}
+
+GetElementPtrInst *
+ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr,
+ int Idx, int Idx2, const char *Name) {
+ Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0),
+ ConstantInt::get(Type::getInt32Ty(Context), Idx),
+ ConstantInt::get(Type::getInt32Ty(Context), Idx2) };
+ Value* Val = B.CreateGEP(BasePtr, Indices, Indices + 3, Name);
+
+ assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
+
+ return dyn_cast<GetElementPtrInst>(Val);
+}
+
+GetElementPtrInst *
+ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr,
+ int Idx, const char *Name) {
+ Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0),
+ ConstantInt::get(Type::getInt32Ty(Context), Idx) };
+ Value *Val = B.CreateGEP(BasePtr, Indices, Indices + 2, Name);
+
+ assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
+
+ return dyn_cast<GetElementPtrInst>(Val);
+}
+
+/// runOnFunction - Insert code to maintain the shadow stack.
+bool ShadowStackGC::performCustomLowering(Function &F) {
+ LLVMContext &Context = F.getContext();
+
+ // Find calls to llvm.gcroot.
+ CollectRoots(F);
+
+ // If there are no roots in this function, then there is no need to add a
+ // stack map entry for it.
+ if (Roots.empty())
+ return false;
+
+ // Build the constant map and figure the type of the shadow stack entry.
+ Value *FrameMap = GetFrameMap(F);
+ const Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F);
+
+ // Build the shadow stack entry at the very start of the function.
+ BasicBlock::iterator IP = F.getEntryBlock().begin();
+ IRBuilder<> AtEntry(IP->getParent(), IP);
+
+ Instruction *StackEntry = AtEntry.CreateAlloca(ConcreteStackEntryTy, 0,
+ "gc_frame");
+
+ while (isa<AllocaInst>(IP)) ++IP;
+ AtEntry.SetInsertPoint(IP->getParent(), IP);
+
+ // Initialize the map pointer and load the current head of the shadow stack.
+ Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead");
+ Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, StackEntry,
+ 0,1,"gc_frame.map");
+ AtEntry.CreateStore(FrameMap, EntryMapPtr);
+
+ // After all the allocas...
+ for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
+ // For each root, find the corresponding slot in the aggregate...
+ Value *SlotPtr = CreateGEP(Context, AtEntry, StackEntry, 1 + I, "gc_root");
+
+ // And use it in lieu of the alloca.
+ AllocaInst *OriginalAlloca = Roots[I].second;
+ SlotPtr->takeName(OriginalAlloca);
+ OriginalAlloca->replaceAllUsesWith(SlotPtr);
+ }
+
+ // Move past the original stores inserted by GCStrategy::InitRoots. This isn't
+ // really necessary (the collector would never see the intermediate state at
+ // runtime), but it's nicer not to push the half-initialized entry onto the
+ // shadow stack.
+ while (isa<StoreInst>(IP)) ++IP;
+ AtEntry.SetInsertPoint(IP->getParent(), IP);
+
+ // Push the entry onto the shadow stack.
+ Instruction *EntryNextPtr = CreateGEP(Context, AtEntry,
+ StackEntry,0,0,"gc_frame.next");
+ Instruction *NewHeadVal = CreateGEP(Context, AtEntry,
+ StackEntry, 0, "gc_newhead");
+ AtEntry.CreateStore(CurrentHead, EntryNextPtr);
+ AtEntry.CreateStore(NewHeadVal, Head);
+
+ // For each instruction that escapes...
+ EscapeEnumerator EE(F, "gc_cleanup");
+ while (IRBuilder<> *AtExit = EE.Next()) {
+ // Pop the entry from the shadow stack. Don't reuse CurrentHead from
+ // AtEntry, since that would make the value live for the entire function.
+ Instruction *EntryNextPtr2 = CreateGEP(Context, *AtExit, StackEntry, 0, 0,
+ "gc_frame.next");
+ Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead");
+ AtExit->CreateStore(SavedHead, Head);
+ }
+
+ // Delete the original allocas (which are no longer used) and the intrinsic
+ // calls (which are no longer valid). Doing this last avoids invalidating
+ // iterators.
+ for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
+ Roots[I].first->eraseFromParent();
+ Roots[I].second->eraseFromParent();
+ }
+
+ Roots.clear();
+ return true;
+}
diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp
new file mode 100644
index 0000000..aeaa38b
--- /dev/null
+++ b/lib/CodeGen/ShrinkWrapping.cpp
@@ -0,0 +1,1152 @@
+//===-- ShrinkWrapping.cpp - Reduce spills/restores of callee-saved regs --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a shrink wrapping variant of prolog/epilog insertion:
+// - Spills and restores of callee-saved registers (CSRs) are placed in the
+// machine CFG to tightly surround their uses so that execution paths that
+// do not use CSRs do not pay the spill/restore penalty.
+//
+// - Avoiding placment of spills/restores in loops: if a CSR is used inside a
+// loop the spills are placed in the loop preheader, and restores are
+// placed in the loop exit nodes (the successors of loop _exiting_ nodes).
+//
+// - Covering paths without CSR uses:
+// If a region in a CFG uses CSRs and has multiple entry and/or exit points,
+// the use info for the CSRs inside the region is propagated outward in the
+// CFG to ensure validity of the spill/restore placements. This decreases
+// the effectiveness of shrink wrapping but does not require edge splitting
+// in the machine CFG.
+//
+// This shrink wrapping implementation uses an iterative analysis to determine
+// which basic blocks require spills and restores for CSRs.
+//
+// This pass uses MachineDominators and MachineLoopInfo. Loop information
+// is used to prevent placement of callee-saved register spills/restores
+// in the bodies of loops.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "shrink-wrap"
+
+#include "PrologEpilogInserter.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include <sstream>
+
+using namespace llvm;
+
+STATISTIC(numSRReduced, "Number of CSR spills+restores reduced.");
+
+// Shrink Wrapping:
+static cl::opt<bool>
+ShrinkWrapping("shrink-wrap",
+ cl::desc("Shrink wrap callee-saved register spills/restores"));
+
+// Shrink wrap only the specified function, a debugging aid.
+static cl::opt<std::string>
+ShrinkWrapFunc("shrink-wrap-func", cl::Hidden,
+ cl::desc("Shrink wrap the specified function"),
+ cl::value_desc("funcname"),
+ cl::init(""));
+
+// Debugging level for shrink wrapping.
+enum ShrinkWrapDebugLevel {
+ None, BasicInfo, Iterations, Details
+};
+
+static cl::opt<enum ShrinkWrapDebugLevel>
+ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden,
+ cl::desc("Print shrink wrapping debugging information"),
+ cl::values(
+ clEnumVal(None , "disable debug output"),
+ clEnumVal(BasicInfo , "print basic DF sets"),
+ clEnumVal(Iterations, "print SR sets for each iteration"),
+ clEnumVal(Details , "print all DF sets"),
+ clEnumValEnd));
+
+
+void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ if (ShrinkWrapping || ShrinkWrapFunc != "") {
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ }
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+//===----------------------------------------------------------------------===//
+// ShrinkWrapping implementation
+//===----------------------------------------------------------------------===//
+
+// Convienences for dealing with machine loops.
+MachineBasicBlock* PEI::getTopLevelLoopPreheader(MachineLoop* LP) {
+ assert(LP && "Machine loop is NULL.");
+ MachineBasicBlock* PHDR = LP->getLoopPreheader();
+ MachineLoop* PLP = LP->getParentLoop();
+ while (PLP) {
+ PHDR = PLP->getLoopPreheader();
+ PLP = PLP->getParentLoop();
+ }
+ return PHDR;
+}
+
+MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) {
+ if (LP == 0)
+ return 0;
+ MachineLoop* PLP = LP->getParentLoop();
+ while (PLP) {
+ LP = PLP;
+ PLP = PLP->getParentLoop();
+ }
+ return LP;
+}
+
+bool PEI::isReturnBlock(MachineBasicBlock* MBB) {
+ return (MBB && !MBB->empty() && MBB->back().getDesc().isReturn());
+}
+
+// Initialize shrink wrapping DFA sets, called before iterations.
+void PEI::clearAnticAvailSets() {
+ AnticIn.clear();
+ AnticOut.clear();
+ AvailIn.clear();
+ AvailOut.clear();
+}
+
+// Clear all sets constructed by shrink wrapping.
+void PEI::clearAllSets() {
+ ReturnBlocks.clear();
+ clearAnticAvailSets();
+ UsedCSRegs.clear();
+ CSRUsed.clear();
+ TLLoops.clear();
+ CSRSave.clear();
+ CSRRestore.clear();
+}
+
+// Initialize all shrink wrapping data.
+void PEI::initShrinkWrappingInfo() {
+ clearAllSets();
+ EntryBlock = 0;
+#ifndef NDEBUG
+ HasFastExitPath = false;
+#endif
+ ShrinkWrapThisFunction = ShrinkWrapping;
+ // DEBUG: enable or disable shrink wrapping for the current function
+ // via --shrink-wrap-func=<funcname>.
+#ifndef NDEBUG
+ if (ShrinkWrapFunc != "") {
+ std::string MFName = MF->getFunction()->getNameStr();
+ ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc);
+ }
+#endif
+}
+
+
+/// placeCSRSpillsAndRestores - determine which MBBs of the function
+/// need save, restore code for callee-saved registers by doing a DF analysis
+/// similar to the one used in code motion (GVNPRE). This produces maps of MBBs
+/// to sets of registers (CSRs) for saves and restores. MachineLoopInfo
+/// is used to ensure that CSR save/restore code is not placed inside loops.
+/// This function computes the maps of MBBs -> CSRs to spill and restore
+/// in CSRSave, CSRRestore.
+///
+/// If shrink wrapping is not being performed, place all spills in
+/// the entry block, all restores in return blocks. In this case,
+/// CSRSave has a single mapping, CSRRestore has mappings for each
+/// return block.
+///
+void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) {
+
+ DEBUG(MF = &Fn);
+
+ initShrinkWrappingInfo();
+
+ DEBUG(if (ShrinkWrapThisFunction) {
+ dbgs() << "Place CSR spills/restores for "
+ << MF->getFunction()->getName() << "\n";
+ });
+
+ if (calculateSets(Fn))
+ placeSpillsAndRestores(Fn);
+}
+
+/// calcAnticInOut - calculate the anticipated in/out reg sets
+/// for the given MBB by looking forward in the MCFG at MBB's
+/// successors.
+///
+bool PEI::calcAnticInOut(MachineBasicBlock* MBB) {
+ bool changed = false;
+
+ // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCCESSORS(MBB))
+ SmallVector<MachineBasicBlock*, 4> successors;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+ if (SUCC != MBB)
+ successors.push_back(SUCC);
+ }
+
+ unsigned i = 0, e = successors.size();
+ if (i != e) {
+ CSRegSet prevAnticOut = AnticOut[MBB];
+ MachineBasicBlock* SUCC = successors[i];
+
+ AnticOut[MBB] = AnticIn[SUCC];
+ for (++i; i != e; ++i) {
+ SUCC = successors[i];
+ AnticOut[MBB] &= AnticIn[SUCC];
+ }
+ if (prevAnticOut != AnticOut[MBB])
+ changed = true;
+ }
+
+ // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]);
+ CSRegSet prevAnticIn = AnticIn[MBB];
+ AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB];
+ if (prevAnticIn |= AnticIn[MBB])
+ changed = true;
+ return changed;
+}
+
+/// calcAvailInOut - calculate the available in/out reg sets
+/// for the given MBB by looking backward in the MCFG at MBB's
+/// predecessors.
+///
+bool PEI::calcAvailInOut(MachineBasicBlock* MBB) {
+ bool changed = false;
+
+ // AvailIn[MBB] = INTERSECT(AvailOut[P] for P in PREDECESSORS(MBB))
+ SmallVector<MachineBasicBlock*, 4> predecessors;
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock* PRED = *PI;
+ if (PRED != MBB)
+ predecessors.push_back(PRED);
+ }
+
+ unsigned i = 0, e = predecessors.size();
+ if (i != e) {
+ CSRegSet prevAvailIn = AvailIn[MBB];
+ MachineBasicBlock* PRED = predecessors[i];
+
+ AvailIn[MBB] = AvailOut[PRED];
+ for (++i; i != e; ++i) {
+ PRED = predecessors[i];
+ AvailIn[MBB] &= AvailOut[PRED];
+ }
+ if (prevAvailIn != AvailIn[MBB])
+ changed = true;
+ }
+
+ // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]);
+ CSRegSet prevAvailOut = AvailOut[MBB];
+ AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB];
+ if (prevAvailOut |= AvailOut[MBB])
+ changed = true;
+ return changed;
+}
+
+/// calculateAnticAvail - build the sets anticipated and available
+/// registers in the MCFG of the current function iteratively,
+/// doing a combined forward and backward analysis.
+///
+void PEI::calculateAnticAvail(MachineFunction &Fn) {
+ // Initialize data flow sets.
+ clearAnticAvailSets();
+
+ // Calulate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG.
+ bool changed = true;
+ unsigned iterations = 0;
+ while (changed) {
+ changed = false;
+ ++iterations;
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+
+ // Calculate anticipated in, out regs at MBB from
+ // anticipated at successors of MBB.
+ changed |= calcAnticInOut(MBB);
+
+ // Calculate available in, out regs at MBB from
+ // available at predecessors of MBB.
+ changed |= calcAvailInOut(MBB);
+ }
+ }
+
+ DEBUG({
+ if (ShrinkWrapDebugging >= Details) {
+ dbgs()
+ << "-----------------------------------------------------------\n"
+ << " Antic/Avail Sets:\n"
+ << "-----------------------------------------------------------\n"
+ << "iterations = " << iterations << "\n"
+ << "-----------------------------------------------------------\n"
+ << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n"
+ << "-----------------------------------------------------------\n";
+
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ dumpSets(MBB);
+ }
+
+ dbgs()
+ << "-----------------------------------------------------------\n";
+ }
+ });
+}
+
+/// propagateUsesAroundLoop - copy used register info from MBB to all blocks
+/// of the loop given by LP and its parent loops. This prevents spills/restores
+/// from being placed in the bodies of loops.
+///
+void PEI::propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP) {
+ if (! MBB || !LP)
+ return;
+
+ std::vector<MachineBasicBlock*> loopBlocks = LP->getBlocks();
+ for (unsigned i = 0, e = loopBlocks.size(); i != e; ++i) {
+ MachineBasicBlock* LBB = loopBlocks[i];
+ if (LBB == MBB)
+ continue;
+ if (CSRUsed[LBB].contains(CSRUsed[MBB]))
+ continue;
+ CSRUsed[LBB] |= CSRUsed[MBB];
+ }
+}
+
+/// calculateSets - collect the CSRs used in this function, compute
+/// the DF sets that describe the initial minimal regions in the
+/// Machine CFG around which CSR spills and restores must be placed.
+///
+/// Additionally, this function decides if shrink wrapping should
+/// be disabled for the current function, checking the following:
+/// 1. the current function has more than 500 MBBs: heuristic limit
+/// on function size to reduce compile time impact of the current
+/// iterative algorithm.
+/// 2. all CSRs are used in the entry block.
+/// 3. all CSRs are used in all immediate successors of the entry block.
+/// 4. all CSRs are used in a subset of blocks, each of which dominates
+/// all return blocks. These blocks, taken as a subgraph of the MCFG,
+/// are equivalent to the entry block since all execution paths pass
+/// through them.
+///
+bool PEI::calculateSets(MachineFunction &Fn) {
+ // Sets used to compute spill, restore placement sets.
+ const std::vector<CalleeSavedInfo> CSI =
+ Fn.getFrameInfo()->getCalleeSavedInfo();
+
+ // If no CSRs used, we are done.
+ if (CSI.empty()) {
+ DEBUG(if (ShrinkWrapThisFunction)
+ dbgs() << "DISABLED: " << Fn.getFunction()->getName()
+ << ": uses no callee-saved registers\n");
+ return false;
+ }
+
+ // Save refs to entry and return blocks.
+ EntryBlock = Fn.begin();
+ for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end();
+ MBB != E; ++MBB)
+ if (isReturnBlock(MBB))
+ ReturnBlocks.push_back(MBB);
+
+ // Determine if this function has fast exit paths.
+ DEBUG(if (ShrinkWrapThisFunction)
+ findFastExitPath());
+
+ // Limit shrink wrapping via the current iterative bit vector
+ // implementation to functions with <= 500 MBBs.
+ if (Fn.size() > 500) {
+ DEBUG(if (ShrinkWrapThisFunction)
+ dbgs() << "DISABLED: " << Fn.getFunction()->getName()
+ << ": too large (" << Fn.size() << " MBBs)\n");
+ ShrinkWrapThisFunction = false;
+ }
+
+ // Return now if not shrink wrapping.
+ if (! ShrinkWrapThisFunction)
+ return false;
+
+ // Collect set of used CSRs.
+ for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
+ UsedCSRegs.set(inx);
+ }
+
+ // Walk instructions in all MBBs, create CSRUsed[] sets, choose
+ // whether or not to shrink wrap this function.
+ MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
+ MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>();
+ const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+
+ bool allCSRUsesInEntryBlock = true;
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) {
+ for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
+ unsigned Reg = CSI[inx].getReg();
+ // If instruction I reads or modifies Reg, add it to UsedCSRegs,
+ // CSRUsed map for the current block.
+ for (unsigned opInx = 0, opEnd = I->getNumOperands();
+ opInx != opEnd; ++opInx) {
+ const MachineOperand &MO = I->getOperand(opInx);
+ if (! (MO.isReg() && (MO.isUse() || MO.isDef())))
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MOReg == Reg ||
+ (TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+ TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ TRI->isSubRegister(Reg, MOReg))) {
+ // CSR Reg is defined/used in block MBB.
+ CSRUsed[MBB].set(inx);
+ // Check for uses in EntryBlock.
+ if (MBB != EntryBlock)
+ allCSRUsesInEntryBlock = false;
+ }
+ }
+ }
+ }
+
+ if (CSRUsed[MBB].empty())
+ continue;
+
+ // Propagate CSRUsed[MBB] in loops
+ if (MachineLoop* LP = LI.getLoopFor(MBB)) {
+ // Add top level loop to work list.
+ MachineBasicBlock* HDR = getTopLevelLoopPreheader(LP);
+ MachineLoop* PLP = getTopLevelLoopParent(LP);
+
+ if (! HDR) {
+ HDR = PLP->getHeader();
+ assert(HDR->pred_size() > 0 && "Loop header has no predecessors?");
+ MachineBasicBlock::pred_iterator PI = HDR->pred_begin();
+ HDR = *PI;
+ }
+ TLLoops[HDR] = PLP;
+
+ // Push uses from inside loop to its parent loops,
+ // or to all other MBBs in its loop.
+ if (LP->getLoopDepth() > 1) {
+ for (MachineLoop* PLP = LP->getParentLoop(); PLP;
+ PLP = PLP->getParentLoop()) {
+ propagateUsesAroundLoop(MBB, PLP);
+ }
+ } else {
+ propagateUsesAroundLoop(MBB, LP);
+ }
+ }
+ }
+
+ if (allCSRUsesInEntryBlock) {
+ DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName()
+ << ": all CSRs used in EntryBlock\n");
+ ShrinkWrapThisFunction = false;
+ } else {
+ bool allCSRsUsedInEntryFanout = true;
+ for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(),
+ SE = EntryBlock->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+ if (CSRUsed[SUCC] != UsedCSRegs)
+ allCSRsUsedInEntryFanout = false;
+ }
+ if (allCSRsUsedInEntryFanout) {
+ DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName()
+ << ": all CSRs used in imm successors of EntryBlock\n");
+ ShrinkWrapThisFunction = false;
+ }
+ }
+
+ if (ShrinkWrapThisFunction) {
+ // Check if MBB uses CSRs and dominates all exit nodes.
+ // Such nodes are equiv. to the entry node w.r.t.
+ // CSR uses: every path through the function must
+ // pass through this node. If each CSR is used at least
+ // once by these nodes, shrink wrapping is disabled.
+ CSRegSet CSRUsedInChokePoints;
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ if (MBB == EntryBlock || CSRUsed[MBB].empty() || MBB->succ_size() < 1)
+ continue;
+ bool dominatesExitNodes = true;
+ for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
+ if (! DT.dominates(MBB, ReturnBlocks[ri])) {
+ dominatesExitNodes = false;
+ break;
+ }
+ if (dominatesExitNodes) {
+ CSRUsedInChokePoints |= CSRUsed[MBB];
+ if (CSRUsedInChokePoints == UsedCSRegs) {
+ DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName()
+ << ": all CSRs used in choke point(s) at "
+ << getBasicBlockName(MBB) << "\n");
+ ShrinkWrapThisFunction = false;
+ break;
+ }
+ }
+ }
+ }
+
+ // Return now if we have decided not to apply shrink wrapping
+ // to the current function.
+ if (! ShrinkWrapThisFunction)
+ return false;
+
+ DEBUG({
+ dbgs() << "ENABLED: " << Fn.getFunction()->getName();
+ if (HasFastExitPath)
+ dbgs() << " (fast exit path)";
+ dbgs() << "\n";
+ if (ShrinkWrapDebugging >= BasicInfo) {
+ dbgs() << "------------------------------"
+ << "-----------------------------\n";
+ dbgs() << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n";
+ if (ShrinkWrapDebugging >= Details) {
+ dbgs() << "------------------------------"
+ << "-----------------------------\n";
+ dumpAllUsed();
+ }
+ }
+ });
+
+ // Build initial DF sets to determine minimal regions in the
+ // Machine CFG around which CSRs must be spilled and restored.
+ calculateAnticAvail(Fn);
+
+ return true;
+}
+
+/// addUsesForMEMERegion - add uses of CSRs spilled or restored in
+/// multi-entry, multi-exit (MEME) regions so spill and restore
+/// placement will not break code that enters or leaves a
+/// shrink-wrapped region by inducing spills with no matching
+/// restores or restores with no matching spills. A MEME region
+/// is a subgraph of the MCFG with multiple entry edges, multiple
+/// exit edges, or both. This code propagates use information
+/// through the MCFG until all paths requiring spills and restores
+/// _outside_ the computed minimal placement regions have been covered.
+///
+bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4>& blks) {
+ if (MBB->succ_size() < 2 && MBB->pred_size() < 2) {
+ bool processThisBlock = false;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+ if (SUCC->pred_size() > 1) {
+ processThisBlock = true;
+ break;
+ }
+ }
+ if (!CSRRestore[MBB].empty() && MBB->succ_size() > 0) {
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock* PRED = *PI;
+ if (PRED->succ_size() > 1) {
+ processThisBlock = true;
+ break;
+ }
+ }
+ }
+ if (! processThisBlock)
+ return false;
+ }
+
+ CSRegSet prop;
+ if (!CSRSave[MBB].empty())
+ prop = CSRSave[MBB];
+ else if (!CSRRestore[MBB].empty())
+ prop = CSRRestore[MBB];
+ else
+ prop = CSRUsed[MBB];
+ if (prop.empty())
+ return false;
+
+ // Propagate selected bits to successors, predecessors of MBB.
+ bool addedUses = false;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+ // Self-loop
+ if (SUCC == MBB)
+ continue;
+ if (! CSRUsed[SUCC].contains(prop)) {
+ CSRUsed[SUCC] |= prop;
+ addedUses = true;
+ blks.push_back(SUCC);
+ DEBUG(if (ShrinkWrapDebugging >= Iterations)
+ dbgs() << getBasicBlockName(MBB)
+ << "(" << stringifyCSRegSet(prop) << ")->"
+ << "successor " << getBasicBlockName(SUCC) << "\n");
+ }
+ }
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock* PRED = *PI;
+ // Self-loop
+ if (PRED == MBB)
+ continue;
+ if (! CSRUsed[PRED].contains(prop)) {
+ CSRUsed[PRED] |= prop;
+ addedUses = true;
+ blks.push_back(PRED);
+ DEBUG(if (ShrinkWrapDebugging >= Iterations)
+ dbgs() << getBasicBlockName(MBB)
+ << "(" << stringifyCSRegSet(prop) << ")->"
+ << "predecessor " << getBasicBlockName(PRED) << "\n");
+ }
+ }
+ return addedUses;
+}
+
+/// addUsesForTopLevelLoops - add uses for CSRs used inside top
+/// level loops to the exit blocks of those loops.
+///
+bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) {
+ bool addedUses = false;
+
+ // Place restores for top level loops where needed.
+ for (DenseMap<MachineBasicBlock*, MachineLoop*>::iterator
+ I = TLLoops.begin(), E = TLLoops.end(); I != E; ++I) {
+ MachineBasicBlock* MBB = I->first;
+ MachineLoop* LP = I->second;
+ MachineBasicBlock* HDR = LP->getHeader();
+ SmallVector<MachineBasicBlock*, 4> exitBlocks;
+ CSRegSet loopSpills;
+
+ loopSpills = CSRSave[MBB];
+ if (CSRSave[MBB].empty()) {
+ loopSpills = CSRUsed[HDR];
+ assert(!loopSpills.empty() && "No CSRs used in loop?");
+ } else if (CSRRestore[MBB].contains(CSRSave[MBB]))
+ continue;
+
+ LP->getExitBlocks(exitBlocks);
+ assert(exitBlocks.size() > 0 && "Loop has no top level exit blocks?");
+ for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) {
+ MachineBasicBlock* EXB = exitBlocks[i];
+ if (! CSRUsed[EXB].contains(loopSpills)) {
+ CSRUsed[EXB] |= loopSpills;
+ addedUses = true;
+ DEBUG(if (ShrinkWrapDebugging >= Iterations)
+ dbgs() << "LOOP " << getBasicBlockName(MBB)
+ << "(" << stringifyCSRegSet(loopSpills) << ")->"
+ << getBasicBlockName(EXB) << "\n");
+ if (EXB->succ_size() > 1 || EXB->pred_size() > 1)
+ blks.push_back(EXB);
+ }
+ }
+ }
+ return addedUses;
+}
+
+/// calcSpillPlacements - determine which CSRs should be spilled
+/// in MBB using AnticIn sets of MBB's predecessors, keeping track
+/// of changes to spilled reg sets. Add MBB to the set of blocks
+/// that need to be processed for propagating use info to cover
+/// multi-entry/exit regions.
+///
+bool PEI::calcSpillPlacements(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4> &blks,
+ CSRegBlockMap &prevSpills) {
+ bool placedSpills = false;
+ // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB)
+ CSRegSet anticInPreds;
+ SmallVector<MachineBasicBlock*, 4> predecessors;
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock* PRED = *PI;
+ if (PRED != MBB)
+ predecessors.push_back(PRED);
+ }
+ unsigned i = 0, e = predecessors.size();
+ if (i != e) {
+ MachineBasicBlock* PRED = predecessors[i];
+ anticInPreds = UsedCSRegs - AnticIn[PRED];
+ for (++i; i != e; ++i) {
+ PRED = predecessors[i];
+ anticInPreds &= (UsedCSRegs - AnticIn[PRED]);
+ }
+ } else {
+ // Handle uses in entry blocks (which have no predecessors).
+ // This is necessary because the DFA formulation assumes the
+ // entry and (multiple) exit nodes cannot have CSR uses, which
+ // is not the case in the real world.
+ anticInPreds = UsedCSRegs;
+ }
+ // Compute spills required at MBB:
+ CSRSave[MBB] |= (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds;
+
+ if (! CSRSave[MBB].empty()) {
+ if (MBB == EntryBlock) {
+ for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
+ CSRRestore[ReturnBlocks[ri]] |= CSRSave[MBB];
+ } else {
+ // Reset all regs spilled in MBB that are also spilled in EntryBlock.
+ if (CSRSave[EntryBlock].intersects(CSRSave[MBB])) {
+ CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock];
+ }
+ }
+ }
+ placedSpills = (CSRSave[MBB] != prevSpills[MBB]);
+ prevSpills[MBB] = CSRSave[MBB];
+ // Remember this block for adding restores to successor
+ // blocks for multi-entry region.
+ if (placedSpills)
+ blks.push_back(MBB);
+
+ DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations)
+ dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRSave[MBB]) << "\n");
+
+ return placedSpills;
+}
+
+/// calcRestorePlacements - determine which CSRs should be restored
+/// in MBB using AvailOut sets of MBB's succcessors, keeping track
+/// of changes to restored reg sets. Add MBB to the set of blocks
+/// that need to be processed for propagating use info to cover
+/// multi-entry/exit regions.
+///
+bool PEI::calcRestorePlacements(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4> &blks,
+ CSRegBlockMap &prevRestores) {
+ bool placedRestores = false;
+ // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB)
+ CSRegSet availOutSucc;
+ SmallVector<MachineBasicBlock*, 4> successors;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+ if (SUCC != MBB)
+ successors.push_back(SUCC);
+ }
+ unsigned i = 0, e = successors.size();
+ if (i != e) {
+ MachineBasicBlock* SUCC = successors[i];
+ availOutSucc = UsedCSRegs - AvailOut[SUCC];
+ for (++i; i != e; ++i) {
+ SUCC = successors[i];
+ availOutSucc &= (UsedCSRegs - AvailOut[SUCC]);
+ }
+ } else {
+ if (! CSRUsed[MBB].empty() || ! AvailOut[MBB].empty()) {
+ // Handle uses in return blocks (which have no successors).
+ // This is necessary because the DFA formulation assumes the
+ // entry and (multiple) exit nodes cannot have CSR uses, which
+ // is not the case in the real world.
+ availOutSucc = UsedCSRegs;
+ }
+ }
+ // Compute restores required at MBB:
+ CSRRestore[MBB] |= (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc;
+
+ // Postprocess restore placements at MBB.
+ // Remove the CSRs that are restored in the return blocks.
+ // Lest this be confusing, note that:
+ // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks.
+ if (MBB->succ_size() && ! CSRRestore[MBB].empty()) {
+ if (! CSRSave[EntryBlock].empty())
+ CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock];
+ }
+ placedRestores = (CSRRestore[MBB] != prevRestores[MBB]);
+ prevRestores[MBB] = CSRRestore[MBB];
+ // Remember this block for adding saves to predecessor
+ // blocks for multi-entry region.
+ if (placedRestores)
+ blks.push_back(MBB);
+
+ DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations)
+ dbgs() << "RESTORE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
+
+ return placedRestores;
+}
+
+/// placeSpillsAndRestores - place spills and restores of CSRs
+/// used in MBBs in minimal regions that contain the uses.
+///
+void PEI::placeSpillsAndRestores(MachineFunction &Fn) {
+ CSRegBlockMap prevCSRSave;
+ CSRegBlockMap prevCSRRestore;
+ SmallVector<MachineBasicBlock*, 4> cvBlocks, ncvBlocks;
+ bool changed = true;
+ unsigned iterations = 0;
+
+ // Iterate computation of spill and restore placements in the MCFG until:
+ // 1. CSR use info has been fully propagated around the MCFG, and
+ // 2. computation of CSRSave[], CSRRestore[] reach fixed points.
+ while (changed) {
+ changed = false;
+ ++iterations;
+
+ DEBUG(if (ShrinkWrapDebugging >= Iterations)
+ dbgs() << "iter " << iterations
+ << " --------------------------------------------------\n");
+
+ // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG,
+ // which determines the placements of spills and restores.
+ // Keep track of changes to spills, restores in each iteration to
+ // minimize the total iterations.
+ bool SRChanged = false;
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+
+ // Place spills for CSRs in MBB.
+ SRChanged |= calcSpillPlacements(MBB, cvBlocks, prevCSRSave);
+
+ // Place restores for CSRs in MBB.
+ SRChanged |= calcRestorePlacements(MBB, cvBlocks, prevCSRRestore);
+ }
+
+ // Add uses of CSRs used inside loops where needed.
+ changed |= addUsesForTopLevelLoops(cvBlocks);
+
+ // Add uses for CSRs spilled or restored at branch, join points.
+ if (changed || SRChanged) {
+ while (! cvBlocks.empty()) {
+ MachineBasicBlock* MBB = cvBlocks.pop_back_val();
+ changed |= addUsesForMEMERegion(MBB, ncvBlocks);
+ }
+ if (! ncvBlocks.empty()) {
+ cvBlocks = ncvBlocks;
+ ncvBlocks.clear();
+ }
+ }
+
+ if (changed) {
+ calculateAnticAvail(Fn);
+ CSRSave.clear();
+ CSRRestore.clear();
+ }
+ }
+
+ // Check for effectiveness:
+ // SR0 = {r | r in CSRSave[EntryBlock], CSRRestore[RB], RB in ReturnBlocks}
+ // numSRReduced = |(UsedCSRegs - SR0)|, approx. SR0 by CSRSave[EntryBlock]
+ // Gives a measure of how many CSR spills have been moved from EntryBlock
+ // to minimal regions enclosing their uses.
+ CSRegSet notSpilledInEntryBlock = (UsedCSRegs - CSRSave[EntryBlock]);
+ unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count();
+ numSRReduced += numSRReducedThisFunc;
+ DEBUG(if (ShrinkWrapDebugging >= BasicInfo) {
+ dbgs() << "-----------------------------------------------------------\n";
+ dbgs() << "total iterations = " << iterations << " ( "
+ << Fn.getFunction()->getName()
+ << " " << numSRReducedThisFunc
+ << " " << Fn.size()
+ << " )\n";
+ dbgs() << "-----------------------------------------------------------\n";
+ dumpSRSets();
+ dbgs() << "-----------------------------------------------------------\n";
+ if (numSRReducedThisFunc)
+ verifySpillRestorePlacement();
+ });
+}
+
+// Debugging methods.
+#ifndef NDEBUG
+/// findFastExitPath - debugging method used to detect functions
+/// with at least one path from the entry block to a return block
+/// directly or which has a very small number of edges.
+///
+void PEI::findFastExitPath() {
+ if (! EntryBlock)
+ return;
+ // Fina a path from EntryBlock to any return block that does not branch:
+ // Entry
+ // | ...
+ // v |
+ // B1<-----+
+ // |
+ // v
+ // Return
+ for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(),
+ SE = EntryBlock->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+
+ // Assume positive, disprove existence of fast path.
+ HasFastExitPath = true;
+
+ // Check the immediate successors.
+ if (isReturnBlock(SUCC)) {
+ if (ShrinkWrapDebugging >= BasicInfo)
+ dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock)
+ << "->" << getBasicBlockName(SUCC) << "\n";
+ break;
+ }
+ // Traverse df from SUCC, look for a branch block.
+ std::string exitPath = getBasicBlockName(SUCC);
+ for (df_iterator<MachineBasicBlock*> BI = df_begin(SUCC),
+ BE = df_end(SUCC); BI != BE; ++BI) {
+ MachineBasicBlock* SBB = *BI;
+ // Reject paths with branch nodes.
+ if (SBB->succ_size() > 1) {
+ HasFastExitPath = false;
+ break;
+ }
+ exitPath += "->" + getBasicBlockName(SBB);
+ }
+ if (HasFastExitPath) {
+ if (ShrinkWrapDebugging >= BasicInfo)
+ dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock)
+ << "->" << exitPath << "\n";
+ break;
+ }
+ }
+}
+
+/// verifySpillRestorePlacement - check the current spill/restore
+/// sets for safety. Attempt to find spills without restores or
+/// restores without spills.
+/// Spills: walk df from each MBB in spill set ensuring that
+/// all CSRs spilled at MMBB are restored on all paths
+/// from MBB to all exit blocks.
+/// Restores: walk idf from each MBB in restore set ensuring that
+/// all CSRs restored at MBB are spilled on all paths
+/// reaching MBB.
+///
+void PEI::verifySpillRestorePlacement() {
+ unsigned numReturnBlocks = 0;
+ for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ if (isReturnBlock(MBB) || MBB->succ_size() == 0)
+ ++numReturnBlocks;
+ }
+ for (CSRegBlockMap::iterator BI = CSRSave.begin(),
+ BE = CSRSave.end(); BI != BE; ++BI) {
+ MachineBasicBlock* MBB = BI->first;
+ CSRegSet spilled = BI->second;
+ CSRegSet restored;
+
+ if (spilled.empty())
+ continue;
+
+ DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(spilled)
+ << " RESTORE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
+
+ if (CSRRestore[MBB].intersects(spilled)) {
+ restored |= (CSRRestore[MBB] & spilled);
+ }
+
+ // Walk depth first from MBB to find restores of all CSRs spilled at MBB:
+ // we must find restores for all spills w/no intervening spills on all
+ // paths from MBB to all return blocks.
+ for (df_iterator<MachineBasicBlock*> BI = df_begin(MBB),
+ BE = df_end(MBB); BI != BE; ++BI) {
+ MachineBasicBlock* SBB = *BI;
+ if (SBB == MBB)
+ continue;
+ // Stop when we encounter spills of any CSRs spilled at MBB that
+ // have not yet been seen to be restored.
+ if (CSRSave[SBB].intersects(spilled) &&
+ !restored.contains(CSRSave[SBB] & spilled))
+ break;
+ // Collect the CSRs spilled at MBB that are restored
+ // at this DF successor of MBB.
+ if (CSRRestore[SBB].intersects(spilled))
+ restored |= (CSRRestore[SBB] & spilled);
+ // If we are at a retun block, check that the restores
+ // we have seen so far exhaust the spills at MBB, then
+ // reset the restores.
+ if (isReturnBlock(SBB) || SBB->succ_size() == 0) {
+ if (restored != spilled) {
+ CSRegSet notRestored = (spilled - restored);
+ DEBUG(dbgs() << MF->getFunction()->getName() << ": "
+ << stringifyCSRegSet(notRestored)
+ << " spilled at " << getBasicBlockName(MBB)
+ << " are never restored on path to return "
+ << getBasicBlockName(SBB) << "\n");
+ }
+ restored.clear();
+ }
+ }
+ }
+
+ // Check restore placements.
+ for (CSRegBlockMap::iterator BI = CSRRestore.begin(),
+ BE = CSRRestore.end(); BI != BE; ++BI) {
+ MachineBasicBlock* MBB = BI->first;
+ CSRegSet restored = BI->second;
+ CSRegSet spilled;
+
+ if (restored.empty())
+ continue;
+
+ DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRSave[MBB])
+ << " RESTORE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(restored) << "\n");
+
+ if (CSRSave[MBB].intersects(restored)) {
+ spilled |= (CSRSave[MBB] & restored);
+ }
+ // Walk inverse depth first from MBB to find spills of all
+ // CSRs restored at MBB:
+ for (idf_iterator<MachineBasicBlock*> BI = idf_begin(MBB),
+ BE = idf_end(MBB); BI != BE; ++BI) {
+ MachineBasicBlock* PBB = *BI;
+ if (PBB == MBB)
+ continue;
+ // Stop when we encounter restores of any CSRs restored at MBB that
+ // have not yet been seen to be spilled.
+ if (CSRRestore[PBB].intersects(restored) &&
+ !spilled.contains(CSRRestore[PBB] & restored))
+ break;
+ // Collect the CSRs restored at MBB that are spilled
+ // at this DF predecessor of MBB.
+ if (CSRSave[PBB].intersects(restored))
+ spilled |= (CSRSave[PBB] & restored);
+ }
+ if (spilled != restored) {
+ CSRegSet notSpilled = (restored - spilled);
+ DEBUG(dbgs() << MF->getFunction()->getName() << ": "
+ << stringifyCSRegSet(notSpilled)
+ << " restored at " << getBasicBlockName(MBB)
+ << " are never spilled\n");
+ }
+ }
+}
+
+// Debugging print methods.
+std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) {
+ if (!MBB)
+ return "";
+
+ if (MBB->getBasicBlock())
+ return MBB->getBasicBlock()->getNameStr();
+
+ std::ostringstream name;
+ name << "_MBB_" << MBB->getNumber();
+ return name.str();
+}
+
+std::string PEI::stringifyCSRegSet(const CSRegSet& s) {
+ const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
+ const std::vector<CalleeSavedInfo> CSI =
+ MF->getFrameInfo()->getCalleeSavedInfo();
+
+ std::ostringstream srep;
+ if (CSI.size() == 0) {
+ srep << "[]";
+ return srep.str();
+ }
+ srep << "[";
+ CSRegSet::iterator I = s.begin(), E = s.end();
+ if (I != E) {
+ unsigned reg = CSI[*I].getReg();
+ srep << TRI->getName(reg);
+ for (++I; I != E; ++I) {
+ reg = CSI[*I].getReg();
+ srep << ",";
+ srep << TRI->getName(reg);
+ }
+ }
+ srep << "]";
+ return srep.str();
+}
+
+void PEI::dumpSet(const CSRegSet& s) {
+ DEBUG(dbgs() << stringifyCSRegSet(s) << "\n");
+}
+
+void PEI::dumpUsed(MachineBasicBlock* MBB) {
+ DEBUG({
+ if (MBB)
+ dbgs() << "CSRUsed[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRUsed[MBB]) << "\n";
+ });
+}
+
+void PEI::dumpAllUsed() {
+ for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ dumpUsed(MBB);
+ }
+}
+
+void PEI::dumpSets(MachineBasicBlock* MBB) {
+ DEBUG({
+ if (MBB)
+ dbgs() << getBasicBlockName(MBB) << " | "
+ << stringifyCSRegSet(CSRUsed[MBB]) << " | "
+ << stringifyCSRegSet(AnticIn[MBB]) << " | "
+ << stringifyCSRegSet(AnticOut[MBB]) << " | "
+ << stringifyCSRegSet(AvailIn[MBB]) << " | "
+ << stringifyCSRegSet(AvailOut[MBB]) << "\n";
+ });
+}
+
+void PEI::dumpSets1(MachineBasicBlock* MBB) {
+ DEBUG({
+ if (MBB)
+ dbgs() << getBasicBlockName(MBB) << " | "
+ << stringifyCSRegSet(CSRUsed[MBB]) << " | "
+ << stringifyCSRegSet(AnticIn[MBB]) << " | "
+ << stringifyCSRegSet(AnticOut[MBB]) << " | "
+ << stringifyCSRegSet(AvailIn[MBB]) << " | "
+ << stringifyCSRegSet(AvailOut[MBB]) << " | "
+ << stringifyCSRegSet(CSRSave[MBB]) << " | "
+ << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
+ });
+}
+
+void PEI::dumpAllSets() {
+ for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ dumpSets1(MBB);
+ }
+}
+
+void PEI::dumpSRSets() {
+ DEBUG({
+ for (MachineFunction::iterator MBB = MF->begin(), E = MF->end();
+ MBB != E; ++MBB) {
+ if (!CSRSave[MBB].empty()) {
+ dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRSave[MBB]);
+ if (CSRRestore[MBB].empty())
+ dbgs() << '\n';
+ }
+
+ if (!CSRRestore[MBB].empty() && !CSRSave[MBB].empty())
+ dbgs() << " "
+ << "RESTORE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
+ }
+ });
+}
+#endif
diff --git a/lib/CodeGen/SimpleHazardRecognizer.h b/lib/CodeGen/SimpleHazardRecognizer.h
new file mode 100644
index 0000000..f69feaf
--- /dev/null
+++ b/lib/CodeGen/SimpleHazardRecognizer.h
@@ -0,0 +1,89 @@
+//=- llvm/CodeGen/SimpleHazardRecognizer.h - Scheduling Support -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SimpleHazardRecognizer class, which
+// implements hazard-avoidance heuristics for scheduling, based on the
+// scheduling itineraries specified for the target.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SIMPLEHAZARDRECOGNIZER_H
+#define LLVM_CODEGEN_SIMPLEHAZARDRECOGNIZER_H
+
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+namespace llvm {
+ /// SimpleHazardRecognizer - A *very* simple hazard recognizer. It uses
+ /// a coarse classification and attempts to avoid that instructions of
+ /// a given class aren't grouped too densely together.
+ class SimpleHazardRecognizer : public ScheduleHazardRecognizer {
+ /// Class - A simple classification for SUnits.
+ enum Class {
+ Other, Load, Store
+ };
+
+ /// Window - The Class values of the most recently issued
+ /// instructions.
+ Class Window[8];
+
+ /// getClass - Classify the given SUnit.
+ Class getClass(const SUnit *SU) {
+ const MachineInstr *MI = SU->getInstr();
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (TID.mayLoad())
+ return Load;
+ if (TID.mayStore())
+ return Store;
+ return Other;
+ }
+
+ /// Step - Rotate the existing entries in Window and insert the
+ /// given class value in position as the most recent.
+ void Step(Class C) {
+ std::copy(Window+1, array_endof(Window), Window);
+ Window[array_lengthof(Window)-1] = C;
+ }
+
+ public:
+ SimpleHazardRecognizer() : Window() {
+ Reset();
+ }
+
+ virtual HazardType getHazardType(SUnit *SU) {
+ Class C = getClass(SU);
+ if (C == Other)
+ return NoHazard;
+ unsigned Score = 0;
+ for (unsigned i = 0; i != array_lengthof(Window); ++i)
+ if (Window[i] == C)
+ Score += i + 1;
+ if (Score > array_lengthof(Window) * 2)
+ return Hazard;
+ return NoHazard;
+ }
+
+ virtual void Reset() {
+ for (unsigned i = 0; i != array_lengthof(Window); ++i)
+ Window[i] = Other;
+ }
+
+ virtual void EmitInstruction(SUnit *SU) {
+ Step(getClass(SU));
+ }
+
+ virtual void AdvanceCycle() {
+ Step(Other);
+ }
+ };
+}
+
+#endif
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
new file mode 100644
index 0000000..1d9bda4
--- /dev/null
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -0,0 +1,2792 @@
+//===-- SimpleRegisterCoalescing.cpp - Register Coalescing ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple register coalescing pass that attempts to
+// aggressively coalesce every register copy that it can.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regcoalescing"
+#include "SimpleRegisterCoalescing.h"
+#include "VirtRegMap.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/Value.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <cmath>
+using namespace llvm;
+
+STATISTIC(numJoins , "Number of interval joins performed");
+STATISTIC(numCrossRCs , "Number of cross class joins performed");
+STATISTIC(numCommutes , "Number of instruction commuting performed");
+STATISTIC(numExtends , "Number of copies extended");
+STATISTIC(NumReMats , "Number of instructions re-materialized");
+STATISTIC(numPeep , "Number of identity moves eliminated after coalescing");
+STATISTIC(numAborts , "Number of times interval joining aborted");
+STATISTIC(numDeadValNo, "Number of valno def marked dead");
+
+char SimpleRegisterCoalescing::ID = 0;
+static cl::opt<bool>
+EnableJoining("join-liveintervals",
+ cl::desc("Coalesce copies (default=true)"),
+ cl::init(true));
+
+static cl::opt<bool>
+DisableCrossClassJoin("disable-cross-class-join",
+ cl::desc("Avoid coalescing cross register class copies"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+PhysJoinTweak("tweak-phys-join-heuristics",
+ cl::desc("Tweak heuristics for joining phys reg with vr"),
+ cl::init(false), cl::Hidden);
+
+static RegisterPass<SimpleRegisterCoalescing>
+X("simple-register-coalescing", "Simple Register Coalescing");
+
+// Declare that we implement the RegisterCoalescer interface
+static RegisterAnalysisGroup<RegisterCoalescer, true/*The Default*/> V(X);
+
+const PassInfo *const llvm::SimpleRegisterCoalescingID = &X;
+
+void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreservedID(MachineDominatorsID);
+ if (StrongPHIElim)
+ AU.addPreservedID(StrongPHIEliminationID);
+ else
+ AU.addPreservedID(PHIEliminationID);
+ AU.addPreservedID(TwoAddressInstructionPassID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA
+/// being the source and IntB being the dest, thus this defines a value number
+/// in IntB. If the source value number (in IntA) is defined by a copy from B,
+/// see if we can merge these two pieces of B into a single value number,
+/// eliminating a copy. For example:
+///
+/// A3 = B0
+/// ...
+/// B1 = A3 <- this copy
+///
+/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1
+/// value number to be replaced with B0 (which simplifies the B liveinterval).
+///
+/// This returns true if an interval was modified.
+///
+bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
+ LiveInterval &IntB,
+ MachineInstr *CopyMI) {
+ SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
+
+ // BValNo is a value number in B that is defined by a copy from A. 'B3' in
+ // the example above.
+ LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx);
+ assert(BLR != IntB.end() && "Live range not found!");
+ VNInfo *BValNo = BLR->valno;
+
+ // Get the location that B is defined at. Two options: either this value has
+ // an unknown definition point or it is defined at CopyIdx. If unknown, we
+ // can't process it.
+ if (!BValNo->getCopy()) return false;
+ assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
+
+ // AValNo is the value number in A that defines the copy, A3 in the example.
+ SlotIndex CopyUseIdx = CopyIdx.getUseIndex();
+ LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx);
+ assert(ALR != IntA.end() && "Live range not found!");
+ VNInfo *AValNo = ALR->valno;
+ // If it's re-defined by an early clobber somewhere in the live range, then
+ // it's not safe to eliminate the copy. FIXME: This is a temporary workaround.
+ // See PR3149:
+ // 172 %ECX<def> = MOV32rr %reg1039<kill>
+ // 180 INLINEASM <es:subl $5,$1
+ // sbbl $3,$0>, 10, %EAX<def>, 14, %ECX<earlyclobber,def>, 9,
+ // %EAX<kill>,
+ // 36, <fi#0>, 1, %reg0, 0, 9, %ECX<kill>, 36, <fi#1>, 1, %reg0, 0
+ // 188 %EAX<def> = MOV32rr %EAX<kill>
+ // 196 %ECX<def> = MOV32rr %ECX<kill>
+ // 204 %ECX<def> = MOV32rr %ECX<kill>
+ // 212 %EAX<def> = MOV32rr %EAX<kill>
+ // 220 %EAX<def> = MOV32rr %EAX
+ // 228 %reg1039<def> = MOV32rr %ECX<kill>
+ // The early clobber operand ties ECX input to the ECX def.
+ //
+ // The live interval of ECX is represented as this:
+ // %reg20,inf = [46,47:1)[174,230:0) 0@174-(230) 1@46-(47)
+ // The coalescer has no idea there was a def in the middle of [174,230].
+ if (AValNo->hasRedefByEC())
+ return false;
+
+ // If AValNo is defined as a copy from IntB, we can potentially process this.
+ // Get the instruction that defines this value number.
+ unsigned SrcReg = li_->getVNInfoSourceReg(AValNo);
+ if (!SrcReg) return false; // Not defined by a copy.
+
+ // If the value number is not defined by a copy instruction, ignore it.
+
+ // If the source register comes from an interval other than IntB, we can't
+ // handle this.
+ if (SrcReg != IntB.reg) return false;
+
+ // Get the LiveRange in IntB that this value number starts with.
+ LiveInterval::iterator ValLR =
+ IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot());
+ assert(ValLR != IntB.end() && "Live range not found!");
+
+ // Make sure that the end of the live range is inside the same block as
+ // CopyMI.
+ MachineInstr *ValLREndInst =
+ li_->getInstructionFromIndex(ValLR->end.getPrevSlot());
+ if (!ValLREndInst ||
+ ValLREndInst->getParent() != CopyMI->getParent()) return false;
+
+ // Okay, we now know that ValLR ends in the same block that the CopyMI
+ // live-range starts. If there are no intervening live ranges between them in
+ // IntB, we can merge them.
+ if (ValLR+1 != BLR) return false;
+
+ // If a live interval is a physical register, conservatively check if any
+ // of its sub-registers is overlapping the live interval of the virtual
+ // register. If so, do not coalesce.
+ if (TargetRegisterInfo::isPhysicalRegister(IntB.reg) &&
+ *tri_->getSubRegisters(IntB.reg)) {
+ for (const unsigned* SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR)
+ if (li_->hasInterval(*SR) && IntA.overlaps(li_->getInterval(*SR))) {
+ DEBUG({
+ dbgs() << "Interfere with sub-register ";
+ li_->getInterval(*SR).print(dbgs(), tri_);
+ });
+ return false;
+ }
+ }
+
+ DEBUG({
+ dbgs() << "\nExtending: ";
+ IntB.print(dbgs(), tri_);
+ });
+
+ SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start;
+ // We are about to delete CopyMI, so need to remove it as the 'instruction
+ // that defines this value #'. Update the valnum with the new defining
+ // instruction #.
+ BValNo->def = FillerStart;
+ BValNo->setCopy(0);
+
+ // Okay, we can merge them. We need to insert a new liverange:
+ // [ValLR.end, BLR.begin) of either value number, then we merge the
+ // two value numbers.
+ IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo));
+
+ // If the IntB live range is assigned to a physical register, and if that
+ // physreg has sub-registers, update their live intervals as well.
+ if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
+ for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
+ LiveInterval &SRLI = li_->getInterval(*SR);
+ SRLI.addRange(LiveRange(FillerStart, FillerEnd,
+ SRLI.getNextValue(FillerStart, 0, true,
+ li_->getVNInfoAllocator())));
+ }
+ }
+
+ // Okay, merge "B1" into the same value number as "B0".
+ if (BValNo != ValLR->valno) {
+ IntB.addKills(ValLR->valno, BValNo->kills);
+ IntB.MergeValueNumberInto(BValNo, ValLR->valno);
+ }
+ DEBUG({
+ dbgs() << " result = ";
+ IntB.print(dbgs(), tri_);
+ dbgs() << "\n";
+ });
+
+ // If the source instruction was killing the source register before the
+ // merge, unset the isKill marker given the live range has been extended.
+ int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true);
+ if (UIdx != -1) {
+ ValLREndInst->getOperand(UIdx).setIsKill(false);
+ ValLR->valno->removeKill(FillerStart);
+ }
+
+ // If the copy instruction was killing the destination register before the
+ // merge, find the last use and trim the live range. That will also add the
+ // isKill marker.
+ if (CopyMI->killsRegister(IntA.reg))
+ TrimLiveIntervalToLastUse(CopyUseIdx, CopyMI->getParent(), IntA, ALR);
+
+ ++numExtends;
+ return true;
+}
+
+/// HasOtherReachingDefs - Return true if there are definitions of IntB
+/// other than BValNo val# that can reach uses of AValno val# of IntA.
+bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA,
+ LiveInterval &IntB,
+ VNInfo *AValNo,
+ VNInfo *BValNo) {
+ for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
+ AI != AE; ++AI) {
+ if (AI->valno != AValNo) continue;
+ LiveInterval::Ranges::iterator BI =
+ std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start);
+ if (BI != IntB.ranges.begin())
+ --BI;
+ for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) {
+ if (BI->valno == BValNo)
+ continue;
+ if (BI->start <= AI->start && BI->end > AI->start)
+ return true;
+ if (BI->start > AI->start && BI->start < AI->end)
+ return true;
+ }
+ }
+ return false;
+}
+
+static void
+TransferImplicitOps(MachineInstr *MI, MachineInstr *NewMI) {
+ for (unsigned i = MI->getDesc().getNumOperands(), e = MI->getNumOperands();
+ i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isImplicit())
+ NewMI->addOperand(MO);
+ }
+}
+
+/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with
+/// IntA being the source and IntB being the dest, thus this defines a value
+/// number in IntB. If the source value number (in IntA) is defined by a
+/// commutable instruction and its other operand is coalesced to the copy dest
+/// register, see if we can transform the copy into a noop by commuting the
+/// definition. For example,
+///
+/// A3 = op A2 B0<kill>
+/// ...
+/// B1 = A3 <- this copy
+/// ...
+/// = op A3 <- more uses
+///
+/// ==>
+///
+/// B2 = op B0 A2<kill>
+/// ...
+/// B1 = B2 <- now an identify copy
+/// ...
+/// = op B2 <- more uses
+///
+/// This returns true if an interval was modified.
+///
+bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
+ LiveInterval &IntB,
+ MachineInstr *CopyMI) {
+ SlotIndex CopyIdx =
+ li_->getInstructionIndex(CopyMI).getDefIndex();
+
+ // FIXME: For now, only eliminate the copy by commuting its def when the
+ // source register is a virtual register. We want to guard against cases
+ // where the copy is a back edge copy and commuting the def lengthen the
+ // live interval of the source register to the entire loop.
+ if (TargetRegisterInfo::isPhysicalRegister(IntA.reg))
+ return false;
+
+ // BValNo is a value number in B that is defined by a copy from A. 'B3' in
+ // the example above.
+ LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx);
+ assert(BLR != IntB.end() && "Live range not found!");
+ VNInfo *BValNo = BLR->valno;
+
+ // Get the location that B is defined at. Two options: either this value has
+ // an unknown definition point or it is defined at CopyIdx. If unknown, we
+ // can't process it.
+ if (!BValNo->getCopy()) return false;
+ assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
+
+ // AValNo is the value number in A that defines the copy, A3 in the example.
+ LiveInterval::iterator ALR =
+ IntA.FindLiveRangeContaining(CopyIdx.getUseIndex()); //
+
+ assert(ALR != IntA.end() && "Live range not found!");
+ VNInfo *AValNo = ALR->valno;
+ // If other defs can reach uses of this def, then it's not safe to perform
+ // the optimization. FIXME: Do isPHIDef and isDefAccurate both need to be
+ // tested?
+ if (AValNo->isPHIDef() || !AValNo->isDefAccurate() ||
+ AValNo->isUnused() || AValNo->hasPHIKill())
+ return false;
+ MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def);
+ const TargetInstrDesc &TID = DefMI->getDesc();
+ if (!TID.isCommutable())
+ return false;
+ // If DefMI is a two-address instruction then commuting it will change the
+ // destination register.
+ int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg);
+ assert(DefIdx != -1);
+ unsigned UseOpIdx;
+ if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
+ return false;
+ unsigned Op1, Op2, NewDstIdx;
+ if (!tii_->findCommutedOpIndices(DefMI, Op1, Op2))
+ return false;
+ if (Op1 == UseOpIdx)
+ NewDstIdx = Op2;
+ else if (Op2 == UseOpIdx)
+ NewDstIdx = Op1;
+ else
+ return false;
+
+ MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
+ unsigned NewReg = NewDstMO.getReg();
+ if (NewReg != IntB.reg || !NewDstMO.isKill())
+ return false;
+
+ // Make sure there are no other definitions of IntB that would reach the
+ // uses which the new definition can reach.
+ if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
+ return false;
+
+ // If some of the uses of IntA.reg is already coalesced away, return false.
+ // It's not possible to determine whether it's safe to perform the coalescing.
+ for (MachineRegisterInfo::use_nodbg_iterator UI =
+ mri_->use_nodbg_begin(IntA.reg),
+ UE = mri_->use_nodbg_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ SlotIndex UseIdx = li_->getInstructionIndex(UseMI);
+ LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
+ if (ULR == IntA.end())
+ continue;
+ if (ULR->valno == AValNo && JoinedCopies.count(UseMI))
+ return false;
+ }
+
+ // At this point we have decided that it is legal to do this
+ // transformation. Start by commuting the instruction.
+ MachineBasicBlock *MBB = DefMI->getParent();
+ MachineInstr *NewMI = tii_->commuteInstruction(DefMI);
+ if (!NewMI)
+ return false;
+ if (NewMI != DefMI) {
+ li_->ReplaceMachineInstrInMaps(DefMI, NewMI);
+ MBB->insert(DefMI, NewMI);
+ MBB->erase(DefMI);
+ }
+ unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false);
+ NewMI->getOperand(OpIdx).setIsKill();
+
+ bool BHasPHIKill = BValNo->hasPHIKill();
+ SmallVector<VNInfo*, 4> BDeadValNos;
+ VNInfo::KillSet BKills;
+ std::map<SlotIndex, SlotIndex> BExtend;
+
+ // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
+ // A = or A, B
+ // ...
+ // B = A
+ // ...
+ // C = A<kill>
+ // ...
+ // = B
+ //
+ // then do not add kills of A to the newly created B interval.
+ bool Extended = BLR->end > ALR->end && ALR->end != ALR->start;
+ if (Extended)
+ BExtend[ALR->end] = BLR->end;
+
+ // Update uses of IntA of the specific Val# with IntB.
+ bool BHasSubRegs = false;
+ if (TargetRegisterInfo::isPhysicalRegister(IntB.reg))
+ BHasSubRegs = *tri_->getSubRegisters(IntB.reg);
+ for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg),
+ UE = mri_->use_end(); UI != UE;) {
+ MachineOperand &UseMO = UI.getOperand();
+ MachineInstr *UseMI = &*UI;
+ ++UI;
+ if (JoinedCopies.count(UseMI))
+ continue;
+ if (UseMI->isDebugValue()) {
+ // FIXME These don't have an instruction index. Not clear we have enough
+ // info to decide whether to do this replacement or not. For now do it.
+ UseMO.setReg(NewReg);
+ continue;
+ }
+ SlotIndex UseIdx = li_->getInstructionIndex(UseMI).getUseIndex();
+ LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
+ if (ULR == IntA.end() || ULR->valno != AValNo)
+ continue;
+ UseMO.setReg(NewReg);
+ if (UseMI == CopyMI)
+ continue;
+ if (UseMO.isKill()) {
+ if (Extended)
+ UseMO.setIsKill(false);
+ else
+ BKills.push_back(UseIdx.getDefIndex());
+ }
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (!tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
+ continue;
+ if (DstReg == IntB.reg) {
+ // This copy will become a noop. If it's defining a new val#,
+ // remove that val# as well. However this live range is being
+ // extended to the end of the existing live range defined by the copy.
+ SlotIndex DefIdx = UseIdx.getDefIndex();
+ const LiveRange *DLR = IntB.getLiveRangeContaining(DefIdx);
+ BHasPHIKill |= DLR->valno->hasPHIKill();
+ assert(DLR->valno->def == DefIdx);
+ BDeadValNos.push_back(DLR->valno);
+ BExtend[DLR->start] = DLR->end;
+ JoinedCopies.insert(UseMI);
+ // If this is a kill but it's going to be removed, the last use
+ // of the same val# is the new kill.
+ if (UseMO.isKill())
+ BKills.pop_back();
+ }
+ }
+
+ // We need to insert a new liverange: [ALR.start, LastUse). It may be we can
+ // simply extend BLR if CopyMI doesn't end the range.
+ DEBUG({
+ dbgs() << "\nExtending: ";
+ IntB.print(dbgs(), tri_);
+ });
+
+ // Remove val#'s defined by copies that will be coalesced away.
+ for (unsigned i = 0, e = BDeadValNos.size(); i != e; ++i) {
+ VNInfo *DeadVNI = BDeadValNos[i];
+ if (BHasSubRegs) {
+ for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
+ LiveInterval &SRLI = li_->getInterval(*SR);
+ const LiveRange *SRLR = SRLI.getLiveRangeContaining(DeadVNI->def);
+ SRLI.removeValNo(SRLR->valno);
+ }
+ }
+ IntB.removeValNo(BDeadValNos[i]);
+ }
+
+ // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition
+ // is updated. Kills are also updated.
+ VNInfo *ValNo = BValNo;
+ ValNo->def = AValNo->def;
+ ValNo->setCopy(0);
+ for (unsigned j = 0, ee = ValNo->kills.size(); j != ee; ++j) {
+ if (ValNo->kills[j] != BLR->end)
+ BKills.push_back(ValNo->kills[j]);
+ }
+ ValNo->kills.clear();
+ for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
+ AI != AE; ++AI) {
+ if (AI->valno != AValNo) continue;
+ SlotIndex End = AI->end;
+ std::map<SlotIndex, SlotIndex>::iterator
+ EI = BExtend.find(End);
+ if (EI != BExtend.end())
+ End = EI->second;
+ IntB.addRange(LiveRange(AI->start, End, ValNo));
+
+ // If the IntB live range is assigned to a physical register, and if that
+ // physreg has sub-registers, update their live intervals as well.
+ if (BHasSubRegs) {
+ for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
+ LiveInterval &SRLI = li_->getInterval(*SR);
+ SRLI.MergeInClobberRange(*li_, AI->start, End,
+ li_->getVNInfoAllocator());
+ }
+ }
+ }
+ IntB.addKills(ValNo, BKills);
+ ValNo->setHasPHIKill(BHasPHIKill);
+
+ DEBUG({
+ dbgs() << " result = ";
+ IntB.print(dbgs(), tri_);
+ dbgs() << '\n';
+ dbgs() << "\nShortening: ";
+ IntA.print(dbgs(), tri_);
+ });
+
+ IntA.removeValNo(AValNo);
+
+ DEBUG({
+ dbgs() << " result = ";
+ IntA.print(dbgs(), tri_);
+ dbgs() << '\n';
+ });
+
+ ++numCommutes;
+ return true;
+}
+
+/// isSameOrFallThroughBB - Return true if MBB == SuccMBB or MBB simply
+/// fallthoughs to SuccMBB.
+static bool isSameOrFallThroughBB(MachineBasicBlock *MBB,
+ MachineBasicBlock *SuccMBB,
+ const TargetInstrInfo *tii_) {
+ if (MBB == SuccMBB)
+ return true;
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ return !tii_->AnalyzeBranch(*MBB, TBB, FBB, Cond) && !TBB && !FBB &&
+ MBB->isSuccessor(SuccMBB);
+}
+
+/// removeRange - Wrapper for LiveInterval::removeRange. This removes a range
+/// from a physical register live interval as well as from the live intervals
+/// of its sub-registers.
+static void removeRange(LiveInterval &li,
+ SlotIndex Start, SlotIndex End,
+ LiveIntervals *li_, const TargetRegisterInfo *tri_) {
+ li.removeRange(Start, End, true);
+ if (TargetRegisterInfo::isPhysicalRegister(li.reg)) {
+ for (const unsigned* SR = tri_->getSubRegisters(li.reg); *SR; ++SR) {
+ if (!li_->hasInterval(*SR))
+ continue;
+ LiveInterval &sli = li_->getInterval(*SR);
+ SlotIndex RemoveStart = Start;
+ SlotIndex RemoveEnd = Start;
+
+ while (RemoveEnd != End) {
+ LiveInterval::iterator LR = sli.FindLiveRangeContaining(RemoveStart);
+ if (LR == sli.end())
+ break;
+ RemoveEnd = (LR->end < End) ? LR->end : End;
+ sli.removeRange(RemoveStart, RemoveEnd, true);
+ RemoveStart = RemoveEnd;
+ }
+ }
+ }
+}
+
+/// TrimLiveIntervalToLastUse - If there is a last use in the same basic block
+/// as the copy instruction, trim the live interval to the last use and return
+/// true.
+bool
+SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(SlotIndex CopyIdx,
+ MachineBasicBlock *CopyMBB,
+ LiveInterval &li,
+ const LiveRange *LR) {
+ SlotIndex MBBStart = li_->getMBBStartIdx(CopyMBB);
+ SlotIndex LastUseIdx;
+ MachineOperand *LastUse =
+ lastRegisterUse(LR->start, CopyIdx.getPrevSlot(), li.reg, LastUseIdx);
+ if (LastUse) {
+ MachineInstr *LastUseMI = LastUse->getParent();
+ if (!isSameOrFallThroughBB(LastUseMI->getParent(), CopyMBB, tii_)) {
+ // r1024 = op
+ // ...
+ // BB1:
+ // = r1024
+ //
+ // BB2:
+ // r1025<dead> = r1024<kill>
+ if (MBBStart < LR->end)
+ removeRange(li, MBBStart, LR->end, li_, tri_);
+ return true;
+ }
+
+ // There are uses before the copy, just shorten the live range to the end
+ // of last use.
+ LastUse->setIsKill();
+ removeRange(li, LastUseIdx.getDefIndex(), LR->end, li_, tri_);
+ LR->valno->addKill(LastUseIdx.getDefIndex());
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (tii_->isMoveInstr(*LastUseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
+ DstReg == li.reg) {
+ // Last use is itself an identity code.
+ int DeadIdx = LastUseMI->findRegisterDefOperandIdx(li.reg, false, tri_);
+ LastUseMI->getOperand(DeadIdx).setIsDead();
+ }
+ return true;
+ }
+
+ // Is it livein?
+ if (LR->start <= MBBStart && LR->end > MBBStart) {
+ if (LR->start == li_->getZeroIndex()) {
+ assert(TargetRegisterInfo::isPhysicalRegister(li.reg));
+ // Live-in to the function but dead. Remove it from entry live-in set.
+ mf_->begin()->removeLiveIn(li.reg);
+ }
+ // FIXME: Shorten intervals in BBs that reaches this BB.
+ }
+
+ return false;
+}
+
+/// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
+/// computation, replace the copy by rematerialize the definition.
+bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
+ unsigned DstReg,
+ unsigned DstSubIdx,
+ MachineInstr *CopyMI) {
+ SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getUseIndex();
+ LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx);
+ assert(SrcLR != SrcInt.end() && "Live range not found!");
+ VNInfo *ValNo = SrcLR->valno;
+ // If other defs can reach uses of this def, then it's not safe to perform
+ // the optimization. FIXME: Do isPHIDef and isDefAccurate both need to be
+ // tested?
+ if (ValNo->isPHIDef() || !ValNo->isDefAccurate() ||
+ ValNo->isUnused() || ValNo->hasPHIKill())
+ return false;
+ MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def);
+ const TargetInstrDesc &TID = DefMI->getDesc();
+ if (!TID.isAsCheapAsAMove())
+ return false;
+ if (!tii_->isTriviallyReMaterializable(DefMI, AA))
+ return false;
+ bool SawStore = false;
+ if (!DefMI->isSafeToMove(tii_, SawStore, AA))
+ return false;
+ if (TID.getNumDefs() != 1)
+ return false;
+ if (!DefMI->isImplicitDef()) {
+ // Make sure the copy destination register class fits the instruction
+ // definition register class. The mismatch can happen as a result of earlier
+ // extract_subreg, insert_subreg, subreg_to_reg coalescing.
+ const TargetRegisterClass *RC = TID.OpInfo[0].getRegClass(tri_);
+ if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
+ if (mri_->getRegClass(DstReg) != RC)
+ return false;
+ } else if (!RC->contains(DstReg))
+ return false;
+ }
+
+ // If destination register has a sub-register index on it, make sure it mtches
+ // the instruction register class.
+ if (DstSubIdx) {
+ const TargetInstrDesc &TID = DefMI->getDesc();
+ if (TID.getNumDefs() != 1)
+ return false;
+ const TargetRegisterClass *DstRC = mri_->getRegClass(DstReg);
+ const TargetRegisterClass *DstSubRC =
+ DstRC->getSubRegisterRegClass(DstSubIdx);
+ const TargetRegisterClass *DefRC = TID.OpInfo[0].getRegClass(tri_);
+ if (DefRC == DstRC)
+ DstSubIdx = 0;
+ else if (DefRC != DstSubRC)
+ return false;
+ }
+
+ SlotIndex DefIdx = CopyIdx.getDefIndex();
+ const LiveRange *DLR= li_->getInterval(DstReg).getLiveRangeContaining(DefIdx);
+ DLR->valno->setCopy(0);
+ // Don't forget to update sub-register intervals.
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
+ for (const unsigned* SR = tri_->getSubRegisters(DstReg); *SR; ++SR) {
+ if (!li_->hasInterval(*SR))
+ continue;
+ DLR = li_->getInterval(*SR).getLiveRangeContaining(DefIdx);
+ if (DLR && DLR->valno->getCopy() == CopyMI)
+ DLR->valno->setCopy(0);
+ }
+ }
+
+ // If copy kills the source register, find the last use and propagate
+ // kill.
+ bool checkForDeadDef = false;
+ MachineBasicBlock *MBB = CopyMI->getParent();
+ if (CopyMI->killsRegister(SrcInt.reg))
+ if (!TrimLiveIntervalToLastUse(CopyIdx, MBB, SrcInt, SrcLR)) {
+ checkForDeadDef = true;
+ }
+
+ MachineBasicBlock::iterator MII =
+ llvm::next(MachineBasicBlock::iterator(CopyMI));
+ tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, tri_);
+ MachineInstr *NewMI = prior(MII);
+
+ if (checkForDeadDef) {
+ // PR4090 fix: Trim interval failed because there was no use of the
+ // source interval in this MBB. If the def is in this MBB too then we
+ // should mark it dead:
+ if (DefMI->getParent() == MBB) {
+ DefMI->addRegisterDead(SrcInt.reg, tri_);
+ SrcLR->end = SrcLR->start.getNextSlot();
+ }
+ }
+
+ // CopyMI may have implicit operands, transfer them over to the newly
+ // rematerialized instruction. And update implicit def interval valnos.
+ for (unsigned i = CopyMI->getDesc().getNumOperands(),
+ e = CopyMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = CopyMI->getOperand(i);
+ if (MO.isReg() && MO.isImplicit())
+ NewMI->addOperand(MO);
+ if (MO.isDef() && li_->hasInterval(MO.getReg())) {
+ unsigned Reg = MO.getReg();
+ DLR = li_->getInterval(Reg).getLiveRangeContaining(DefIdx);
+ if (DLR && DLR->valno->getCopy() == CopyMI)
+ DLR->valno->setCopy(0);
+ }
+ }
+
+ TransferImplicitOps(CopyMI, NewMI);
+ li_->ReplaceMachineInstrInMaps(CopyMI, NewMI);
+ CopyMI->eraseFromParent();
+ ReMatCopies.insert(CopyMI);
+ ReMatDefs.insert(DefMI);
+ ++NumReMats;
+ return true;
+}
+
+/// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
+/// update the subregister number if it is not zero. If DstReg is a
+/// physical register and the existing subregister number of the def / use
+/// being updated is not zero, make sure to set it to the correct physical
+/// subregister.
+void
+SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
+ unsigned SubIdx) {
+ bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ if (DstIsPhys && SubIdx) {
+ // Figure out the real physical register we are updating with.
+ DstReg = tri_->getSubReg(DstReg, SubIdx);
+ SubIdx = 0;
+ }
+
+ for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg),
+ E = mri_->reg_end(); I != E; ) {
+ MachineOperand &O = I.getOperand();
+ MachineInstr *UseMI = &*I;
+ ++I;
+ unsigned OldSubIdx = O.getSubReg();
+ if (DstIsPhys) {
+ unsigned UseDstReg = DstReg;
+ if (OldSubIdx)
+ UseDstReg = tri_->getSubReg(DstReg, OldSubIdx);
+
+ unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx;
+ if (tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg,
+ CopySrcSubIdx, CopyDstSubIdx) &&
+ CopySrcReg != CopyDstReg &&
+ CopySrcReg == SrcReg && CopyDstReg != UseDstReg) {
+ // If the use is a copy and it won't be coalesced away, and its source
+ // is defined by a trivial computation, try to rematerialize it instead.
+ if (ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg,
+ CopyDstSubIdx, UseMI))
+ continue;
+ }
+
+ O.setReg(UseDstReg);
+ O.setSubReg(0);
+ continue;
+ }
+
+ // Sub-register indexes goes from small to large. e.g.
+ // RAX: 1 -> AL, 2 -> AX, 3 -> EAX
+ // EAX: 1 -> AL, 2 -> AX
+ // So RAX's sub-register 2 is AX, RAX's sub-regsiter 3 is EAX, whose
+ // sub-register 2 is also AX.
+ if (SubIdx && OldSubIdx && SubIdx != OldSubIdx)
+ assert(OldSubIdx < SubIdx && "Conflicting sub-register index!");
+ else if (SubIdx)
+ O.setSubReg(SubIdx);
+ // Remove would-be duplicated kill marker.
+ if (O.isKill() && UseMI->killsRegister(DstReg))
+ O.setIsKill(false);
+ O.setReg(DstReg);
+
+ // After updating the operand, check if the machine instruction has
+ // become a copy. If so, update its val# information.
+ if (JoinedCopies.count(UseMI))
+ continue;
+
+ const TargetInstrDesc &TID = UseMI->getDesc();
+ unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx;
+ if (TID.getNumDefs() == 1 && TID.getNumOperands() > 2 &&
+ tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg,
+ CopySrcSubIdx, CopyDstSubIdx) &&
+ CopySrcReg != CopyDstReg &&
+ (TargetRegisterInfo::isVirtualRegister(CopyDstReg) ||
+ allocatableRegs_[CopyDstReg])) {
+ LiveInterval &LI = li_->getInterval(CopyDstReg);
+ SlotIndex DefIdx =
+ li_->getInstructionIndex(UseMI).getDefIndex();
+ if (const LiveRange *DLR = LI.getLiveRangeContaining(DefIdx)) {
+ if (DLR->valno->def == DefIdx)
+ DLR->valno->setCopy(UseMI);
+ }
+ }
+ }
+}
+
+/// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate
+/// due to live range lengthening as the result of coalescing.
+void SimpleRegisterCoalescing::RemoveUnnecessaryKills(unsigned Reg,
+ LiveInterval &LI) {
+ for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg),
+ UE = mri_->use_end(); UI != UE; ++UI) {
+ MachineOperand &UseMO = UI.getOperand();
+ if (!UseMO.isKill())
+ continue;
+ MachineInstr *UseMI = UseMO.getParent();
+ SlotIndex UseIdx =
+ li_->getInstructionIndex(UseMI).getUseIndex();
+ const LiveRange *LR = LI.getLiveRangeContaining(UseIdx);
+ if (!LR ||
+ (!LR->valno->isKill(UseIdx.getDefIndex()) &&
+ LR->valno->def != UseIdx.getDefIndex())) {
+ // Interesting problem. After coalescing reg1027's def and kill are both
+ // at the same point: %reg1027,0.000000e+00 = [56,814:0) 0@70-(814)
+ //
+ // bb5:
+ // 60 %reg1027<def> = t2MOVr %reg1027, 14, %reg0, %reg0
+ // 68 %reg1027<def> = t2LDRi12 %reg1027<kill>, 8, 14, %reg0
+ // 76 t2CMPzri %reg1038<kill,undef>, 0, 14, %reg0, %CPSR<imp-def>
+ // 84 %reg1027<def> = t2MOVr %reg1027, 14, %reg0, %reg0
+ // 96 t2Bcc mbb<bb5,0x2030910>, 1, %CPSR<kill>
+ //
+ // Do not remove the kill marker on t2LDRi12.
+ UseMO.setIsKill(false);
+ }
+ }
+}
+
+/// removeIntervalIfEmpty - Check if the live interval of a physical register
+/// is empty, if so remove it and also remove the empty intervals of its
+/// sub-registers. Return true if live interval is removed.
+static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *li_,
+ const TargetRegisterInfo *tri_) {
+ if (li.empty()) {
+ if (TargetRegisterInfo::isPhysicalRegister(li.reg))
+ for (const unsigned* SR = tri_->getSubRegisters(li.reg); *SR; ++SR) {
+ if (!li_->hasInterval(*SR))
+ continue;
+ LiveInterval &sli = li_->getInterval(*SR);
+ if (sli.empty())
+ li_->removeInterval(*SR);
+ }
+ li_->removeInterval(li.reg);
+ return true;
+ }
+ return false;
+}
+
+/// ShortenDeadCopyLiveRange - Shorten a live range defined by a dead copy.
+/// Return true if live interval is removed.
+bool SimpleRegisterCoalescing::ShortenDeadCopyLiveRange(LiveInterval &li,
+ MachineInstr *CopyMI) {
+ SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI);
+ LiveInterval::iterator MLR =
+ li.FindLiveRangeContaining(CopyIdx.getDefIndex());
+ if (MLR == li.end())
+ return false; // Already removed by ShortenDeadCopySrcLiveRange.
+ SlotIndex RemoveStart = MLR->start;
+ SlotIndex RemoveEnd = MLR->end;
+ SlotIndex DefIdx = CopyIdx.getDefIndex();
+ // Remove the liverange that's defined by this.
+ if (RemoveStart == DefIdx && RemoveEnd == DefIdx.getStoreIndex()) {
+ removeRange(li, RemoveStart, RemoveEnd, li_, tri_);
+ return removeIntervalIfEmpty(li, li_, tri_);
+ }
+ return false;
+}
+
+/// RemoveDeadDef - If a def of a live interval is now determined dead, remove
+/// the val# it defines. If the live interval becomes empty, remove it as well.
+bool SimpleRegisterCoalescing::RemoveDeadDef(LiveInterval &li,
+ MachineInstr *DefMI) {
+ SlotIndex DefIdx = li_->getInstructionIndex(DefMI).getDefIndex();
+ LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx);
+ if (DefIdx != MLR->valno->def)
+ return false;
+ li.removeValNo(MLR->valno);
+ return removeIntervalIfEmpty(li, li_, tri_);
+}
+
+/// PropagateDeadness - Propagate the dead marker to the instruction which
+/// defines the val#.
+static void PropagateDeadness(LiveInterval &li, MachineInstr *CopyMI,
+ SlotIndex &LRStart, LiveIntervals *li_,
+ const TargetRegisterInfo* tri_) {
+ MachineInstr *DefMI =
+ li_->getInstructionFromIndex(LRStart.getDefIndex());
+ if (DefMI && DefMI != CopyMI) {
+ int DeadIdx = DefMI->findRegisterDefOperandIdx(li.reg, false);
+ if (DeadIdx != -1)
+ DefMI->getOperand(DeadIdx).setIsDead();
+ else
+ DefMI->addOperand(MachineOperand::CreateReg(li.reg,
+ /*def*/true, /*implicit*/true, /*kill*/false, /*dead*/true));
+ LRStart = LRStart.getNextSlot();
+ }
+}
+
+/// ShortenDeadCopySrcLiveRange - Shorten a live range as it's artificially
+/// extended by a dead copy. Mark the last use (if any) of the val# as kill as
+/// ends the live range there. If there isn't another use, then this live range
+/// is dead. Return true if live interval is removed.
+bool
+SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li,
+ MachineInstr *CopyMI) {
+ SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI);
+ if (CopyIdx == SlotIndex()) {
+ // FIXME: special case: function live in. It can be a general case if the
+ // first instruction index starts at > 0 value.
+ assert(TargetRegisterInfo::isPhysicalRegister(li.reg));
+ // Live-in to the function but dead. Remove it from entry live-in set.
+ if (mf_->begin()->isLiveIn(li.reg))
+ mf_->begin()->removeLiveIn(li.reg);
+ const LiveRange *LR = li.getLiveRangeContaining(CopyIdx);
+ removeRange(li, LR->start, LR->end, li_, tri_);
+ return removeIntervalIfEmpty(li, li_, tri_);
+ }
+
+ LiveInterval::iterator LR =
+ li.FindLiveRangeContaining(CopyIdx.getPrevIndex().getStoreIndex());
+ if (LR == li.end())
+ // Livein but defined by a phi.
+ return false;
+
+ SlotIndex RemoveStart = LR->start;
+ SlotIndex RemoveEnd = CopyIdx.getStoreIndex();
+ if (LR->end > RemoveEnd)
+ // More uses past this copy? Nothing to do.
+ return false;
+
+ // If there is a last use in the same bb, we can't remove the live range.
+ // Shorten the live interval and return.
+ MachineBasicBlock *CopyMBB = CopyMI->getParent();
+ if (TrimLiveIntervalToLastUse(CopyIdx, CopyMBB, li, LR))
+ return false;
+
+ // There are other kills of the val#. Nothing to do.
+ if (!li.isOnlyLROfValNo(LR))
+ return false;
+
+ MachineBasicBlock *StartMBB = li_->getMBBFromIndex(RemoveStart);
+ if (!isSameOrFallThroughBB(StartMBB, CopyMBB, tii_))
+ // If the live range starts in another mbb and the copy mbb is not a fall
+ // through mbb, then we can only cut the range from the beginning of the
+ // copy mbb.
+ RemoveStart = li_->getMBBStartIdx(CopyMBB).getNextIndex().getBaseIndex();
+
+ if (LR->valno->def == RemoveStart) {
+ // If the def MI defines the val# and this copy is the only kill of the
+ // val#, then propagate the dead marker.
+ PropagateDeadness(li, CopyMI, RemoveStart, li_, tri_);
+ ++numDeadValNo;
+
+ if (LR->valno->isKill(RemoveEnd))
+ LR->valno->removeKill(RemoveEnd);
+ }
+
+ removeRange(li, RemoveStart, RemoveEnd, li_, tri_);
+ return removeIntervalIfEmpty(li, li_, tri_);
+}
+
+/// CanCoalesceWithImpDef - Returns true if the specified copy instruction
+/// from an implicit def to another register can be coalesced away.
+bool SimpleRegisterCoalescing::CanCoalesceWithImpDef(MachineInstr *CopyMI,
+ LiveInterval &li,
+ LiveInterval &ImpLi) const{
+ if (!CopyMI->killsRegister(ImpLi.reg))
+ return false;
+ // Make sure this is the only use.
+ for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(ImpLi.reg),
+ UE = mri_->use_end(); UI != UE;) {
+ MachineInstr *UseMI = &*UI;
+ ++UI;
+ if (CopyMI == UseMI || JoinedCopies.count(UseMI))
+ continue;
+ return false;
+ }
+ return true;
+}
+
+
+/// isWinToJoinVRWithSrcPhysReg - Return true if it's worth while to join a
+/// a virtual destination register with physical source register.
+bool
+SimpleRegisterCoalescing::isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI,
+ MachineBasicBlock *CopyMBB,
+ LiveInterval &DstInt,
+ LiveInterval &SrcInt) {
+ // If the virtual register live interval is long but it has low use desity,
+ // do not join them, instead mark the physical register as its allocation
+ // preference.
+ const TargetRegisterClass *RC = mri_->getRegClass(DstInt.reg);
+ unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
+ unsigned Length = li_->getApproximateInstructionCount(DstInt);
+ if (Length > Threshold &&
+ (((float)std::distance(mri_->use_nodbg_begin(DstInt.reg),
+ mri_->use_nodbg_end()) / Length) <
+ (1.0 / Threshold)))
+ return false;
+
+ // If the virtual register live interval extends into a loop, turn down
+ // aggressiveness.
+ SlotIndex CopyIdx =
+ li_->getInstructionIndex(CopyMI).getDefIndex();
+ const MachineLoop *L = loopInfo->getLoopFor(CopyMBB);
+ if (!L) {
+ // Let's see if the virtual register live interval extends into the loop.
+ LiveInterval::iterator DLR = DstInt.FindLiveRangeContaining(CopyIdx);
+ assert(DLR != DstInt.end() && "Live range not found!");
+ DLR = DstInt.FindLiveRangeContaining(DLR->end.getNextSlot());
+ if (DLR != DstInt.end()) {
+ CopyMBB = li_->getMBBFromIndex(DLR->start);
+ L = loopInfo->getLoopFor(CopyMBB);
+ }
+ }
+
+ if (!L || Length <= Threshold)
+ return true;
+
+ SlotIndex UseIdx = CopyIdx.getUseIndex();
+ LiveInterval::iterator SLR = SrcInt.FindLiveRangeContaining(UseIdx);
+ MachineBasicBlock *SMBB = li_->getMBBFromIndex(SLR->start);
+ if (loopInfo->getLoopFor(SMBB) != L) {
+ if (!loopInfo->isLoopHeader(CopyMBB))
+ return false;
+ // If vr's live interval extends pass the loop header, do not join.
+ for (MachineBasicBlock::succ_iterator SI = CopyMBB->succ_begin(),
+ SE = CopyMBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock *SuccMBB = *SI;
+ if (SuccMBB == CopyMBB)
+ continue;
+ if (DstInt.overlaps(li_->getMBBStartIdx(SuccMBB),
+ li_->getMBBEndIdx(SuccMBB)))
+ return false;
+ }
+ }
+ return true;
+}
+
+/// isWinToJoinVRWithDstPhysReg - Return true if it's worth while to join a
+/// copy from a virtual source register to a physical destination register.
+bool
+SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI,
+ MachineBasicBlock *CopyMBB,
+ LiveInterval &DstInt,
+ LiveInterval &SrcInt) {
+ // If the virtual register live interval is long but it has low use density,
+ // do not join them, instead mark the physical register as its allocation
+ // preference.
+ const TargetRegisterClass *RC = mri_->getRegClass(SrcInt.reg);
+ unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
+ unsigned Length = li_->getApproximateInstructionCount(SrcInt);
+ if (Length > Threshold &&
+ (((float)std::distance(mri_->use_nodbg_begin(SrcInt.reg),
+ mri_->use_nodbg_end()) / Length) <
+ (1.0 / Threshold)))
+ return false;
+
+ if (SrcInt.empty())
+ // Must be implicit_def.
+ return false;
+
+ // If the virtual register live interval is defined or cross a loop, turn
+ // down aggressiveness.
+ SlotIndex CopyIdx =
+ li_->getInstructionIndex(CopyMI).getDefIndex();
+ SlotIndex UseIdx = CopyIdx.getUseIndex();
+ LiveInterval::iterator SLR = SrcInt.FindLiveRangeContaining(UseIdx);
+ assert(SLR != SrcInt.end() && "Live range not found!");
+ SLR = SrcInt.FindLiveRangeContaining(SLR->start.getPrevSlot());
+ if (SLR == SrcInt.end())
+ return true;
+ MachineBasicBlock *SMBB = li_->getMBBFromIndex(SLR->start);
+ const MachineLoop *L = loopInfo->getLoopFor(SMBB);
+
+ if (!L || Length <= Threshold)
+ return true;
+
+ if (loopInfo->getLoopFor(CopyMBB) != L) {
+ if (SMBB != L->getLoopLatch())
+ return false;
+ // If vr's live interval is extended from before the loop latch, do not
+ // join.
+ for (MachineBasicBlock::pred_iterator PI = SMBB->pred_begin(),
+ PE = SMBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock *PredMBB = *PI;
+ if (PredMBB == SMBB)
+ continue;
+ if (SrcInt.overlaps(li_->getMBBStartIdx(PredMBB),
+ li_->getMBBEndIdx(PredMBB)))
+ return false;
+ }
+ }
+ return true;
+}
+
+/// isWinToJoinCrossClass - Return true if it's profitable to coalesce
+/// two virtual registers from different register classes.
+bool
+SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned LargeReg,
+ unsigned SmallReg,
+ unsigned Threshold) {
+ // Then make sure the intervals are *short*.
+ LiveInterval &LargeInt = li_->getInterval(LargeReg);
+ LiveInterval &SmallInt = li_->getInterval(SmallReg);
+ unsigned LargeSize = li_->getApproximateInstructionCount(LargeInt);
+ unsigned SmallSize = li_->getApproximateInstructionCount(SmallInt);
+ if (SmallSize > Threshold || LargeSize > Threshold)
+ if ((float)std::distance(mri_->use_nodbg_begin(SmallReg),
+ mri_->use_nodbg_end()) / SmallSize <
+ (float)std::distance(mri_->use_nodbg_begin(LargeReg),
+ mri_->use_nodbg_end()) / LargeSize)
+ return false;
+ return true;
+}
+
+/// HasIncompatibleSubRegDefUse - If we are trying to coalesce a virtual
+/// register with a physical register, check if any of the virtual register
+/// operand is a sub-register use or def. If so, make sure it won't result
+/// in an illegal extract_subreg or insert_subreg instruction. e.g.
+/// vr1024 = extract_subreg vr1025, 1
+/// ...
+/// vr1024 = mov8rr AH
+/// If vr1024 is coalesced with AH, the extract_subreg is now illegal since
+/// AH does not have a super-reg whose sub-register 1 is AH.
+bool
+SimpleRegisterCoalescing::HasIncompatibleSubRegDefUse(MachineInstr *CopyMI,
+ unsigned VirtReg,
+ unsigned PhysReg) {
+ for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(VirtReg),
+ E = mri_->reg_end(); I != E; ++I) {
+ MachineOperand &O = I.getOperand();
+ MachineInstr *MI = &*I;
+ if (MI == CopyMI || JoinedCopies.count(MI))
+ continue;
+ unsigned SubIdx = O.getSubReg();
+ if (SubIdx && !tri_->getSubReg(PhysReg, SubIdx))
+ return true;
+ if (MI->isExtractSubreg()) {
+ SubIdx = MI->getOperand(2).getImm();
+ if (O.isUse() && !tri_->getSubReg(PhysReg, SubIdx))
+ return true;
+ if (O.isDef()) {
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ const TargetRegisterClass *RC =
+ TargetRegisterInfo::isPhysicalRegister(SrcReg)
+ ? tri_->getPhysicalRegisterRegClass(SrcReg)
+ : mri_->getRegClass(SrcReg);
+ if (!tri_->getMatchingSuperReg(PhysReg, SubIdx, RC))
+ return true;
+ }
+ }
+ if (MI->isInsertSubreg() || MI->isSubregToReg()) {
+ SubIdx = MI->getOperand(3).getImm();
+ if (VirtReg == MI->getOperand(0).getReg()) {
+ if (!tri_->getSubReg(PhysReg, SubIdx))
+ return true;
+ } else {
+ unsigned DstReg = MI->getOperand(0).getReg();
+ const TargetRegisterClass *RC =
+ TargetRegisterInfo::isPhysicalRegister(DstReg)
+ ? tri_->getPhysicalRegisterRegClass(DstReg)
+ : mri_->getRegClass(DstReg);
+ if (!tri_->getMatchingSuperReg(PhysReg, SubIdx, RC))
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+
+/// CanJoinExtractSubRegToPhysReg - Return true if it's possible to coalesce
+/// an extract_subreg where dst is a physical register, e.g.
+/// cl = EXTRACT_SUBREG reg1024, 1
+bool
+SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg,
+ unsigned SrcReg, unsigned SubIdx,
+ unsigned &RealDstReg) {
+ const TargetRegisterClass *RC = mri_->getRegClass(SrcReg);
+ RealDstReg = tri_->getMatchingSuperReg(DstReg, SubIdx, RC);
+ assert(RealDstReg && "Invalid extract_subreg instruction!");
+
+ // For this type of EXTRACT_SUBREG, conservatively
+ // check if the live interval of the source register interfere with the
+ // actual super physical register we are trying to coalesce with.
+ LiveInterval &RHS = li_->getInterval(SrcReg);
+ if (li_->hasInterval(RealDstReg) &&
+ RHS.overlaps(li_->getInterval(RealDstReg))) {
+ DEBUG({
+ dbgs() << "Interfere with register ";
+ li_->getInterval(RealDstReg).print(dbgs(), tri_);
+ });
+ return false; // Not coalescable
+ }
+ for (const unsigned* SR = tri_->getSubRegisters(RealDstReg); *SR; ++SR)
+ if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
+ DEBUG({
+ dbgs() << "Interfere with sub-register ";
+ li_->getInterval(*SR).print(dbgs(), tri_);
+ });
+ return false; // Not coalescable
+ }
+ return true;
+}
+
+/// CanJoinInsertSubRegToPhysReg - Return true if it's possible to coalesce
+/// an insert_subreg where src is a physical register, e.g.
+/// reg1024 = INSERT_SUBREG reg1024, c1, 0
+bool
+SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg,
+ unsigned SrcReg, unsigned SubIdx,
+ unsigned &RealSrcReg) {
+ const TargetRegisterClass *RC = mri_->getRegClass(DstReg);
+ RealSrcReg = tri_->getMatchingSuperReg(SrcReg, SubIdx, RC);
+ assert(RealSrcReg && "Invalid extract_subreg instruction!");
+
+ LiveInterval &RHS = li_->getInterval(DstReg);
+ if (li_->hasInterval(RealSrcReg) &&
+ RHS.overlaps(li_->getInterval(RealSrcReg))) {
+ DEBUG({
+ dbgs() << "Interfere with register ";
+ li_->getInterval(RealSrcReg).print(dbgs(), tri_);
+ });
+ return false; // Not coalescable
+ }
+ for (const unsigned* SR = tri_->getSubRegisters(RealSrcReg); *SR; ++SR)
+ if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
+ DEBUG({
+ dbgs() << "Interfere with sub-register ";
+ li_->getInterval(*SR).print(dbgs(), tri_);
+ });
+ return false; // Not coalescable
+ }
+ return true;
+}
+
+/// getRegAllocPreference - Return register allocation preference register.
+///
+static unsigned getRegAllocPreference(unsigned Reg, MachineFunction &MF,
+ MachineRegisterInfo *MRI,
+ const TargetRegisterInfo *TRI) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return 0;
+ std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg);
+ return TRI->ResolveRegAllocHint(Hint.first, Hint.second, MF);
+}
+
+/// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
+/// which are the src/dst of the copy instruction CopyMI. This returns true
+/// if the copy was successfully coalesced away. If it is not currently
+/// possible to coalesce this interval, but it may be possible if other
+/// things get coalesced, then it returns true by reference in 'Again'.
+bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
+ MachineInstr *CopyMI = TheCopy.MI;
+
+ Again = false;
+ if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI))
+ return false; // Already done.
+
+ DEBUG(dbgs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI);
+
+ unsigned SrcReg, DstReg, SrcSubIdx = 0, DstSubIdx = 0;
+ bool isExtSubReg = CopyMI->isExtractSubreg();
+ bool isInsSubReg = CopyMI->isInsertSubreg();
+ bool isSubRegToReg = CopyMI->isSubregToReg();
+ unsigned SubIdx = 0;
+ if (isExtSubReg) {
+ DstReg = CopyMI->getOperand(0).getReg();
+ DstSubIdx = CopyMI->getOperand(0).getSubReg();
+ SrcReg = CopyMI->getOperand(1).getReg();
+ SrcSubIdx = CopyMI->getOperand(2).getImm();
+ } else if (isInsSubReg || isSubRegToReg) {
+ DstReg = CopyMI->getOperand(0).getReg();
+ DstSubIdx = CopyMI->getOperand(3).getImm();
+ SrcReg = CopyMI->getOperand(2).getReg();
+ SrcSubIdx = CopyMI->getOperand(2).getSubReg();
+ if (SrcSubIdx && SrcSubIdx != DstSubIdx) {
+ // r1025 = INSERT_SUBREG r1025, r1024<2>, 2 Then r1024 has already been
+ // coalesced to a larger register so the subreg indices cancel out.
+ DEBUG(dbgs() << "\tSource of insert_subreg or subreg_to_reg is already "
+ "coalesced to another register.\n");
+ return false; // Not coalescable.
+ }
+ } else if (tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
+ if (SrcSubIdx && DstSubIdx && SrcSubIdx != DstSubIdx) {
+ // e.g. %reg16404:1<def> = MOV8rr %reg16412:2<kill>
+ Again = true;
+ return false; // Not coalescable.
+ }
+ } else {
+ llvm_unreachable("Unrecognized copy instruction!");
+ }
+
+ // If they are already joined we continue.
+ if (SrcReg == DstReg) {
+ DEBUG(dbgs() << "\tCopy already coalesced.\n");
+ return false; // Not coalescable.
+ }
+
+ bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
+ bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+
+ // If they are both physical registers, we cannot join them.
+ if (SrcIsPhys && DstIsPhys) {
+ DEBUG(dbgs() << "\tCan not coalesce physregs.\n");
+ return false; // Not coalescable.
+ }
+
+ // We only join virtual registers with allocatable physical registers.
+ if (SrcIsPhys && !allocatableRegs_[SrcReg]) {
+ DEBUG(dbgs() << "\tSrc reg is unallocatable physreg.\n");
+ return false; // Not coalescable.
+ }
+ if (DstIsPhys && !allocatableRegs_[DstReg]) {
+ DEBUG(dbgs() << "\tDst reg is unallocatable physreg.\n");
+ return false; // Not coalescable.
+ }
+
+ // Check that a physical source register is compatible with dst regclass
+ if (SrcIsPhys) {
+ unsigned SrcSubReg = SrcSubIdx ?
+ tri_->getSubReg(SrcReg, SrcSubIdx) : SrcReg;
+ const TargetRegisterClass *DstRC = mri_->getRegClass(DstReg);
+ const TargetRegisterClass *DstSubRC = DstRC;
+ if (DstSubIdx)
+ DstSubRC = DstRC->getSubRegisterRegClass(DstSubIdx);
+ assert(DstSubRC && "Illegal subregister index");
+ if (!DstSubRC->contains(SrcSubReg)) {
+ DEBUG(dbgs() << "\tIncompatible destination regclass: "
+ << tri_->getName(SrcSubReg) << " not in "
+ << DstSubRC->getName() << ".\n");
+ return false; // Not coalescable.
+ }
+ }
+
+ // Check that a physical dst register is compatible with source regclass
+ if (DstIsPhys) {
+ unsigned DstSubReg = DstSubIdx ?
+ tri_->getSubReg(DstReg, DstSubIdx) : DstReg;
+ const TargetRegisterClass *SrcRC = mri_->getRegClass(SrcReg);
+ const TargetRegisterClass *SrcSubRC = SrcRC;
+ if (SrcSubIdx)
+ SrcSubRC = SrcRC->getSubRegisterRegClass(SrcSubIdx);
+ assert(SrcSubRC && "Illegal subregister index");
+ if (!SrcSubRC->contains(DstSubReg)) {
+ DEBUG(dbgs() << "\tIncompatible source regclass: "
+ << tri_->getName(DstSubReg) << " not in "
+ << SrcSubRC->getName() << ".\n");
+ (void)DstSubReg;
+ return false; // Not coalescable.
+ }
+ }
+
+ // Should be non-null only when coalescing to a sub-register class.
+ bool CrossRC = false;
+ const TargetRegisterClass *SrcRC= SrcIsPhys ? 0 : mri_->getRegClass(SrcReg);
+ const TargetRegisterClass *DstRC= DstIsPhys ? 0 : mri_->getRegClass(DstReg);
+ const TargetRegisterClass *NewRC = NULL;
+ MachineBasicBlock *CopyMBB = CopyMI->getParent();
+ unsigned RealDstReg = 0;
+ unsigned RealSrcReg = 0;
+ if (isExtSubReg || isInsSubReg || isSubRegToReg) {
+ SubIdx = CopyMI->getOperand(isExtSubReg ? 2 : 3).getImm();
+ if (SrcIsPhys && isExtSubReg) {
+ // r1024 = EXTRACT_SUBREG EAX, 0 then r1024 is really going to be
+ // coalesced with AX.
+ unsigned DstSubIdx = CopyMI->getOperand(0).getSubReg();
+ if (DstSubIdx) {
+ // r1024<2> = EXTRACT_SUBREG EAX, 2. Then r1024 has already been
+ // coalesced to a larger register so the subreg indices cancel out.
+ if (DstSubIdx != SubIdx) {
+ DEBUG(dbgs() << "\t Sub-register indices mismatch.\n");
+ return false; // Not coalescable.
+ }
+ } else
+ SrcReg = tri_->getSubReg(SrcReg, SubIdx);
+ SubIdx = 0;
+ } else if (DstIsPhys && (isInsSubReg || isSubRegToReg)) {
+ // EAX = INSERT_SUBREG EAX, r1024, 0
+ unsigned SrcSubIdx = CopyMI->getOperand(2).getSubReg();
+ if (SrcSubIdx) {
+ // EAX = INSERT_SUBREG EAX, r1024<2>, 2 Then r1024 has already been
+ // coalesced to a larger register so the subreg indices cancel out.
+ if (SrcSubIdx != SubIdx) {
+ DEBUG(dbgs() << "\t Sub-register indices mismatch.\n");
+ return false; // Not coalescable.
+ }
+ } else
+ DstReg = tri_->getSubReg(DstReg, SubIdx);
+ SubIdx = 0;
+ } else if ((DstIsPhys && isExtSubReg) ||
+ (SrcIsPhys && (isInsSubReg || isSubRegToReg))) {
+ if (!isSubRegToReg && CopyMI->getOperand(1).getSubReg()) {
+ DEBUG(dbgs() << "\tSrc of extract_subreg already coalesced with reg"
+ << " of a super-class.\n");
+ return false; // Not coalescable.
+ }
+
+ if (isExtSubReg) {
+ if (!CanJoinExtractSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealDstReg))
+ return false; // Not coalescable
+ } else {
+ if (!CanJoinInsertSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealSrcReg))
+ return false; // Not coalescable
+ }
+ SubIdx = 0;
+ } else {
+ unsigned OldSubIdx = isExtSubReg ? CopyMI->getOperand(0).getSubReg()
+ : CopyMI->getOperand(2).getSubReg();
+ if (OldSubIdx) {
+ if (OldSubIdx == SubIdx && !differingRegisterClasses(SrcReg, DstReg))
+ // r1024<2> = EXTRACT_SUBREG r1025, 2. Then r1024 has already been
+ // coalesced to a larger register so the subreg indices cancel out.
+ // Also check if the other larger register is of the same register
+ // class as the would be resulting register.
+ SubIdx = 0;
+ else {
+ DEBUG(dbgs() << "\t Sub-register indices mismatch.\n");
+ return false; // Not coalescable.
+ }
+ }
+ if (SubIdx) {
+ if (!DstIsPhys && !SrcIsPhys) {
+ if (isInsSubReg || isSubRegToReg) {
+ NewRC = tri_->getMatchingSuperRegClass(DstRC, SrcRC, SubIdx);
+ } else // extract_subreg {
+ NewRC = tri_->getMatchingSuperRegClass(SrcRC, DstRC, SubIdx);
+ }
+ if (!NewRC) {
+ DEBUG(dbgs() << "\t Conflicting sub-register indices.\n");
+ return false; // Not coalescable
+ }
+
+ unsigned LargeReg = isExtSubReg ? SrcReg : DstReg;
+ unsigned SmallReg = isExtSubReg ? DstReg : SrcReg;
+ unsigned Limit= allocatableRCRegs_[mri_->getRegClass(SmallReg)].count();
+ if (!isWinToJoinCrossClass(LargeReg, SmallReg, Limit)) {
+ Again = true; // May be possible to coalesce later.
+ return false;
+ }
+ }
+ }
+ } else if (differingRegisterClasses(SrcReg, DstReg)) {
+ if (DisableCrossClassJoin)
+ return false;
+ CrossRC = true;
+
+ // FIXME: What if the result of a EXTRACT_SUBREG is then coalesced
+ // with another? If it's the resulting destination register, then
+ // the subidx must be propagated to uses (but only those defined
+ // by the EXTRACT_SUBREG). If it's being coalesced into another
+ // register, it should be safe because register is assumed to have
+ // the register class of the super-register.
+
+ // Process moves where one of the registers have a sub-register index.
+ MachineOperand *DstMO = CopyMI->findRegisterDefOperand(DstReg);
+ MachineOperand *SrcMO = CopyMI->findRegisterUseOperand(SrcReg);
+ SubIdx = DstMO->getSubReg();
+ if (SubIdx) {
+ if (SrcMO->getSubReg())
+ // FIXME: can we handle this?
+ return false;
+ // This is not an insert_subreg but it looks like one.
+ // e.g. %reg1024:4 = MOV32rr %EAX
+ isInsSubReg = true;
+ if (SrcIsPhys) {
+ if (!CanJoinInsertSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealSrcReg))
+ return false; // Not coalescable
+ SubIdx = 0;
+ }
+ } else {
+ SubIdx = SrcMO->getSubReg();
+ if (SubIdx) {
+ // This is not a extract_subreg but it looks like one.
+ // e.g. %cl = MOV16rr %reg1024:1
+ isExtSubReg = true;
+ if (DstIsPhys) {
+ if (!CanJoinExtractSubRegToPhysReg(DstReg, SrcReg, SubIdx,RealDstReg))
+ return false; // Not coalescable
+ SubIdx = 0;
+ }
+ }
+ }
+
+ unsigned LargeReg = SrcReg;
+ unsigned SmallReg = DstReg;
+
+ // Now determine the register class of the joined register.
+ if (isExtSubReg) {
+ if (SubIdx && DstRC && DstRC->isASubClass()) {
+ // This is a move to a sub-register class. However, the source is a
+ // sub-register of a larger register class. We don't know what should
+ // the register class be. FIXME.
+ Again = true;
+ return false;
+ }
+ if (!DstIsPhys && !SrcIsPhys)
+ NewRC = SrcRC;
+ } else if (!SrcIsPhys && !DstIsPhys) {
+ NewRC = getCommonSubClass(SrcRC, DstRC);
+ if (!NewRC) {
+ DEBUG(dbgs() << "\tDisjoint regclasses: "
+ << SrcRC->getName() << ", "
+ << DstRC->getName() << ".\n");
+ return false; // Not coalescable.
+ }
+ if (DstRC->getSize() > SrcRC->getSize())
+ std::swap(LargeReg, SmallReg);
+ }
+
+ // If we are joining two virtual registers and the resulting register
+ // class is more restrictive (fewer register, smaller size). Check if it's
+ // worth doing the merge.
+ if (!SrcIsPhys && !DstIsPhys &&
+ (isExtSubReg || DstRC->isASubClass()) &&
+ !isWinToJoinCrossClass(LargeReg, SmallReg,
+ allocatableRCRegs_[NewRC].count())) {
+ DEBUG(dbgs() << "\tSrc/Dest are different register classes.\n");
+ // Allow the coalescer to try again in case either side gets coalesced to
+ // a physical register that's compatible with the other side. e.g.
+ // r1024 = MOV32to32_ r1025
+ // But later r1024 is assigned EAX then r1025 may be coalesced with EAX.
+ Again = true; // May be possible to coalesce later.
+ return false;
+ }
+ }
+
+ // Will it create illegal extract_subreg / insert_subreg?
+ if (SrcIsPhys && HasIncompatibleSubRegDefUse(CopyMI, DstReg, SrcReg))
+ return false;
+ if (DstIsPhys && HasIncompatibleSubRegDefUse(CopyMI, SrcReg, DstReg))
+ return false;
+
+ LiveInterval &SrcInt = li_->getInterval(SrcReg);
+ LiveInterval &DstInt = li_->getInterval(DstReg);
+ assert(SrcInt.reg == SrcReg && DstInt.reg == DstReg &&
+ "Register mapping is horribly broken!");
+
+ DEBUG({
+ dbgs() << "\t\tInspecting "; SrcInt.print(dbgs(), tri_);
+ dbgs() << " and "; DstInt.print(dbgs(), tri_);
+ dbgs() << ": ";
+ });
+
+ // Save a copy of the virtual register live interval. We'll manually
+ // merge this into the "real" physical register live interval this is
+ // coalesced with.
+ LiveInterval *SavedLI = 0;
+ if (RealDstReg)
+ SavedLI = li_->dupInterval(&SrcInt);
+ else if (RealSrcReg)
+ SavedLI = li_->dupInterval(&DstInt);
+
+ // Check if it is necessary to propagate "isDead" property.
+ if (!isExtSubReg && !isInsSubReg && !isSubRegToReg) {
+ MachineOperand *mopd = CopyMI->findRegisterDefOperand(DstReg, false);
+ bool isDead = mopd->isDead();
+
+ // We need to be careful about coalescing a source physical register with a
+ // virtual register. Once the coalescing is done, it cannot be broken and
+ // these are not spillable! If the destination interval uses are far away,
+ // think twice about coalescing them!
+ if (!isDead && (SrcIsPhys || DstIsPhys)) {
+ // If the copy is in a loop, take care not to coalesce aggressively if the
+ // src is coming in from outside the loop (or the dst is out of the loop).
+ // If it's not in a loop, then determine whether to join them base purely
+ // by the length of the interval.
+ if (PhysJoinTweak) {
+ if (SrcIsPhys) {
+ if (!isWinToJoinVRWithSrcPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) {
+ mri_->setRegAllocationHint(DstInt.reg, 0, SrcReg);
+ ++numAborts;
+ DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
+ Again = true; // May be possible to coalesce later.
+ return false;
+ }
+ } else {
+ if (!isWinToJoinVRWithDstPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) {
+ mri_->setRegAllocationHint(SrcInt.reg, 0, DstReg);
+ ++numAborts;
+ DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
+ Again = true; // May be possible to coalesce later.
+ return false;
+ }
+ }
+ } else {
+ // If the virtual register live interval is long but it has low use
+ // density, do not join them, instead mark the physical register as its
+ // allocation preference.
+ LiveInterval &JoinVInt = SrcIsPhys ? DstInt : SrcInt;
+ unsigned JoinVReg = SrcIsPhys ? DstReg : SrcReg;
+ unsigned JoinPReg = SrcIsPhys ? SrcReg : DstReg;
+ const TargetRegisterClass *RC = mri_->getRegClass(JoinVReg);
+ unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
+ unsigned Length = li_->getApproximateInstructionCount(JoinVInt);
+ float Ratio = 1.0 / Threshold;
+ if (Length > Threshold &&
+ (((float)std::distance(mri_->use_nodbg_begin(JoinVReg),
+ mri_->use_nodbg_end()) / Length) < Ratio)) {
+ mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg);
+ ++numAborts;
+ DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
+ Again = true; // May be possible to coalesce later.
+ return false;
+ }
+ }
+ }
+ }
+
+ // Okay, attempt to join these two intervals. On failure, this returns false.
+ // Otherwise, if one of the intervals being joined is a physreg, this method
+ // always canonicalizes DstInt to be it. The output "SrcInt" will not have
+ // been modified, so we can use this information below to update aliases.
+ bool Swapped = false;
+ // If SrcInt is implicitly defined, it's safe to coalesce.
+ bool isEmpty = SrcInt.empty();
+ if (isEmpty && !CanCoalesceWithImpDef(CopyMI, DstInt, SrcInt)) {
+ // Only coalesce an empty interval (defined by implicit_def) with
+ // another interval which has a valno defined by the CopyMI and the CopyMI
+ // is a kill of the implicit def.
+ DEBUG(dbgs() << "Not profitable!\n");
+ return false;
+ }
+
+ if (!isEmpty && !JoinIntervals(DstInt, SrcInt, Swapped)) {
+ // Coalescing failed.
+
+ // If definition of source is defined by trivial computation, try
+ // rematerializing it.
+ if (!isExtSubReg && !isInsSubReg && !isSubRegToReg &&
+ ReMaterializeTrivialDef(SrcInt, DstReg, DstSubIdx, CopyMI))
+ return true;
+
+ // If we can eliminate the copy without merging the live ranges, do so now.
+ if (!isExtSubReg && !isInsSubReg && !isSubRegToReg &&
+ (AdjustCopiesBackFrom(SrcInt, DstInt, CopyMI) ||
+ RemoveCopyByCommutingDef(SrcInt, DstInt, CopyMI))) {
+ JoinedCopies.insert(CopyMI);
+ return true;
+ }
+
+ // Otherwise, we are unable to join the intervals.
+ DEBUG(dbgs() << "Interference!\n");
+ Again = true; // May be possible to coalesce later.
+ return false;
+ }
+
+ LiveInterval *ResSrcInt = &SrcInt;
+ LiveInterval *ResDstInt = &DstInt;
+ if (Swapped) {
+ std::swap(SrcReg, DstReg);
+ std::swap(ResSrcInt, ResDstInt);
+ }
+ assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+ "LiveInterval::join didn't work right!");
+
+ // If we're about to merge live ranges into a physical register live interval,
+ // we have to update any aliased register's live ranges to indicate that they
+ // have clobbered values for this range.
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
+ // If this is a extract_subreg where dst is a physical register, e.g.
+ // cl = EXTRACT_SUBREG reg1024, 1
+ // then create and update the actual physical register allocated to RHS.
+ if (RealDstReg || RealSrcReg) {
+ LiveInterval &RealInt =
+ li_->getOrCreateInterval(RealDstReg ? RealDstReg : RealSrcReg);
+ for (LiveInterval::const_vni_iterator I = SavedLI->vni_begin(),
+ E = SavedLI->vni_end(); I != E; ++I) {
+ const VNInfo *ValNo = *I;
+ VNInfo *NewValNo = RealInt.getNextValue(ValNo->def, ValNo->getCopy(),
+ false, // updated at *
+ li_->getVNInfoAllocator());
+ NewValNo->setFlags(ValNo->getFlags()); // * updated here.
+ RealInt.addKills(NewValNo, ValNo->kills);
+ RealInt.MergeValueInAsValue(*SavedLI, ValNo, NewValNo);
+ }
+ RealInt.weight += SavedLI->weight;
+ DstReg = RealDstReg ? RealDstReg : RealSrcReg;
+ }
+
+ // Update the liveintervals of sub-registers.
+ for (const unsigned *AS = tri_->getSubRegisters(DstReg); *AS; ++AS)
+ li_->getOrCreateInterval(*AS).MergeInClobberRanges(*li_, *ResSrcInt,
+ li_->getVNInfoAllocator());
+ }
+
+ // If this is a EXTRACT_SUBREG, make sure the result of coalescing is the
+ // larger super-register.
+ if ((isExtSubReg || isInsSubReg || isSubRegToReg) &&
+ !SrcIsPhys && !DstIsPhys) {
+ if ((isExtSubReg && !Swapped) ||
+ ((isInsSubReg || isSubRegToReg) && Swapped)) {
+ ResSrcInt->Copy(*ResDstInt, mri_, li_->getVNInfoAllocator());
+ std::swap(SrcReg, DstReg);
+ std::swap(ResSrcInt, ResDstInt);
+ }
+ }
+
+ // Coalescing to a virtual register that is of a sub-register class of the
+ // other. Make sure the resulting register is set to the right register class.
+ if (CrossRC)
+ ++numCrossRCs;
+
+ // This may happen even if it's cross-rc coalescing. e.g.
+ // %reg1026<def> = SUBREG_TO_REG 0, %reg1037<kill>, 4
+ // reg1026 -> GR64, reg1037 -> GR32_ABCD. The resulting register will have to
+ // be allocate a register from GR64_ABCD.
+ if (NewRC)
+ mri_->setRegClass(DstReg, NewRC);
+
+ // Remember to delete the copy instruction.
+ JoinedCopies.insert(CopyMI);
+
+ // Some live range has been lengthened due to colaescing, eliminate the
+ // unnecessary kills.
+ RemoveUnnecessaryKills(SrcReg, *ResDstInt);
+ if (TargetRegisterInfo::isVirtualRegister(DstReg))
+ RemoveUnnecessaryKills(DstReg, *ResDstInt);
+
+ UpdateRegDefsUses(SrcReg, DstReg, SubIdx);
+
+ // If we have extended the live range of a physical register, make sure we
+ // update live-in lists as well.
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
+ const LiveInterval &VRegInterval = li_->getInterval(SrcReg);
+ SmallVector<MachineBasicBlock*, 16> BlockSeq;
+ for (LiveInterval::const_iterator I = VRegInterval.begin(),
+ E = VRegInterval.end(); I != E; ++I ) {
+ li_->findLiveInMBBs(I->start, I->end, BlockSeq);
+ for (unsigned idx = 0, size = BlockSeq.size(); idx != size; ++idx) {
+ MachineBasicBlock &block = *BlockSeq[idx];
+ if (!block.isLiveIn(DstReg))
+ block.addLiveIn(DstReg);
+ }
+ BlockSeq.clear();
+ }
+ }
+
+ // SrcReg is guarateed to be the register whose live interval that is
+ // being merged.
+ li_->removeInterval(SrcReg);
+
+ // Update regalloc hint.
+ tri_->UpdateRegAllocHint(SrcReg, DstReg, *mf_);
+
+ // Manually deleted the live interval copy.
+ if (SavedLI) {
+ SavedLI->clear();
+ delete SavedLI;
+ }
+
+ // If resulting interval has a preference that no longer fits because of subreg
+ // coalescing, just clear the preference.
+ unsigned Preference = getRegAllocPreference(ResDstInt->reg, *mf_, mri_, tri_);
+ if (Preference && (isExtSubReg || isInsSubReg || isSubRegToReg) &&
+ TargetRegisterInfo::isVirtualRegister(ResDstInt->reg)) {
+ const TargetRegisterClass *RC = mri_->getRegClass(ResDstInt->reg);
+ if (!RC->contains(Preference))
+ mri_->setRegAllocationHint(ResDstInt->reg, 0, 0);
+ }
+
+ DEBUG({
+ dbgs() << "\n\t\tJoined. Result = ";
+ ResDstInt->print(dbgs(), tri_);
+ dbgs() << "\n";
+ });
+
+ ++numJoins;
+ return true;
+}
+
+/// ComputeUltimateVN - Assuming we are going to join two live intervals,
+/// compute what the resultant value numbers for each value in the input two
+/// ranges will be. This is complicated by copies between the two which can
+/// and will commonly cause multiple value numbers to be merged into one.
+///
+/// VN is the value number that we're trying to resolve. InstDefiningValue
+/// keeps track of the new InstDefiningValue assignment for the result
+/// LiveInterval. ThisFromOther/OtherFromThis are sets that keep track of
+/// whether a value in this or other is a copy from the opposite set.
+/// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have
+/// already been assigned.
+///
+/// ThisFromOther[x] - If x is defined as a copy from the other interval, this
+/// contains the value number the copy is from.
+///
+static unsigned ComputeUltimateVN(VNInfo *VNI,
+ SmallVector<VNInfo*, 16> &NewVNInfo,
+ DenseMap<VNInfo*, VNInfo*> &ThisFromOther,
+ DenseMap<VNInfo*, VNInfo*> &OtherFromThis,
+ SmallVector<int, 16> &ThisValNoAssignments,
+ SmallVector<int, 16> &OtherValNoAssignments) {
+ unsigned VN = VNI->id;
+
+ // If the VN has already been computed, just return it.
+ if (ThisValNoAssignments[VN] >= 0)
+ return ThisValNoAssignments[VN];
+// assert(ThisValNoAssignments[VN] != -2 && "Cyclic case?");
+
+ // If this val is not a copy from the other val, then it must be a new value
+ // number in the destination.
+ DenseMap<VNInfo*, VNInfo*>::iterator I = ThisFromOther.find(VNI);
+ if (I == ThisFromOther.end()) {
+ NewVNInfo.push_back(VNI);
+ return ThisValNoAssignments[VN] = NewVNInfo.size()-1;
+ }
+ VNInfo *OtherValNo = I->second;
+
+ // Otherwise, this *is* a copy from the RHS. If the other side has already
+ // been computed, return it.
+ if (OtherValNoAssignments[OtherValNo->id] >= 0)
+ return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id];
+
+ // Mark this value number as currently being computed, then ask what the
+ // ultimate value # of the other value is.
+ ThisValNoAssignments[VN] = -2;
+ unsigned UltimateVN =
+ ComputeUltimateVN(OtherValNo, NewVNInfo, OtherFromThis, ThisFromOther,
+ OtherValNoAssignments, ThisValNoAssignments);
+ return ThisValNoAssignments[VN] = UltimateVN;
+}
+
+static bool InVector(VNInfo *Val, const SmallVector<VNInfo*, 8> &V) {
+ return std::find(V.begin(), V.end(), Val) != V.end();
+}
+
+static bool isValNoDefMove(const MachineInstr *MI, unsigned DR, unsigned SR,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
+ ;
+ else if (MI->isExtractSubreg()) {
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ } else if (MI->isSubregToReg() ||
+ MI->isInsertSubreg()) {
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(2).getReg();
+ } else
+ return false;
+ return (SrcReg == SR || TRI->isSuperRegister(SR, SrcReg)) &&
+ (DstReg == DR || TRI->isSuperRegister(DR, DstReg));
+}
+
+/// RangeIsDefinedByCopyFromReg - Return true if the specified live range of
+/// the specified live interval is defined by a copy from the specified
+/// register.
+bool SimpleRegisterCoalescing::RangeIsDefinedByCopyFromReg(LiveInterval &li,
+ LiveRange *LR,
+ unsigned Reg) {
+ unsigned SrcReg = li_->getVNInfoSourceReg(LR->valno);
+ if (SrcReg == Reg)
+ return true;
+ // FIXME: Do isPHIDef and isDefAccurate both need to be tested?
+ if ((LR->valno->isPHIDef() || !LR->valno->isDefAccurate()) &&
+ TargetRegisterInfo::isPhysicalRegister(li.reg) &&
+ *tri_->getSuperRegisters(li.reg)) {
+ // It's a sub-register live interval, we may not have precise information.
+ // Re-compute it.
+ MachineInstr *DefMI = li_->getInstructionFromIndex(LR->start);
+ if (DefMI && isValNoDefMove(DefMI, li.reg, Reg, tii_, tri_)) {
+ // Cache computed info.
+ LR->valno->def = LR->start;
+ LR->valno->setCopy(DefMI);
+ return true;
+ }
+ }
+ return false;
+}
+
+
+/// ValueLiveAt - Return true if the LiveRange pointed to by the given
+/// iterator, or any subsequent range with the same value number,
+/// is live at the given point.
+bool SimpleRegisterCoalescing::ValueLiveAt(LiveInterval::iterator LRItr,
+ LiveInterval::iterator LREnd,
+ SlotIndex defPoint) const {
+ for (const VNInfo *valno = LRItr->valno;
+ (LRItr != LREnd) && (LRItr->valno == valno); ++LRItr) {
+ if (LRItr->contains(defPoint))
+ return true;
+ }
+
+ return false;
+}
+
+
+/// SimpleJoin - Attempt to joint the specified interval into this one. The
+/// caller of this method must guarantee that the RHS only contains a single
+/// value number and that the RHS is not defined by a copy from this
+/// interval. This returns false if the intervals are not joinable, or it
+/// joins them and returns true.
+bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){
+ assert(RHS.containsOneValue());
+
+ // Some number (potentially more than one) value numbers in the current
+ // interval may be defined as copies from the RHS. Scan the overlapping
+ // portions of the LHS and RHS, keeping track of this and looking for
+ // overlapping live ranges that are NOT defined as copies. If these exist, we
+ // cannot coalesce.
+
+ LiveInterval::iterator LHSIt = LHS.begin(), LHSEnd = LHS.end();
+ LiveInterval::iterator RHSIt = RHS.begin(), RHSEnd = RHS.end();
+
+ if (LHSIt->start < RHSIt->start) {
+ LHSIt = std::upper_bound(LHSIt, LHSEnd, RHSIt->start);
+ if (LHSIt != LHS.begin()) --LHSIt;
+ } else if (RHSIt->start < LHSIt->start) {
+ RHSIt = std::upper_bound(RHSIt, RHSEnd, LHSIt->start);
+ if (RHSIt != RHS.begin()) --RHSIt;
+ }
+
+ SmallVector<VNInfo*, 8> EliminatedLHSVals;
+
+ while (1) {
+ // Determine if these live intervals overlap.
+ bool Overlaps = false;
+ if (LHSIt->start <= RHSIt->start)
+ Overlaps = LHSIt->end > RHSIt->start;
+ else
+ Overlaps = RHSIt->end > LHSIt->start;
+
+ // If the live intervals overlap, there are two interesting cases: if the
+ // LHS interval is defined by a copy from the RHS, it's ok and we record
+ // that the LHS value # is the same as the RHS. If it's not, then we cannot
+ // coalesce these live ranges and we bail out.
+ if (Overlaps) {
+ // If we haven't already recorded that this value # is safe, check it.
+ if (!InVector(LHSIt->valno, EliminatedLHSVals)) {
+ // If it's re-defined by an early clobber somewhere in the live range,
+ // then conservatively abort coalescing.
+ if (LHSIt->valno->hasRedefByEC())
+ return false;
+ // Copy from the RHS?
+ if (!RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg))
+ return false; // Nope, bail out.
+
+ if (ValueLiveAt(LHSIt, LHS.end(), RHSIt->valno->def))
+ // Here is an interesting situation:
+ // BB1:
+ // vr1025 = copy vr1024
+ // ..
+ // BB2:
+ // vr1024 = op
+ // = vr1025
+ // Even though vr1025 is copied from vr1024, it's not safe to
+ // coalesce them since the live range of vr1025 intersects the
+ // def of vr1024. This happens because vr1025 is assigned the
+ // value of the previous iteration of vr1024.
+ return false;
+ EliminatedLHSVals.push_back(LHSIt->valno);
+ }
+
+ // We know this entire LHS live range is okay, so skip it now.
+ if (++LHSIt == LHSEnd) break;
+ continue;
+ }
+
+ if (LHSIt->end < RHSIt->end) {
+ if (++LHSIt == LHSEnd) break;
+ } else {
+ // One interesting case to check here. It's possible that we have
+ // something like "X3 = Y" which defines a new value number in the LHS,
+ // and is the last use of this liverange of the RHS. In this case, we
+ // want to notice this copy (so that it gets coalesced away) even though
+ // the live ranges don't actually overlap.
+ if (LHSIt->start == RHSIt->end) {
+ if (InVector(LHSIt->valno, EliminatedLHSVals)) {
+ // We already know that this value number is going to be merged in
+ // if coalescing succeeds. Just skip the liverange.
+ if (++LHSIt == LHSEnd) break;
+ } else {
+ // If it's re-defined by an early clobber somewhere in the live range,
+ // then conservatively abort coalescing.
+ if (LHSIt->valno->hasRedefByEC())
+ return false;
+ // Otherwise, if this is a copy from the RHS, mark it as being merged
+ // in.
+ if (RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg)) {
+ if (ValueLiveAt(LHSIt, LHS.end(), RHSIt->valno->def))
+ // Here is an interesting situation:
+ // BB1:
+ // vr1025 = copy vr1024
+ // ..
+ // BB2:
+ // vr1024 = op
+ // = vr1025
+ // Even though vr1025 is copied from vr1024, it's not safe to
+ // coalesced them since live range of vr1025 intersects the
+ // def of vr1024. This happens because vr1025 is assigned the
+ // value of the previous iteration of vr1024.
+ return false;
+ EliminatedLHSVals.push_back(LHSIt->valno);
+
+ // We know this entire LHS live range is okay, so skip it now.
+ if (++LHSIt == LHSEnd) break;
+ }
+ }
+ }
+
+ if (++RHSIt == RHSEnd) break;
+ }
+ }
+
+ // If we got here, we know that the coalescing will be successful and that
+ // the value numbers in EliminatedLHSVals will all be merged together. Since
+ // the most common case is that EliminatedLHSVals has a single number, we
+ // optimize for it: if there is more than one value, we merge them all into
+ // the lowest numbered one, then handle the interval as if we were merging
+ // with one value number.
+ VNInfo *LHSValNo = NULL;
+ if (EliminatedLHSVals.size() > 1) {
+ // Loop through all the equal value numbers merging them into the smallest
+ // one.
+ VNInfo *Smallest = EliminatedLHSVals[0];
+ for (unsigned i = 1, e = EliminatedLHSVals.size(); i != e; ++i) {
+ if (EliminatedLHSVals[i]->id < Smallest->id) {
+ // Merge the current notion of the smallest into the smaller one.
+ LHS.MergeValueNumberInto(Smallest, EliminatedLHSVals[i]);
+ Smallest = EliminatedLHSVals[i];
+ } else {
+ // Merge into the smallest.
+ LHS.MergeValueNumberInto(EliminatedLHSVals[i], Smallest);
+ }
+ }
+ LHSValNo = Smallest;
+ } else if (EliminatedLHSVals.empty()) {
+ if (TargetRegisterInfo::isPhysicalRegister(LHS.reg) &&
+ *tri_->getSuperRegisters(LHS.reg))
+ // Imprecise sub-register information. Can't handle it.
+ return false;
+ llvm_unreachable("No copies from the RHS?");
+ } else {
+ LHSValNo = EliminatedLHSVals[0];
+ }
+
+ // Okay, now that there is a single LHS value number that we're merging the
+ // RHS into, update the value number info for the LHS to indicate that the
+ // value number is defined where the RHS value number was.
+ const VNInfo *VNI = RHS.getValNumInfo(0);
+ LHSValNo->def = VNI->def;
+ LHSValNo->setCopy(VNI->getCopy());
+
+ // Okay, the final step is to loop over the RHS live intervals, adding them to
+ // the LHS.
+ if (VNI->hasPHIKill())
+ LHSValNo->setHasPHIKill(true);
+ LHS.addKills(LHSValNo, VNI->kills);
+ LHS.MergeRangesInAsValue(RHS, LHSValNo);
+
+ LHS.ComputeJoinedWeight(RHS);
+
+ // Update regalloc hint if both are virtual registers.
+ if (TargetRegisterInfo::isVirtualRegister(LHS.reg) &&
+ TargetRegisterInfo::isVirtualRegister(RHS.reg)) {
+ std::pair<unsigned, unsigned> RHSPref = mri_->getRegAllocationHint(RHS.reg);
+ std::pair<unsigned, unsigned> LHSPref = mri_->getRegAllocationHint(LHS.reg);
+ if (RHSPref != LHSPref)
+ mri_->setRegAllocationHint(LHS.reg, RHSPref.first, RHSPref.second);
+ }
+
+ // Update the liveintervals of sub-registers.
+ if (TargetRegisterInfo::isPhysicalRegister(LHS.reg))
+ for (const unsigned *AS = tri_->getSubRegisters(LHS.reg); *AS; ++AS)
+ li_->getOrCreateInterval(*AS).MergeInClobberRanges(*li_, LHS,
+ li_->getVNInfoAllocator());
+
+ return true;
+}
+
+/// JoinIntervals - Attempt to join these two intervals. On failure, this
+/// returns false. Otherwise, if one of the intervals being joined is a
+/// physreg, this method always canonicalizes LHS to be it. The output
+/// "RHS" will not have been modified, so we can use this information
+/// below to update aliases.
+bool
+SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
+ bool &Swapped) {
+ // Compute the final value assignment, assuming that the live ranges can be
+ // coalesced.
+ SmallVector<int, 16> LHSValNoAssignments;
+ SmallVector<int, 16> RHSValNoAssignments;
+ DenseMap<VNInfo*, VNInfo*> LHSValsDefinedFromRHS;
+ DenseMap<VNInfo*, VNInfo*> RHSValsDefinedFromLHS;
+ SmallVector<VNInfo*, 16> NewVNInfo;
+
+ // If a live interval is a physical register, conservatively check if any
+ // of its sub-registers is overlapping the live interval of the virtual
+ // register. If so, do not coalesce.
+ if (TargetRegisterInfo::isPhysicalRegister(LHS.reg) &&
+ *tri_->getSubRegisters(LHS.reg)) {
+ // If it's coalescing a virtual register to a physical register, estimate
+ // its live interval length. This is the *cost* of scanning an entire live
+ // interval. If the cost is low, we'll do an exhaustive check instead.
+
+ // If this is something like this:
+ // BB1:
+ // v1024 = op
+ // ...
+ // BB2:
+ // ...
+ // RAX = v1024
+ //
+ // That is, the live interval of v1024 crosses a bb. Then we can't rely on
+ // less conservative check. It's possible a sub-register is defined before
+ // v1024 (or live in) and live out of BB1.
+ if (RHS.containsOneValue() &&
+ li_->intervalIsInOneMBB(RHS) &&
+ li_->getApproximateInstructionCount(RHS) <= 10) {
+ // Perform a more exhaustive check for some common cases.
+ if (li_->conflictsWithPhysRegRef(RHS, LHS.reg, true, JoinedCopies))
+ return false;
+ } else {
+ for (const unsigned* SR = tri_->getSubRegisters(LHS.reg); *SR; ++SR)
+ if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
+ DEBUG({
+ dbgs() << "Interfere with sub-register ";
+ li_->getInterval(*SR).print(dbgs(), tri_);
+ });
+ return false;
+ }
+ }
+ } else if (TargetRegisterInfo::isPhysicalRegister(RHS.reg) &&
+ *tri_->getSubRegisters(RHS.reg)) {
+ if (LHS.containsOneValue() &&
+ li_->getApproximateInstructionCount(LHS) <= 10) {
+ // Perform a more exhaustive check for some common cases.
+ if (li_->conflictsWithPhysRegRef(LHS, RHS.reg, false, JoinedCopies))
+ return false;
+ } else {
+ for (const unsigned* SR = tri_->getSubRegisters(RHS.reg); *SR; ++SR)
+ if (li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) {
+ DEBUG({
+ dbgs() << "Interfere with sub-register ";
+ li_->getInterval(*SR).print(dbgs(), tri_);
+ });
+ return false;
+ }
+ }
+ }
+
+ // Compute ultimate value numbers for the LHS and RHS values.
+ if (RHS.containsOneValue()) {
+ // Copies from a liveinterval with a single value are simple to handle and
+ // very common, handle the special case here. This is important, because
+ // often RHS is small and LHS is large (e.g. a physreg).
+
+ // Find out if the RHS is defined as a copy from some value in the LHS.
+ int RHSVal0DefinedFromLHS = -1;
+ int RHSValID = -1;
+ VNInfo *RHSValNoInfo = NULL;
+ VNInfo *RHSValNoInfo0 = RHS.getValNumInfo(0);
+ unsigned RHSSrcReg = li_->getVNInfoSourceReg(RHSValNoInfo0);
+ if (RHSSrcReg == 0 || RHSSrcReg != LHS.reg) {
+ // If RHS is not defined as a copy from the LHS, we can use simpler and
+ // faster checks to see if the live ranges are coalescable. This joiner
+ // can't swap the LHS/RHS intervals though.
+ if (!TargetRegisterInfo::isPhysicalRegister(RHS.reg)) {
+ return SimpleJoin(LHS, RHS);
+ } else {
+ RHSValNoInfo = RHSValNoInfo0;
+ }
+ } else {
+ // It was defined as a copy from the LHS, find out what value # it is.
+ RHSValNoInfo =
+ LHS.getLiveRangeContaining(RHSValNoInfo0->def.getPrevSlot())->valno;
+ RHSValID = RHSValNoInfo->id;
+ RHSVal0DefinedFromLHS = RHSValID;
+ }
+
+ LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
+ RHSValNoAssignments.resize(RHS.getNumValNums(), -1);
+ NewVNInfo.resize(LHS.getNumValNums(), NULL);
+
+ // Okay, *all* of the values in LHS that are defined as a copy from RHS
+ // should now get updated.
+ for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
+ i != e; ++i) {
+ VNInfo *VNI = *i;
+ unsigned VN = VNI->id;
+ if (unsigned LHSSrcReg = li_->getVNInfoSourceReg(VNI)) {
+ if (LHSSrcReg != RHS.reg) {
+ // If this is not a copy from the RHS, its value number will be
+ // unmodified by the coalescing.
+ NewVNInfo[VN] = VNI;
+ LHSValNoAssignments[VN] = VN;
+ } else if (RHSValID == -1) {
+ // Otherwise, it is a copy from the RHS, and we don't already have a
+ // value# for it. Keep the current value number, but remember it.
+ LHSValNoAssignments[VN] = RHSValID = VN;
+ NewVNInfo[VN] = RHSValNoInfo;
+ LHSValsDefinedFromRHS[VNI] = RHSValNoInfo0;
+ } else {
+ // Otherwise, use the specified value #.
+ LHSValNoAssignments[VN] = RHSValID;
+ if (VN == (unsigned)RHSValID) { // Else this val# is dead.
+ NewVNInfo[VN] = RHSValNoInfo;
+ LHSValsDefinedFromRHS[VNI] = RHSValNoInfo0;
+ }
+ }
+ } else {
+ NewVNInfo[VN] = VNI;
+ LHSValNoAssignments[VN] = VN;
+ }
+ }
+
+ assert(RHSValID != -1 && "Didn't find value #?");
+ RHSValNoAssignments[0] = RHSValID;
+ if (RHSVal0DefinedFromLHS != -1) {
+ // This path doesn't go through ComputeUltimateVN so just set
+ // it to anything.
+ RHSValsDefinedFromLHS[RHSValNoInfo0] = (VNInfo*)1;
+ }
+ } else {
+ // Loop over the value numbers of the LHS, seeing if any are defined from
+ // the RHS.
+ for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
+ i != e; ++i) {
+ VNInfo *VNI = *i;
+ if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy?
+ continue;
+
+ // DstReg is known to be a register in the LHS interval. If the src is
+ // from the RHS interval, we can use its value #.
+ if (li_->getVNInfoSourceReg(VNI) != RHS.reg)
+ continue;
+
+ // Figure out the value # from the RHS.
+ LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot());
+ assert(lr && "Cannot find live range");
+ LHSValsDefinedFromRHS[VNI] = lr->valno;
+ }
+
+ // Loop over the value numbers of the RHS, seeing if any are defined from
+ // the LHS.
+ for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
+ i != e; ++i) {
+ VNInfo *VNI = *i;
+ if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy?
+ continue;
+
+ // DstReg is known to be a register in the RHS interval. If the src is
+ // from the LHS interval, we can use its value #.
+ if (li_->getVNInfoSourceReg(VNI) != LHS.reg)
+ continue;
+
+ // Figure out the value # from the LHS.
+ LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot());
+ assert(lr && "Cannot find live range");
+ RHSValsDefinedFromLHS[VNI] = lr->valno;
+ }
+
+ LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
+ RHSValNoAssignments.resize(RHS.getNumValNums(), -1);
+ NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums());
+
+ for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
+ i != e; ++i) {
+ VNInfo *VNI = *i;
+ unsigned VN = VNI->id;
+ if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused())
+ continue;
+ ComputeUltimateVN(VNI, NewVNInfo,
+ LHSValsDefinedFromRHS, RHSValsDefinedFromLHS,
+ LHSValNoAssignments, RHSValNoAssignments);
+ }
+ for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
+ i != e; ++i) {
+ VNInfo *VNI = *i;
+ unsigned VN = VNI->id;
+ if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused())
+ continue;
+ // If this value number isn't a copy from the LHS, it's a new number.
+ if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) {
+ NewVNInfo.push_back(VNI);
+ RHSValNoAssignments[VN] = NewVNInfo.size()-1;
+ continue;
+ }
+
+ ComputeUltimateVN(VNI, NewVNInfo,
+ RHSValsDefinedFromLHS, LHSValsDefinedFromRHS,
+ RHSValNoAssignments, LHSValNoAssignments);
+ }
+ }
+
+ // Armed with the mappings of LHS/RHS values to ultimate values, walk the
+ // interval lists to see if these intervals are coalescable.
+ LiveInterval::const_iterator I = LHS.begin();
+ LiveInterval::const_iterator IE = LHS.end();
+ LiveInterval::const_iterator J = RHS.begin();
+ LiveInterval::const_iterator JE = RHS.end();
+
+ // Skip ahead until the first place of potential sharing.
+ if (I->start < J->start) {
+ I = std::upper_bound(I, IE, J->start);
+ if (I != LHS.begin()) --I;
+ } else if (J->start < I->start) {
+ J = std::upper_bound(J, JE, I->start);
+ if (J != RHS.begin()) --J;
+ }
+
+ while (1) {
+ // Determine if these two live ranges overlap.
+ bool Overlaps;
+ if (I->start < J->start) {
+ Overlaps = I->end > J->start;
+ } else {
+ Overlaps = J->end > I->start;
+ }
+
+ // If so, check value # info to determine if they are really different.
+ if (Overlaps) {
+ // If the live range overlap will map to the same value number in the
+ // result liverange, we can still coalesce them. If not, we can't.
+ if (LHSValNoAssignments[I->valno->id] !=
+ RHSValNoAssignments[J->valno->id])
+ return false;
+ // If it's re-defined by an early clobber somewhere in the live range,
+ // then conservatively abort coalescing.
+ if (NewVNInfo[LHSValNoAssignments[I->valno->id]]->hasRedefByEC())
+ return false;
+ }
+
+ if (I->end < J->end) {
+ ++I;
+ if (I == IE) break;
+ } else {
+ ++J;
+ if (J == JE) break;
+ }
+ }
+
+ // Update kill info. Some live ranges are extended due to copy coalescing.
+ for (DenseMap<VNInfo*, VNInfo*>::iterator I = LHSValsDefinedFromRHS.begin(),
+ E = LHSValsDefinedFromRHS.end(); I != E; ++I) {
+ VNInfo *VNI = I->first;
+ unsigned LHSValID = LHSValNoAssignments[VNI->id];
+ NewVNInfo[LHSValID]->removeKill(VNI->def);
+ if (VNI->hasPHIKill())
+ NewVNInfo[LHSValID]->setHasPHIKill(true);
+ RHS.addKills(NewVNInfo[LHSValID], VNI->kills);
+ }
+
+ // Update kill info. Some live ranges are extended due to copy coalescing.
+ for (DenseMap<VNInfo*, VNInfo*>::iterator I = RHSValsDefinedFromLHS.begin(),
+ E = RHSValsDefinedFromLHS.end(); I != E; ++I) {
+ VNInfo *VNI = I->first;
+ unsigned RHSValID = RHSValNoAssignments[VNI->id];
+ NewVNInfo[RHSValID]->removeKill(VNI->def);
+ if (VNI->hasPHIKill())
+ NewVNInfo[RHSValID]->setHasPHIKill(true);
+ LHS.addKills(NewVNInfo[RHSValID], VNI->kills);
+ }
+
+ // If we get here, we know that we can coalesce the live ranges. Ask the
+ // intervals to coalesce themselves now.
+ if ((RHS.ranges.size() > LHS.ranges.size() &&
+ TargetRegisterInfo::isVirtualRegister(LHS.reg)) ||
+ TargetRegisterInfo::isPhysicalRegister(RHS.reg)) {
+ RHS.join(LHS, &RHSValNoAssignments[0], &LHSValNoAssignments[0], NewVNInfo,
+ mri_);
+ Swapped = true;
+ } else {
+ LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo,
+ mri_);
+ Swapped = false;
+ }
+ return true;
+}
+
+namespace {
+ // DepthMBBCompare - Comparison predicate that sort first based on the loop
+ // depth of the basic block (the unsigned), and then on the MBB number.
+ struct DepthMBBCompare {
+ typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair;
+ bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const {
+ // Deeper loops first
+ if (LHS.first != RHS.first)
+ return LHS.first > RHS.first;
+
+ // Prefer blocks that are more connected in the CFG. This takes care of
+ // the most difficult copies first while intervals are short.
+ unsigned cl = LHS.second->pred_size() + LHS.second->succ_size();
+ unsigned cr = RHS.second->pred_size() + RHS.second->succ_size();
+ if (cl != cr)
+ return cl > cr;
+
+ // As a last resort, sort by block number.
+ return LHS.second->getNumber() < RHS.second->getNumber();
+ }
+ };
+}
+
+void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
+ std::vector<CopyRec> &TryAgain) {
+ DEBUG(dbgs() << MBB->getName() << ":\n");
+
+ std::vector<CopyRec> VirtCopies;
+ std::vector<CopyRec> PhysCopies;
+ std::vector<CopyRec> ImpDefCopies;
+ for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
+ MII != E;) {
+ MachineInstr *Inst = MII++;
+
+ // If this isn't a copy nor a extract_subreg, we can't join intervals.
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ bool isInsUndef = false;
+ if (Inst->isExtractSubreg()) {
+ DstReg = Inst->getOperand(0).getReg();
+ SrcReg = Inst->getOperand(1).getReg();
+ } else if (Inst->isInsertSubreg()) {
+ DstReg = Inst->getOperand(0).getReg();
+ SrcReg = Inst->getOperand(2).getReg();
+ if (Inst->getOperand(1).isUndef())
+ isInsUndef = true;
+ } else if (Inst->isInsertSubreg() || Inst->isSubregToReg()) {
+ DstReg = Inst->getOperand(0).getReg();
+ SrcReg = Inst->getOperand(2).getReg();
+ } else if (!tii_->isMoveInstr(*Inst, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
+ continue;
+
+ bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
+ bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ if (isInsUndef ||
+ (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty()))
+ ImpDefCopies.push_back(CopyRec(Inst, 0));
+ else if (SrcIsPhys || DstIsPhys)
+ PhysCopies.push_back(CopyRec(Inst, 0));
+ else
+ VirtCopies.push_back(CopyRec(Inst, 0));
+ }
+
+ // Try coalescing implicit copies and insert_subreg <undef> first,
+ // followed by copies to / from physical registers, then finally copies
+ // from virtual registers to virtual registers.
+ for (unsigned i = 0, e = ImpDefCopies.size(); i != e; ++i) {
+ CopyRec &TheCopy = ImpDefCopies[i];
+ bool Again = false;
+ if (!JoinCopy(TheCopy, Again))
+ if (Again)
+ TryAgain.push_back(TheCopy);
+ }
+ for (unsigned i = 0, e = PhysCopies.size(); i != e; ++i) {
+ CopyRec &TheCopy = PhysCopies[i];
+ bool Again = false;
+ if (!JoinCopy(TheCopy, Again))
+ if (Again)
+ TryAgain.push_back(TheCopy);
+ }
+ for (unsigned i = 0, e = VirtCopies.size(); i != e; ++i) {
+ CopyRec &TheCopy = VirtCopies[i];
+ bool Again = false;
+ if (!JoinCopy(TheCopy, Again))
+ if (Again)
+ TryAgain.push_back(TheCopy);
+ }
+}
+
+void SimpleRegisterCoalescing::joinIntervals() {
+ DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
+
+ std::vector<CopyRec> TryAgainList;
+ if (loopInfo->empty()) {
+ // If there are no loops in the function, join intervals in function order.
+ for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();
+ I != E; ++I)
+ CopyCoalesceInMBB(I, TryAgainList);
+ } else {
+ // Otherwise, join intervals in inner loops before other intervals.
+ // Unfortunately we can't just iterate over loop hierarchy here because
+ // there may be more MBB's than BB's. Collect MBB's for sorting.
+
+ // Join intervals in the function prolog first. We want to join physical
+ // registers with virtual registers before the intervals got too long.
+ std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs;
+ for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();I != E;++I){
+ MachineBasicBlock *MBB = I;
+ MBBs.push_back(std::make_pair(loopInfo->getLoopDepth(MBB), I));
+ }
+
+ // Sort by loop depth.
+ std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare());
+
+ // Finally, join intervals in loop nest order.
+ for (unsigned i = 0, e = MBBs.size(); i != e; ++i)
+ CopyCoalesceInMBB(MBBs[i].second, TryAgainList);
+ }
+
+ // Joining intervals can allow other intervals to be joined. Iteratively join
+ // until we make no progress.
+ bool ProgressMade = true;
+ while (ProgressMade) {
+ ProgressMade = false;
+
+ for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) {
+ CopyRec &TheCopy = TryAgainList[i];
+ if (!TheCopy.MI)
+ continue;
+
+ bool Again = false;
+ bool Success = JoinCopy(TheCopy, Again);
+ if (Success || !Again) {
+ TheCopy.MI = 0; // Mark this one as done.
+ ProgressMade = true;
+ }
+ }
+ }
+}
+
+/// Return true if the two specified registers belong to different register
+/// classes. The registers may be either phys or virt regs.
+bool
+SimpleRegisterCoalescing::differingRegisterClasses(unsigned RegA,
+ unsigned RegB) const {
+ // Get the register classes for the first reg.
+ if (TargetRegisterInfo::isPhysicalRegister(RegA)) {
+ assert(TargetRegisterInfo::isVirtualRegister(RegB) &&
+ "Shouldn't consider two physregs!");
+ return !mri_->getRegClass(RegB)->contains(RegA);
+ }
+
+ // Compare against the regclass for the second reg.
+ const TargetRegisterClass *RegClassA = mri_->getRegClass(RegA);
+ if (TargetRegisterInfo::isVirtualRegister(RegB)) {
+ const TargetRegisterClass *RegClassB = mri_->getRegClass(RegB);
+ return RegClassA != RegClassB;
+ }
+ return !RegClassA->contains(RegB);
+}
+
+/// lastRegisterUse - Returns the last (non-debug) use of the specific register
+/// between cycles Start and End or NULL if there are no uses.
+MachineOperand *
+SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start,
+ SlotIndex End,
+ unsigned Reg,
+ SlotIndex &UseIdx) const{
+ UseIdx = SlotIndex();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ MachineOperand *LastUse = NULL;
+ for (MachineRegisterInfo::use_nodbg_iterator I = mri_->use_nodbg_begin(Reg),
+ E = mri_->use_nodbg_end(); I != E; ++I) {
+ MachineOperand &Use = I.getOperand();
+ MachineInstr *UseMI = Use.getParent();
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
+ SrcReg == DstReg)
+ // Ignore identity copies.
+ continue;
+ SlotIndex Idx = li_->getInstructionIndex(UseMI);
+ // FIXME: Should this be Idx != UseIdx? SlotIndex() will return something
+ // that compares higher than any other interval.
+ if (Idx >= Start && Idx < End && Idx >= UseIdx) {
+ LastUse = &Use;
+ UseIdx = Idx.getUseIndex();
+ }
+ }
+ return LastUse;
+ }
+
+ SlotIndex s = Start;
+ SlotIndex e = End.getPrevSlot().getBaseIndex();
+ while (e >= s) {
+ // Skip deleted instructions
+ MachineInstr *MI = li_->getInstructionFromIndex(e);
+ while (e != SlotIndex() && e.getPrevIndex() >= s && !MI) {
+ e = e.getPrevIndex();
+ MI = li_->getInstructionFromIndex(e);
+ }
+ if (e < s || MI == NULL)
+ return NULL;
+
+ // Ignore identity copies.
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (!(tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
+ SrcReg == DstReg))
+ for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) {
+ MachineOperand &Use = MI->getOperand(i);
+ if (Use.isReg() && Use.isUse() && Use.getReg() &&
+ tri_->regsOverlap(Use.getReg(), Reg)) {
+ UseIdx = e.getUseIndex();
+ return &Use;
+ }
+ }
+
+ e = e.getPrevIndex();
+ }
+
+ return NULL;
+}
+
+void SimpleRegisterCoalescing::printRegName(unsigned reg) const {
+ if (TargetRegisterInfo::isPhysicalRegister(reg))
+ dbgs() << tri_->getName(reg);
+ else
+ dbgs() << "%reg" << reg;
+}
+
+void SimpleRegisterCoalescing::releaseMemory() {
+ JoinedCopies.clear();
+ ReMatCopies.clear();
+ ReMatDefs.clear();
+}
+
+bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
+ mf_ = &fn;
+ mri_ = &fn.getRegInfo();
+ tm_ = &fn.getTarget();
+ tri_ = tm_->getRegisterInfo();
+ tii_ = tm_->getInstrInfo();
+ li_ = &getAnalysis<LiveIntervals>();
+ AA = &getAnalysis<AliasAnalysis>();
+ loopInfo = &getAnalysis<MachineLoopInfo>();
+
+ DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
+ << "********** Function: "
+ << ((Value*)mf_->getFunction())->getName() << '\n');
+
+ allocatableRegs_ = tri_->getAllocatableSet(fn);
+ for (TargetRegisterInfo::regclass_iterator I = tri_->regclass_begin(),
+ E = tri_->regclass_end(); I != E; ++I)
+ allocatableRCRegs_.insert(std::make_pair(*I,
+ tri_->getAllocatableSet(fn, *I)));
+
+ // Join (coalesce) intervals if requested.
+ if (EnableJoining) {
+ joinIntervals();
+ DEBUG({
+ dbgs() << "********** INTERVALS POST JOINING **********\n";
+ for (LiveIntervals::iterator I = li_->begin(), E = li_->end();
+ I != E; ++I){
+ I->second->print(dbgs(), tri_);
+ dbgs() << "\n";
+ }
+ });
+ }
+
+ // Perform a final pass over the instructions and compute spill weights
+ // and remove identity moves.
+ SmallVector<unsigned, 4> DeadDefs;
+ for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
+ mbbi != mbbe; ++mbbi) {
+ MachineBasicBlock* mbb = mbbi;
+ for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end();
+ mii != mie; ) {
+ MachineInstr *MI = mii;
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (JoinedCopies.count(MI)) {
+ // Delete all coalesced copies.
+ bool DoDelete = true;
+ if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
+ assert((MI->isExtractSubreg() || MI->isInsertSubreg() ||
+ MI->isSubregToReg()) && "Unrecognized copy instruction");
+ DstReg = MI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+ // Do not delete extract_subreg, insert_subreg of physical
+ // registers unless the definition is dead. e.g.
+ // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1
+ // or else the scavenger may complain. LowerSubregs will
+ // delete them later.
+ DoDelete = false;
+ }
+ if (MI->registerDefIsDead(DstReg)) {
+ LiveInterval &li = li_->getInterval(DstReg);
+ if (!ShortenDeadCopySrcLiveRange(li, MI))
+ ShortenDeadCopyLiveRange(li, MI);
+ DoDelete = true;
+ }
+ if (!DoDelete)
+ mii = llvm::next(mii);
+ else {
+ li_->RemoveMachineInstrFromMaps(MI);
+ mii = mbbi->erase(mii);
+ ++numPeep;
+ }
+ continue;
+ }
+
+ // Now check if this is a remat'ed def instruction which is now dead.
+ if (ReMatDefs.count(MI)) {
+ bool isDead = true;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ DeadDefs.push_back(Reg);
+ if (MO.isDead())
+ continue;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+ !mri_->use_empty(Reg)) {
+ isDead = false;
+ break;
+ }
+ }
+ if (isDead) {
+ while (!DeadDefs.empty()) {
+ unsigned DeadDef = DeadDefs.back();
+ DeadDefs.pop_back();
+ RemoveDeadDef(li_->getInterval(DeadDef), MI);
+ }
+ li_->RemoveMachineInstrFromMaps(mii);
+ mii = mbbi->erase(mii);
+ continue;
+ } else
+ DeadDefs.clear();
+ }
+
+ // If the move will be an identity move delete it
+ bool isMove= tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx);
+ if (isMove && SrcReg == DstReg) {
+ if (li_->hasInterval(SrcReg)) {
+ LiveInterval &RegInt = li_->getInterval(SrcReg);
+ // If def of this move instruction is dead, remove its live range
+ // from the dstination register's live interval.
+ if (MI->registerDefIsDead(DstReg)) {
+ if (!ShortenDeadCopySrcLiveRange(RegInt, MI))
+ ShortenDeadCopyLiveRange(RegInt, MI);
+ }
+ }
+ li_->RemoveMachineInstrFromMaps(MI);
+ mii = mbbi->erase(mii);
+ ++numPeep;
+ } else {
+ ++mii;
+ }
+ }
+ }
+
+ DEBUG(dump());
+ return true;
+}
+
+/// print - Implement the dump method.
+void SimpleRegisterCoalescing::print(raw_ostream &O, const Module* m) const {
+ li_->print(O, m);
+}
+
+RegisterCoalescer* llvm::createSimpleRegisterCoalescer() {
+ return new SimpleRegisterCoalescing();
+}
+
+// Make sure that anything that uses RegisterCoalescer pulls in this file...
+DEFINING_FILE_FOR(SimpleRegisterCoalescing)
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h
new file mode 100644
index 0000000..f668064
--- /dev/null
+++ b/lib/CodeGen/SimpleRegisterCoalescing.h
@@ -0,0 +1,252 @@
+//===-- SimpleRegisterCoalescing.h - Register Coalescing --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple register copy coalescing phase.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SIMPLE_REGISTER_COALESCING_H
+#define LLVM_CODEGEN_SIMPLE_REGISTER_COALESCING_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/ADT/BitVector.h"
+
+namespace llvm {
+ class SimpleRegisterCoalescing;
+ class LiveVariables;
+ class TargetRegisterInfo;
+ class TargetInstrInfo;
+ class VirtRegMap;
+ class MachineLoopInfo;
+
+ /// CopyRec - Representation for copy instructions in coalescer queue.
+ ///
+ struct CopyRec {
+ MachineInstr *MI;
+ unsigned LoopDepth;
+ CopyRec(MachineInstr *mi, unsigned depth)
+ : MI(mi), LoopDepth(depth) {}
+ };
+
+ class SimpleRegisterCoalescing : public MachineFunctionPass,
+ public RegisterCoalescer {
+ MachineFunction* mf_;
+ MachineRegisterInfo* mri_;
+ const TargetMachine* tm_;
+ const TargetRegisterInfo* tri_;
+ const TargetInstrInfo* tii_;
+ LiveIntervals *li_;
+ const MachineLoopInfo* loopInfo;
+ AliasAnalysis *AA;
+
+ BitVector allocatableRegs_;
+ DenseMap<const TargetRegisterClass*, BitVector> allocatableRCRegs_;
+
+ /// JoinedCopies - Keep track of copies eliminated due to coalescing.
+ ///
+ SmallPtrSet<MachineInstr*, 32> JoinedCopies;
+
+ /// ReMatCopies - Keep track of copies eliminated due to remat.
+ ///
+ SmallPtrSet<MachineInstr*, 32> ReMatCopies;
+
+ /// ReMatDefs - Keep track of definition instructions which have
+ /// been remat'ed.
+ SmallPtrSet<MachineInstr*, 8> ReMatDefs;
+
+ public:
+ static char ID; // Pass identifcation, replacement for typeid
+ SimpleRegisterCoalescing() : MachineFunctionPass(&ID) {}
+
+ struct InstrSlots {
+ enum {
+ LOAD = 0,
+ USE = 1,
+ DEF = 2,
+ STORE = 3,
+ NUM = 4
+ };
+ };
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual void releaseMemory();
+
+ /// runOnMachineFunction - pass entry point
+ virtual bool runOnMachineFunction(MachineFunction&);
+
+ bool coalesceFunction(MachineFunction &mf, RegallocQuery &) {
+ // This runs as an independent pass, so don't do anything.
+ return false;
+ }
+
+ /// print - Implement the dump method.
+ virtual void print(raw_ostream &O, const Module* = 0) const;
+
+ private:
+ /// joinIntervals - join compatible live intervals
+ void joinIntervals();
+
+ /// CopyCoalesceInMBB - Coalesce copies in the specified MBB, putting
+ /// copies that cannot yet be coalesced into the "TryAgain" list.
+ void CopyCoalesceInMBB(MachineBasicBlock *MBB,
+ std::vector<CopyRec> &TryAgain);
+
+ /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
+ /// which are the src/dst of the copy instruction CopyMI. This returns true
+ /// if the copy was successfully coalesced away. If it is not currently
+ /// possible to coalesce this interval, but it may be possible if other
+ /// things get coalesced, then it returns true by reference in 'Again'.
+ bool JoinCopy(CopyRec &TheCopy, bool &Again);
+
+ /// JoinIntervals - Attempt to join these two intervals. On failure, this
+ /// returns false. Otherwise, if one of the intervals being joined is a
+ /// physreg, this method always canonicalizes DestInt to be it. The output
+ /// "SrcInt" will not have been modified, so we can use this information
+ /// below to update aliases.
+ bool JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, bool &Swapped);
+
+ /// SimpleJoin - Attempt to join the specified interval into this one. The
+ /// caller of this method must guarantee that the RHS only contains a single
+ /// value number and that the RHS is not defined by a copy from this
+ /// interval. This returns false if the intervals are not joinable, or it
+ /// joins them and returns true.
+ bool SimpleJoin(LiveInterval &LHS, LiveInterval &RHS);
+
+ /// Return true if the two specified registers belong to different register
+ /// classes. The registers may be either phys or virt regs.
+ bool differingRegisterClasses(unsigned RegA, unsigned RegB) const;
+
+ /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy. If
+ /// the source value number is defined by a copy from the destination reg
+ /// see if we can merge these two destination reg valno# into a single
+ /// value number, eliminating a copy.
+ bool AdjustCopiesBackFrom(LiveInterval &IntA, LiveInterval &IntB,
+ MachineInstr *CopyMI);
+
+ /// HasOtherReachingDefs - Return true if there are definitions of IntB
+ /// other than BValNo val# that can reach uses of AValno val# of IntA.
+ bool HasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
+ VNInfo *AValNo, VNInfo *BValNo);
+
+ /// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy.
+ /// If the source value number is defined by a commutable instruction and
+ /// its other operand is coalesced to the copy dest register, see if we
+ /// can transform the copy into a noop by commuting the definition.
+ bool RemoveCopyByCommutingDef(LiveInterval &IntA, LiveInterval &IntB,
+ MachineInstr *CopyMI);
+
+ /// TrimLiveIntervalToLastUse - If there is a last use in the same basic
+ /// block as the copy instruction, trim the ive interval to the last use
+ /// and return true.
+ bool TrimLiveIntervalToLastUse(SlotIndex CopyIdx,
+ MachineBasicBlock *CopyMBB,
+ LiveInterval &li, const LiveRange *LR);
+
+ /// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
+ /// computation, replace the copy by rematerialize the definition.
+ bool ReMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg,
+ unsigned DstSubIdx, MachineInstr *CopyMI);
+
+ /// CanCoalesceWithImpDef - Returns true if the specified copy instruction
+ /// from an implicit def to another register can be coalesced away.
+ bool CanCoalesceWithImpDef(MachineInstr *CopyMI,
+ LiveInterval &li, LiveInterval &ImpLi) const;
+
+ /// TurnCopiesFromValNoToImpDefs - The specified value# is defined by an
+ /// implicit_def and it is being removed. Turn all copies from this value#
+ /// into implicit_defs.
+ void TurnCopiesFromValNoToImpDefs(LiveInterval &li, VNInfo *VNI);
+
+ /// isWinToJoinVRWithSrcPhysReg - Return true if it's worth while to join a
+ /// a virtual destination register with physical source register.
+ bool isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI,
+ MachineBasicBlock *CopyMBB,
+ LiveInterval &DstInt, LiveInterval &SrcInt);
+
+ /// isWinToJoinVRWithDstPhysReg - Return true if it's worth while to join a
+ /// copy from a virtual source register to a physical destination register.
+ bool isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI,
+ MachineBasicBlock *CopyMBB,
+ LiveInterval &DstInt, LiveInterval &SrcInt);
+
+ /// isWinToJoinCrossClass - Return true if it's profitable to coalesce
+ /// two virtual registers from different register classes.
+ bool isWinToJoinCrossClass(unsigned LargeReg, unsigned SmallReg,
+ unsigned Threshold);
+
+ /// HasIncompatibleSubRegDefUse - If we are trying to coalesce a virtual
+ /// register with a physical register, check if any of the virtual register
+ /// operand is a sub-register use or def. If so, make sure it won't result
+ /// in an illegal extract_subreg or insert_subreg instruction.
+ bool HasIncompatibleSubRegDefUse(MachineInstr *CopyMI,
+ unsigned VirtReg, unsigned PhysReg);
+
+ /// CanJoinExtractSubRegToPhysReg - Return true if it's possible to coalesce
+ /// an extract_subreg where dst is a physical register, e.g.
+ /// cl = EXTRACT_SUBREG reg1024, 1
+ bool CanJoinExtractSubRegToPhysReg(unsigned DstReg, unsigned SrcReg,
+ unsigned SubIdx, unsigned &RealDstReg);
+
+ /// CanJoinInsertSubRegToPhysReg - Return true if it's possible to coalesce
+ /// an insert_subreg where src is a physical register, e.g.
+ /// reg1024 = INSERT_SUBREG reg1024, c1, 0
+ bool CanJoinInsertSubRegToPhysReg(unsigned DstReg, unsigned SrcReg,
+ unsigned SubIdx, unsigned &RealDstReg);
+
+ /// ValueLiveAt - Return true if the LiveRange pointed to by the given
+ /// iterator, or any subsequent range with the same value number,
+ /// is live at the given point.
+ bool ValueLiveAt(LiveInterval::iterator LRItr, LiveInterval::iterator LREnd,
+ SlotIndex defPoint) const;
+
+ /// RangeIsDefinedByCopyFromReg - Return true if the specified live range of
+ /// the specified live interval is defined by a copy from the specified
+ /// register.
+ bool RangeIsDefinedByCopyFromReg(LiveInterval &li, LiveRange *LR,
+ unsigned Reg);
+
+ /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
+ /// update the subregister number if it is not zero. If DstReg is a
+ /// physical register and the existing subregister number of the def / use
+ /// being updated is not zero, make sure to set it to the correct physical
+ /// subregister.
+ void UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx);
+
+ /// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate
+ /// due to live range lengthening as the result of coalescing.
+ void RemoveUnnecessaryKills(unsigned Reg, LiveInterval &LI);
+
+ /// ShortenDeadCopyLiveRange - Shorten a live range defined by a dead copy.
+ /// Return true if live interval is removed.
+ bool ShortenDeadCopyLiveRange(LiveInterval &li, MachineInstr *CopyMI);
+
+ /// ShortenDeadCopyLiveRange - Shorten a live range as it's artificially
+ /// extended by a dead copy. Mark the last use (if any) of the val# as kill
+ /// as ends the live range there. If there isn't another use, then this
+ /// live range is dead. Return true if live interval is removed.
+ bool ShortenDeadCopySrcLiveRange(LiveInterval &li, MachineInstr *CopyMI);
+
+ /// RemoveDeadDef - If a def of a live interval is now determined dead,
+ /// remove the val# it defines. If the live interval becomes empty, remove
+ /// it as well.
+ bool RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI);
+
+ /// lastRegisterUse - Returns the last use of the specific register between
+ /// cycles Start and End or NULL if there are no uses.
+ MachineOperand *lastRegisterUse(SlotIndex Start, SlotIndex End,
+ unsigned Reg, SlotIndex &LastUseIdx) const;
+
+ void printRegName(unsigned reg) const;
+ };
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
new file mode 100644
index 0000000..8d4d1b2
--- /dev/null
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -0,0 +1,519 @@
+//===- SjLjEHPass.cpp - Eliminate Invoke & Unwind instructions -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation is designed for use by code generators which use SjLj
+// based exception handling.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sjljehprepare"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+STATISTIC(NumInvokes, "Number of invokes replaced");
+STATISTIC(NumUnwinds, "Number of unwinds replaced");
+STATISTIC(NumSpilled, "Number of registers live across unwind edges");
+
+namespace {
+ class SjLjEHPass : public FunctionPass {
+
+ const TargetLowering *TLI;
+
+ const Type *FunctionContextTy;
+ Constant *RegisterFn;
+ Constant *UnregisterFn;
+ Constant *ResumeFn;
+ Constant *BuiltinSetjmpFn;
+ Constant *FrameAddrFn;
+ Constant *LSDAAddrFn;
+ Value *PersonalityFn;
+ Constant *SelectorFn;
+ Constant *ExceptionFn;
+ Constant *CallSiteFn;
+
+ Value *CallSite;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit SjLjEHPass(const TargetLowering *tli = NULL)
+ : FunctionPass(&ID), TLI(tli) { }
+ bool doInitialization(Module &M);
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
+ const char *getPassName() const {
+ return "SJLJ Exception Handling preparation";
+ }
+
+ private:
+ void markInvokeCallSite(InvokeInst *II, unsigned InvokeNo,
+ Value *CallSite,
+ SwitchInst *CatchSwitch);
+ void splitLiveRangesLiveAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes);
+ bool insertSjLjEHSupport(Function &F);
+ };
+} // end anonymous namespace
+
+char SjLjEHPass::ID = 0;
+
+// Public Interface To the SjLjEHPass pass.
+FunctionPass *llvm::createSjLjEHPass(const TargetLowering *TLI) {
+ return new SjLjEHPass(TLI);
+}
+// doInitialization - Set up decalarations and types needed to process
+// exceptions.
+bool SjLjEHPass::doInitialization(Module &M) {
+ // Build the function context structure.
+ // builtin_setjmp uses a five word jbuf
+ const Type *VoidPtrTy =
+ Type::getInt8PtrTy(M.getContext());
+ const Type *Int32Ty = Type::getInt32Ty(M.getContext());
+ FunctionContextTy =
+ StructType::get(M.getContext(),
+ VoidPtrTy, // __prev
+ Int32Ty, // call_site
+ ArrayType::get(Int32Ty, 4), // __data
+ VoidPtrTy, // __personality
+ VoidPtrTy, // __lsda
+ ArrayType::get(VoidPtrTy, 5), // __jbuf
+ NULL);
+ RegisterFn = M.getOrInsertFunction("_Unwind_SjLj_Register",
+ Type::getVoidTy(M.getContext()),
+ PointerType::getUnqual(FunctionContextTy),
+ (Type *)0);
+ UnregisterFn =
+ M.getOrInsertFunction("_Unwind_SjLj_Unregister",
+ Type::getVoidTy(M.getContext()),
+ PointerType::getUnqual(FunctionContextTy),
+ (Type *)0);
+ ResumeFn =
+ M.getOrInsertFunction("_Unwind_SjLj_Resume",
+ Type::getVoidTy(M.getContext()),
+ VoidPtrTy,
+ (Type *)0);
+ FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress);
+ BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp);
+ LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda);
+ SelectorFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_selector);
+ ExceptionFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_exception);
+ CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite);
+ PersonalityFn = 0;
+
+ return true;
+}
+
+/// markInvokeCallSite - Insert code to mark the call_site for this invoke
+void SjLjEHPass::markInvokeCallSite(InvokeInst *II, unsigned InvokeNo,
+ Value *CallSite,
+ SwitchInst *CatchSwitch) {
+ ConstantInt *CallSiteNoC= ConstantInt::get(Type::getInt32Ty(II->getContext()),
+ InvokeNo);
+ // The runtime comes back to the dispatcher with the call_site - 1 in
+ // the context. Odd, but there it is.
+ ConstantInt *SwitchValC = ConstantInt::get(Type::getInt32Ty(II->getContext()),
+ InvokeNo - 1);
+
+ // If the unwind edge has phi nodes, split the edge.
+ if (isa<PHINode>(II->getUnwindDest()->begin())) {
+ SplitCriticalEdge(II, 1, this);
+
+ // If there are any phi nodes left, they must have a single predecessor.
+ while (PHINode *PN = dyn_cast<PHINode>(II->getUnwindDest()->begin())) {
+ PN->replaceAllUsesWith(PN->getIncomingValue(0));
+ PN->eraseFromParent();
+ }
+ }
+
+ // Insert a store of the invoke num before the invoke
+ new StoreInst(CallSiteNoC, CallSite, true, II); // volatile
+ CallInst::Create(CallSiteFn, CallSiteNoC, "", II);
+
+ // Add a switch case to our unwind block.
+ CatchSwitch->addCase(SwitchValC, II->getUnwindDest());
+ // We still want this to look like an invoke so we emit the LSDA properly,
+ // so we don't transform the invoke into a call here.
+}
+
+/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until
+/// we reach blocks we've already seen.
+static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) {
+ if (!LiveBBs.insert(BB).second) return; // already been here.
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ MarkBlocksLiveIn(*PI, LiveBBs);
+}
+
+/// splitLiveRangesAcrossInvokes - Each value that is live across an unwind edge
+/// we spill into a stack location, guaranteeing that there is nothing live
+/// across the unwind edge. This process also splits all critical edges
+/// coming out of invoke's.
+void SjLjEHPass::
+splitLiveRangesLiveAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) {
+ // First step, split all critical edges from invoke instructions.
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+ InvokeInst *II = Invokes[i];
+ SplitCriticalEdge(II, 0, this);
+ SplitCriticalEdge(II, 1, this);
+ assert(!isa<PHINode>(II->getNormalDest()) &&
+ !isa<PHINode>(II->getUnwindDest()) &&
+ "critical edge splitting left single entry phi nodes?");
+ }
+
+ Function *F = Invokes.back()->getParent()->getParent();
+
+ // To avoid having to handle incoming arguments specially, we lower each arg
+ // to a copy instruction in the entry block. This ensures that the argument
+ // value itself cannot be live across the entry block.
+ BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin();
+ while (isa<AllocaInst>(AfterAllocaInsertPt) &&
+ isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize()))
+ ++AfterAllocaInsertPt;
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+ AI != E; ++AI) {
+ // This is always a no-op cast because we're casting AI to AI->getType() so
+ // src and destination types are identical. BitCast is the only possibility.
+ CastInst *NC = new BitCastInst(
+ AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt);
+ AI->replaceAllUsesWith(NC);
+ // Normally its is forbidden to replace a CastInst's operand because it
+ // could cause the opcode to reflect an illegal conversion. However, we're
+ // replacing it here with the same value it was constructed with to simply
+ // make NC its user.
+ NC->setOperand(0, AI);
+ }
+
+ // Finally, scan the code looking for instructions with bad live ranges.
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+ // Ignore obvious cases we don't have to handle. In particular, most
+ // instructions either have no uses or only have a single use inside the
+ // current block. Ignore them quickly.
+ Instruction *Inst = II;
+ if (Inst->use_empty()) continue;
+ if (Inst->hasOneUse() &&
+ cast<Instruction>(Inst->use_back())->getParent() == BB &&
+ !isa<PHINode>(Inst->use_back())) continue;
+
+ // If this is an alloca in the entry block, it's not a real register
+ // value.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst))
+ if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin())
+ continue;
+
+ // Avoid iterator invalidation by copying users to a temporary vector.
+ SmallVector<Instruction*,16> Users;
+ for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (User->getParent() != BB || isa<PHINode>(User))
+ Users.push_back(User);
+ }
+
+ // Find all of the blocks that this value is live in.
+ std::set<BasicBlock*> LiveBBs;
+ LiveBBs.insert(Inst->getParent());
+ while (!Users.empty()) {
+ Instruction *U = Users.back();
+ Users.pop_back();
+
+ if (!isa<PHINode>(U)) {
+ MarkBlocksLiveIn(U->getParent(), LiveBBs);
+ } else {
+ // Uses for a PHI node occur in their predecessor block.
+ PHINode *PN = cast<PHINode>(U);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == Inst)
+ MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
+ }
+ }
+
+ // Now that we know all of the blocks that this thing is live in, see if
+ // it includes any of the unwind locations.
+ bool NeedsSpill = false;
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+ BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
+ if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) {
+ NeedsSpill = true;
+ }
+ }
+
+ // If we decided we need a spill, do it.
+ if (NeedsSpill) {
+ ++NumSpilled;
+ DemoteRegToStack(*Inst, true);
+ }
+ }
+}
+
+bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
+ SmallVector<ReturnInst*,16> Returns;
+ SmallVector<UnwindInst*,16> Unwinds;
+ SmallVector<InvokeInst*,16> Invokes;
+
+ // Look through the terminators of the basic blocks to find invokes, returns
+ // and unwinds
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ // Remember all return instructions in case we insert an invoke into this
+ // function.
+ Returns.push_back(RI);
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ Invokes.push_back(II);
+ } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+ Unwinds.push_back(UI);
+ }
+ // If we don't have any invokes or unwinds, there's nothing to do.
+ if (Unwinds.empty() && Invokes.empty()) return false;
+
+ // Find the eh.selector.* and eh.exception calls. We'll use the first
+ // eh.selector to determine the right personality function to use. For
+ // SJLJ, we always use the same personality for the whole function,
+ // not on a per-selector basis.
+ // FIXME: That's a bit ugly. Better way?
+ SmallVector<CallInst*,16> EH_Selectors;
+ SmallVector<CallInst*,16> EH_Exceptions;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (CI->getCalledFunction() == SelectorFn) {
+ if (!PersonalityFn) PersonalityFn = CI->getOperand(2);
+ EH_Selectors.push_back(CI);
+ } else if (CI->getCalledFunction() == ExceptionFn) {
+ EH_Exceptions.push_back(CI);
+ }
+ }
+ }
+ }
+ // If we don't have any eh.selector calls, we can't determine the personality
+ // function. Without a personality function, we can't process exceptions.
+ if (!PersonalityFn) return false;
+
+ NumInvokes += Invokes.size();
+ NumUnwinds += Unwinds.size();
+
+ if (!Invokes.empty()) {
+ // We have invokes, so we need to add register/unregister calls to get
+ // this function onto the global unwind stack.
+ //
+ // First thing we need to do is scan the whole function for values that are
+ // live across unwind edges. Each value that is live across an unwind edge
+ // we spill into a stack location, guaranteeing that there is nothing live
+ // across the unwind edge. This process also splits all critical edges
+ // coming out of invoke's.
+ splitLiveRangesLiveAcrossInvokes(Invokes);
+
+ BasicBlock *EntryBB = F.begin();
+ // Create an alloca for the incoming jump buffer ptr and the new jump buffer
+ // that needs to be restored on all exits from the function. This is an
+ // alloca because the value needs to be added to the global context list.
+ unsigned Align = 4; // FIXME: Should be a TLI check?
+ AllocaInst *FunctionContext =
+ new AllocaInst(FunctionContextTy, 0, Align,
+ "fcn_context", F.begin()->begin());
+
+ Value *Idxs[2];
+ const Type *Int32Ty = Type::getInt32Ty(F.getContext());
+ Value *Zero = ConstantInt::get(Int32Ty, 0);
+ // We need to also keep around a reference to the call_site field
+ Idxs[0] = Zero;
+ Idxs[1] = ConstantInt::get(Int32Ty, 1);
+ CallSite = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+ "call_site",
+ EntryBB->getTerminator());
+
+ // The exception selector comes back in context->data[1]
+ Idxs[1] = ConstantInt::get(Int32Ty, 2);
+ Value *FCData = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+ "fc_data",
+ EntryBB->getTerminator());
+ Idxs[1] = ConstantInt::get(Int32Ty, 1);
+ Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2,
+ "exc_selector_gep",
+ EntryBB->getTerminator());
+ // The exception value comes back in context->data[0]
+ Idxs[1] = Zero;
+ Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2,
+ "exception_gep",
+ EntryBB->getTerminator());
+
+ // The result of the eh.selector call will be replaced with a
+ // a reference to the selector value returned in the function
+ // context. We leave the selector itself so the EH analysis later
+ // can use it.
+ for (int i = 0, e = EH_Selectors.size(); i < e; ++i) {
+ CallInst *I = EH_Selectors[i];
+ Value *SelectorVal = new LoadInst(SelectorAddr, "select_val", true, I);
+ I->replaceAllUsesWith(SelectorVal);
+ }
+ // eh.exception calls are replaced with references to the proper
+ // location in the context. Unlike eh.selector, the eh.exception
+ // calls are removed entirely.
+ for (int i = 0, e = EH_Exceptions.size(); i < e; ++i) {
+ CallInst *I = EH_Exceptions[i];
+ // Possible for there to be duplicates, so check to make sure
+ // the instruction hasn't already been removed.
+ if (!I->getParent()) continue;
+ Value *Val = new LoadInst(ExceptionAddr, "exception", true, I);
+ const Type *Ty = Type::getInt8PtrTy(F.getContext());
+ Val = CastInst::Create(Instruction::IntToPtr, Val, Ty, "", I);
+
+ I->replaceAllUsesWith(Val);
+ I->eraseFromParent();
+ }
+
+ // The entry block changes to have the eh.sjlj.setjmp, with a conditional
+ // branch to a dispatch block for non-zero returns. If we return normally,
+ // we're not handling an exception and just register the function context
+ // and continue.
+
+ // Create the dispatch block. The dispatch block is basically a big switch
+ // statement that goes to all of the invoke landing pads.
+ BasicBlock *DispatchBlock =
+ BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F);
+
+ // Insert a load in the Catch block, and a switch on its value. By default,
+ // we go to a block that just does an unwind (which is the correct action
+ // for a standard call).
+ BasicBlock *UnwindBlock =
+ BasicBlock::Create(F.getContext(), "unwindbb", &F);
+ Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBlock));
+
+ Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true,
+ DispatchBlock);
+ SwitchInst *DispatchSwitch =
+ SwitchInst::Create(DispatchLoad, UnwindBlock, Invokes.size(),
+ DispatchBlock);
+ // Split the entry block to insert the conditional branch for the setjmp.
+ BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
+ "eh.sjlj.setjmp.cont");
+
+ // Populate the Function Context
+ // 1. LSDA address
+ // 2. Personality function address
+ // 3. jmpbuf (save FP and call eh.sjlj.setjmp)
+
+ // LSDA address
+ Idxs[0] = Zero;
+ Idxs[1] = ConstantInt::get(Int32Ty, 4);
+ Value *LSDAFieldPtr =
+ GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+ "lsda_gep",
+ EntryBB->getTerminator());
+ Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr",
+ EntryBB->getTerminator());
+ new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator());
+
+ Idxs[1] = ConstantInt::get(Int32Ty, 3);
+ Value *PersonalityFieldPtr =
+ GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+ "lsda_gep",
+ EntryBB->getTerminator());
+ new StoreInst(PersonalityFn, PersonalityFieldPtr, true,
+ EntryBB->getTerminator());
+
+ // Save the frame pointer.
+ Idxs[1] = ConstantInt::get(Int32Ty, 5);
+ Value *FieldPtr
+ = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+ "jbuf_gep",
+ EntryBB->getTerminator());
+ Idxs[1] = ConstantInt::get(Int32Ty, 0);
+ Value *ElemPtr =
+ GetElementPtrInst::Create(FieldPtr, Idxs, Idxs+2, "jbuf_fp_gep",
+ EntryBB->getTerminator());
+
+ Value *Val = CallInst::Create(FrameAddrFn,
+ ConstantInt::get(Int32Ty, 0),
+ "fp",
+ EntryBB->getTerminator());
+ new StoreInst(Val, ElemPtr, true, EntryBB->getTerminator());
+ // Call the setjmp instrinsic. It fills in the rest of the jmpbuf
+ Value *SetjmpArg =
+ CastInst::Create(Instruction::BitCast, FieldPtr,
+ Type::getInt8PtrTy(F.getContext()), "",
+ EntryBB->getTerminator());
+ Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg,
+ "dispatch",
+ EntryBB->getTerminator());
+ // check the return value of the setjmp. non-zero goes to dispatcher
+ Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
+ ICmpInst::ICMP_EQ, DispatchVal, Zero,
+ "notunwind");
+ // Nuke the uncond branch.
+ EntryBB->getTerminator()->eraseFromParent();
+
+ // Put in a new condbranch in its place.
+ BranchInst::Create(ContBlock, DispatchBlock, IsNormal, EntryBB);
+
+ // Register the function context and make sure it's known to not throw
+ CallInst *Register =
+ CallInst::Create(RegisterFn, FunctionContext, "",
+ ContBlock->getTerminator());
+ Register->setDoesNotThrow();
+
+ // At this point, we are all set up, update the invoke instructions
+ // to mark their call_site values, and fill in the dispatch switch
+ // accordingly.
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
+ markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch);
+
+ // The front end has likely added calls to _Unwind_Resume. We need
+ // to find those calls and mark the call_site as -1 immediately prior.
+ // resume is a noreturn function, so any block that has a call to it
+ // should end in an 'unreachable' instruction with the call immediately
+ // prior. That's how we'll search.
+ // ??? There's got to be a better way. this is fugly.
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if ((dyn_cast<UnreachableInst>(BB->getTerminator()))) {
+ BasicBlock::iterator I = BB->getTerminator();
+ // Check the previous instruction and see if it's a resume call
+ if (I == BB->begin()) continue;
+ if (CallInst *CI = dyn_cast<CallInst>(--I)) {
+ if (CI->getCalledFunction() == ResumeFn) {
+ Value *NegativeOne = Constant::getAllOnesValue(Int32Ty);
+ new StoreInst(NegativeOne, CallSite, true, I); // volatile
+ }
+ }
+ }
+
+ // Replace all unwinds with a branch to the unwind handler.
+ // ??? Should this ever happen with sjlj exceptions?
+ for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) {
+ BranchInst::Create(UnwindBlock, Unwinds[i]);
+ Unwinds[i]->eraseFromParent();
+ }
+
+ // Finally, for any returns from this function, if this function contains an
+ // invoke, add a call to unregister the function context.
+ for (unsigned i = 0, e = Returns.size(); i != e; ++i)
+ CallInst::Create(UnregisterFn, FunctionContext, "", Returns[i]);
+ }
+
+ return true;
+}
+
+bool SjLjEHPass::runOnFunction(Function &F) {
+ bool Res = insertSjLjEHSupport(F);
+ return Res;
+}
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
new file mode 100644
index 0000000..6110ef5
--- /dev/null
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -0,0 +1,226 @@
+//===-- SlotIndexes.cpp - Slot Indexes Pass ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "slotindexes"
+
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+
+// Yep - these are thread safe. See the header for details.
+namespace {
+
+
+ class EmptyIndexListEntry : public IndexListEntry {
+ public:
+ EmptyIndexListEntry() : IndexListEntry(EMPTY_KEY) {}
+ };
+
+ class TombstoneIndexListEntry : public IndexListEntry {
+ public:
+ TombstoneIndexListEntry() : IndexListEntry(TOMBSTONE_KEY) {}
+ };
+
+ // The following statics are thread safe. They're read only, and you
+ // can't step from them to any other list entries.
+ ManagedStatic<EmptyIndexListEntry> IndexListEntryEmptyKey;
+ ManagedStatic<TombstoneIndexListEntry> IndexListEntryTombstoneKey;
+}
+
+char SlotIndexes::ID = 0;
+static RegisterPass<SlotIndexes> X("slotindexes", "Slot index numbering");
+
+IndexListEntry* IndexListEntry::getEmptyKeyEntry() {
+ return &*IndexListEntryEmptyKey;
+}
+
+IndexListEntry* IndexListEntry::getTombstoneKeyEntry() {
+ return &*IndexListEntryTombstoneKey;
+}
+
+
+void SlotIndexes::getAnalysisUsage(AnalysisUsage &au) const {
+ au.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(au);
+}
+
+void SlotIndexes::releaseMemory() {
+ mi2iMap.clear();
+ mbb2IdxMap.clear();
+ idx2MBBMap.clear();
+ terminatorGaps.clear();
+ clearList();
+}
+
+bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
+
+ // Compute numbering as follows:
+ // Grab an iterator to the start of the index list.
+ // Iterate over all MBBs, and within each MBB all MIs, keeping the MI
+ // iterator in lock-step (though skipping it over indexes which have
+ // null pointers in the instruction field).
+ // At each iteration assert that the instruction pointed to in the index
+ // is the same one pointed to by the MI iterator. This
+
+ // FIXME: This can be simplified. The mi2iMap_, Idx2MBBMap, etc. should
+ // only need to be set up once after the first numbering is computed.
+
+ mf = &fn;
+ initList();
+
+ // Check that the list contains only the sentinal.
+ assert(indexListHead->getNext() == 0 &&
+ "Index list non-empty at initial numbering?");
+ assert(idx2MBBMap.empty() &&
+ "Index -> MBB mapping non-empty at initial numbering?");
+ assert(mbb2IdxMap.empty() &&
+ "MBB -> Index mapping non-empty at initial numbering?");
+ assert(mi2iMap.empty() &&
+ "MachineInstr -> Index mapping non-empty at initial numbering?");
+
+ functionSize = 0;
+ unsigned index = 0;
+
+ push_back(createEntry(0, index));
+
+ // Iterate over the function.
+ for (MachineFunction::iterator mbbItr = mf->begin(), mbbEnd = mf->end();
+ mbbItr != mbbEnd; ++mbbItr) {
+ MachineBasicBlock *mbb = &*mbbItr;
+
+ // Insert an index for the MBB start.
+ SlotIndex blockStartIndex(back(), SlotIndex::LOAD);
+
+ index += SlotIndex::NUM;
+
+ for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end();
+ miItr != miEnd; ++miItr) {
+ MachineInstr *mi = miItr;
+ if (mi->isDebugValue())
+ continue;
+
+ if (miItr == mbb->getFirstTerminator()) {
+ push_back(createEntry(0, index));
+ terminatorGaps.insert(
+ std::make_pair(mbb, SlotIndex(back(), SlotIndex::PHI_BIT)));
+ index += SlotIndex::NUM;
+ }
+
+ // Insert a store index for the instr.
+ push_back(createEntry(mi, index));
+
+ // Save this base index in the maps.
+ mi2iMap.insert(
+ std::make_pair(mi, SlotIndex(back(), SlotIndex::LOAD)));
+
+ ++functionSize;
+
+ unsigned Slots = mi->getDesc().getNumDefs();
+ if (Slots == 0)
+ Slots = 1;
+
+ index += (Slots + 1) * SlotIndex::NUM;
+ }
+
+ if (mbb->getFirstTerminator() == mbb->end()) {
+ push_back(createEntry(0, index));
+ terminatorGaps.insert(
+ std::make_pair(mbb, SlotIndex(back(), SlotIndex::PHI_BIT)));
+ index += SlotIndex::NUM;
+ }
+
+ // One blank instruction at the end.
+ push_back(createEntry(0, index));
+
+ SlotIndex blockEndIndex(back(), SlotIndex::LOAD);
+ mbb2IdxMap.insert(
+ std::make_pair(mbb, std::make_pair(blockStartIndex, blockEndIndex)));
+
+ idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, mbb));
+ }
+
+ // Sort the Idx2MBBMap
+ std::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare());
+
+ DEBUG(dump());
+
+ // And we're done!
+ return false;
+}
+
+void SlotIndexes::renumberIndexes() {
+
+ // Renumber updates the index of every element of the index list.
+ // If all instrs in the function have been allocated an index (which has been
+ // placed in the index list in the order of instruction iteration) then the
+ // resulting numbering will match what would have been generated by the
+ // pass during the initial numbering of the function if the new instructions
+ // had been present.
+
+ functionSize = 0;
+ unsigned index = 0;
+
+ for (IndexListEntry *curEntry = front(); curEntry != getTail();
+ curEntry = curEntry->getNext()) {
+
+ curEntry->setIndex(index);
+
+ if (curEntry->getInstr() == 0) {
+ // MBB start entry or terminator gap. Just step index by 1.
+ index += SlotIndex::NUM;
+ }
+ else {
+ ++functionSize;
+ unsigned Slots = curEntry->getInstr()->getDesc().getNumDefs();
+ if (Slots == 0)
+ Slots = 1;
+
+ index += (Slots + 1) * SlotIndex::NUM;
+ }
+ }
+}
+
+void SlotIndexes::dump() const {
+ for (const IndexListEntry *itr = front(); itr != getTail();
+ itr = itr->getNext()) {
+ dbgs() << itr->getIndex() << " ";
+
+ if (itr->getInstr() != 0) {
+ dbgs() << *itr->getInstr();
+ } else {
+ dbgs() << "\n";
+ }
+ }
+
+ for (MBB2IdxMap::const_iterator itr = mbb2IdxMap.begin();
+ itr != mbb2IdxMap.end(); ++itr) {
+ dbgs() << "MBB " << itr->first->getNumber() << " (" << itr->first << ") - ["
+ << itr->second.first << ", " << itr->second.second << "]\n";
+ }
+}
+
+// Print a SlotIndex to a raw_ostream.
+void SlotIndex::print(raw_ostream &os) const {
+ os << getIndex();
+ if (isPHI())
+ os << "*";
+}
+
+// Dump a SlotIndex to stderr.
+void SlotIndex::dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+}
+
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
new file mode 100644
index 0000000..7ba4403
--- /dev/null
+++ b/lib/CodeGen/Spiller.cpp
@@ -0,0 +1,527 @@
+//===-- llvm/CodeGen/Spiller.cpp - Spiller -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "spiller"
+
+#include "Spiller.h"
+#include "VirtRegMap.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <set>
+
+using namespace llvm;
+
+namespace {
+ enum SpillerName { trivial, standard, splitting };
+}
+
+static cl::opt<SpillerName>
+spillerOpt("spiller",
+ cl::desc("Spiller to use: (default: standard)"),
+ cl::Prefix,
+ cl::values(clEnumVal(trivial, "trivial spiller"),
+ clEnumVal(standard, "default spiller"),
+ clEnumVal(splitting, "splitting spiller"),
+ clEnumValEnd),
+ cl::init(standard));
+
+// Spiller virtual destructor implementation.
+Spiller::~Spiller() {}
+
+namespace {
+
+/// Utility class for spillers.
+class SpillerBase : public Spiller {
+protected:
+
+ MachineFunction *mf;
+ LiveIntervals *lis;
+ MachineFrameInfo *mfi;
+ MachineRegisterInfo *mri;
+ const TargetInstrInfo *tii;
+ VirtRegMap *vrm;
+
+ /// Construct a spiller base.
+ SpillerBase(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm)
+ : mf(mf), lis(lis), vrm(vrm)
+ {
+ mfi = mf->getFrameInfo();
+ mri = &mf->getRegInfo();
+ tii = mf->getTarget().getInstrInfo();
+ }
+
+ /// Add spill ranges for every use/def of the live interval, inserting loads
+ /// immediately before each use, and stores after each def. No folding or
+ /// remat is attempted.
+ std::vector<LiveInterval*> trivialSpillEverywhere(LiveInterval *li) {
+ DEBUG(dbgs() << "Spilling everywhere " << *li << "\n");
+
+ assert(li->weight != HUGE_VALF &&
+ "Attempting to spill already spilled value.");
+
+ assert(!li->isStackSlot() &&
+ "Trying to spill a stack slot.");
+
+ DEBUG(dbgs() << "Trivial spill everywhere of reg" << li->reg << "\n");
+
+ std::vector<LiveInterval*> added;
+
+ const TargetRegisterClass *trc = mri->getRegClass(li->reg);
+ unsigned ss = vrm->assignVirt2StackSlot(li->reg);
+
+ // Iterate over reg uses/defs.
+ for (MachineRegisterInfo::reg_iterator
+ regItr = mri->reg_begin(li->reg); regItr != mri->reg_end();) {
+
+ // Grab the use/def instr.
+ MachineInstr *mi = &*regItr;
+
+ DEBUG(dbgs() << " Processing " << *mi);
+
+ // Step regItr to the next use/def instr.
+ do {
+ ++regItr;
+ } while (regItr != mri->reg_end() && (&*regItr == mi));
+
+ // Collect uses & defs for this instr.
+ SmallVector<unsigned, 2> indices;
+ bool hasUse = false;
+ bool hasDef = false;
+ for (unsigned i = 0; i != mi->getNumOperands(); ++i) {
+ MachineOperand &op = mi->getOperand(i);
+ if (!op.isReg() || op.getReg() != li->reg)
+ continue;
+ hasUse |= mi->getOperand(i).isUse();
+ hasDef |= mi->getOperand(i).isDef();
+ indices.push_back(i);
+ }
+
+ // Create a new vreg & interval for this instr.
+ unsigned newVReg = mri->createVirtualRegister(trc);
+ vrm->grow();
+ vrm->assignVirt2StackSlot(newVReg, ss);
+ LiveInterval *newLI = &lis->getOrCreateInterval(newVReg);
+ newLI->weight = HUGE_VALF;
+
+ // Update the reg operands & kill flags.
+ for (unsigned i = 0; i < indices.size(); ++i) {
+ unsigned mopIdx = indices[i];
+ MachineOperand &mop = mi->getOperand(mopIdx);
+ mop.setReg(newVReg);
+ if (mop.isUse() && !mi->isRegTiedToDefOperand(mopIdx)) {
+ mop.setIsKill(true);
+ }
+ }
+ assert(hasUse || hasDef);
+
+ // Insert reload if necessary.
+ MachineBasicBlock::iterator miItr(mi);
+ if (hasUse) {
+ tii->loadRegFromStackSlot(*mi->getParent(), miItr, newVReg, ss, trc);
+ MachineInstr *loadInstr(prior(miItr));
+ SlotIndex loadIndex =
+ lis->InsertMachineInstrInMaps(loadInstr).getDefIndex();
+ SlotIndex endIndex = loadIndex.getNextIndex();
+ VNInfo *loadVNI =
+ newLI->getNextValue(loadIndex, 0, true, lis->getVNInfoAllocator());
+ loadVNI->addKill(endIndex);
+ newLI->addRange(LiveRange(loadIndex, endIndex, loadVNI));
+ }
+
+ // Insert store if necessary.
+ if (hasDef) {
+ tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr), newVReg, true,
+ ss, trc);
+ MachineInstr *storeInstr(llvm::next(miItr));
+ SlotIndex storeIndex =
+ lis->InsertMachineInstrInMaps(storeInstr).getDefIndex();
+ SlotIndex beginIndex = storeIndex.getPrevIndex();
+ VNInfo *storeVNI =
+ newLI->getNextValue(beginIndex, 0, true, lis->getVNInfoAllocator());
+ storeVNI->addKill(storeIndex);
+ newLI->addRange(LiveRange(beginIndex, storeIndex, storeVNI));
+ }
+
+ added.push_back(newLI);
+ }
+
+ return added;
+ }
+
+};
+
+
+/// Spills any live range using the spill-everywhere method with no attempt at
+/// folding.
+class TrivialSpiller : public SpillerBase {
+public:
+
+ TrivialSpiller(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm)
+ : SpillerBase(mf, lis, vrm) {}
+
+ std::vector<LiveInterval*> spill(LiveInterval *li,
+ SmallVectorImpl<LiveInterval*> &spillIs,
+ SlotIndex*) {
+ // Ignore spillIs - we don't use it.
+ return trivialSpillEverywhere(li);
+ }
+
+};
+
+/// Falls back on LiveIntervals::addIntervalsForSpills.
+class StandardSpiller : public Spiller {
+protected:
+ LiveIntervals *lis;
+ const MachineLoopInfo *loopInfo;
+ VirtRegMap *vrm;
+public:
+ StandardSpiller(LiveIntervals *lis, const MachineLoopInfo *loopInfo,
+ VirtRegMap *vrm)
+ : lis(lis), loopInfo(loopInfo), vrm(vrm) {}
+
+ /// Falls back on LiveIntervals::addIntervalsForSpills.
+ std::vector<LiveInterval*> spill(LiveInterval *li,
+ SmallVectorImpl<LiveInterval*> &spillIs,
+ SlotIndex*) {
+ return lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm);
+ }
+
+};
+
+/// When a call to spill is placed this spiller will first try to break the
+/// interval up into its component values (one new interval per value).
+/// If this fails, or if a call is placed to spill a previously split interval
+/// then the spiller falls back on the standard spilling mechanism.
+class SplittingSpiller : public StandardSpiller {
+public:
+ SplittingSpiller(MachineFunction *mf, LiveIntervals *lis,
+ const MachineLoopInfo *loopInfo, VirtRegMap *vrm)
+ : StandardSpiller(lis, loopInfo, vrm) {
+
+ mri = &mf->getRegInfo();
+ tii = mf->getTarget().getInstrInfo();
+ tri = mf->getTarget().getRegisterInfo();
+ }
+
+ std::vector<LiveInterval*> spill(LiveInterval *li,
+ SmallVectorImpl<LiveInterval*> &spillIs,
+ SlotIndex *earliestStart) {
+
+ if (worthTryingToSplit(li)) {
+ return tryVNISplit(li, earliestStart);
+ }
+ // else
+ return StandardSpiller::spill(li, spillIs, earliestStart);
+ }
+
+private:
+
+ MachineRegisterInfo *mri;
+ const TargetInstrInfo *tii;
+ const TargetRegisterInfo *tri;
+ DenseSet<LiveInterval*> alreadySplit;
+
+ bool worthTryingToSplit(LiveInterval *li) const {
+ return (!alreadySplit.count(li) && li->getNumValNums() > 1);
+ }
+
+ /// Try to break a LiveInterval into its component values.
+ std::vector<LiveInterval*> tryVNISplit(LiveInterval *li,
+ SlotIndex *earliestStart) {
+
+ DEBUG(dbgs() << "Trying VNI split of %reg" << *li << "\n");
+
+ std::vector<LiveInterval*> added;
+ SmallVector<VNInfo*, 4> vnis;
+
+ std::copy(li->vni_begin(), li->vni_end(), std::back_inserter(vnis));
+
+ for (SmallVectorImpl<VNInfo*>::iterator vniItr = vnis.begin(),
+ vniEnd = vnis.end(); vniItr != vniEnd; ++vniItr) {
+ VNInfo *vni = *vniItr;
+
+ // Skip unused VNIs, or VNIs with no kills.
+ if (vni->isUnused() || vni->kills.empty())
+ continue;
+
+ DEBUG(dbgs() << " Extracted Val #" << vni->id << " as ");
+ LiveInterval *splitInterval = extractVNI(li, vni);
+
+ if (splitInterval != 0) {
+ DEBUG(dbgs() << *splitInterval << "\n");
+ added.push_back(splitInterval);
+ alreadySplit.insert(splitInterval);
+ if (earliestStart != 0) {
+ if (splitInterval->beginIndex() < *earliestStart)
+ *earliestStart = splitInterval->beginIndex();
+ }
+ } else {
+ DEBUG(dbgs() << "0\n");
+ }
+ }
+
+ DEBUG(dbgs() << "Original LI: " << *li << "\n");
+
+ // If there original interval still contains some live ranges
+ // add it to added and alreadySplit.
+ if (!li->empty()) {
+ added.push_back(li);
+ alreadySplit.insert(li);
+ if (earliestStart != 0) {
+ if (li->beginIndex() < *earliestStart)
+ *earliestStart = li->beginIndex();
+ }
+ }
+
+ return added;
+ }
+
+ /// Extract the given value number from the interval.
+ LiveInterval* extractVNI(LiveInterval *li, VNInfo *vni) const {
+ assert(vni->isDefAccurate() || vni->isPHIDef());
+ assert(!vni->kills.empty());
+
+ // Create a new vreg and live interval, copy VNI kills & ranges over.
+ const TargetRegisterClass *trc = mri->getRegClass(li->reg);
+ unsigned newVReg = mri->createVirtualRegister(trc);
+ vrm->grow();
+ LiveInterval *newLI = &lis->getOrCreateInterval(newVReg);
+ VNInfo *newVNI = newLI->createValueCopy(vni, lis->getVNInfoAllocator());
+
+ // Start by copying all live ranges in the VN to the new interval.
+ for (LiveInterval::iterator rItr = li->begin(), rEnd = li->end();
+ rItr != rEnd; ++rItr) {
+ if (rItr->valno == vni) {
+ newLI->addRange(LiveRange(rItr->start, rItr->end, newVNI));
+ }
+ }
+
+ // Erase the old VNI & ranges.
+ li->removeValNo(vni);
+
+ // Collect all current uses of the register belonging to the given VNI.
+ // We'll use this to rename the register after we've dealt with the def.
+ std::set<MachineInstr*> uses;
+ for (MachineRegisterInfo::use_iterator
+ useItr = mri->use_begin(li->reg), useEnd = mri->use_end();
+ useItr != useEnd; ++useItr) {
+ uses.insert(&*useItr);
+ }
+
+ // Process the def instruction for this VNI.
+ if (newVNI->isPHIDef()) {
+ // Insert a copy at the start of the MBB. The range proceeding the
+ // copy will be attached to the original LiveInterval.
+ MachineBasicBlock *defMBB = lis->getMBBFromIndex(newVNI->def);
+ tii->copyRegToReg(*defMBB, defMBB->begin(), newVReg, li->reg, trc, trc);
+ MachineInstr *copyMI = defMBB->begin();
+ copyMI->addRegisterKilled(li->reg, tri);
+ SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
+ VNInfo *phiDefVNI = li->getNextValue(lis->getMBBStartIdx(defMBB),
+ 0, false, lis->getVNInfoAllocator());
+ phiDefVNI->setIsPHIDef(true);
+ phiDefVNI->addKill(copyIdx.getDefIndex());
+ li->addRange(LiveRange(phiDefVNI->def, copyIdx.getDefIndex(), phiDefVNI));
+ LiveRange *oldPHIDefRange =
+ newLI->getLiveRangeContaining(lis->getMBBStartIdx(defMBB));
+
+ // If the old phi def starts in the middle of the range chop it up.
+ if (oldPHIDefRange->start < lis->getMBBStartIdx(defMBB)) {
+ LiveRange oldPHIDefRange2(copyIdx.getDefIndex(), oldPHIDefRange->end,
+ oldPHIDefRange->valno);
+ oldPHIDefRange->end = lis->getMBBStartIdx(defMBB);
+ newLI->addRange(oldPHIDefRange2);
+ } else if (oldPHIDefRange->start == lis->getMBBStartIdx(defMBB)) {
+ // Otherwise if it's at the start of the range just trim it.
+ oldPHIDefRange->start = copyIdx.getDefIndex();
+ } else {
+ assert(false && "PHI def range doesn't cover PHI def?");
+ }
+
+ newVNI->def = copyIdx.getDefIndex();
+ newVNI->setCopy(copyMI);
+ newVNI->setIsPHIDef(false); // not a PHI def anymore.
+ newVNI->setIsDefAccurate(true);
+ } else {
+ // non-PHI def. Rename the def. If it's two-addr that means renaming the use
+ // and inserting a new copy too.
+ MachineInstr *defInst = lis->getInstructionFromIndex(newVNI->def);
+ // We'll rename this now, so we can remove it from uses.
+ uses.erase(defInst);
+ unsigned defOpIdx = defInst->findRegisterDefOperandIdx(li->reg);
+ bool isTwoAddr = defInst->isRegTiedToUseOperand(defOpIdx),
+ twoAddrUseIsUndef = false;
+
+ for (unsigned i = 0; i < defInst->getNumOperands(); ++i) {
+ MachineOperand &mo = defInst->getOperand(i);
+ if (mo.isReg() && (mo.isDef() || isTwoAddr) && (mo.getReg()==li->reg)) {
+ mo.setReg(newVReg);
+ if (isTwoAddr && mo.isUse() && mo.isUndef())
+ twoAddrUseIsUndef = true;
+ }
+ }
+
+ SlotIndex defIdx = lis->getInstructionIndex(defInst);
+ newVNI->def = defIdx.getDefIndex();
+
+ if (isTwoAddr && !twoAddrUseIsUndef) {
+ MachineBasicBlock *defMBB = defInst->getParent();
+ tii->copyRegToReg(*defMBB, defInst, newVReg, li->reg, trc, trc);
+ MachineInstr *copyMI = prior(MachineBasicBlock::iterator(defInst));
+ SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
+ copyMI->addRegisterKilled(li->reg, tri);
+ LiveRange *origUseRange =
+ li->getLiveRangeContaining(newVNI->def.getUseIndex());
+ VNInfo *origUseVNI = origUseRange->valno;
+ origUseRange->end = copyIdx.getDefIndex();
+ bool updatedKills = false;
+ for (unsigned k = 0; k < origUseVNI->kills.size(); ++k) {
+ if (origUseVNI->kills[k] == defIdx.getDefIndex()) {
+ origUseVNI->kills[k] = copyIdx.getDefIndex();
+ updatedKills = true;
+ break;
+ }
+ }
+ assert(updatedKills && "Failed to update VNI kill list.");
+ VNInfo *copyVNI = newLI->getNextValue(copyIdx.getDefIndex(), copyMI,
+ true, lis->getVNInfoAllocator());
+ copyVNI->addKill(defIdx.getDefIndex());
+ LiveRange copyRange(copyIdx.getDefIndex(),defIdx.getDefIndex(),copyVNI);
+ newLI->addRange(copyRange);
+ }
+ }
+
+ for (std::set<MachineInstr*>::iterator
+ usesItr = uses.begin(), usesEnd = uses.end();
+ usesItr != usesEnd; ++usesItr) {
+ MachineInstr *useInst = *usesItr;
+ SlotIndex useIdx = lis->getInstructionIndex(useInst);
+ LiveRange *useRange =
+ newLI->getLiveRangeContaining(useIdx.getUseIndex());
+
+ // If this use doesn't belong to the new interval skip it.
+ if (useRange == 0)
+ continue;
+
+ // This use doesn't belong to the VNI, skip it.
+ if (useRange->valno != newVNI)
+ continue;
+
+ // Check if this instr is two address.
+ unsigned useOpIdx = useInst->findRegisterUseOperandIdx(li->reg);
+ bool isTwoAddress = useInst->isRegTiedToDefOperand(useOpIdx);
+
+ // Rename uses (and defs for two-address instrs).
+ for (unsigned i = 0; i < useInst->getNumOperands(); ++i) {
+ MachineOperand &mo = useInst->getOperand(i);
+ if (mo.isReg() && (mo.isUse() || isTwoAddress) &&
+ (mo.getReg() == li->reg)) {
+ mo.setReg(newVReg);
+ }
+ }
+
+ // If this is a two address instruction we've got some extra work to do.
+ if (isTwoAddress) {
+ // We modified the def operand, so we need to copy back to the original
+ // reg.
+ MachineBasicBlock *useMBB = useInst->getParent();
+ MachineBasicBlock::iterator useItr(useInst);
+ tii->copyRegToReg(*useMBB, next(useItr), li->reg, newVReg, trc, trc);
+ MachineInstr *copyMI = next(useItr);
+ copyMI->addRegisterKilled(newVReg, tri);
+ SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
+
+ // Change the old two-address defined range & vni to start at
+ // (and be defined by) the copy.
+ LiveRange *origDefRange =
+ li->getLiveRangeContaining(useIdx.getDefIndex());
+ origDefRange->start = copyIdx.getDefIndex();
+ origDefRange->valno->def = copyIdx.getDefIndex();
+ origDefRange->valno->setCopy(copyMI);
+
+ // Insert a new range & vni for the two-address-to-copy value. This
+ // will be attached to the new live interval.
+ VNInfo *copyVNI =
+ newLI->getNextValue(useIdx.getDefIndex(), 0, true,
+ lis->getVNInfoAllocator());
+ copyVNI->addKill(copyIdx.getDefIndex());
+ LiveRange copyRange(useIdx.getDefIndex(),copyIdx.getDefIndex(),copyVNI);
+ newLI->addRange(copyRange);
+ }
+ }
+
+ // Iterate over any PHI kills - we'll need to insert new copies for them.
+ for (VNInfo::KillSet::iterator
+ killItr = newVNI->kills.begin(), killEnd = newVNI->kills.end();
+ killItr != killEnd; ++killItr) {
+ SlotIndex killIdx(*killItr);
+ if (killItr->isPHI()) {
+ MachineBasicBlock *killMBB = lis->getMBBFromIndex(killIdx);
+ LiveRange *oldKillRange =
+ newLI->getLiveRangeContaining(killIdx);
+
+ assert(oldKillRange != 0 && "No kill range?");
+
+ tii->copyRegToReg(*killMBB, killMBB->getFirstTerminator(),
+ li->reg, newVReg, trc, trc);
+ MachineInstr *copyMI = prior(killMBB->getFirstTerminator());
+ copyMI->addRegisterKilled(newVReg, tri);
+ SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
+
+ // Save the current end. We may need it to add a new range if the
+ // current range runs of the end of the MBB.
+ SlotIndex newKillRangeEnd = oldKillRange->end;
+ oldKillRange->end = copyIdx.getDefIndex();
+
+ if (newKillRangeEnd != lis->getMBBEndIdx(killMBB)) {
+ assert(newKillRangeEnd > lis->getMBBEndIdx(killMBB) &&
+ "PHI kill range doesn't reach kill-block end. Not sane.");
+ newLI->addRange(LiveRange(lis->getMBBEndIdx(killMBB),
+ newKillRangeEnd, newVNI));
+ }
+
+ *killItr = oldKillRange->end;
+ VNInfo *newKillVNI = li->getNextValue(copyIdx.getDefIndex(),
+ copyMI, true,
+ lis->getVNInfoAllocator());
+ newKillVNI->addKill(lis->getMBBTerminatorGap(killMBB));
+ newKillVNI->setHasPHIKill(true);
+ li->addRange(LiveRange(copyIdx.getDefIndex(),
+ lis->getMBBEndIdx(killMBB),
+ newKillVNI));
+ }
+
+ }
+
+ newVNI->setHasPHIKill(false);
+
+ return newLI;
+ }
+
+};
+
+}
+
+llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis,
+ const MachineLoopInfo *loopInfo,
+ VirtRegMap *vrm) {
+ switch (spillerOpt) {
+ case trivial: return new TrivialSpiller(mf, lis, vrm); break;
+ case standard: return new StandardSpiller(lis, loopInfo, vrm); break;
+ case splitting: return new SplittingSpiller(mf, lis, loopInfo, vrm); break;
+ default: llvm_unreachable("Unreachable!"); break;
+ }
+}
diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h
new file mode 100644
index 0000000..dda52e8
--- /dev/null
+++ b/lib/CodeGen/Spiller.h
@@ -0,0 +1,49 @@
+//===-- llvm/CodeGen/Spiller.h - Spiller -*- C++ -*------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SPILLER_H
+#define LLVM_CODEGEN_SPILLER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include <vector>
+
+namespace llvm {
+
+ class LiveInterval;
+ class LiveIntervals;
+ class LiveStacks;
+ class MachineFunction;
+ class MachineInstr;
+ class MachineLoopInfo;
+ class SlotIndex;
+ class VirtRegMap;
+ class VNInfo;
+
+ /// Spiller interface.
+ ///
+ /// Implementations are utility classes which insert spill or remat code on
+ /// demand.
+ class Spiller {
+ public:
+ virtual ~Spiller() = 0;
+
+ /// Spill the given live range. The method used will depend on the Spiller
+ /// implementation selected.
+ virtual std::vector<LiveInterval*> spill(LiveInterval *li,
+ SmallVectorImpl<LiveInterval*> &spillIs,
+ SlotIndex *earliestIndex = 0) = 0;
+
+ };
+
+ /// Create and return a spiller object, as specified on the command line.
+ Spiller* createSpiller(MachineFunction *mf, LiveIntervals *li,
+ const MachineLoopInfo *loopInfo, VirtRegMap *vrm);
+}
+
+#endif
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
new file mode 100644
index 0000000..48bb5af
--- /dev/null
+++ b/lib/CodeGen/StackProtector.cpp
@@ -0,0 +1,232 @@
+//===-- StackProtector.cpp - Stack Protector Insertion --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass inserts stack protectors into functions which need them. A variable
+// with a random value in it is stored onto the stack before the local variables
+// are allocated. Upon exiting the block, the stored value is checked. If it's
+// changed, then there was some sort of violation and the program aborts.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "stack-protector"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Attributes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+// SSPBufferSize - The lower bound for a buffer to be considered for stack
+// smashing protection.
+static cl::opt<unsigned>
+SSPBufferSize("stack-protector-buffer-size", cl::init(8),
+ cl::desc("Lower bound for a buffer to be considered for "
+ "stack protection"));
+
+namespace {
+ class StackProtector : public FunctionPass {
+ /// TLI - Keep a pointer of a TargetLowering to consult for determining
+ /// target type sizes.
+ const TargetLowering *TLI;
+
+ Function *F;
+ Module *M;
+
+ /// InsertStackProtectors - Insert code into the prologue and epilogue of
+ /// the function.
+ ///
+ /// - The prologue code loads and stores the stack guard onto the stack.
+ /// - The epilogue checks the value stored in the prologue against the
+ /// original value. It calls __stack_chk_fail if they differ.
+ bool InsertStackProtectors();
+
+ /// CreateFailBB - Create a basic block to jump to when the stack protector
+ /// check fails.
+ BasicBlock *CreateFailBB();
+
+ /// RequiresStackProtector - Check whether or not this function needs a
+ /// stack protector based upon the stack protector level.
+ bool RequiresStackProtector() const;
+ public:
+ static char ID; // Pass identification, replacement for typeid.
+ StackProtector() : FunctionPass(&ID), TLI(0) {}
+ StackProtector(const TargetLowering *tli)
+ : FunctionPass(&ID), TLI(tli) {}
+
+ virtual bool runOnFunction(Function &Fn);
+ };
+} // end anonymous namespace
+
+char StackProtector::ID = 0;
+static RegisterPass<StackProtector>
+X("stack-protector", "Insert stack protectors");
+
+FunctionPass *llvm::createStackProtectorPass(const TargetLowering *tli) {
+ return new StackProtector(tli);
+}
+
+bool StackProtector::runOnFunction(Function &Fn) {
+ F = &Fn;
+ M = F->getParent();
+
+ if (!RequiresStackProtector()) return false;
+
+ return InsertStackProtectors();
+}
+
+/// RequiresStackProtector - Check whether or not this function needs a stack
+/// protector based upon the stack protector level. The heuristic we use is to
+/// add a guard variable to functions that call alloca, and functions with
+/// buffers larger than SSPBufferSize bytes.
+bool StackProtector::RequiresStackProtector() const {
+ if (F->hasFnAttr(Attribute::StackProtectReq))
+ return true;
+
+ if (!F->hasFnAttr(Attribute::StackProtect))
+ return false;
+
+ const TargetData *TD = TLI->getTargetData();
+
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+ BasicBlock *BB = I;
+
+ for (BasicBlock::iterator
+ II = BB->begin(), IE = BB->end(); II != IE; ++II)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+ if (AI->isArrayAllocation())
+ // This is a call to alloca with a variable size. Emit stack
+ // protectors.
+ return true;
+
+ if (const ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) {
+ // We apparently only care about character arrays.
+ if (!AT->getElementType()->isInteger(8))
+ continue;
+
+ // If an array has more than SSPBufferSize bytes of allocated space,
+ // then we emit stack protectors.
+ if (SSPBufferSize <= TD->getTypeAllocSize(AT))
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+/// InsertStackProtectors - Insert code into the prologue and epilogue of the
+/// function.
+///
+/// - The prologue code loads and stores the stack guard onto the stack.
+/// - The epilogue checks the value stored in the prologue against the original
+/// value. It calls __stack_chk_fail if they differ.
+bool StackProtector::InsertStackProtectors() {
+ BasicBlock *FailBB = 0; // The basic block to jump to if check fails.
+ AllocaInst *AI = 0; // Place on stack that stores the stack guard.
+ Constant *StackGuardVar = 0; // The stack guard variable.
+
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ) {
+ BasicBlock *BB = I++;
+
+ ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
+ if (!RI) continue;
+
+ if (!FailBB) {
+ // Insert code into the entry block that stores the __stack_chk_guard
+ // variable onto the stack:
+ //
+ // entry:
+ // StackGuardSlot = alloca i8*
+ // StackGuard = load __stack_chk_guard
+ // call void @llvm.stackprotect.create(StackGuard, StackGuardSlot)
+ //
+ PointerType *PtrTy = PointerType::getUnqual(
+ Type::getInt8Ty(RI->getContext()));
+ StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy);
+
+ BasicBlock &Entry = F->getEntryBlock();
+ Instruction *InsPt = &Entry.front();
+
+ AI = new AllocaInst(PtrTy, "StackGuardSlot", InsPt);
+ LoadInst *LI = new LoadInst(StackGuardVar, "StackGuard", false, InsPt);
+
+ Value *Args[] = { LI, AI };
+ CallInst::
+ Create(Intrinsic::getDeclaration(M, Intrinsic::stackprotector),
+ &Args[0], array_endof(Args), "", InsPt);
+
+ // Create the basic block to jump to when the guard check fails.
+ FailBB = CreateFailBB();
+ }
+
+ // For each block with a return instruction, convert this:
+ //
+ // return:
+ // ...
+ // ret ...
+ //
+ // into this:
+ //
+ // return:
+ // ...
+ // %1 = load __stack_chk_guard
+ // %2 = load StackGuardSlot
+ // %3 = cmp i1 %1, %2
+ // br i1 %3, label %SP_return, label %CallStackCheckFailBlk
+ //
+ // SP_return:
+ // ret ...
+ //
+ // CallStackCheckFailBlk:
+ // call void @__stack_chk_fail()
+ // unreachable
+
+ // Split the basic block before the return instruction.
+ BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return");
+
+ // Remove default branch instruction to the new BB.
+ BB->getTerminator()->eraseFromParent();
+
+ // Move the newly created basic block to the point right after the old basic
+ // block so that it's in the "fall through" position.
+ NewBB->moveAfter(BB);
+
+ // Generate the stack protector instructions in the old basic block.
+ LoadInst *LI1 = new LoadInst(StackGuardVar, "", false, BB);
+ LoadInst *LI2 = new LoadInst(AI, "", true, BB);
+ ICmpInst *Cmp = new ICmpInst(*BB, CmpInst::ICMP_EQ, LI1, LI2, "");
+ BranchInst::Create(NewBB, FailBB, Cmp, BB);
+ }
+
+ // Return if we didn't modify any basic blocks. I.e., there are no return
+ // statements in the function.
+ if (!FailBB) return false;
+
+ return true;
+}
+
+/// CreateFailBB - Create a basic block to jump to when the stack protector
+/// check fails.
+BasicBlock *StackProtector::CreateFailBB() {
+ BasicBlock *FailBB = BasicBlock::Create(F->getContext(),
+ "CallStackCheckFailBlk", F);
+ Constant *StackChkFail =
+ M->getOrInsertFunction("__stack_chk_fail",
+ Type::getVoidTy(F->getContext()), NULL);
+ CallInst::Create(StackChkFail, "", FailBB);
+ new UnreachableInst(F->getContext(), FailBB);
+ return FailBB;
+}
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
new file mode 100644
index 0000000..12d38f0
--- /dev/null
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -0,0 +1,742 @@
+//===-- StackSlotColoring.cpp - Stack slot coloring pass. -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the stack slot coloring pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "stackcoloring"
+#include "VirtRegMap.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include <vector>
+using namespace llvm;
+
+static cl::opt<bool>
+DisableSharing("no-stack-slot-sharing",
+ cl::init(false), cl::Hidden,
+ cl::desc("Suppress slot sharing during stack coloring"));
+
+static cl::opt<bool>
+ColorWithRegsOpt("color-ss-with-regs",
+ cl::init(false), cl::Hidden,
+ cl::desc("Color stack slots with free registers"));
+
+
+static cl::opt<int> DCELimit("ssc-dce-limit", cl::init(-1), cl::Hidden);
+
+STATISTIC(NumEliminated, "Number of stack slots eliminated due to coloring");
+STATISTIC(NumRegRepl, "Number of stack slot refs replaced with reg refs");
+STATISTIC(NumLoadElim, "Number of loads eliminated");
+STATISTIC(NumStoreElim, "Number of stores eliminated");
+STATISTIC(NumDead, "Number of trivially dead stack accesses eliminated");
+
+namespace {
+ class StackSlotColoring : public MachineFunctionPass {
+ bool ColorWithRegs;
+ LiveStacks* LS;
+ VirtRegMap* VRM;
+ MachineFrameInfo *MFI;
+ MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const MachineLoopInfo *loopInfo;
+
+ // SSIntervals - Spill slot intervals.
+ std::vector<LiveInterval*> SSIntervals;
+
+ // SSRefs - Keep a list of frame index references for each spill slot.
+ SmallVector<SmallVector<MachineInstr*, 8>, 16> SSRefs;
+
+ // OrigAlignments - Alignments of stack objects before coloring.
+ SmallVector<unsigned, 16> OrigAlignments;
+
+ // OrigSizes - Sizess of stack objects before coloring.
+ SmallVector<unsigned, 16> OrigSizes;
+
+ // AllColors - If index is set, it's a spill slot, i.e. color.
+ // FIXME: This assumes PEI locate spill slot with smaller indices
+ // closest to stack pointer / frame pointer. Therefore, smaller
+ // index == better color.
+ BitVector AllColors;
+
+ // NextColor - Next "color" that's not yet used.
+ int NextColor;
+
+ // UsedColors - "Colors" that have been assigned.
+ BitVector UsedColors;
+
+ // Assignments - Color to intervals mapping.
+ SmallVector<SmallVector<LiveInterval*,4>, 16> Assignments;
+
+ public:
+ static char ID; // Pass identification
+ StackSlotColoring() :
+ MachineFunctionPass(&ID), ColorWithRegs(false), NextColor(-1) {}
+ StackSlotColoring(bool RegColor) :
+ MachineFunctionPass(&ID), ColorWithRegs(RegColor), NextColor(-1) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveStacks>();
+ AU.addRequired<VirtRegMap>();
+ AU.addPreserved<VirtRegMap>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ virtual const char* getPassName() const {
+ return "Stack Slot Coloring";
+ }
+
+ private:
+ void InitializeSlots();
+ void ScanForSpillSlotRefs(MachineFunction &MF);
+ bool OverlapWithAssignments(LiveInterval *li, int Color) const;
+ int ColorSlot(LiveInterval *li);
+ bool ColorSlots(MachineFunction &MF);
+ bool ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
+ SmallVector<SmallVector<int, 4>, 16> &RevMap,
+ BitVector &SlotIsReg);
+ void RewriteInstruction(MachineInstr *MI, int OldFI, int NewFI,
+ MachineFunction &MF);
+ bool PropagateBackward(MachineBasicBlock::iterator MII,
+ MachineBasicBlock *MBB,
+ unsigned OldReg, unsigned NewReg);
+ bool PropagateForward(MachineBasicBlock::iterator MII,
+ MachineBasicBlock *MBB,
+ unsigned OldReg, unsigned NewReg);
+ void UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
+ unsigned Reg, const TargetRegisterClass *RC,
+ SmallSet<unsigned, 4> &Defs,
+ MachineFunction &MF);
+ bool AllMemRefsCanBeUnfolded(int SS);
+ bool RemoveDeadStores(MachineBasicBlock* MBB);
+ };
+} // end anonymous namespace
+
+char StackSlotColoring::ID = 0;
+
+static RegisterPass<StackSlotColoring>
+X("stack-slot-coloring", "Stack Slot Coloring");
+
+FunctionPass *llvm::createStackSlotColoringPass(bool RegColor) {
+ return new StackSlotColoring(RegColor);
+}
+
+namespace {
+ // IntervalSorter - Comparison predicate that sort live intervals by
+ // their weight.
+ struct IntervalSorter {
+ bool operator()(LiveInterval* LHS, LiveInterval* RHS) const {
+ return LHS->weight > RHS->weight;
+ }
+ };
+}
+
+/// ScanForSpillSlotRefs - Scan all the machine instructions for spill slot
+/// references and update spill slot weights.
+void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) {
+ SSRefs.resize(MFI->getObjectIndexEnd());
+
+ // FIXME: Need the equivalent of MachineRegisterInfo for frameindex operands.
+ for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock *MBB = &*MBBI;
+ unsigned loopDepth = loopInfo->getLoopDepth(MBB);
+ for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end();
+ MII != EE; ++MII) {
+ MachineInstr *MI = &*MII;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isFI())
+ continue;
+ int FI = MO.getIndex();
+ if (FI < 0)
+ continue;
+ if (!LS->hasInterval(FI))
+ continue;
+ LiveInterval &li = LS->getInterval(FI);
+ li.weight += LiveIntervals::getSpillWeight(false, true, loopDepth);
+ SSRefs[FI].push_back(MI);
+ }
+ }
+ }
+}
+
+/// InitializeSlots - Process all spill stack slot liveintervals and add them
+/// to a sorted (by weight) list.
+void StackSlotColoring::InitializeSlots() {
+ int LastFI = MFI->getObjectIndexEnd();
+ OrigAlignments.resize(LastFI);
+ OrigSizes.resize(LastFI);
+ AllColors.resize(LastFI);
+ UsedColors.resize(LastFI);
+ Assignments.resize(LastFI);
+
+ // Gather all spill slots into a list.
+ DEBUG(dbgs() << "Spill slot intervals:\n");
+ for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) {
+ LiveInterval &li = i->second;
+ DEBUG(li.dump());
+ int FI = li.getStackSlotIndex();
+ if (MFI->isDeadObjectIndex(FI))
+ continue;
+ SSIntervals.push_back(&li);
+ OrigAlignments[FI] = MFI->getObjectAlignment(FI);
+ OrigSizes[FI] = MFI->getObjectSize(FI);
+ AllColors.set(FI);
+ }
+ DEBUG(dbgs() << '\n');
+
+ // Sort them by weight.
+ std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter());
+
+ // Get first "color".
+ NextColor = AllColors.find_first();
+}
+
+/// OverlapWithAssignments - Return true if LiveInterval overlaps with any
+/// LiveIntervals that have already been assigned to the specified color.
+bool
+StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const {
+ const SmallVector<LiveInterval*,4> &OtherLIs = Assignments[Color];
+ for (unsigned i = 0, e = OtherLIs.size(); i != e; ++i) {
+ LiveInterval *OtherLI = OtherLIs[i];
+ if (OtherLI->overlaps(*li))
+ return true;
+ }
+ return false;
+}
+
+/// ColorSlotsWithFreeRegs - If there are any free registers available, try
+/// replacing spill slots references with registers instead.
+bool
+StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
+ SmallVector<SmallVector<int, 4>, 16> &RevMap,
+ BitVector &SlotIsReg) {
+ if (!(ColorWithRegs || ColorWithRegsOpt) || !VRM->HasUnusedRegisters())
+ return false;
+
+ bool Changed = false;
+ DEBUG(dbgs() << "Assigning unused registers to spill slots:\n");
+ for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
+ LiveInterval *li = SSIntervals[i];
+ int SS = li->getStackSlotIndex();
+ if (!UsedColors[SS] || li->weight < 20)
+ // If the weight is < 20, i.e. two references in a loop with depth 1,
+ // don't bother with it.
+ continue;
+
+ // These slots allow to share the same registers.
+ bool AllColored = true;
+ SmallVector<unsigned, 4> ColoredRegs;
+ for (unsigned j = 0, ee = RevMap[SS].size(); j != ee; ++j) {
+ int RSS = RevMap[SS][j];
+ const TargetRegisterClass *RC = LS->getIntervalRegClass(RSS);
+ // If it's not colored to another stack slot, try coloring it
+ // to a "free" register.
+ if (!RC) {
+ AllColored = false;
+ continue;
+ }
+ unsigned Reg = VRM->getFirstUnusedRegister(RC);
+ if (!Reg) {
+ AllColored = false;
+ continue;
+ }
+ if (!AllMemRefsCanBeUnfolded(RSS)) {
+ AllColored = false;
+ continue;
+ } else {
+ DEBUG(dbgs() << "Assigning fi#" << RSS << " to "
+ << TRI->getName(Reg) << '\n');
+ ColoredRegs.push_back(Reg);
+ SlotMapping[RSS] = Reg;
+ SlotIsReg.set(RSS);
+ Changed = true;
+ }
+ }
+
+ // Register and its sub-registers are no longer free.
+ while (!ColoredRegs.empty()) {
+ unsigned Reg = ColoredRegs.back();
+ ColoredRegs.pop_back();
+ VRM->setRegisterUsed(Reg);
+ // If reg is a callee-saved register, it will have to be spilled in
+ // the prologue.
+ MRI->setPhysRegUsed(Reg);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+ VRM->setRegisterUsed(*AS);
+ MRI->setPhysRegUsed(*AS);
+ }
+ }
+ // This spill slot is dead after the rewrites
+ if (AllColored) {
+ MFI->RemoveStackObject(SS);
+ ++NumEliminated;
+ }
+ }
+ DEBUG(dbgs() << '\n');
+
+ return Changed;
+}
+
+/// ColorSlot - Assign a "color" (stack slot) to the specified stack slot.
+///
+int StackSlotColoring::ColorSlot(LiveInterval *li) {
+ int Color = -1;
+ bool Share = false;
+ if (!DisableSharing) {
+ // Check if it's possible to reuse any of the used colors.
+ Color = UsedColors.find_first();
+ while (Color != -1) {
+ if (!OverlapWithAssignments(li, Color)) {
+ Share = true;
+ ++NumEliminated;
+ break;
+ }
+ Color = UsedColors.find_next(Color);
+ }
+ }
+
+ // Assign it to the first available color (assumed to be the best) if it's
+ // not possible to share a used color with other objects.
+ if (!Share) {
+ assert(NextColor != -1 && "No more spill slots?");
+ Color = NextColor;
+ UsedColors.set(Color);
+ NextColor = AllColors.find_next(NextColor);
+ }
+
+ // Record the assignment.
+ Assignments[Color].push_back(li);
+ int FI = li->getStackSlotIndex();
+ DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n");
+
+ // Change size and alignment of the allocated slot. If there are multiple
+ // objects sharing the same slot, then make sure the size and alignment
+ // are large enough for all.
+ unsigned Align = OrigAlignments[FI];
+ if (!Share || Align > MFI->getObjectAlignment(Color))
+ MFI->setObjectAlignment(Color, Align);
+ int64_t Size = OrigSizes[FI];
+ if (!Share || Size > MFI->getObjectSize(Color))
+ MFI->setObjectSize(Color, Size);
+ return Color;
+}
+
+/// Colorslots - Color all spill stack slots and rewrite all frameindex machine
+/// operands in the function.
+bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
+ unsigned NumObjs = MFI->getObjectIndexEnd();
+ SmallVector<int, 16> SlotMapping(NumObjs, -1);
+ SmallVector<float, 16> SlotWeights(NumObjs, 0.0);
+ SmallVector<SmallVector<int, 4>, 16> RevMap(NumObjs);
+ BitVector SlotIsReg(NumObjs);
+ BitVector UsedColors(NumObjs);
+
+ DEBUG(dbgs() << "Color spill slot intervals:\n");
+ bool Changed = false;
+ for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
+ LiveInterval *li = SSIntervals[i];
+ int SS = li->getStackSlotIndex();
+ int NewSS = ColorSlot(li);
+ assert(NewSS >= 0 && "Stack coloring failed?");
+ SlotMapping[SS] = NewSS;
+ RevMap[NewSS].push_back(SS);
+ SlotWeights[NewSS] += li->weight;
+ UsedColors.set(NewSS);
+ Changed |= (SS != NewSS);
+ }
+
+ DEBUG(dbgs() << "\nSpill slots after coloring:\n");
+ for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
+ LiveInterval *li = SSIntervals[i];
+ int SS = li->getStackSlotIndex();
+ li->weight = SlotWeights[SS];
+ }
+ // Sort them by new weight.
+ std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter());
+
+#ifndef NDEBUG
+ for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i)
+ DEBUG(SSIntervals[i]->dump());
+ DEBUG(dbgs() << '\n');
+#endif
+
+ // Can we "color" a stack slot with a unused register?
+ Changed |= ColorSlotsWithFreeRegs(SlotMapping, RevMap, SlotIsReg);
+
+ if (!Changed)
+ return false;
+
+ // Rewrite all MO_FrameIndex operands.
+ SmallVector<SmallSet<unsigned, 4>, 4> NewDefs(MF.getNumBlockIDs());
+ for (unsigned SS = 0, SE = SSRefs.size(); SS != SE; ++SS) {
+ bool isReg = SlotIsReg[SS];
+ int NewFI = SlotMapping[SS];
+ if (NewFI == -1 || (NewFI == (int)SS && !isReg))
+ continue;
+
+ const TargetRegisterClass *RC = LS->getIntervalRegClass(SS);
+ SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS];
+ for (unsigned i = 0, e = RefMIs.size(); i != e; ++i)
+ if (!isReg)
+ RewriteInstruction(RefMIs[i], SS, NewFI, MF);
+ else {
+ // Rewrite to use a register instead.
+ unsigned MBBId = RefMIs[i]->getParent()->getNumber();
+ SmallSet<unsigned, 4> &Defs = NewDefs[MBBId];
+ UnfoldAndRewriteInstruction(RefMIs[i], SS, NewFI, RC, Defs, MF);
+ }
+ }
+
+ // Delete unused stack slots.
+ while (NextColor != -1) {
+ DEBUG(dbgs() << "Removing unused stack object fi#" << NextColor << "\n");
+ MFI->RemoveStackObject(NextColor);
+ NextColor = AllColors.find_next(NextColor);
+ }
+
+ return true;
+}
+
+/// AllMemRefsCanBeUnfolded - Return true if all references of the specified
+/// spill slot index can be unfolded.
+bool StackSlotColoring::AllMemRefsCanBeUnfolded(int SS) {
+ SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS];
+ for (unsigned i = 0, e = RefMIs.size(); i != e; ++i) {
+ MachineInstr *MI = RefMIs[i];
+ if (TII->isLoadFromStackSlot(MI, SS) ||
+ TII->isStoreToStackSlot(MI, SS))
+ // Restore and spill will become copies.
+ return true;
+ if (!TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), false, false))
+ return false;
+ for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
+ MachineOperand &MO = MI->getOperand(j);
+ if (MO.isFI() && MO.getIndex() != SS)
+ // If it uses another frameindex, we can, currently* unfold it.
+ return false;
+ }
+ }
+ return true;
+}
+
+/// RewriteInstruction - Rewrite specified instruction by replacing references
+/// to old frame index with new one.
+void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI,
+ int NewFI, MachineFunction &MF) {
+ // Update the operands.
+ for (unsigned i = 0, ee = MI->getNumOperands(); i != ee; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isFI())
+ continue;
+ int FI = MO.getIndex();
+ if (FI != OldFI)
+ continue;
+ MO.setIndex(NewFI);
+ }
+
+ // Update the memory references. This changes the MachineMemOperands
+ // directly. They may be in use by multiple instructions, however all
+ // instructions using OldFI are being rewritten to use NewFI.
+ const Value *OldSV = PseudoSourceValue::getFixedStack(OldFI);
+ const Value *NewSV = PseudoSourceValue::getFixedStack(NewFI);
+ for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
+ E = MI->memoperands_end(); I != E; ++I)
+ if ((*I)->getValue() == OldSV)
+ (*I)->setValue(NewSV);
+}
+
+/// PropagateBackward - Traverse backward and look for the definition of
+/// OldReg. If it can successfully update all of the references with NewReg,
+/// do so and return true.
+bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII,
+ MachineBasicBlock *MBB,
+ unsigned OldReg, unsigned NewReg) {
+ if (MII == MBB->begin())
+ return false;
+
+ SmallVector<MachineOperand*, 4> Uses;
+ SmallVector<MachineOperand*, 4> Refs;
+ while (--MII != MBB->begin()) {
+ bool FoundDef = false; // Not counting 2address def.
+
+ Uses.clear();
+ const TargetInstrDesc &TID = MII->getDesc();
+ for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MII->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+ if (Reg == OldReg) {
+ if (MO.isImplicit())
+ return false;
+
+ // Abort the use is actually a sub-register def. We don't have enough
+ // information to figure out if it is really legal.
+ if (MO.getSubReg() || MII->isExtractSubreg() ||
+ MII->isInsertSubreg() || MII->isSubregToReg())
+ return false;
+
+ const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI);
+ if (RC && !RC->contains(NewReg))
+ return false;
+
+ if (MO.isUse()) {
+ Uses.push_back(&MO);
+ } else {
+ Refs.push_back(&MO);
+ if (!MII->isRegTiedToUseOperand(i))
+ FoundDef = true;
+ }
+ } else if (TRI->regsOverlap(Reg, NewReg)) {
+ return false;
+ } else if (TRI->regsOverlap(Reg, OldReg)) {
+ if (!MO.isUse() || !MO.isKill())
+ return false;
+ }
+ }
+
+ if (FoundDef) {
+ // Found non-two-address def. Stop here.
+ for (unsigned i = 0, e = Refs.size(); i != e; ++i)
+ Refs[i]->setReg(NewReg);
+ return true;
+ }
+
+ // Two-address uses must be updated as well.
+ for (unsigned i = 0, e = Uses.size(); i != e; ++i)
+ Refs.push_back(Uses[i]);
+ }
+ return false;
+}
+
+/// PropagateForward - Traverse forward and look for the kill of OldReg. If
+/// it can successfully update all of the uses with NewReg, do so and
+/// return true.
+bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII,
+ MachineBasicBlock *MBB,
+ unsigned OldReg, unsigned NewReg) {
+ if (MII == MBB->end())
+ return false;
+
+ SmallVector<MachineOperand*, 4> Uses;
+ while (++MII != MBB->end()) {
+ bool FoundKill = false;
+ const TargetInstrDesc &TID = MII->getDesc();
+ for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MII->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+ if (Reg == OldReg) {
+ if (MO.isDef() || MO.isImplicit())
+ return false;
+
+ // Abort the use is actually a sub-register use. We don't have enough
+ // information to figure out if it is really legal.
+ if (MO.getSubReg() || MII->isExtractSubreg())
+ return false;
+
+ const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI);
+ if (RC && !RC->contains(NewReg))
+ return false;
+ if (MO.isKill())
+ FoundKill = true;
+
+ Uses.push_back(&MO);
+ } else if (TRI->regsOverlap(Reg, NewReg) ||
+ TRI->regsOverlap(Reg, OldReg))
+ return false;
+ }
+ if (FoundKill) {
+ for (unsigned i = 0, e = Uses.size(); i != e; ++i)
+ Uses[i]->setReg(NewReg);
+ return true;
+ }
+ }
+ return false;
+}
+
+/// UnfoldAndRewriteInstruction - Rewrite specified instruction by unfolding
+/// folded memory references and replacing those references with register
+/// references instead.
+void
+StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
+ unsigned Reg,
+ const TargetRegisterClass *RC,
+ SmallSet<unsigned, 4> &Defs,
+ MachineFunction &MF) {
+ MachineBasicBlock *MBB = MI->getParent();
+ if (unsigned DstReg = TII->isLoadFromStackSlot(MI, OldFI)) {
+ if (PropagateForward(MI, MBB, DstReg, Reg)) {
+ DEBUG(dbgs() << "Eliminated load: ");
+ DEBUG(MI->dump());
+ ++NumLoadElim;
+ } else {
+ TII->copyRegToReg(*MBB, MI, DstReg, Reg, RC, RC);
+ ++NumRegRepl;
+ }
+
+ if (!Defs.count(Reg)) {
+ // If this is the first use of Reg in this MBB and it wasn't previously
+ // defined in MBB, add it to livein.
+ MBB->addLiveIn(Reg);
+ Defs.insert(Reg);
+ }
+ } else if (unsigned SrcReg = TII->isStoreToStackSlot(MI, OldFI)) {
+ if (MI->killsRegister(SrcReg) && PropagateBackward(MI, MBB, SrcReg, Reg)) {
+ DEBUG(dbgs() << "Eliminated store: ");
+ DEBUG(MI->dump());
+ ++NumStoreElim;
+ } else {
+ TII->copyRegToReg(*MBB, MI, Reg, SrcReg, RC, RC);
+ ++NumRegRepl;
+ }
+
+ // Remember reg has been defined in MBB.
+ Defs.insert(Reg);
+ } else {
+ SmallVector<MachineInstr*, 4> NewMIs;
+ bool Success = TII->unfoldMemoryOperand(MF, MI, Reg, false, false, NewMIs);
+ Success = Success; // Silence compiler warning.
+ assert(Success && "Failed to unfold!");
+ MachineInstr *NewMI = NewMIs[0];
+ MBB->insert(MI, NewMI);
+ ++NumRegRepl;
+
+ if (NewMI->readsRegister(Reg)) {
+ if (!Defs.count(Reg))
+ // If this is the first use of Reg in this MBB and it wasn't previously
+ // defined in MBB, add it to livein.
+ MBB->addLiveIn(Reg);
+ Defs.insert(Reg);
+ }
+ }
+ MBB->erase(MI);
+}
+
+/// RemoveDeadStores - Scan through a basic block and look for loads followed
+/// by stores. If they're both using the same stack slot, then the store is
+/// definitely dead. This could obviously be much more aggressive (consider
+/// pairs with instructions between them), but such extensions might have a
+/// considerable compile time impact.
+bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
+ // FIXME: This could be much more aggressive, but we need to investigate
+ // the compile time impact of doing so.
+ bool changed = false;
+
+ SmallVector<MachineInstr*, 4> toErase;
+
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ if (DCELimit != -1 && (int)NumDead >= DCELimit)
+ break;
+
+ MachineBasicBlock::iterator NextMI = llvm::next(I);
+ if (NextMI == MBB->end()) continue;
+
+ int FirstSS, SecondSS;
+ unsigned LoadReg = 0;
+ unsigned StoreReg = 0;
+ if (!(LoadReg = TII->isLoadFromStackSlot(I, FirstSS))) continue;
+ if (!(StoreReg = TII->isStoreToStackSlot(NextMI, SecondSS))) continue;
+ if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue;
+
+ ++NumDead;
+ changed = true;
+
+ if (NextMI->findRegisterUseOperandIdx(LoadReg, true, 0) != -1) {
+ ++NumDead;
+ toErase.push_back(I);
+ }
+
+ toErase.push_back(NextMI);
+ ++I;
+ }
+
+ for (SmallVector<MachineInstr*, 4>::iterator I = toErase.begin(),
+ E = toErase.end(); I != E; ++I)
+ (*I)->eraseFromParent();
+
+ return changed;
+}
+
+
+bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "********** Stack Slot Coloring **********\n");
+
+ MFI = MF.getFrameInfo();
+ MRI = &MF.getRegInfo();
+ TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ LS = &getAnalysis<LiveStacks>();
+ VRM = &getAnalysis<VirtRegMap>();
+ loopInfo = &getAnalysis<MachineLoopInfo>();
+
+ bool Changed = false;
+
+ unsigned NumSlots = LS->getNumIntervals();
+ if (NumSlots < 2) {
+ if (NumSlots == 0 || !VRM->HasUnusedRegisters())
+ // Nothing to do!
+ return false;
+ }
+
+ // Gather spill slot references
+ ScanForSpillSlotRefs(MF);
+ InitializeSlots();
+ Changed = ColorSlots(MF);
+
+ NextColor = -1;
+ SSIntervals.clear();
+ for (unsigned i = 0, e = SSRefs.size(); i != e; ++i)
+ SSRefs[i].clear();
+ SSRefs.clear();
+ OrigAlignments.clear();
+ OrigSizes.clear();
+ AllColors.clear();
+ UsedColors.clear();
+ for (unsigned i = 0, e = Assignments.size(); i != e; ++i)
+ Assignments[i].clear();
+ Assignments.clear();
+
+ if (Changed) {
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ Changed |= RemoveDeadStores(I);
+ }
+
+ return Changed;
+}
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
new file mode 100644
index 0000000..f8f6a55
--- /dev/null
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -0,0 +1,1050 @@
+//===- StrongPhiElimination.cpp - Eliminate PHI nodes by inserting copies -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass eliminates machine instruction PHI nodes by inserting copy
+// instructions, using an intelligent copy-folding technique based on
+// dominator information. This is technique is derived from:
+//
+// Budimlic, et al. Fast copy coalescing and live-range identification.
+// In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language
+// Design and Implementation (Berlin, Germany, June 17 - 19, 2002).
+// PLDI '02. ACM, New York, NY, 25-32.
+// DOI= http://doi.acm.org/10.1145/512529.512534
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "strongphielim"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+namespace {
+ struct StrongPHIElimination : public MachineFunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ StrongPHIElimination() : MachineFunctionPass(&ID) {}
+
+ // Waiting stores, for each MBB, the set of copies that need to
+ // be inserted into that MBB
+ DenseMap<MachineBasicBlock*,
+ std::multimap<unsigned, unsigned> > Waiting;
+
+ // Stacks holds the renaming stack for each register
+ std::map<unsigned, std::vector<unsigned> > Stacks;
+
+ // Registers in UsedByAnother are PHI nodes that are themselves
+ // used as operands to another PHI node
+ std::set<unsigned> UsedByAnother;
+
+ // RenameSets are the is a map from a PHI-defined register
+ // to the input registers to be coalesced along with the
+ // predecessor block for those input registers.
+ std::map<unsigned, std::map<unsigned, MachineBasicBlock*> > RenameSets;
+
+ // PhiValueNumber holds the ID numbers of the VNs for each phi that we're
+ // eliminating, indexed by the register defined by that phi.
+ std::map<unsigned, unsigned> PhiValueNumber;
+
+ // Store the DFS-in number of each block
+ DenseMap<MachineBasicBlock*, unsigned> preorder;
+
+ // Store the DFS-out number of each block
+ DenseMap<MachineBasicBlock*, unsigned> maxpreorder;
+
+ bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveIntervals>();
+
+ // TODO: Actually make this true.
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreserved<RegisterCoalescer>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual void releaseMemory() {
+ preorder.clear();
+ maxpreorder.clear();
+
+ Waiting.clear();
+ Stacks.clear();
+ UsedByAnother.clear();
+ RenameSets.clear();
+ }
+
+ private:
+
+ /// DomForestNode - Represents a node in the "dominator forest". This is
+ /// a forest in which the nodes represent registers and the edges
+ /// represent a dominance relation in the block defining those registers.
+ struct DomForestNode {
+ private:
+ // Store references to our children
+ std::vector<DomForestNode*> children;
+ // The register we represent
+ unsigned reg;
+
+ // Add another node as our child
+ void addChild(DomForestNode* DFN) { children.push_back(DFN); }
+
+ public:
+ typedef std::vector<DomForestNode*>::iterator iterator;
+
+ // Create a DomForestNode by providing the register it represents, and
+ // the node to be its parent. The virtual root node has register 0
+ // and a null parent.
+ DomForestNode(unsigned r, DomForestNode* parent) : reg(r) {
+ if (parent)
+ parent->addChild(this);
+ }
+
+ ~DomForestNode() {
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ delete *I;
+ }
+
+ /// getReg - Return the regiser that this node represents
+ inline unsigned getReg() { return reg; }
+
+ // Provide iterator access to our children
+ inline DomForestNode::iterator begin() { return children.begin(); }
+ inline DomForestNode::iterator end() { return children.end(); }
+ };
+
+ void computeDFS(MachineFunction& MF);
+ void processBlock(MachineBasicBlock* MBB);
+
+ std::vector<DomForestNode*> computeDomForest(
+ std::map<unsigned, MachineBasicBlock*>& instrs,
+ MachineRegisterInfo& MRI);
+ void processPHIUnion(MachineInstr* Inst,
+ std::map<unsigned, MachineBasicBlock*>& PHIUnion,
+ std::vector<StrongPHIElimination::DomForestNode*>& DF,
+ std::vector<std::pair<unsigned, unsigned> >& locals);
+ void ScheduleCopies(MachineBasicBlock* MBB, std::set<unsigned>& pushed);
+ void InsertCopies(MachineDomTreeNode* MBB,
+ SmallPtrSet<MachineBasicBlock*, 16>& v);
+ bool mergeLiveIntervals(unsigned primary, unsigned secondary);
+ };
+}
+
+char StrongPHIElimination::ID = 0;
+static RegisterPass<StrongPHIElimination>
+X("strong-phi-node-elimination",
+ "Eliminate PHI nodes for register allocation, intelligently");
+
+const PassInfo *const llvm::StrongPHIEliminationID = &X;
+
+/// computeDFS - Computes the DFS-in and DFS-out numbers of the dominator tree
+/// of the given MachineFunction. These numbers are then used in other parts
+/// of the PHI elimination process.
+void StrongPHIElimination::computeDFS(MachineFunction& MF) {
+ SmallPtrSet<MachineDomTreeNode*, 8> frontier;
+ SmallPtrSet<MachineDomTreeNode*, 8> visited;
+
+ unsigned time = 0;
+
+ MachineDominatorTree& DT = getAnalysis<MachineDominatorTree>();
+
+ MachineDomTreeNode* node = DT.getRootNode();
+
+ std::vector<MachineDomTreeNode*> worklist;
+ worklist.push_back(node);
+
+ while (!worklist.empty()) {
+ MachineDomTreeNode* currNode = worklist.back();
+
+ if (!frontier.count(currNode)) {
+ frontier.insert(currNode);
+ ++time;
+ preorder.insert(std::make_pair(currNode->getBlock(), time));
+ }
+
+ bool inserted = false;
+ for (MachineDomTreeNode::iterator I = currNode->begin(), E = currNode->end();
+ I != E; ++I)
+ if (!frontier.count(*I) && !visited.count(*I)) {
+ worklist.push_back(*I);
+ inserted = true;
+ break;
+ }
+
+ if (!inserted) {
+ frontier.erase(currNode);
+ visited.insert(currNode);
+ maxpreorder.insert(std::make_pair(currNode->getBlock(), time));
+
+ worklist.pop_back();
+ }
+ }
+}
+
+namespace {
+
+/// PreorderSorter - a helper class that is used to sort registers
+/// according to the preorder number of their defining blocks
+class PreorderSorter {
+private:
+ DenseMap<MachineBasicBlock*, unsigned>& preorder;
+ MachineRegisterInfo& MRI;
+
+public:
+ PreorderSorter(DenseMap<MachineBasicBlock*, unsigned>& p,
+ MachineRegisterInfo& M) : preorder(p), MRI(M) { }
+
+ bool operator()(unsigned A, unsigned B) {
+ if (A == B)
+ return false;
+
+ MachineBasicBlock* ABlock = MRI.getVRegDef(A)->getParent();
+ MachineBasicBlock* BBlock = MRI.getVRegDef(B)->getParent();
+
+ if (preorder[ABlock] < preorder[BBlock])
+ return true;
+ else if (preorder[ABlock] > preorder[BBlock])
+ return false;
+
+ return false;
+ }
+};
+
+}
+
+/// computeDomForest - compute the subforest of the DomTree corresponding
+/// to the defining blocks of the registers in question
+std::vector<StrongPHIElimination::DomForestNode*>
+StrongPHIElimination::computeDomForest(
+ std::map<unsigned, MachineBasicBlock*>& regs,
+ MachineRegisterInfo& MRI) {
+ // Begin by creating a virtual root node, since the actual results
+ // may well be a forest. Assume this node has maximum DFS-out number.
+ DomForestNode* VirtualRoot = new DomForestNode(0, 0);
+ maxpreorder.insert(std::make_pair((MachineBasicBlock*)0, ~0UL));
+
+ // Populate a worklist with the registers
+ std::vector<unsigned> worklist;
+ worklist.reserve(regs.size());
+ for (std::map<unsigned, MachineBasicBlock*>::iterator I = regs.begin(),
+ E = regs.end(); I != E; ++I)
+ worklist.push_back(I->first);
+
+ // Sort the registers by the DFS-in number of their defining block
+ PreorderSorter PS(preorder, MRI);
+ std::sort(worklist.begin(), worklist.end(), PS);
+
+ // Create a "current parent" stack, and put the virtual root on top of it
+ DomForestNode* CurrentParent = VirtualRoot;
+ std::vector<DomForestNode*> stack;
+ stack.push_back(VirtualRoot);
+
+ // Iterate over all the registers in the previously computed order
+ for (std::vector<unsigned>::iterator I = worklist.begin(), E = worklist.end();
+ I != E; ++I) {
+ unsigned pre = preorder[MRI.getVRegDef(*I)->getParent()];
+ MachineBasicBlock* parentBlock = CurrentParent->getReg() ?
+ MRI.getVRegDef(CurrentParent->getReg())->getParent() :
+ 0;
+
+ // If the DFS-in number of the register is greater than the DFS-out number
+ // of the current parent, repeatedly pop the parent stack until it isn't.
+ while (pre > maxpreorder[parentBlock]) {
+ stack.pop_back();
+ CurrentParent = stack.back();
+
+ parentBlock = CurrentParent->getReg() ?
+ MRI.getVRegDef(CurrentParent->getReg())->getParent() :
+ 0;
+ }
+
+ // Now that we've found the appropriate parent, create a DomForestNode for
+ // this register and attach it to the forest
+ DomForestNode* child = new DomForestNode(*I, CurrentParent);
+
+ // Push this new node on the "current parent" stack
+ stack.push_back(child);
+ CurrentParent = child;
+ }
+
+ // Return a vector containing the children of the virtual root node
+ std::vector<DomForestNode*> ret;
+ ret.insert(ret.end(), VirtualRoot->begin(), VirtualRoot->end());
+ return ret;
+}
+
+/// isLiveIn - helper method that determines, from a regno, if a register
+/// is live into a block
+static bool isLiveIn(unsigned r, MachineBasicBlock* MBB,
+ LiveIntervals& LI) {
+ LiveInterval& I = LI.getOrCreateInterval(r);
+ SlotIndex idx = LI.getMBBStartIdx(MBB);
+ return I.liveAt(idx);
+}
+
+/// isLiveOut - help method that determines, from a regno, if a register is
+/// live out of a block.
+static bool isLiveOut(unsigned r, MachineBasicBlock* MBB,
+ LiveIntervals& LI) {
+ for (MachineBasicBlock::succ_iterator PI = MBB->succ_begin(),
+ E = MBB->succ_end(); PI != E; ++PI)
+ if (isLiveIn(r, *PI, LI))
+ return true;
+
+ return false;
+}
+
+/// interferes - checks for local interferences by scanning a block. The only
+/// trick parameter is 'mode' which tells it the relationship of the two
+/// registers. 0 - defined in the same block, 1 - first properly dominates
+/// second, 2 - second properly dominates first
+static bool interferes(unsigned a, unsigned b, MachineBasicBlock* scan,
+ LiveIntervals& LV, unsigned mode) {
+ MachineInstr* def = 0;
+ MachineInstr* kill = 0;
+
+ // The code is still in SSA form at this point, so there is only one
+ // definition per VReg. Thus we can safely use MRI->getVRegDef().
+ const MachineRegisterInfo* MRI = &scan->getParent()->getRegInfo();
+
+ bool interference = false;
+
+ // Wallk the block, checking for interferences
+ for (MachineBasicBlock::iterator MBI = scan->begin(), MBE = scan->end();
+ MBI != MBE; ++MBI) {
+ MachineInstr* curr = MBI;
+
+ // Same defining block...
+ if (mode == 0) {
+ if (curr == MRI->getVRegDef(a)) {
+ // If we find our first definition, save it
+ if (!def) {
+ def = curr;
+ // If there's already an unkilled definition, then
+ // this is an interference
+ } else if (!kill) {
+ interference = true;
+ break;
+ // If there's a definition followed by a KillInst, then
+ // they can't interfere
+ } else {
+ interference = false;
+ break;
+ }
+ // Symmetric with the above
+ } else if (curr == MRI->getVRegDef(b)) {
+ if (!def) {
+ def = curr;
+ } else if (!kill) {
+ interference = true;
+ break;
+ } else {
+ interference = false;
+ break;
+ }
+ // Store KillInsts if they match up with the definition
+ } else if (curr->killsRegister(a)) {
+ if (def == MRI->getVRegDef(a)) {
+ kill = curr;
+ } else if (curr->killsRegister(b)) {
+ if (def == MRI->getVRegDef(b)) {
+ kill = curr;
+ }
+ }
+ }
+ // First properly dominates second...
+ } else if (mode == 1) {
+ if (curr == MRI->getVRegDef(b)) {
+ // Definition of second without kill of first is an interference
+ if (!kill) {
+ interference = true;
+ break;
+ // Definition after a kill is a non-interference
+ } else {
+ interference = false;
+ break;
+ }
+ // Save KillInsts of First
+ } else if (curr->killsRegister(a)) {
+ kill = curr;
+ }
+ // Symmetric with the above
+ } else if (mode == 2) {
+ if (curr == MRI->getVRegDef(a)) {
+ if (!kill) {
+ interference = true;
+ break;
+ } else {
+ interference = false;
+ break;
+ }
+ } else if (curr->killsRegister(b)) {
+ kill = curr;
+ }
+ }
+ }
+
+ return interference;
+}
+
+/// processBlock - Determine how to break up PHIs in the current block. Each
+/// PHI is broken up by some combination of renaming its operands and inserting
+/// copies. This method is responsible for determining which operands receive
+/// which treatment.
+void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) {
+ LiveIntervals& LI = getAnalysis<LiveIntervals>();
+ MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo();
+
+ // Holds names that have been added to a set in any PHI within this block
+ // before the current one.
+ std::set<unsigned> ProcessedNames;
+
+ // Iterate over all the PHI nodes in this block
+ MachineBasicBlock::iterator P = MBB->begin();
+ while (P != MBB->end() && P->isPHI()) {
+ unsigned DestReg = P->getOperand(0).getReg();
+
+ // Don't both doing PHI elimination for dead PHI's.
+ if (P->registerDefIsDead(DestReg)) {
+ ++P;
+ continue;
+ }
+
+ LiveInterval& PI = LI.getOrCreateInterval(DestReg);
+ SlotIndex pIdx = LI.getInstructionIndex(P).getDefIndex();
+ VNInfo* PVN = PI.getLiveRangeContaining(pIdx)->valno;
+ PhiValueNumber.insert(std::make_pair(DestReg, PVN->id));
+
+ // PHIUnion is the set of incoming registers to the PHI node that
+ // are going to be renames rather than having copies inserted. This set
+ // is refinded over the course of this function. UnionedBlocks is the set
+ // of corresponding MBBs.
+ std::map<unsigned, MachineBasicBlock*> PHIUnion;
+ SmallPtrSet<MachineBasicBlock*, 8> UnionedBlocks;
+
+ // Iterate over the operands of the PHI node
+ for (int i = P->getNumOperands() - 1; i >= 2; i-=2) {
+ unsigned SrcReg = P->getOperand(i-1).getReg();
+
+ // Don't need to try to coalesce a register with itself.
+ if (SrcReg == DestReg) {
+ ProcessedNames.insert(SrcReg);
+ continue;
+ }
+
+ // We don't need to insert copies for implicit_defs.
+ MachineInstr* DefMI = MRI.getVRegDef(SrcReg);
+ if (DefMI->isImplicitDef())
+ ProcessedNames.insert(SrcReg);
+
+ // Check for trivial interferences via liveness information, allowing us
+ // to avoid extra work later. Any registers that interfere cannot both
+ // be in the renaming set, so choose one and add copies for it instead.
+ // The conditions are:
+ // 1) if the operand is live into the PHI node's block OR
+ // 2) if the PHI node is live out of the operand's defining block OR
+ // 3) if the operand is itself a PHI node and the original PHI is
+ // live into the operand's defining block OR
+ // 4) if the operand is already being renamed for another PHI node
+ // in this block OR
+ // 5) if any two operands are defined in the same block, insert copies
+ // for one of them
+ if (isLiveIn(SrcReg, P->getParent(), LI) ||
+ isLiveOut(P->getOperand(0).getReg(),
+ MRI.getVRegDef(SrcReg)->getParent(), LI) ||
+ ( MRI.getVRegDef(SrcReg)->isPHI() &&
+ isLiveIn(P->getOperand(0).getReg(),
+ MRI.getVRegDef(SrcReg)->getParent(), LI) ) ||
+ ProcessedNames.count(SrcReg) ||
+ UnionedBlocks.count(MRI.getVRegDef(SrcReg)->getParent())) {
+
+ // Add a copy for the selected register
+ MachineBasicBlock* From = P->getOperand(i).getMBB();
+ Waiting[From].insert(std::make_pair(SrcReg, DestReg));
+ UsedByAnother.insert(SrcReg);
+ } else {
+ // Otherwise, add it to the renaming set
+ PHIUnion.insert(std::make_pair(SrcReg,P->getOperand(i).getMBB()));
+ UnionedBlocks.insert(MRI.getVRegDef(SrcReg)->getParent());
+ }
+ }
+
+ // Compute the dominator forest for the renaming set. This is a forest
+ // where the nodes are the registers and the edges represent dominance
+ // relations between the defining blocks of the registers
+ std::vector<StrongPHIElimination::DomForestNode*> DF =
+ computeDomForest(PHIUnion, MRI);
+
+ // Walk DomForest to resolve interferences at an inter-block level. This
+ // will remove registers from the renaming set (and insert copies for them)
+ // if interferences are found.
+ std::vector<std::pair<unsigned, unsigned> > localInterferences;
+ processPHIUnion(P, PHIUnion, DF, localInterferences);
+
+ // If one of the inputs is defined in the same block as the current PHI
+ // then we need to check for a local interference between that input and
+ // the PHI.
+ for (std::map<unsigned, MachineBasicBlock*>::iterator I = PHIUnion.begin(),
+ E = PHIUnion.end(); I != E; ++I)
+ if (MRI.getVRegDef(I->first)->getParent() == P->getParent())
+ localInterferences.push_back(std::make_pair(I->first,
+ P->getOperand(0).getReg()));
+
+ // The dominator forest walk may have returned some register pairs whose
+ // interference cannot be determined from dominator analysis. We now
+ // examine these pairs for local interferences.
+ for (std::vector<std::pair<unsigned, unsigned> >::iterator I =
+ localInterferences.begin(), E = localInterferences.end(); I != E; ++I) {
+ std::pair<unsigned, unsigned> p = *I;
+
+ MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>();
+
+ // Determine the block we need to scan and the relationship between
+ // the two registers
+ MachineBasicBlock* scan = 0;
+ unsigned mode = 0;
+ if (MRI.getVRegDef(p.first)->getParent() ==
+ MRI.getVRegDef(p.second)->getParent()) {
+ scan = MRI.getVRegDef(p.first)->getParent();
+ mode = 0; // Same block
+ } else if (MDT.dominates(MRI.getVRegDef(p.first)->getParent(),
+ MRI.getVRegDef(p.second)->getParent())) {
+ scan = MRI.getVRegDef(p.second)->getParent();
+ mode = 1; // First dominates second
+ } else {
+ scan = MRI.getVRegDef(p.first)->getParent();
+ mode = 2; // Second dominates first
+ }
+
+ // If there's an interference, we need to insert copies
+ if (interferes(p.first, p.second, scan, LI, mode)) {
+ // Insert copies for First
+ for (int i = P->getNumOperands() - 1; i >= 2; i-=2) {
+ if (P->getOperand(i-1).getReg() == p.first) {
+ unsigned SrcReg = p.first;
+ MachineBasicBlock* From = P->getOperand(i).getMBB();
+
+ Waiting[From].insert(std::make_pair(SrcReg,
+ P->getOperand(0).getReg()));
+ UsedByAnother.insert(SrcReg);
+
+ PHIUnion.erase(SrcReg);
+ }
+ }
+ }
+ }
+
+ // Add the renaming set for this PHI node to our overall renaming information
+ for (std::map<unsigned, MachineBasicBlock*>::iterator QI = PHIUnion.begin(),
+ QE = PHIUnion.end(); QI != QE; ++QI) {
+ DEBUG(dbgs() << "Adding Renaming: " << QI->first << " -> "
+ << P->getOperand(0).getReg() << "\n");
+ }
+
+ RenameSets.insert(std::make_pair(P->getOperand(0).getReg(), PHIUnion));
+
+ // Remember which registers are already renamed, so that we don't try to
+ // rename them for another PHI node in this block
+ for (std::map<unsigned, MachineBasicBlock*>::iterator I = PHIUnion.begin(),
+ E = PHIUnion.end(); I != E; ++I)
+ ProcessedNames.insert(I->first);
+
+ ++P;
+ }
+}
+
+/// processPHIUnion - Take a set of candidate registers to be coalesced when
+/// decomposing the PHI instruction. Use the DominanceForest to remove the ones
+/// that are known to interfere, and flag others that need to be checked for
+/// local interferences.
+void StrongPHIElimination::processPHIUnion(MachineInstr* Inst,
+ std::map<unsigned, MachineBasicBlock*>& PHIUnion,
+ std::vector<StrongPHIElimination::DomForestNode*>& DF,
+ std::vector<std::pair<unsigned, unsigned> >& locals) {
+
+ std::vector<DomForestNode*> worklist(DF.begin(), DF.end());
+ SmallPtrSet<DomForestNode*, 4> visited;
+
+ // Code is still in SSA form, so we can use MRI::getVRegDef()
+ MachineRegisterInfo& MRI = Inst->getParent()->getParent()->getRegInfo();
+
+ LiveIntervals& LI = getAnalysis<LiveIntervals>();
+ unsigned DestReg = Inst->getOperand(0).getReg();
+
+ // DF walk on the DomForest
+ while (!worklist.empty()) {
+ DomForestNode* DFNode = worklist.back();
+
+ visited.insert(DFNode);
+
+ bool inserted = false;
+ for (DomForestNode::iterator CI = DFNode->begin(), CE = DFNode->end();
+ CI != CE; ++CI) {
+ DomForestNode* child = *CI;
+
+ // If the current node is live-out of the defining block of one of its
+ // children, insert a copy for it. NOTE: The paper actually calls for
+ // a more elaborate heuristic for determining whether to insert copies
+ // for the child or the parent. In the interest of simplicity, we're
+ // just always choosing the parent.
+ if (isLiveOut(DFNode->getReg(),
+ MRI.getVRegDef(child->getReg())->getParent(), LI)) {
+ // Insert copies for parent
+ for (int i = Inst->getNumOperands() - 1; i >= 2; i-=2) {
+ if (Inst->getOperand(i-1).getReg() == DFNode->getReg()) {
+ unsigned SrcReg = DFNode->getReg();
+ MachineBasicBlock* From = Inst->getOperand(i).getMBB();
+
+ Waiting[From].insert(std::make_pair(SrcReg, DestReg));
+ UsedByAnother.insert(SrcReg);
+
+ PHIUnion.erase(SrcReg);
+ }
+ }
+
+ // If a node is live-in to the defining block of one of its children, but
+ // not live-out, then we need to scan that block for local interferences.
+ } else if (isLiveIn(DFNode->getReg(),
+ MRI.getVRegDef(child->getReg())->getParent(), LI) ||
+ MRI.getVRegDef(DFNode->getReg())->getParent() ==
+ MRI.getVRegDef(child->getReg())->getParent()) {
+ // Add (p, c) to possible local interferences
+ locals.push_back(std::make_pair(DFNode->getReg(), child->getReg()));
+ }
+
+ if (!visited.count(child)) {
+ worklist.push_back(child);
+ inserted = true;
+ }
+ }
+
+ if (!inserted) worklist.pop_back();
+ }
+}
+
+/// ScheduleCopies - Insert copies into predecessor blocks, scheduling
+/// them properly so as to avoid the 'lost copy' and the 'virtual swap'
+/// problems.
+///
+/// Based on "Practical Improvements to the Construction and Destruction
+/// of Static Single Assignment Form" by Briggs, et al.
+void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
+ std::set<unsigned>& pushed) {
+ // FIXME: This function needs to update LiveIntervals
+ std::multimap<unsigned, unsigned>& copy_set= Waiting[MBB];
+
+ std::multimap<unsigned, unsigned> worklist;
+ std::map<unsigned, unsigned> map;
+
+ // Setup worklist of initial copies
+ for (std::multimap<unsigned, unsigned>::iterator I = copy_set.begin(),
+ E = copy_set.end(); I != E; ) {
+ map.insert(std::make_pair(I->first, I->first));
+ map.insert(std::make_pair(I->second, I->second));
+
+ if (!UsedByAnother.count(I->second)) {
+ worklist.insert(*I);
+
+ // Avoid iterator invalidation
+ std::multimap<unsigned, unsigned>::iterator OI = I;
+ ++I;
+ copy_set.erase(OI);
+ } else {
+ ++I;
+ }
+ }
+
+ LiveIntervals& LI = getAnalysis<LiveIntervals>();
+ MachineFunction* MF = MBB->getParent();
+ MachineRegisterInfo& MRI = MF->getRegInfo();
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+ SmallVector<std::pair<unsigned, MachineInstr*>, 4> InsertedPHIDests;
+
+ // Iterate over the worklist, inserting copies
+ while (!worklist.empty() || !copy_set.empty()) {
+ while (!worklist.empty()) {
+ std::multimap<unsigned, unsigned>::iterator WI = worklist.begin();
+ std::pair<unsigned, unsigned> curr = *WI;
+ worklist.erase(WI);
+
+ const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(curr.first);
+
+ if (isLiveOut(curr.second, MBB, LI)) {
+ // Create a temporary
+ unsigned t = MF->getRegInfo().createVirtualRegister(RC);
+
+ // Insert copy from curr.second to a temporary at
+ // the Phi defining curr.second
+ MachineBasicBlock::iterator PI = MRI.getVRegDef(curr.second);
+ TII->copyRegToReg(*PI->getParent(), PI, t,
+ curr.second, RC, RC);
+
+ DEBUG(dbgs() << "Inserted copy from " << curr.second << " to " << t
+ << "\n");
+
+ // Push temporary on Stacks
+ Stacks[curr.second].push_back(t);
+
+ // Insert curr.second in pushed
+ pushed.insert(curr.second);
+
+ // Create a live interval for this temporary
+ InsertedPHIDests.push_back(std::make_pair(t, --PI));
+ }
+
+ // Insert copy from map[curr.first] to curr.second
+ TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), curr.second,
+ map[curr.first], RC, RC);
+ map[curr.first] = curr.second;
+ DEBUG(dbgs() << "Inserted copy from " << curr.first << " to "
+ << curr.second << "\n");
+
+ // Push this copy onto InsertedPHICopies so we can
+ // update LiveIntervals with it.
+ MachineBasicBlock::iterator MI = MBB->getFirstTerminator();
+ InsertedPHIDests.push_back(std::make_pair(curr.second, --MI));
+
+ // If curr.first is a destination in copy_set...
+ for (std::multimap<unsigned, unsigned>::iterator I = copy_set.begin(),
+ E = copy_set.end(); I != E; )
+ if (curr.first == I->second) {
+ std::pair<unsigned, unsigned> temp = *I;
+ worklist.insert(temp);
+
+ // Avoid iterator invalidation
+ std::multimap<unsigned, unsigned>::iterator OI = I;
+ ++I;
+ copy_set.erase(OI);
+
+ break;
+ } else {
+ ++I;
+ }
+ }
+
+ if (!copy_set.empty()) {
+ std::multimap<unsigned, unsigned>::iterator CI = copy_set.begin();
+ std::pair<unsigned, unsigned> curr = *CI;
+ worklist.insert(curr);
+ copy_set.erase(CI);
+
+ LiveInterval& I = LI.getInterval(curr.second);
+ MachineBasicBlock::iterator term = MBB->getFirstTerminator();
+ SlotIndex endIdx = SlotIndex();
+ if (term != MBB->end())
+ endIdx = LI.getInstructionIndex(term);
+ else
+ endIdx = LI.getMBBEndIdx(MBB);
+
+ if (I.liveAt(endIdx)) {
+ const TargetRegisterClass *RC =
+ MF->getRegInfo().getRegClass(curr.first);
+
+ // Insert a copy from dest to a new temporary t at the end of b
+ unsigned t = MF->getRegInfo().createVirtualRegister(RC);
+ TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), t,
+ curr.second, RC, RC);
+ map[curr.second] = t;
+
+ MachineBasicBlock::iterator TI = MBB->getFirstTerminator();
+ InsertedPHIDests.push_back(std::make_pair(t, --TI));
+ }
+ }
+ }
+
+ // Renumber the instructions so that we can perform the index computations
+ // needed to create new live intervals.
+ LI.renumber();
+
+ // For copies that we inserted at the ends of predecessors, we construct
+ // live intervals. This is pretty easy, since we know that the destination
+ // register cannot have be in live at that point previously. We just have
+ // to make sure that, for registers that serve as inputs to more than one
+ // PHI, we don't create multiple overlapping live intervals.
+ std::set<unsigned> RegHandled;
+ for (SmallVector<std::pair<unsigned, MachineInstr*>, 4>::iterator I =
+ InsertedPHIDests.begin(), E = InsertedPHIDests.end(); I != E; ++I) {
+ if (RegHandled.insert(I->first).second) {
+ LiveInterval& Int = LI.getOrCreateInterval(I->first);
+ SlotIndex instrIdx = LI.getInstructionIndex(I->second);
+ if (Int.liveAt(instrIdx.getDefIndex()))
+ Int.removeRange(instrIdx.getDefIndex(),
+ LI.getMBBEndIdx(I->second->getParent()).getNextSlot(),
+ true);
+
+ LiveRange R = LI.addLiveRangeToEndOfBlock(I->first, I->second);
+ R.valno->setCopy(I->second);
+ R.valno->def = LI.getInstructionIndex(I->second).getDefIndex();
+ }
+ }
+}
+
+/// InsertCopies - insert copies into MBB and all of its successors
+void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN,
+ SmallPtrSet<MachineBasicBlock*, 16>& visited) {
+ MachineBasicBlock* MBB = MDTN->getBlock();
+ visited.insert(MBB);
+
+ std::set<unsigned> pushed;
+
+ LiveIntervals& LI = getAnalysis<LiveIntervals>();
+ // Rewrite register uses from Stacks
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ if (I->isPHI())
+ continue;
+
+ for (unsigned i = 0; i < I->getNumOperands(); ++i)
+ if (I->getOperand(i).isReg() &&
+ Stacks[I->getOperand(i).getReg()].size()) {
+ // Remove the live range for the old vreg.
+ LiveInterval& OldInt = LI.getInterval(I->getOperand(i).getReg());
+ LiveInterval::iterator OldLR =
+ OldInt.FindLiveRangeContaining(LI.getInstructionIndex(I).getUseIndex());
+ if (OldLR != OldInt.end())
+ OldInt.removeRange(*OldLR, true);
+
+ // Change the register
+ I->getOperand(i).setReg(Stacks[I->getOperand(i).getReg()].back());
+
+ // Add a live range for the new vreg
+ LiveInterval& Int = LI.getInterval(I->getOperand(i).getReg());
+ VNInfo* FirstVN = *Int.vni_begin();
+ FirstVN->setHasPHIKill(false);
+ if (I->getOperand(i).isKill())
+ FirstVN->addKill(LI.getInstructionIndex(I).getUseIndex());
+
+ LiveRange LR (LI.getMBBStartIdx(I->getParent()),
+ LI.getInstructionIndex(I).getUseIndex().getNextSlot(),
+ FirstVN);
+
+ Int.addRange(LR);
+ }
+ }
+
+ // Schedule the copies for this block
+ ScheduleCopies(MBB, pushed);
+
+ // Recur down the dominator tree.
+ for (MachineDomTreeNode::iterator I = MDTN->begin(),
+ E = MDTN->end(); I != E; ++I)
+ if (!visited.count((*I)->getBlock()))
+ InsertCopies(*I, visited);
+
+ // As we exit this block, pop the names we pushed while processing it
+ for (std::set<unsigned>::iterator I = pushed.begin(),
+ E = pushed.end(); I != E; ++I)
+ Stacks[*I].pop_back();
+}
+
+bool StrongPHIElimination::mergeLiveIntervals(unsigned primary,
+ unsigned secondary) {
+
+ LiveIntervals& LI = getAnalysis<LiveIntervals>();
+ LiveInterval& LHS = LI.getOrCreateInterval(primary);
+ LiveInterval& RHS = LI.getOrCreateInterval(secondary);
+
+ LI.renumber();
+
+ DenseMap<VNInfo*, VNInfo*> VNMap;
+ for (LiveInterval::iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
+ LiveRange R = *I;
+
+ SlotIndex Start = R.start;
+ SlotIndex End = R.end;
+ if (LHS.getLiveRangeContaining(Start))
+ return false;
+
+ if (LHS.getLiveRangeContaining(End))
+ return false;
+
+ LiveInterval::iterator RI = std::upper_bound(LHS.begin(), LHS.end(), R);
+ if (RI != LHS.end() && RI->start < End)
+ return false;
+ }
+
+ for (LiveInterval::iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
+ LiveRange R = *I;
+ VNInfo* OldVN = R.valno;
+ VNInfo*& NewVN = VNMap[OldVN];
+ if (!NewVN) {
+ NewVN = LHS.createValueCopy(OldVN, LI.getVNInfoAllocator());
+ }
+
+ LiveRange LR (R.start, R.end, NewVN);
+ LHS.addRange(LR);
+ }
+
+ LI.removeInterval(RHS.reg);
+
+ return true;
+}
+
+bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
+ LiveIntervals& LI = getAnalysis<LiveIntervals>();
+
+ // Compute DFS numbers of each block
+ computeDFS(Fn);
+
+ // Determine which phi node operands need copies
+ for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
+ if (!I->empty() && I->begin()->isPHI())
+ processBlock(I);
+
+ // Break interferences where two different phis want to coalesce
+ // in the same register.
+ std::set<unsigned> seen;
+ typedef std::map<unsigned, std::map<unsigned, MachineBasicBlock*> >
+ RenameSetType;
+ for (RenameSetType::iterator I = RenameSets.begin(), E = RenameSets.end();
+ I != E; ++I) {
+ for (std::map<unsigned, MachineBasicBlock*>::iterator
+ OI = I->second.begin(), OE = I->second.end(); OI != OE; ) {
+ if (!seen.count(OI->first)) {
+ seen.insert(OI->first);
+ ++OI;
+ } else {
+ Waiting[OI->second].insert(std::make_pair(OI->first, I->first));
+ unsigned reg = OI->first;
+ ++OI;
+ I->second.erase(reg);
+ DEBUG(dbgs() << "Removing Renaming: " << reg << " -> " << I->first
+ << "\n");
+ }
+ }
+ }
+
+ // Insert copies
+ // FIXME: This process should probably preserve LiveIntervals
+ SmallPtrSet<MachineBasicBlock*, 16> visited;
+ MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>();
+ InsertCopies(MDT.getRootNode(), visited);
+
+ // Perform renaming
+ for (RenameSetType::iterator I = RenameSets.begin(), E = RenameSets.end();
+ I != E; ++I)
+ while (I->second.size()) {
+ std::map<unsigned, MachineBasicBlock*>::iterator SI = I->second.begin();
+
+ DEBUG(dbgs() << "Renaming: " << SI->first << " -> " << I->first << "\n");
+
+ if (SI->first != I->first) {
+ if (mergeLiveIntervals(I->first, SI->first)) {
+ Fn.getRegInfo().replaceRegWith(SI->first, I->first);
+
+ if (RenameSets.count(SI->first)) {
+ I->second.insert(RenameSets[SI->first].begin(),
+ RenameSets[SI->first].end());
+ RenameSets.erase(SI->first);
+ }
+ } else {
+ // Insert a last-minute copy if a conflict was detected.
+ const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
+ const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(I->first);
+ TII->copyRegToReg(*SI->second, SI->second->getFirstTerminator(),
+ I->first, SI->first, RC, RC);
+
+ LI.renumber();
+
+ LiveInterval& Int = LI.getOrCreateInterval(I->first);
+ SlotIndex instrIdx =
+ LI.getInstructionIndex(--SI->second->getFirstTerminator());
+ if (Int.liveAt(instrIdx.getDefIndex()))
+ Int.removeRange(instrIdx.getDefIndex(),
+ LI.getMBBEndIdx(SI->second).getNextSlot(), true);
+
+ LiveRange R = LI.addLiveRangeToEndOfBlock(I->first,
+ --SI->second->getFirstTerminator());
+ R.valno->setCopy(--SI->second->getFirstTerminator());
+ R.valno->def = instrIdx.getDefIndex();
+
+ DEBUG(dbgs() << "Renaming failed: " << SI->first << " -> "
+ << I->first << "\n");
+ }
+ }
+
+ LiveInterval& Int = LI.getOrCreateInterval(I->first);
+ const LiveRange* LR =
+ Int.getLiveRangeContaining(LI.getMBBEndIdx(SI->second));
+ LR->valno->setHasPHIKill(true);
+
+ I->second.erase(SI->first);
+ }
+
+ // Remove PHIs
+ std::vector<MachineInstr*> phis;
+ for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
+ for (MachineBasicBlock::iterator BI = I->begin(), BE = I->end();
+ BI != BE; ++BI)
+ if (BI->isPHI())
+ phis.push_back(BI);
+ }
+
+ for (std::vector<MachineInstr*>::iterator I = phis.begin(), E = phis.end();
+ I != E; ) {
+ MachineInstr* PInstr = *(I++);
+
+ // If this is a dead PHI node, then remove it from LiveIntervals.
+ unsigned DestReg = PInstr->getOperand(0).getReg();
+ LiveInterval& PI = LI.getInterval(DestReg);
+ if (PInstr->registerDefIsDead(DestReg)) {
+ if (PI.containsOneValue()) {
+ LI.removeInterval(DestReg);
+ } else {
+ SlotIndex idx = LI.getInstructionIndex(PInstr).getDefIndex();
+ PI.removeRange(*PI.getLiveRangeContaining(idx), true);
+ }
+ } else {
+ // Trim live intervals of input registers. They are no longer live into
+ // this block if they died after the PHI. If they lived after it, don't
+ // trim them because they might have other legitimate uses.
+ for (unsigned i = 1; i < PInstr->getNumOperands(); i += 2) {
+ unsigned reg = PInstr->getOperand(i).getReg();
+
+ MachineBasicBlock* MBB = PInstr->getOperand(i+1).getMBB();
+ LiveInterval& InputI = LI.getInterval(reg);
+ if (MBB != PInstr->getParent() &&
+ InputI.liveAt(LI.getMBBStartIdx(PInstr->getParent())) &&
+ InputI.expiredAt(LI.getInstructionIndex(PInstr).getNextIndex()))
+ InputI.removeRange(LI.getMBBStartIdx(PInstr->getParent()),
+ LI.getInstructionIndex(PInstr),
+ true);
+ }
+
+ // If the PHI is not dead, then the valno defined by the PHI
+ // now has an unknown def.
+ SlotIndex idx = LI.getInstructionIndex(PInstr).getDefIndex();
+ const LiveRange* PLR = PI.getLiveRangeContaining(idx);
+ PLR->valno->setIsPHIDef(true);
+ LiveRange R (LI.getMBBStartIdx(PInstr->getParent()),
+ PLR->start, PLR->valno);
+ PI.addRange(R);
+ }
+
+ LI.RemoveMachineInstrFromMaps(PInstr);
+ PInstr->eraseFromParent();
+ }
+
+ LI.renumber();
+
+ return true;
+}
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
new file mode 100644
index 0000000..9ab4058
--- /dev/null
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -0,0 +1,646 @@
+//===-- TailDuplication.cpp - Duplicate blocks into predecessors' tails ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass duplicates basic blocks ending in unconditional branches into
+// the tails of their predecessors.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "tailduplication"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSSAUpdater.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumTails , "Number of tails duplicated");
+STATISTIC(NumTailDups , "Number of tail duplicated blocks");
+STATISTIC(NumInstrDups , "Additional instructions due to tail duplication");
+STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
+
+// Heuristic for tail duplication.
+static cl::opt<unsigned>
+TailDuplicateSize("tail-dup-size",
+ cl::desc("Maximum instructions to consider tail duplicating"),
+ cl::init(2), cl::Hidden);
+
+static cl::opt<bool>
+TailDupVerify("tail-dup-verify",
+ cl::desc("Verify sanity of PHI instructions during taildup"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<unsigned>
+TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden);
+
+typedef std::vector<std::pair<MachineBasicBlock*,unsigned> > AvailableValsTy;
+
+namespace {
+ /// TailDuplicatePass - Perform tail duplication.
+ class TailDuplicatePass : public MachineFunctionPass {
+ bool PreRegAlloc;
+ const TargetInstrInfo *TII;
+ MachineModuleInfo *MMI;
+ MachineRegisterInfo *MRI;
+
+ // SSAUpdateVRs - A list of virtual registers for which to update SSA form.
+ SmallVector<unsigned, 16> SSAUpdateVRs;
+
+ // SSAUpdateVals - For each virtual register in SSAUpdateVals keep a list of
+ // source virtual registers.
+ DenseMap<unsigned, AvailableValsTy> SSAUpdateVals;
+
+ public:
+ static char ID;
+ explicit TailDuplicatePass(bool PreRA) :
+ MachineFunctionPass(&ID), PreRegAlloc(PreRA) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ virtual const char *getPassName() const { return "Tail Duplication"; }
+
+ private:
+ void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
+ MachineBasicBlock *BB);
+ void ProcessPHI(MachineInstr *MI, MachineBasicBlock *TailBB,
+ MachineBasicBlock *PredBB,
+ DenseMap<unsigned, unsigned> &LocalVRMap,
+ SmallVector<std::pair<unsigned,unsigned>, 4> &Copies);
+ void DuplicateInstruction(MachineInstr *MI,
+ MachineBasicBlock *TailBB,
+ MachineBasicBlock *PredBB,
+ MachineFunction &MF,
+ DenseMap<unsigned, unsigned> &LocalVRMap);
+ void UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
+ SmallVector<MachineBasicBlock*, 8> &TDBBs,
+ SmallSetVector<MachineBasicBlock*, 8> &Succs);
+ bool TailDuplicateBlocks(MachineFunction &MF);
+ bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
+ SmallVector<MachineBasicBlock*, 8> &TDBBs,
+ SmallVector<MachineInstr*, 16> &Copies);
+ void RemoveDeadBlock(MachineBasicBlock *MBB);
+ };
+
+ char TailDuplicatePass::ID = 0;
+}
+
+FunctionPass *llvm::createTailDuplicatePass(bool PreRegAlloc) {
+ return new TailDuplicatePass(PreRegAlloc);
+}
+
+bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
+ TII = MF.getTarget().getInstrInfo();
+ MRI = &MF.getRegInfo();
+ MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+
+ bool MadeChange = false;
+ while (TailDuplicateBlocks(MF))
+ MadeChange = true;
+
+ return MadeChange;
+}
+
+static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
+ for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = I;
+ SmallSetVector<MachineBasicBlock*, 8> Preds(MBB->pred_begin(),
+ MBB->pred_end());
+ MachineBasicBlock::iterator MI = MBB->begin();
+ while (MI != MBB->end()) {
+ if (!MI->isPHI())
+ break;
+ for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+ PE = Preds.end(); PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+ bool Found = false;
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
+ MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB();
+ if (PHIBB == PredBB) {
+ Found = true;
+ break;
+ }
+ }
+ if (!Found) {
+ dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
+ dbgs() << " missing input from predecessor BB#"
+ << PredBB->getNumber() << '\n';
+ llvm_unreachable(0);
+ }
+ }
+
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
+ MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB();
+ if (CheckExtra && !Preds.count(PHIBB)) {
+ // This is not a hard error.
+ dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber()
+ << ": " << *MI;
+ dbgs() << " extra input from predecessor BB#"
+ << PHIBB->getNumber() << '\n';
+ }
+ if (PHIBB->getNumber() < 0) {
+ dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
+ dbgs() << " non-existing BB#" << PHIBB->getNumber() << '\n';
+ llvm_unreachable(0);
+ }
+ }
+ ++MI;
+ }
+ }
+}
+
+/// TailDuplicateBlocks - Look for small blocks that are unconditionally
+/// branched to and do not fall through. Tail-duplicate their instructions
+/// into their predecessors to eliminate (dynamic) branches.
+bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
+ bool MadeChange = false;
+
+ if (PreRegAlloc && TailDupVerify) {
+ DEBUG(dbgs() << "\n*** Before tail-duplicating\n");
+ VerifyPHIs(MF, true);
+ }
+
+ SmallVector<MachineInstr*, 8> NewPHIs;
+ MachineSSAUpdater SSAUpdate(MF, &NewPHIs);
+
+ for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
+ MachineBasicBlock *MBB = I++;
+
+ if (NumTails == TailDupLimit)
+ break;
+
+ // Only duplicate blocks that end with unconditional branches.
+ if (MBB->canFallThrough())
+ continue;
+
+ // Save the successors list.
+ SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(),
+ MBB->succ_end());
+
+ SmallVector<MachineBasicBlock*, 8> TDBBs;
+ SmallVector<MachineInstr*, 16> Copies;
+ if (TailDuplicate(MBB, MF, TDBBs, Copies)) {
+ ++NumTails;
+
+ // TailBB's immediate successors are now successors of those predecessors
+ // which duplicated TailBB. Add the predecessors as sources to the PHI
+ // instructions.
+ bool isDead = MBB->pred_empty();
+ if (PreRegAlloc)
+ UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs);
+
+ // If it is dead, remove it.
+ if (isDead) {
+ NumInstrDups -= MBB->size();
+ RemoveDeadBlock(MBB);
+ ++NumDeadBlocks;
+ }
+
+ // Update SSA form.
+ if (!SSAUpdateVRs.empty()) {
+ for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) {
+ unsigned VReg = SSAUpdateVRs[i];
+ SSAUpdate.Initialize(VReg);
+
+ // If the original definition is still around, add it as an available
+ // value.
+ MachineInstr *DefMI = MRI->getVRegDef(VReg);
+ MachineBasicBlock *DefBB = 0;
+ if (DefMI) {
+ DefBB = DefMI->getParent();
+ SSAUpdate.AddAvailableValue(DefBB, VReg);
+ }
+
+ // Add the new vregs as available values.
+ DenseMap<unsigned, AvailableValsTy>::iterator LI =
+ SSAUpdateVals.find(VReg);
+ for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
+ MachineBasicBlock *SrcBB = LI->second[j].first;
+ unsigned SrcReg = LI->second[j].second;
+ SSAUpdate.AddAvailableValue(SrcBB, SrcReg);
+ }
+
+ // Rewrite uses that are outside of the original def's block.
+ MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg);
+ while (UI != MRI->use_end()) {
+ MachineOperand &UseMO = UI.getOperand();
+ MachineInstr *UseMI = &*UI;
+ ++UI;
+ if (UseMI->getParent() == DefBB)
+ continue;
+ SSAUpdate.RewriteUse(UseMO);
+ }
+ }
+
+ SSAUpdateVRs.clear();
+ SSAUpdateVals.clear();
+ }
+
+ // Eliminate some of the copies inserted by tail duplication to maintain
+ // SSA form.
+ for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
+ MachineInstr *Copy = Copies[i];
+ unsigned Src, Dst, SrcSR, DstSR;
+ if (TII->isMoveInstr(*Copy, Src, Dst, SrcSR, DstSR)) {
+ MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src);
+ if (++UI == MRI->use_end()) {
+ // Copy is the only use. Do trivial copy propagation here.
+ MRI->replaceRegWith(Dst, Src);
+ Copy->eraseFromParent();
+ }
+ }
+ }
+
+ if (PreRegAlloc && TailDupVerify)
+ VerifyPHIs(MF, false);
+ MadeChange = true;
+ }
+ }
+
+ return MadeChange;
+}
+
+static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB,
+ const MachineRegisterInfo *MRI) {
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ if (UseMI->getParent() != BB)
+ return true;
+ }
+ return false;
+}
+
+static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) {
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2)
+ if (MI->getOperand(i+1).getMBB() == SrcBB)
+ return i;
+ return 0;
+}
+
+/// AddSSAUpdateEntry - Add a definition and source virtual registers pair for
+/// SSA update.
+void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
+ MachineBasicBlock *BB) {
+ DenseMap<unsigned, AvailableValsTy>::iterator LI= SSAUpdateVals.find(OrigReg);
+ if (LI != SSAUpdateVals.end())
+ LI->second.push_back(std::make_pair(BB, NewReg));
+ else {
+ AvailableValsTy Vals;
+ Vals.push_back(std::make_pair(BB, NewReg));
+ SSAUpdateVals.insert(std::make_pair(OrigReg, Vals));
+ SSAUpdateVRs.push_back(OrigReg);
+ }
+}
+
+/// ProcessPHI - Process PHI node in TailBB by turning it into a copy in PredBB.
+/// Remember the source register that's contributed by PredBB and update SSA
+/// update map.
+void TailDuplicatePass::ProcessPHI(MachineInstr *MI,
+ MachineBasicBlock *TailBB,
+ MachineBasicBlock *PredBB,
+ DenseMap<unsigned, unsigned> &LocalVRMap,
+ SmallVector<std::pair<unsigned,unsigned>, 4> &Copies) {
+ unsigned DefReg = MI->getOperand(0).getReg();
+ unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB);
+ assert(SrcOpIdx && "Unable to find matching PHI source?");
+ unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg();
+ const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
+ LocalVRMap.insert(std::make_pair(DefReg, SrcReg));
+
+ // Insert a copy from source to the end of the block. The def register is the
+ // available value liveout of the block.
+ unsigned NewDef = MRI->createVirtualRegister(RC);
+ Copies.push_back(std::make_pair(NewDef, SrcReg));
+ if (isDefLiveOut(DefReg, TailBB, MRI))
+ AddSSAUpdateEntry(DefReg, NewDef, PredBB);
+
+ // Remove PredBB from the PHI node.
+ MI->RemoveOperand(SrcOpIdx+1);
+ MI->RemoveOperand(SrcOpIdx);
+ if (MI->getNumOperands() == 1)
+ MI->eraseFromParent();
+}
+
+/// DuplicateInstruction - Duplicate a TailBB instruction to PredBB and update
+/// the source operands due to earlier PHI translation.
+void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
+ MachineBasicBlock *TailBB,
+ MachineBasicBlock *PredBB,
+ MachineFunction &MF,
+ DenseMap<unsigned, unsigned> &LocalVRMap) {
+ MachineInstr *NewMI = TII->duplicate(MI, MF);
+ for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = NewMI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ if (MO.isDef()) {
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ unsigned NewReg = MRI->createVirtualRegister(RC);
+ MO.setReg(NewReg);
+ LocalVRMap.insert(std::make_pair(Reg, NewReg));
+ if (isDefLiveOut(Reg, TailBB, MRI))
+ AddSSAUpdateEntry(Reg, NewReg, PredBB);
+ } else {
+ DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg);
+ if (VI != LocalVRMap.end())
+ MO.setReg(VI->second);
+ }
+ }
+ PredBB->insert(PredBB->end(), NewMI);
+}
+
+/// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor
+/// blocks, the successors have gained new predecessors. Update the PHI
+/// instructions in them accordingly.
+void
+TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
+ SmallVector<MachineBasicBlock*, 8> &TDBBs,
+ SmallSetVector<MachineBasicBlock*,8> &Succs) {
+ for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(),
+ SE = Succs.end(); SI != SE; ++SI) {
+ MachineBasicBlock *SuccBB = *SI;
+ for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end();
+ II != EE; ++II) {
+ if (!II->isPHI())
+ break;
+ unsigned Idx = 0;
+ for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) {
+ MachineOperand &MO = II->getOperand(i+1);
+ if (MO.getMBB() == FromBB) {
+ Idx = i;
+ break;
+ }
+ }
+
+ assert(Idx != 0);
+ MachineOperand &MO0 = II->getOperand(Idx);
+ unsigned Reg = MO0.getReg();
+ if (isDead) {
+ // Folded into the previous BB.
+ // There could be duplicate phi source entries. FIXME: Should sdisel
+ // or earlier pass fixed this?
+ for (unsigned i = II->getNumOperands()-2; i != Idx; i -= 2) {
+ MachineOperand &MO = II->getOperand(i+1);
+ if (MO.getMBB() == FromBB) {
+ II->RemoveOperand(i+1);
+ II->RemoveOperand(i);
+ }
+ }
+ II->RemoveOperand(Idx+1);
+ II->RemoveOperand(Idx);
+ }
+ DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg);
+ if (LI != SSAUpdateVals.end()) {
+ // This register is defined in the tail block.
+ for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
+ MachineBasicBlock *SrcBB = LI->second[j].first;
+ unsigned SrcReg = LI->second[j].second;
+ II->addOperand(MachineOperand::CreateReg(SrcReg, false));
+ II->addOperand(MachineOperand::CreateMBB(SrcBB));
+ }
+ } else {
+ // Live in tail block, must also be live in predecessors.
+ for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) {
+ MachineBasicBlock *SrcBB = TDBBs[j];
+ II->addOperand(MachineOperand::CreateReg(Reg, false));
+ II->addOperand(MachineOperand::CreateMBB(SrcBB));
+ }
+ }
+ }
+ }
+}
+
+/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
+/// of its predecessors.
+bool
+TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
+ SmallVector<MachineBasicBlock*, 8> &TDBBs,
+ SmallVector<MachineInstr*, 16> &Copies) {
+ // Set the limit on the number of instructions to duplicate, with a default
+ // of one less than the tail-merge threshold. When optimizing for size,
+ // duplicate only one, because one branch instruction can be eliminated to
+ // compensate for the duplication.
+ unsigned MaxDuplicateCount;
+ if (MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ MaxDuplicateCount = 1;
+ else
+ MaxDuplicateCount = TailDuplicateSize;
+
+ if (PreRegAlloc) {
+ // Pre-regalloc tail duplication hurts compile time and doesn't help
+ // much except for indirect branches.
+ if (TailBB->empty() || !TailBB->back().getDesc().isIndirectBranch())
+ return false;
+ // If the target has hardware branch prediction that can handle indirect
+ // branches, duplicating them can often make them predictable when there
+ // are common paths through the code. The limit needs to be high enough
+ // to allow undoing the effects of tail merging and other optimizations
+ // that rearrange the predecessors of the indirect branch.
+ MaxDuplicateCount = 20;
+ }
+
+ // Don't try to tail-duplicate single-block loops.
+ if (TailBB->isSuccessor(TailBB))
+ return false;
+
+ // Check the instructions in the block to determine whether tail-duplication
+ // is invalid or unlikely to be profitable.
+ unsigned InstrCount = 0;
+ bool HasCall = false;
+ for (MachineBasicBlock::iterator I = TailBB->begin();
+ I != TailBB->end(); ++I) {
+ // Non-duplicable things shouldn't be tail-duplicated.
+ if (I->getDesc().isNotDuplicable()) return false;
+ // Do not duplicate 'return' instructions if this is a pre-regalloc run.
+ // A return may expand into a lot more instructions (e.g. reload of callee
+ // saved registers) after PEI.
+ if (PreRegAlloc && I->getDesc().isReturn()) return false;
+ // Don't duplicate more than the threshold.
+ if (InstrCount == MaxDuplicateCount) return false;
+ // Remember if we saw a call.
+ if (I->getDesc().isCall()) HasCall = true;
+ if (!I->isPHI())
+ InstrCount += 1;
+ }
+ // Heuristically, don't tail-duplicate calls if it would expand code size,
+ // as it's less likely to be worth the extra cost.
+ if (InstrCount > 1 && HasCall)
+ return false;
+
+ DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
+
+ // Iterate through all the unique predecessors and tail-duplicate this
+ // block into them, if possible. Copying the list ahead of time also
+ // avoids trouble with the predecessor list reallocating.
+ bool Changed = false;
+ SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
+ TailBB->pred_end());
+ for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+ PE = Preds.end(); PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+
+ assert(TailBB != PredBB &&
+ "Single-block loop should have been rejected earlier!");
+ if (PredBB->succ_size() > 1) continue;
+
+ MachineBasicBlock *PredTBB, *PredFBB;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+ continue;
+ if (!PredCond.empty())
+ continue;
+ // EH edges are ignored by AnalyzeBranch.
+ if (PredBB->succ_size() != 1)
+ continue;
+ // Don't duplicate into a fall-through predecessor (at least for now).
+ if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
+ continue;
+
+ DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
+ << "From Succ: " << *TailBB);
+
+ TDBBs.push_back(PredBB);
+
+ // Remove PredBB's unconditional branch.
+ TII->RemoveBranch(*PredBB);
+
+ // Clone the contents of TailBB into PredBB.
+ DenseMap<unsigned, unsigned> LocalVRMap;
+ SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
+ MachineBasicBlock::iterator I = TailBB->begin();
+ while (I != TailBB->end()) {
+ MachineInstr *MI = &*I;
+ ++I;
+ if (MI->isPHI()) {
+ // Replace the uses of the def of the PHI with the register coming
+ // from PredBB.
+ ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos);
+ } else {
+ // Replace def of virtual registers with new registers, and update
+ // uses with PHI source register or the new registers.
+ DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap);
+ }
+ }
+ MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
+ for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
+ const TargetRegisterClass *RC = MRI->getRegClass(CopyInfos[i].first);
+ TII->copyRegToReg(*PredBB, Loc, CopyInfos[i].first,
+ CopyInfos[i].second, RC,RC);
+ MachineInstr *CopyMI = prior(Loc);
+ Copies.push_back(CopyMI);
+ }
+ NumInstrDups += TailBB->size() - 1; // subtract one for removed branch
+
+ // Update the CFG.
+ PredBB->removeSuccessor(PredBB->succ_begin());
+ assert(PredBB->succ_empty() &&
+ "TailDuplicate called on block with multiple successors!");
+ for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
+ E = TailBB->succ_end(); I != E; ++I)
+ PredBB->addSuccessor(*I);
+
+ Changed = true;
+ ++NumTailDups;
+ }
+
+ // If TailBB was duplicated into all its predecessors except for the prior
+ // block, which falls through unconditionally, move the contents of this
+ // block into the prior block.
+ MachineBasicBlock *PrevBB = prior(MachineFunction::iterator(TailBB));
+ MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
+ SmallVector<MachineOperand, 4> PriorCond;
+ bool PriorUnAnalyzable =
+ TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true);
+ // This has to check PrevBB->succ_size() because EH edges are ignored by
+ // AnalyzeBranch.
+ if (!PriorUnAnalyzable && PriorCond.empty() && !PriorTBB &&
+ TailBB->pred_size() == 1 && PrevBB->succ_size() == 1 &&
+ !TailBB->hasAddressTaken()) {
+ DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
+ << "From MBB: " << *TailBB);
+ if (PreRegAlloc) {
+ DenseMap<unsigned, unsigned> LocalVRMap;
+ SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
+ MachineBasicBlock::iterator I = TailBB->begin();
+ // Process PHI instructions first.
+ while (I != TailBB->end() && I->isPHI()) {
+ // Replace the uses of the def of the PHI with the register coming
+ // from PredBB.
+ MachineInstr *MI = &*I++;
+ ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos);
+ if (MI->getParent())
+ MI->eraseFromParent();
+ }
+
+ // Now copy the non-PHI instructions.
+ while (I != TailBB->end()) {
+ // Replace def of virtual registers with new registers, and update
+ // uses with PHI source register or the new registers.
+ MachineInstr *MI = &*I++;
+ DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap);
+ MI->eraseFromParent();
+ }
+ MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator();
+ for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
+ const TargetRegisterClass *RC = MRI->getRegClass(CopyInfos[i].first);
+ TII->copyRegToReg(*PrevBB, Loc, CopyInfos[i].first,
+ CopyInfos[i].second, RC, RC);
+ MachineInstr *CopyMI = prior(Loc);
+ Copies.push_back(CopyMI);
+ }
+ } else {
+ // No PHIs to worry about, just splice the instructions over.
+ PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
+ }
+ PrevBB->removeSuccessor(PrevBB->succ_begin());
+ assert(PrevBB->succ_empty());
+ PrevBB->transferSuccessors(TailBB);
+ TDBBs.push_back(PrevBB);
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+/// RemoveDeadBlock - Remove the specified dead machine basic block from the
+/// function, updating the CFG.
+void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) {
+ assert(MBB->pred_empty() && "MBB must be dead!");
+ DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
+
+ // Remove all successors.
+ while (!MBB->succ_empty())
+ MBB->removeSuccessor(MBB->succ_end()-1);
+
+ // If there are any labels in the basic block, unregister them from
+ // MachineModuleInfo.
+ if (MMI && !MBB->empty()) {
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ if (I->isLabel())
+ // The label ID # is always operand #0, an immediate.
+ MMI->InvalidateLabel(I->getOperand(0).getImm());
+ }
+ }
+
+ // Remove the block.
+ MBB->eraseFromParent();
+}
+
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
new file mode 100644
index 0000000..a0fccab
--- /dev/null
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -0,0 +1,352 @@
+//===-- TargetInstrInfoImpl.cpp - Target Instruction Information ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetInstrInfoImpl class, it just provides default
+// implementations of various methods.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+// commuteInstruction - The default implementation of this method just exchanges
+// the two operands returned by findCommutedOpIndices.
+MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
+ bool NewMI) const {
+ const TargetInstrDesc &TID = MI->getDesc();
+ bool HasDef = TID.getNumDefs();
+ if (HasDef && !MI->getOperand(0).isReg())
+ // No idea how to commute this instruction. Target should implement its own.
+ return 0;
+ unsigned Idx1, Idx2;
+ if (!findCommutedOpIndices(MI, Idx1, Idx2)) {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "Don't know how to commute: " << *MI;
+ llvm_report_error(Msg.str());
+ }
+
+ assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() &&
+ "This only knows how to commute register operands so far");
+ unsigned Reg1 = MI->getOperand(Idx1).getReg();
+ unsigned Reg2 = MI->getOperand(Idx2).getReg();
+ bool Reg1IsKill = MI->getOperand(Idx1).isKill();
+ bool Reg2IsKill = MI->getOperand(Idx2).isKill();
+ bool ChangeReg0 = false;
+ if (HasDef && MI->getOperand(0).getReg() == Reg1) {
+ // Must be two address instruction!
+ assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) &&
+ "Expecting a two-address instruction!");
+ Reg2IsKill = false;
+ ChangeReg0 = true;
+ }
+
+ if (NewMI) {
+ // Create a new instruction.
+ unsigned Reg0 = HasDef
+ ? (ChangeReg0 ? Reg2 : MI->getOperand(0).getReg()) : 0;
+ bool Reg0IsDead = HasDef ? MI->getOperand(0).isDead() : false;
+ MachineFunction &MF = *MI->getParent()->getParent();
+ if (HasDef)
+ return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
+ .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
+ .addReg(Reg2, getKillRegState(Reg2IsKill))
+ .addReg(Reg1, getKillRegState(Reg2IsKill));
+ else
+ return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
+ .addReg(Reg2, getKillRegState(Reg2IsKill))
+ .addReg(Reg1, getKillRegState(Reg2IsKill));
+ }
+
+ if (ChangeReg0)
+ MI->getOperand(0).setReg(Reg2);
+ MI->getOperand(Idx2).setReg(Reg1);
+ MI->getOperand(Idx1).setReg(Reg2);
+ MI->getOperand(Idx2).setIsKill(Reg1IsKill);
+ MI->getOperand(Idx1).setIsKill(Reg2IsKill);
+ return MI;
+}
+
+/// findCommutedOpIndices - If specified MI is commutable, return the two
+/// operand indices that would swap value. Return true if the instruction
+/// is not in a form which this routine understands.
+bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI,
+ unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const {
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (!TID.isCommutable())
+ return false;
+ // This assumes v0 = op v1, v2 and commuting would swap v1 and v2. If this
+ // is not true, then the target must implement this.
+ SrcOpIdx1 = TID.getNumDefs();
+ SrcOpIdx2 = SrcOpIdx1 + 1;
+ if (!MI->getOperand(SrcOpIdx1).isReg() ||
+ !MI->getOperand(SrcOpIdx2).isReg())
+ // No idea.
+ return false;
+ return true;
+}
+
+
+bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const {
+ bool MadeChange = false;
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (!TID.isPredicable())
+ return false;
+
+ for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ if (TID.OpInfo[i].isPredicate()) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg()) {
+ MO.setReg(Pred[j].getReg());
+ MadeChange = true;
+ } else if (MO.isImm()) {
+ MO.setImm(Pred[j].getImm());
+ MadeChange = true;
+ } else if (MO.isMBB()) {
+ MO.setMBB(Pred[j].getMBB());
+ MadeChange = true;
+ }
+ ++j;
+ }
+ }
+ return MadeChange;
+}
+
+void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg,
+ unsigned SubIdx,
+ const MachineInstr *Orig,
+ const TargetRegisterInfo *TRI) const {
+ MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
+ MachineOperand &MO = MI->getOperand(0);
+ if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+ MO.setReg(DestReg);
+ MO.setSubReg(SubIdx);
+ } else if (SubIdx) {
+ MO.setReg(TRI->getSubReg(DestReg, SubIdx));
+ } else {
+ MO.setReg(DestReg);
+ }
+ MBB.insert(I, MI);
+}
+
+MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig,
+ MachineFunction &MF) const {
+ assert(!Orig->getDesc().isNotDuplicable() &&
+ "Instruction cannot be duplicated");
+ return MF.CloneMachineInstr(Orig);
+}
+
+bool
+TargetInstrInfoImpl::isIdentical(const MachineInstr *MI,
+ const MachineInstr *Other,
+ const MachineRegisterInfo *MRI) const {
+ if (MI->getOpcode() != Other->getOpcode() ||
+ MI->getNumOperands() != Other->getNumOperands())
+ return false;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ const MachineOperand &OMO = Other->getOperand(i);
+ if (MO.isReg() && MO.isDef()) {
+ assert(OMO.isReg() && OMO.isDef());
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (Reg != OMO.getReg())
+ return false;
+ } else if (MRI->getRegClass(MO.getReg()) !=
+ MRI->getRegClass(OMO.getReg()))
+ return false;
+
+ continue;
+ }
+
+ if (!MO.isIdenticalTo(OMO))
+ return false;
+ }
+
+ return true;
+}
+
+unsigned
+TargetInstrInfoImpl::GetFunctionSizeInBytes(const MachineFunction &MF) const {
+ unsigned FnSize = 0;
+ for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ const MachineBasicBlock &MBB = *MBBI;
+ for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end();
+ I != E; ++I)
+ FnSize += GetInstSizeInBytes(I);
+ }
+ return FnSize;
+}
+
+/// foldMemoryOperand - Attempt to fold a load or store of the specified stack
+/// slot into the specified machine instruction for the specified operand(s).
+/// If this is possible, a new instruction is returned with the specified
+/// operand folded, otherwise NULL is returned. The client is responsible for
+/// removing the old instruction and adding the new one in the instruction
+/// stream.
+MachineInstr*
+TargetInstrInfo::foldMemoryOperand(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const {
+ unsigned Flags = 0;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ if (MI->getOperand(Ops[i]).isDef())
+ Flags |= MachineMemOperand::MOStore;
+ else
+ Flags |= MachineMemOperand::MOLoad;
+
+ // Ask the target to do the actual folding.
+ MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FrameIndex);
+ if (!NewMI) return 0;
+
+ assert((!(Flags & MachineMemOperand::MOStore) ||
+ NewMI->getDesc().mayStore()) &&
+ "Folded a def to a non-store!");
+ assert((!(Flags & MachineMemOperand::MOLoad) ||
+ NewMI->getDesc().mayLoad()) &&
+ "Folded a use to a non-load!");
+ const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ assert(MFI.getObjectOffset(FrameIndex) != -1);
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIndex),
+ Flags, /*Offset=*/0,
+ MFI.getObjectSize(FrameIndex),
+ MFI.getObjectAlignment(FrameIndex));
+ NewMI->addMemOperand(MF, MMO);
+
+ return NewMI;
+}
+
+/// foldMemoryOperand - Same as the previous version except it allows folding
+/// of any load and store from / to any address, not just from a specific
+/// stack slot.
+MachineInstr*
+TargetInstrInfo::foldMemoryOperand(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr* LoadMI) const {
+ assert(LoadMI->getDesc().canFoldAsLoad() && "LoadMI isn't foldable!");
+#ifndef NDEBUG
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!");
+#endif
+
+ // Ask the target to do the actual folding.
+ MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI);
+ if (!NewMI) return 0;
+
+ // Copy the memoperands from the load to the folded instruction.
+ NewMI->setMemRefs(LoadMI->memoperands_begin(),
+ LoadMI->memoperands_end());
+
+ return NewMI;
+}
+
+bool
+TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(const MachineInstr *
+ MI,
+ AliasAnalysis *
+ AA) const {
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetMachine &TM = MF.getTarget();
+ const TargetInstrInfo &TII = *TM.getInstrInfo();
+ const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+
+ // A load from a fixed stack slot can be rematerialized. This may be
+ // redundant with subsequent checks, but it's target-independent,
+ // simple, and a common case.
+ int FrameIdx = 0;
+ if (TII.isLoadFromStackSlot(MI, FrameIdx) &&
+ MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx))
+ return true;
+
+ const TargetInstrDesc &TID = MI->getDesc();
+
+ // Avoid instructions obviously unsafe for remat.
+ if (TID.hasUnmodeledSideEffects() || TID.isNotDuplicable() ||
+ TID.mayStore())
+ return false;
+
+ // Avoid instructions which load from potentially varying memory.
+ if (TID.mayLoad() && !MI->isInvariantLoad(AA))
+ return false;
+
+ // If any of the registers accessed are non-constant, conservatively assume
+ // the instruction is not rematerializable.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+
+ // Check for a well-behaved physical register.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (MO.isUse()) {
+ // If the physreg has no defs anywhere, it's just an ambient register
+ // and we can freely move its uses. Alternatively, if it's allocatable,
+ // it could get allocated to something with a def during allocation.
+ if (!MRI.def_empty(Reg))
+ return false;
+ BitVector AllocatableRegs = TRI.getAllocatableSet(MF, 0);
+ if (AllocatableRegs.test(Reg))
+ return false;
+ // Check for a def among the register's aliases too.
+ for (const unsigned *Alias = TRI.getAliasSet(Reg); *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ if (!MRI.def_empty(AliasReg))
+ return false;
+ if (AllocatableRegs.test(AliasReg))
+ return false;
+ }
+ } else {
+ // A physreg def. We can't remat it.
+ return false;
+ }
+ continue;
+ }
+
+ // Only allow one virtual-register def, and that in the first operand.
+ if (MO.isDef() != (i == 0))
+ return false;
+
+ // For the def, it should be the only def of that register.
+ if (MO.isDef() && (llvm::next(MRI.def_begin(Reg)) != MRI.def_end() ||
+ MRI.isLiveIn(Reg)))
+ return false;
+
+ // Don't allow any virtual-register uses. Rematting an instruction with
+ // virtual register uses would length the live ranges of the uses, which
+ // is not necessarily a good idea, certainly not "trivial".
+ if (MO.isUse())
+ return false;
+ }
+
+ // Everything checked out.
+ return true;
+}
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
new file mode 100644
index 0000000..6c7c1a1
--- /dev/null
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -0,0 +1,1108 @@
+//===-- TwoAddressInstructionPass.cpp - Two-Address instruction pass ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TwoAddress instruction pass which is used
+// by most register allocators. Two-Address instructions are rewritten
+// from:
+//
+// A = B op C
+//
+// to:
+//
+// A = B
+// A op= C
+//
+// Note that if a register allocator chooses to use this pass, that it
+// has to be capable of handling the non-SSA nature of these rewritten
+// virtual registers.
+//
+// It is also worth noting that the duplicate operand of the two
+// address instruction is removed.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "twoaddrinstr"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions");
+STATISTIC(NumCommuted , "Number of instructions commuted to coalesce");
+STATISTIC(NumAggrCommuted , "Number of instructions aggressively commuted");
+STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address");
+STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk");
+STATISTIC(NumReMats, "Number of instructions re-materialized");
+STATISTIC(NumDeletes, "Number of dead instructions deleted");
+
+namespace {
+ class TwoAddressInstructionPass : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+ LiveVariables *LV;
+ AliasAnalysis *AA;
+
+ // DistanceMap - Keep track the distance of a MI from the start of the
+ // current basic block.
+ DenseMap<MachineInstr*, unsigned> DistanceMap;
+
+ // SrcRegMap - A map from virtual registers to physical registers which
+ // are likely targets to be coalesced to due to copies from physical
+ // registers to virtual registers. e.g. v1024 = move r0.
+ DenseMap<unsigned, unsigned> SrcRegMap;
+
+ // DstRegMap - A map from virtual registers to physical registers which
+ // are likely targets to be coalesced to due to copies to physical
+ // registers from virtual registers. e.g. r1 = move v1024.
+ DenseMap<unsigned, unsigned> DstRegMap;
+
+ bool Sink3AddrInstruction(MachineBasicBlock *MBB, MachineInstr *MI,
+ unsigned Reg,
+ MachineBasicBlock::iterator OldPos);
+
+ bool isProfitableToReMat(unsigned Reg, const TargetRegisterClass *RC,
+ MachineInstr *MI, MachineInstr *DefMI,
+ MachineBasicBlock *MBB, unsigned Loc);
+
+ bool NoUseAfterLastDef(unsigned Reg, MachineBasicBlock *MBB, unsigned Dist,
+ unsigned &LastDef);
+
+ MachineInstr *FindLastUseInMBB(unsigned Reg, MachineBasicBlock *MBB,
+ unsigned Dist);
+
+ bool isProfitableToCommute(unsigned regB, unsigned regC,
+ MachineInstr *MI, MachineBasicBlock *MBB,
+ unsigned Dist);
+
+ bool CommuteInstruction(MachineBasicBlock::iterator &mi,
+ MachineFunction::iterator &mbbi,
+ unsigned RegB, unsigned RegC, unsigned Dist);
+
+ bool isProfitableToConv3Addr(unsigned RegA);
+
+ bool ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ MachineFunction::iterator &mbbi,
+ unsigned RegB, unsigned Dist);
+
+ typedef std::pair<std::pair<unsigned, bool>, MachineInstr*> NewKill;
+ bool canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills,
+ SmallVector<NewKill, 4> &NewKills,
+ MachineBasicBlock *MBB, unsigned Dist);
+ bool DeleteUnusedInstr(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ MachineFunction::iterator &mbbi, unsigned Dist);
+
+ bool TryInstructionTransform(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ MachineFunction::iterator &mbbi,
+ unsigned SrcIdx, unsigned DstIdx,
+ unsigned Dist);
+
+ void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallPtrSet<MachineInstr*, 8> &Processed);
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ TwoAddressInstructionPass() : MachineFunctionPass(&ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<LiveVariables>();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ if (StrongPHIElim)
+ AU.addPreservedID(StrongPHIEliminationID);
+ else
+ AU.addPreservedID(PHIEliminationID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ /// runOnMachineFunction - Pass entry point.
+ bool runOnMachineFunction(MachineFunction&);
+ };
+}
+
+char TwoAddressInstructionPass::ID = 0;
+static RegisterPass<TwoAddressInstructionPass>
+X("twoaddressinstruction", "Two-Address instruction pass");
+
+const PassInfo *const llvm::TwoAddressInstructionPassID = &X;
+
+/// Sink3AddrInstruction - A two-address instruction has been converted to a
+/// three-address instruction to avoid clobbering a register. Try to sink it
+/// past the instruction that would kill the above mentioned register to reduce
+/// register pressure.
+bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
+ MachineInstr *MI, unsigned SavedReg,
+ MachineBasicBlock::iterator OldPos) {
+ // Check if it's safe to move this instruction.
+ bool SeenStore = true; // Be conservative.
+ if (!MI->isSafeToMove(TII, SeenStore, AA))
+ return false;
+
+ unsigned DefReg = 0;
+ SmallSet<unsigned, 4> UseRegs;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MO.isUse() && MOReg != SavedReg)
+ UseRegs.insert(MO.getReg());
+ if (!MO.isDef())
+ continue;
+ if (MO.isImplicit())
+ // Don't try to move it if it implicitly defines a register.
+ return false;
+ if (DefReg)
+ // For now, don't move any instructions that define multiple registers.
+ return false;
+ DefReg = MO.getReg();
+ }
+
+ // Find the instruction that kills SavedReg.
+ MachineInstr *KillMI = NULL;
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SavedReg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ MachineOperand &UseMO = UI.getOperand();
+ if (!UseMO.isKill())
+ continue;
+ KillMI = UseMO.getParent();
+ break;
+ }
+
+ if (!KillMI || KillMI->getParent() != MBB || KillMI == MI)
+ return false;
+
+ // If any of the definitions are used by another instruction between the
+ // position and the kill use, then it's not safe to sink it.
+ //
+ // FIXME: This can be sped up if there is an easy way to query whether an
+ // instruction is before or after another instruction. Then we can use
+ // MachineRegisterInfo def / use instead.
+ MachineOperand *KillMO = NULL;
+ MachineBasicBlock::iterator KillPos = KillMI;
+ ++KillPos;
+
+ unsigned NumVisited = 0;
+ for (MachineBasicBlock::iterator I = llvm::next(OldPos); I != KillPos; ++I) {
+ MachineInstr *OtherMI = I;
+ if (NumVisited > 30) // FIXME: Arbitrary limit to reduce compile time cost.
+ return false;
+ ++NumVisited;
+ for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = OtherMI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (DefReg == MOReg)
+ return false;
+
+ if (MO.isKill()) {
+ if (OtherMI == KillMI && MOReg == SavedReg)
+ // Save the operand that kills the register. We want to unset the kill
+ // marker if we can sink MI past it.
+ KillMO = &MO;
+ else if (UseRegs.count(MOReg))
+ // One of the uses is killed before the destination.
+ return false;
+ }
+ }
+ }
+
+ // Update kill and LV information.
+ KillMO->setIsKill(false);
+ KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI);
+ KillMO->setIsKill(true);
+
+ if (LV)
+ LV->replaceKillInstruction(SavedReg, KillMI, MI);
+
+ // Move instruction to its destination.
+ MBB->remove(MI);
+ MBB->insert(KillPos, MI);
+
+ ++Num3AddrSunk;
+ return true;
+}
+
+/// isTwoAddrUse - Return true if the specified MI is using the specified
+/// register as a two-address operand.
+static bool isTwoAddrUse(MachineInstr *UseMI, unsigned Reg) {
+ const TargetInstrDesc &TID = UseMI->getDesc();
+ for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = UseMI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == Reg &&
+ (MO.isDef() || UseMI->isRegTiedToDefOperand(i)))
+ // Earlier use is a two-address one.
+ return true;
+ }
+ return false;
+}
+
+/// isProfitableToReMat - Return true if the heuristics determines it is likely
+/// to be profitable to re-materialize the definition of Reg rather than copy
+/// the register.
+bool
+TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg,
+ const TargetRegisterClass *RC,
+ MachineInstr *MI, MachineInstr *DefMI,
+ MachineBasicBlock *MBB, unsigned Loc) {
+ bool OtherUse = false;
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ MachineOperand &UseMO = UI.getOperand();
+ MachineInstr *UseMI = UseMO.getParent();
+ MachineBasicBlock *UseMBB = UseMI->getParent();
+ if (UseMBB == MBB) {
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI);
+ if (DI != DistanceMap.end() && DI->second == Loc)
+ continue; // Current use.
+ OtherUse = true;
+ // There is at least one other use in the MBB that will clobber the
+ // register.
+ if (isTwoAddrUse(UseMI, Reg))
+ return true;
+ }
+ }
+
+ // If other uses in MBB are not two-address uses, then don't remat.
+ if (OtherUse)
+ return false;
+
+ // No other uses in the same block, remat if it's defined in the same
+ // block so it does not unnecessarily extend the live range.
+ return MBB == DefMI->getParent();
+}
+
+/// NoUseAfterLastDef - Return true if there are no intervening uses between the
+/// last instruction in the MBB that defines the specified register and the
+/// two-address instruction which is being processed. It also returns the last
+/// def location by reference
+bool TwoAddressInstructionPass::NoUseAfterLastDef(unsigned Reg,
+ MachineBasicBlock *MBB, unsigned Dist,
+ unsigned &LastDef) {
+ LastDef = 0;
+ unsigned LastUse = Dist;
+ for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg),
+ E = MRI->reg_end(); I != E; ++I) {
+ MachineOperand &MO = I.getOperand();
+ MachineInstr *MI = MO.getParent();
+ if (MI->getParent() != MBB || MI->isDebugValue())
+ continue;
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+ if (DI == DistanceMap.end())
+ continue;
+ if (MO.isUse() && DI->second < LastUse)
+ LastUse = DI->second;
+ if (MO.isDef() && DI->second > LastDef)
+ LastDef = DI->second;
+ }
+
+ return !(LastUse > LastDef && LastUse < Dist);
+}
+
+MachineInstr *TwoAddressInstructionPass::FindLastUseInMBB(unsigned Reg,
+ MachineBasicBlock *MBB,
+ unsigned Dist) {
+ unsigned LastUseDist = 0;
+ MachineInstr *LastUse = 0;
+ for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg),
+ E = MRI->reg_end(); I != E; ++I) {
+ MachineOperand &MO = I.getOperand();
+ MachineInstr *MI = MO.getParent();
+ if (MI->getParent() != MBB || MI->isDebugValue())
+ continue;
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+ if (DI == DistanceMap.end())
+ continue;
+ if (DI->second >= Dist)
+ continue;
+
+ if (MO.isUse() && DI->second > LastUseDist) {
+ LastUse = DI->first;
+ LastUseDist = DI->second;
+ }
+ }
+ return LastUse;
+}
+
+/// isCopyToReg - Return true if the specified MI is a copy instruction or
+/// a extract_subreg instruction. It also returns the source and destination
+/// registers and whether they are physical registers by reference.
+static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
+ unsigned &SrcReg, unsigned &DstReg,
+ bool &IsSrcPhys, bool &IsDstPhys) {
+ SrcReg = 0;
+ DstReg = 0;
+ unsigned SrcSubIdx, DstSubIdx;
+ if (!TII->isMoveInstr(MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
+ if (MI.isExtractSubreg()) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ } else if (MI.isInsertSubreg()) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(2).getReg();
+ } else if (MI.isSubregToReg()) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(2).getReg();
+ }
+ }
+
+ if (DstReg) {
+ IsSrcPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
+ IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ return true;
+ }
+ return false;
+}
+
+/// isKilled - Test if the given register value, which is used by the given
+/// instruction, is killed by the given instruction. This looks through
+/// coalescable copies to see if the original value is potentially not killed.
+///
+/// For example, in this code:
+///
+/// %reg1034 = copy %reg1024
+/// %reg1035 = copy %reg1025<kill>
+/// %reg1036 = add %reg1034<kill>, %reg1035<kill>
+///
+/// %reg1034 is not considered to be killed, since it is copied from a
+/// register which is not killed. Treating it as not killed lets the
+/// normal heuristics commute the (two-address) add, which lets
+/// coalescing eliminate the extra copy.
+///
+static bool isKilled(MachineInstr &MI, unsigned Reg,
+ const MachineRegisterInfo *MRI,
+ const TargetInstrInfo *TII) {
+ MachineInstr *DefMI = &MI;
+ for (;;) {
+ if (!DefMI->killsRegister(Reg))
+ return false;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return true;
+ MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg);
+ // If there are multiple defs, we can't do a simple analysis, so just
+ // go with what the kill flag says.
+ if (llvm::next(Begin) != MRI->def_end())
+ return true;
+ DefMI = &*Begin;
+ bool IsSrcPhys, IsDstPhys;
+ unsigned SrcReg, DstReg;
+ // If the def is something other than a copy, then it isn't going to
+ // be coalesced, so follow the kill flag.
+ if (!isCopyToReg(*DefMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys))
+ return true;
+ Reg = SrcReg;
+ }
+}
+
+/// isTwoAddrUse - Return true if the specified MI uses the specified register
+/// as a two-address use. If so, return the destination register by reference.
+static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
+ const TargetInstrDesc &TID = MI.getDesc();
+ unsigned NumOps = MI.isInlineAsm() ? MI.getNumOperands():TID.getNumOperands();
+ for (unsigned i = 0; i != NumOps; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
+ continue;
+ unsigned ti;
+ if (MI.isRegTiedToDefOperand(i, &ti)) {
+ DstReg = MI.getOperand(ti).getReg();
+ return true;
+ }
+ }
+ return false;
+}
+
+/// findOnlyInterestingUse - Given a register, if has a single in-basic block
+/// use, return the use instruction if it's a copy or a two-address use.
+static
+MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB,
+ MachineRegisterInfo *MRI,
+ const TargetInstrInfo *TII,
+ bool &IsCopy,
+ unsigned &DstReg, bool &IsDstPhys) {
+ MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg);
+ if (UI == MRI->use_end())
+ return 0;
+ MachineInstr &UseMI = *UI;
+ if (++UI != MRI->use_end())
+ // More than one use.
+ return 0;
+ if (UseMI.getParent() != MBB)
+ return 0;
+ unsigned SrcReg;
+ bool IsSrcPhys;
+ if (isCopyToReg(UseMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) {
+ IsCopy = true;
+ return &UseMI;
+ }
+ IsDstPhys = false;
+ if (isTwoAddrUse(UseMI, Reg, DstReg)) {
+ IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ return &UseMI;
+ }
+ return 0;
+}
+
+/// getMappedReg - Return the physical register the specified virtual register
+/// might be mapped to.
+static unsigned
+getMappedReg(unsigned Reg, DenseMap<unsigned, unsigned> &RegMap) {
+ while (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DenseMap<unsigned, unsigned>::iterator SI = RegMap.find(Reg);
+ if (SI == RegMap.end())
+ return 0;
+ Reg = SI->second;
+ }
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return Reg;
+ return 0;
+}
+
+/// regsAreCompatible - Return true if the two registers are equal or aliased.
+///
+static bool
+regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) {
+ if (RegA == RegB)
+ return true;
+ if (!RegA || !RegB)
+ return false;
+ return TRI->regsOverlap(RegA, RegB);
+}
+
+
+/// isProfitableToReMat - Return true if it's potentially profitable to commute
+/// the two-address instruction that's being processed.
+bool
+TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC,
+ MachineInstr *MI, MachineBasicBlock *MBB,
+ unsigned Dist) {
+ // Determine if it's profitable to commute this two address instruction. In
+ // general, we want no uses between this instruction and the definition of
+ // the two-address register.
+ // e.g.
+ // %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1
+ // %reg1029<def> = MOV8rr %reg1028
+ // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead>
+ // insert => %reg1030<def> = MOV8rr %reg1028
+ // %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead>
+ // In this case, it might not be possible to coalesce the second MOV8rr
+ // instruction if the first one is coalesced. So it would be profitable to
+ // commute it:
+ // %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1
+ // %reg1029<def> = MOV8rr %reg1028
+ // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead>
+ // insert => %reg1030<def> = MOV8rr %reg1029
+ // %reg1030<def> = ADD8rr %reg1029<kill>, %reg1028<kill>, %EFLAGS<imp-def,dead>
+
+ if (!MI->killsRegister(regC))
+ return false;
+
+ // Ok, we have something like:
+ // %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead>
+ // let's see if it's worth commuting it.
+
+ // Look for situations like this:
+ // %reg1024<def> = MOV r1
+ // %reg1025<def> = MOV r0
+ // %reg1026<def> = ADD %reg1024, %reg1025
+ // r0 = MOV %reg1026
+ // Commute the ADD to hopefully eliminate an otherwise unavoidable copy.
+ unsigned FromRegB = getMappedReg(regB, SrcRegMap);
+ unsigned FromRegC = getMappedReg(regC, SrcRegMap);
+ unsigned ToRegB = getMappedReg(regB, DstRegMap);
+ unsigned ToRegC = getMappedReg(regC, DstRegMap);
+ if (!regsAreCompatible(FromRegB, ToRegB, TRI) &&
+ (regsAreCompatible(FromRegB, ToRegC, TRI) ||
+ regsAreCompatible(FromRegC, ToRegB, TRI)))
+ return true;
+
+ // If there is a use of regC between its last def (could be livein) and this
+ // instruction, then bail.
+ unsigned LastDefC = 0;
+ if (!NoUseAfterLastDef(regC, MBB, Dist, LastDefC))
+ return false;
+
+ // If there is a use of regB between its last def (could be livein) and this
+ // instruction, then go ahead and make this transformation.
+ unsigned LastDefB = 0;
+ if (!NoUseAfterLastDef(regB, MBB, Dist, LastDefB))
+ return true;
+
+ // Since there are no intervening uses for both registers, then commute
+ // if the def of regC is closer. Its live interval is shorter.
+ return LastDefB && LastDefC && LastDefC > LastDefB;
+}
+
+/// CommuteInstruction - Commute a two-address instruction and update the basic
+/// block, distance map, and live variables if needed. Return true if it is
+/// successful.
+bool
+TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi,
+ MachineFunction::iterator &mbbi,
+ unsigned RegB, unsigned RegC, unsigned Dist) {
+ MachineInstr *MI = mi;
+ DEBUG(dbgs() << "2addr: COMMUTING : " << *MI);
+ MachineInstr *NewMI = TII->commuteInstruction(MI);
+
+ if (NewMI == 0) {
+ DEBUG(dbgs() << "2addr: COMMUTING FAILED!\n");
+ return false;
+ }
+
+ DEBUG(dbgs() << "2addr: COMMUTED TO: " << *NewMI);
+ // If the instruction changed to commute it, update livevar.
+ if (NewMI != MI) {
+ if (LV)
+ // Update live variables
+ LV->replaceKillInstruction(RegC, MI, NewMI);
+
+ mbbi->insert(mi, NewMI); // Insert the new inst
+ mbbi->erase(mi); // Nuke the old inst.
+ mi = NewMI;
+ DistanceMap.insert(std::make_pair(NewMI, Dist));
+ }
+
+ // Update source register map.
+ unsigned FromRegC = getMappedReg(RegC, SrcRegMap);
+ if (FromRegC) {
+ unsigned RegA = MI->getOperand(0).getReg();
+ SrcRegMap[RegA] = FromRegC;
+ }
+
+ return true;
+}
+
+/// isProfitableToConv3Addr - Return true if it is profitable to convert the
+/// given 2-address instruction to a 3-address one.
+bool
+TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA) {
+ // Look for situations like this:
+ // %reg1024<def> = MOV r1
+ // %reg1025<def> = MOV r0
+ // %reg1026<def> = ADD %reg1024, %reg1025
+ // r2 = MOV %reg1026
+ // Turn ADD into a 3-address instruction to avoid a copy.
+ unsigned FromRegA = getMappedReg(RegA, SrcRegMap);
+ unsigned ToRegA = getMappedReg(RegA, DstRegMap);
+ return (FromRegA && ToRegA && !regsAreCompatible(FromRegA, ToRegA, TRI));
+}
+
+/// ConvertInstTo3Addr - Convert the specified two-address instruction into a
+/// three address one. Return true if this transformation was successful.
+bool
+TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ MachineFunction::iterator &mbbi,
+ unsigned RegB, unsigned Dist) {
+ MachineInstr *NewMI = TII->convertToThreeAddress(mbbi, mi, LV);
+ if (NewMI) {
+ DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
+ DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI);
+ bool Sunk = false;
+
+ if (NewMI->findRegisterUseOperand(RegB, false, TRI))
+ // FIXME: Temporary workaround. If the new instruction doesn't
+ // uses RegB, convertToThreeAddress must have created more
+ // then one instruction.
+ Sunk = Sink3AddrInstruction(mbbi, NewMI, RegB, mi);
+
+ mbbi->erase(mi); // Nuke the old inst.
+
+ if (!Sunk) {
+ DistanceMap.insert(std::make_pair(NewMI, Dist));
+ mi = NewMI;
+ nmi = llvm::next(mi);
+ }
+ return true;
+ }
+
+ return false;
+}
+
+/// ProcessCopy - If the specified instruction is not yet processed, process it
+/// if it's a copy. For a copy instruction, we find the physical registers the
+/// source and destination registers might be mapped to. These are kept in
+/// point-to maps used to determine future optimizations. e.g.
+/// v1024 = mov r0
+/// v1025 = mov r1
+/// v1026 = add v1024, v1025
+/// r1 = mov r1026
+/// If 'add' is a two-address instruction, v1024, v1026 are both potentially
+/// coalesced to r0 (from the input side). v1025 is mapped to r1. v1026 is
+/// potentially joined with r1 on the output side. It's worthwhile to commute
+/// 'add' to eliminate a copy.
+void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ SmallPtrSet<MachineInstr*, 8> &Processed) {
+ if (Processed.count(MI))
+ return;
+
+ bool IsSrcPhys, IsDstPhys;
+ unsigned SrcReg, DstReg;
+ if (!isCopyToReg(*MI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys))
+ return;
+
+ if (IsDstPhys && !IsSrcPhys)
+ DstRegMap.insert(std::make_pair(SrcReg, DstReg));
+ else if (!IsDstPhys && IsSrcPhys) {
+ bool isNew = SrcRegMap.insert(std::make_pair(DstReg, SrcReg)).second;
+ if (!isNew)
+ assert(SrcRegMap[DstReg] == SrcReg &&
+ "Can't map to two src physical registers!");
+
+ SmallVector<unsigned, 4> VirtRegPairs;
+ bool IsCopy = false;
+ unsigned NewReg = 0;
+ while (MachineInstr *UseMI = findOnlyInterestingUse(DstReg, MBB, MRI,TII,
+ IsCopy, NewReg, IsDstPhys)) {
+ if (IsCopy) {
+ if (!Processed.insert(UseMI))
+ break;
+ }
+
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI);
+ if (DI != DistanceMap.end())
+ // Earlier in the same MBB.Reached via a back edge.
+ break;
+
+ if (IsDstPhys) {
+ VirtRegPairs.push_back(NewReg);
+ break;
+ }
+ bool isNew = SrcRegMap.insert(std::make_pair(NewReg, DstReg)).second;
+ if (!isNew)
+ assert(SrcRegMap[NewReg] == DstReg &&
+ "Can't map to two src physical registers!");
+ VirtRegPairs.push_back(NewReg);
+ DstReg = NewReg;
+ }
+
+ if (!VirtRegPairs.empty()) {
+ unsigned ToReg = VirtRegPairs.back();
+ VirtRegPairs.pop_back();
+ while (!VirtRegPairs.empty()) {
+ unsigned FromReg = VirtRegPairs.back();
+ VirtRegPairs.pop_back();
+ bool isNew = DstRegMap.insert(std::make_pair(FromReg, ToReg)).second;
+ if (!isNew)
+ assert(DstRegMap[FromReg] == ToReg &&
+ "Can't map to two dst physical registers!");
+ ToReg = FromReg;
+ }
+ }
+ }
+
+ Processed.insert(MI);
+}
+
+/// isSafeToDelete - If the specified instruction does not produce any side
+/// effects and all of its defs are dead, then it's safe to delete.
+static bool isSafeToDelete(MachineInstr *MI,
+ const TargetInstrInfo *TII,
+ SmallVector<unsigned, 4> &Kills) {
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (TID.mayStore() || TID.isCall())
+ return false;
+ if (TID.isTerminator() || TID.hasUnmodeledSideEffects())
+ return false;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ if (MO.isDef() && !MO.isDead())
+ return false;
+ if (MO.isUse() && MO.isKill())
+ Kills.push_back(MO.getReg());
+ }
+ return true;
+}
+
+/// canUpdateDeletedKills - Check if all the registers listed in Kills are
+/// killed by instructions in MBB preceding the current instruction at
+/// position Dist. If so, return true and record information about the
+/// preceding kills in NewKills.
+bool TwoAddressInstructionPass::
+canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills,
+ SmallVector<NewKill, 4> &NewKills,
+ MachineBasicBlock *MBB, unsigned Dist) {
+ while (!Kills.empty()) {
+ unsigned Kill = Kills.back();
+ Kills.pop_back();
+ if (TargetRegisterInfo::isPhysicalRegister(Kill))
+ return false;
+
+ MachineInstr *LastKill = FindLastUseInMBB(Kill, MBB, Dist);
+ if (!LastKill)
+ return false;
+
+ bool isModRef = LastKill->modifiesRegister(Kill);
+ NewKills.push_back(std::make_pair(std::make_pair(Kill, isModRef),
+ LastKill));
+ }
+ return true;
+}
+
+/// DeleteUnusedInstr - If an instruction with a tied register operand can
+/// be safely deleted, just delete it.
+bool
+TwoAddressInstructionPass::DeleteUnusedInstr(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ MachineFunction::iterator &mbbi,
+ unsigned Dist) {
+ // Check if the instruction has no side effects and if all its defs are dead.
+ SmallVector<unsigned, 4> Kills;
+ if (!isSafeToDelete(mi, TII, Kills))
+ return false;
+
+ // If this instruction kills some virtual registers, we need to
+ // update the kill information. If it's not possible to do so,
+ // then bail out.
+ SmallVector<NewKill, 4> NewKills;
+ if (!canUpdateDeletedKills(Kills, NewKills, &*mbbi, Dist))
+ return false;
+
+ if (LV) {
+ while (!NewKills.empty()) {
+ MachineInstr *NewKill = NewKills.back().second;
+ unsigned Kill = NewKills.back().first.first;
+ bool isDead = NewKills.back().first.second;
+ NewKills.pop_back();
+ if (LV->removeVirtualRegisterKilled(Kill, mi)) {
+ if (isDead)
+ LV->addVirtualRegisterDead(Kill, NewKill);
+ else
+ LV->addVirtualRegisterKilled(Kill, NewKill);
+ }
+ }
+ }
+
+ mbbi->erase(mi); // Nuke the old inst.
+ mi = nmi;
+ return true;
+}
+
+/// TryInstructionTransform - For the case where an instruction has a single
+/// pair of tied register operands, attempt some transformations that may
+/// either eliminate the tied operands or improve the opportunities for
+/// coalescing away the register copy. Returns true if the tied operands
+/// are eliminated altogether.
+bool TwoAddressInstructionPass::
+TryInstructionTransform(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ MachineFunction::iterator &mbbi,
+ unsigned SrcIdx, unsigned DstIdx, unsigned Dist) {
+ const TargetInstrDesc &TID = mi->getDesc();
+ unsigned regA = mi->getOperand(DstIdx).getReg();
+ unsigned regB = mi->getOperand(SrcIdx).getReg();
+
+ assert(TargetRegisterInfo::isVirtualRegister(regB) &&
+ "cannot make instruction into two-address form");
+
+ // If regA is dead and the instruction can be deleted, just delete
+ // it so it doesn't clobber regB.
+ bool regBKilled = isKilled(*mi, regB, MRI, TII);
+ if (!regBKilled && mi->getOperand(DstIdx).isDead() &&
+ DeleteUnusedInstr(mi, nmi, mbbi, Dist)) {
+ ++NumDeletes;
+ return true; // Done with this instruction.
+ }
+
+ // Check if it is profitable to commute the operands.
+ unsigned SrcOp1, SrcOp2;
+ unsigned regC = 0;
+ unsigned regCIdx = ~0U;
+ bool TryCommute = false;
+ bool AggressiveCommute = false;
+ if (TID.isCommutable() && mi->getNumOperands() >= 3 &&
+ TII->findCommutedOpIndices(mi, SrcOp1, SrcOp2)) {
+ if (SrcIdx == SrcOp1)
+ regCIdx = SrcOp2;
+ else if (SrcIdx == SrcOp2)
+ regCIdx = SrcOp1;
+
+ if (regCIdx != ~0U) {
+ regC = mi->getOperand(regCIdx).getReg();
+ if (!regBKilled && isKilled(*mi, regC, MRI, TII))
+ // If C dies but B does not, swap the B and C operands.
+ // This makes the live ranges of A and C joinable.
+ TryCommute = true;
+ else if (isProfitableToCommute(regB, regC, mi, mbbi, Dist)) {
+ TryCommute = true;
+ AggressiveCommute = true;
+ }
+ }
+ }
+
+ // If it's profitable to commute, try to do so.
+ if (TryCommute && CommuteInstruction(mi, mbbi, regB, regC, Dist)) {
+ ++NumCommuted;
+ if (AggressiveCommute)
+ ++NumAggrCommuted;
+ return false;
+ }
+
+ if (TID.isConvertibleTo3Addr()) {
+ // This instruction is potentially convertible to a true
+ // three-address instruction. Check if it is profitable.
+ if (!regBKilled || isProfitableToConv3Addr(regA)) {
+ // Try to convert it.
+ if (ConvertInstTo3Addr(mi, nmi, mbbi, regB, Dist)) {
+ ++NumConvertedTo3Addr;
+ return true; // Done with this instruction.
+ }
+ }
+ }
+ return false;
+}
+
+/// runOnMachineFunction - Reduce two-address instructions to two operands.
+///
+bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "Machine Function\n");
+ const TargetMachine &TM = MF.getTarget();
+ MRI = &MF.getRegInfo();
+ TII = TM.getInstrInfo();
+ TRI = TM.getRegisterInfo();
+ LV = getAnalysisIfAvailable<LiveVariables>();
+ AA = &getAnalysis<AliasAnalysis>();
+
+ bool MadeChange = false;
+
+ DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n");
+ DEBUG(dbgs() << "********** Function: "
+ << MF.getFunction()->getName() << '\n');
+
+ // ReMatRegs - Keep track of the registers whose def's are remat'ed.
+ BitVector ReMatRegs;
+ ReMatRegs.resize(MRI->getLastVirtReg()+1);
+
+ typedef DenseMap<unsigned, SmallVector<std::pair<unsigned, unsigned>, 4> >
+ TiedOperandMap;
+ TiedOperandMap TiedOperands(4);
+
+ SmallPtrSet<MachineInstr*, 8> Processed;
+ for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end();
+ mbbi != mbbe; ++mbbi) {
+ unsigned Dist = 0;
+ DistanceMap.clear();
+ SrcRegMap.clear();
+ DstRegMap.clear();
+ Processed.clear();
+ for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
+ mi != me; ) {
+ MachineBasicBlock::iterator nmi = llvm::next(mi);
+ const TargetInstrDesc &TID = mi->getDesc();
+ bool FirstTied = true;
+
+ DistanceMap.insert(std::make_pair(mi, ++Dist));
+
+ ProcessCopy(&*mi, &*mbbi, Processed);
+
+ // First scan through all the tied register uses in this instruction
+ // and record a list of pairs of tied operands for each register.
+ unsigned NumOps = mi->isInlineAsm()
+ ? mi->getNumOperands() : TID.getNumOperands();
+ for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) {
+ unsigned DstIdx = 0;
+ if (!mi->isRegTiedToDefOperand(SrcIdx, &DstIdx))
+ continue;
+
+ if (FirstTied) {
+ FirstTied = false;
+ ++NumTwoAddressInstrs;
+ DEBUG(dbgs() << '\t' << *mi);
+ }
+
+ assert(mi->getOperand(SrcIdx).isReg() &&
+ mi->getOperand(SrcIdx).getReg() &&
+ mi->getOperand(SrcIdx).isUse() &&
+ "two address instruction invalid");
+
+ unsigned regB = mi->getOperand(SrcIdx).getReg();
+ TiedOperandMap::iterator OI = TiedOperands.find(regB);
+ if (OI == TiedOperands.end()) {
+ SmallVector<std::pair<unsigned, unsigned>, 4> TiedPair;
+ OI = TiedOperands.insert(std::make_pair(regB, TiedPair)).first;
+ }
+ OI->second.push_back(std::make_pair(SrcIdx, DstIdx));
+ }
+
+ // Now iterate over the information collected above.
+ for (TiedOperandMap::iterator OI = TiedOperands.begin(),
+ OE = TiedOperands.end(); OI != OE; ++OI) {
+ SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs = OI->second;
+
+ // If the instruction has a single pair of tied operands, try some
+ // transformations that may either eliminate the tied operands or
+ // improve the opportunities for coalescing away the register copy.
+ if (TiedOperands.size() == 1 && TiedPairs.size() == 1) {
+ unsigned SrcIdx = TiedPairs[0].first;
+ unsigned DstIdx = TiedPairs[0].second;
+
+ // If the registers are already equal, nothing needs to be done.
+ if (mi->getOperand(SrcIdx).getReg() ==
+ mi->getOperand(DstIdx).getReg())
+ break; // Done with this instruction.
+
+ if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist))
+ break; // The tied operands have been eliminated.
+ }
+
+ bool RemovedKillFlag = false;
+ bool AllUsesCopied = true;
+ unsigned LastCopiedReg = 0;
+ unsigned regB = OI->first;
+ for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
+ unsigned SrcIdx = TiedPairs[tpi].first;
+ unsigned DstIdx = TiedPairs[tpi].second;
+ unsigned regA = mi->getOperand(DstIdx).getReg();
+ // Grab regB from the instruction because it may have changed if the
+ // instruction was commuted.
+ regB = mi->getOperand(SrcIdx).getReg();
+
+ if (regA == regB) {
+ // The register is tied to multiple destinations (or else we would
+ // not have continued this far), but this use of the register
+ // already matches the tied destination. Leave it.
+ AllUsesCopied = false;
+ continue;
+ }
+ LastCopiedReg = regA;
+
+ assert(TargetRegisterInfo::isVirtualRegister(regB) &&
+ "cannot make instruction into two-address form");
+
+#ifndef NDEBUG
+ // First, verify that we don't have a use of "a" in the instruction
+ // (a = b + a for example) because our transformation will not
+ // work. This should never occur because we are in SSA form.
+ for (unsigned i = 0; i != mi->getNumOperands(); ++i)
+ assert(i == DstIdx ||
+ !mi->getOperand(i).isReg() ||
+ mi->getOperand(i).getReg() != regA);
+#endif
+
+ // Emit a copy or rematerialize the definition.
+ const TargetRegisterClass *rc = MRI->getRegClass(regB);
+ MachineInstr *DefMI = MRI->getVRegDef(regB);
+ // If it's safe and profitable, remat the definition instead of
+ // copying it.
+ if (DefMI &&
+ DefMI->getDesc().isAsCheapAsAMove() &&
+ DefMI->isSafeToReMat(TII, regB, AA) &&
+ isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){
+ DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n");
+ unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg();
+ TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, TRI);
+ ReMatRegs.set(regB);
+ ++NumReMats;
+ } else {
+ bool Emitted = TII->copyRegToReg(*mbbi, mi, regA, regB, rc, rc);
+ (void)Emitted;
+ assert(Emitted && "Unable to issue a copy instruction!\n");
+ }
+
+ MachineBasicBlock::iterator prevMI = prior(mi);
+ // Update DistanceMap.
+ DistanceMap.insert(std::make_pair(prevMI, Dist));
+ DistanceMap[mi] = ++Dist;
+
+ DEBUG(dbgs() << "\t\tprepend:\t" << *prevMI);
+
+ MachineOperand &MO = mi->getOperand(SrcIdx);
+ assert(MO.isReg() && MO.getReg() == regB && MO.isUse() &&
+ "inconsistent operand info for 2-reg pass");
+ if (MO.isKill()) {
+ MO.setIsKill(false);
+ RemovedKillFlag = true;
+ }
+ MO.setReg(regA);
+ }
+
+ if (AllUsesCopied) {
+ // Replace other (un-tied) uses of regB with LastCopiedReg.
+ for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = mi->getOperand(i);
+ if (MO.isReg() && MO.getReg() == regB && MO.isUse()) {
+ if (MO.isKill()) {
+ MO.setIsKill(false);
+ RemovedKillFlag = true;
+ }
+ MO.setReg(LastCopiedReg);
+ }
+ }
+
+ // Update live variables for regB.
+ if (RemovedKillFlag && LV && LV->getVarInfo(regB).removeKill(mi))
+ LV->addVirtualRegisterKilled(regB, prior(mi));
+
+ } else if (RemovedKillFlag) {
+ // Some tied uses of regB matched their destination registers, so
+ // regB is still used in this instruction, but a kill flag was
+ // removed from a different tied use of regB, so now we need to add
+ // a kill flag to one of the remaining uses of regB.
+ for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = mi->getOperand(i);
+ if (MO.isReg() && MO.getReg() == regB && MO.isUse()) {
+ MO.setIsKill(true);
+ break;
+ }
+ }
+ }
+
+ MadeChange = true;
+
+ DEBUG(dbgs() << "\t\trewrite to:\t" << *mi);
+ }
+
+ // Clear TiedOperands here instead of at the top of the loop
+ // since most instructions do not have tied operands.
+ TiedOperands.clear();
+ mi = nmi;
+ }
+ }
+
+ // Some remat'ed instructions are dead.
+ int VReg = ReMatRegs.find_first();
+ while (VReg != -1) {
+ if (MRI->use_empty(VReg)) {
+ MachineInstr *DefMI = MRI->getVRegDef(VReg);
+ DefMI->eraseFromParent();
+ }
+ VReg = ReMatRegs.find_next(VReg);
+ }
+
+ return MadeChange;
+}
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
new file mode 100644
index 0000000..b0f0a07
--- /dev/null
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -0,0 +1,218 @@
+//===-- UnreachableBlockElim.cpp - Remove unreachable blocks for codegen --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is an extremely simple version of the SimplifyCFG pass. Its sole
+// job is to delete LLVM basic blocks that are not reachable from the entry
+// node. To do this, it performs a simple depth first traversal of the CFG,
+// then deletes any unvisited nodes.
+//
+// Note that this pass is really a hack. In particular, the instruction
+// selectors for various targets should just not generate code for unreachable
+// blocks. Until LLVM has a more systematic way of defining instruction
+// selectors, however, we cannot really expect them to handle additional
+// complexity.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Constant.h"
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+using namespace llvm;
+
+namespace {
+ class UnreachableBlockElim : public FunctionPass {
+ virtual bool runOnFunction(Function &F);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ UnreachableBlockElim() : FunctionPass(&ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<ProfileInfo>();
+ }
+ };
+}
+char UnreachableBlockElim::ID = 0;
+static RegisterPass<UnreachableBlockElim>
+X("unreachableblockelim", "Remove unreachable blocks from the CFG");
+
+FunctionPass *llvm::createUnreachableBlockEliminationPass() {
+ return new UnreachableBlockElim();
+}
+
+bool UnreachableBlockElim::runOnFunction(Function &F) {
+ SmallPtrSet<BasicBlock*, 8> Reachable;
+
+ // Mark all reachable blocks.
+ for (df_ext_iterator<Function*, SmallPtrSet<BasicBlock*, 8> > I =
+ df_ext_begin(&F, Reachable), E = df_ext_end(&F, Reachable); I != E; ++I)
+ /* Mark all reachable blocks */;
+
+ // Loop over all dead blocks, remembering them and deleting all instructions
+ // in them.
+ std::vector<BasicBlock*> DeadBlocks;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ if (!Reachable.count(I)) {
+ BasicBlock *BB = I;
+ DeadBlocks.push_back(BB);
+ while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+ PN->replaceAllUsesWith(Constant::getNullValue(PN->getType()));
+ BB->getInstList().pop_front();
+ }
+ for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
+ (*SI)->removePredecessor(BB);
+ BB->dropAllReferences();
+ }
+
+ // Actually remove the blocks now.
+ ProfileInfo *PI = getAnalysisIfAvailable<ProfileInfo>();
+ for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) {
+ if (PI) PI->removeBlock(DeadBlocks[i]);
+ DeadBlocks[i]->eraseFromParent();
+ }
+
+ return DeadBlocks.size();
+}
+
+
+namespace {
+ class UnreachableMachineBlockElim : public MachineFunctionPass {
+ virtual bool runOnMachineFunction(MachineFunction &F);
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ MachineModuleInfo *MMI;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ UnreachableMachineBlockElim() : MachineFunctionPass(&ID) {}
+ };
+}
+char UnreachableMachineBlockElim::ID = 0;
+
+static RegisterPass<UnreachableMachineBlockElim>
+Y("unreachable-mbb-elimination",
+ "Remove unreachable machine basic blocks");
+
+const PassInfo *const llvm::UnreachableMachineBlockElimID = &Y;
+
+void UnreachableMachineBlockElim::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
+ SmallPtrSet<MachineBasicBlock*, 8> Reachable;
+
+ MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ MachineDominatorTree *MDT = getAnalysisIfAvailable<MachineDominatorTree>();
+ MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+
+ // Mark all reachable blocks.
+ for (df_ext_iterator<MachineFunction*, SmallPtrSet<MachineBasicBlock*, 8> >
+ I = df_ext_begin(&F, Reachable), E = df_ext_end(&F, Reachable);
+ I != E; ++I)
+ /* Mark all reachable blocks */;
+
+ // Loop over all dead blocks, remembering them and deleting all instructions
+ // in them.
+ std::vector<MachineBasicBlock*> DeadBlocks;
+ for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ MachineBasicBlock *BB = I;
+
+ // Test for deadness.
+ if (!Reachable.count(BB)) {
+ DeadBlocks.push_back(BB);
+
+ // Update dominator and loop info.
+ if (MLI) MLI->removeBlock(BB);
+ if (MDT && MDT->getNode(BB)) MDT->eraseNode(BB);
+
+ while (BB->succ_begin() != BB->succ_end()) {
+ MachineBasicBlock* succ = *BB->succ_begin();
+
+ MachineBasicBlock::iterator start = succ->begin();
+ while (start != succ->end() && start->isPHI()) {
+ for (unsigned i = start->getNumOperands() - 1; i >= 2; i-=2)
+ if (start->getOperand(i).isMBB() &&
+ start->getOperand(i).getMBB() == BB) {
+ start->RemoveOperand(i);
+ start->RemoveOperand(i-1);
+ }
+
+ start++;
+ }
+
+ BB->removeSuccessor(BB->succ_begin());
+ }
+ }
+ }
+
+ // Actually remove the blocks now.
+ for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = DeadBlocks[i];
+ // If there are any labels in the basic block, unregister them from
+ // MachineModuleInfo.
+ if (MMI && !MBB->empty()) {
+ for (MachineBasicBlock::iterator I = MBB->begin(),
+ E = MBB->end(); I != E; ++I) {
+ if (I->isLabel())
+ // The label ID # is always operand #0, an immediate.
+ MMI->InvalidateLabel(I->getOperand(0).getImm());
+ }
+ }
+ MBB->eraseFromParent();
+ }
+
+ // Cleanup PHI nodes.
+ for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ MachineBasicBlock *BB = I;
+ // Prune unneeded PHI entries.
+ SmallPtrSet<MachineBasicBlock*, 8> preds(BB->pred_begin(),
+ BB->pred_end());
+ MachineBasicBlock::iterator phi = BB->begin();
+ while (phi != BB->end() && phi->isPHI()) {
+ for (unsigned i = phi->getNumOperands() - 1; i >= 2; i-=2)
+ if (!preds.count(phi->getOperand(i).getMBB())) {
+ phi->RemoveOperand(i);
+ phi->RemoveOperand(i-1);
+ }
+
+ if (phi->getNumOperands() == 3) {
+ unsigned Input = phi->getOperand(1).getReg();
+ unsigned Output = phi->getOperand(0).getReg();
+
+ MachineInstr* temp = phi;
+ ++phi;
+ temp->eraseFromParent();
+
+ if (Input != Output)
+ F.getRegInfo().replaceRegWith(Output, Input);
+
+ continue;
+ }
+
+ ++phi;
+ }
+ }
+
+ F.RenumberBlocks();
+
+ return DeadBlocks.size();
+}
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
new file mode 100644
index 0000000..5956b61
--- /dev/null
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -0,0 +1,282 @@
+//===-- llvm/CodeGen/VirtRegMap.cpp - Virtual Register Map ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the VirtRegMap class.
+//
+// It also contains implementations of the Spiller interface, which, given a
+// virtual register map and a machine function, eliminates all virtual
+// references by replacing them with physical register references - adding spill
+// code as necessary.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "virtregmap"
+#include "VirtRegMap.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumSpills , "Number of register spills");
+
+//===----------------------------------------------------------------------===//
+// VirtRegMap implementation
+//===----------------------------------------------------------------------===//
+
+char VirtRegMap::ID = 0;
+
+static RegisterPass<VirtRegMap>
+X("virtregmap", "Virtual Register Map");
+
+bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
+ MRI = &mf.getRegInfo();
+ TII = mf.getTarget().getInstrInfo();
+ TRI = mf.getTarget().getRegisterInfo();
+ MF = &mf;
+
+ ReMatId = MAX_STACK_SLOT+1;
+ LowSpillSlot = HighSpillSlot = NO_STACK_SLOT;
+
+ Virt2PhysMap.clear();
+ Virt2StackSlotMap.clear();
+ Virt2ReMatIdMap.clear();
+ Virt2SplitMap.clear();
+ Virt2SplitKillMap.clear();
+ ReMatMap.clear();
+ ImplicitDefed.clear();
+ SpillSlotToUsesMap.clear();
+ MI2VirtMap.clear();
+ SpillPt2VirtMap.clear();
+ RestorePt2VirtMap.clear();
+ EmergencySpillMap.clear();
+ EmergencySpillSlots.clear();
+
+ SpillSlotToUsesMap.resize(8);
+ ImplicitDefed.resize(MF->getRegInfo().getLastVirtReg()+1-
+ TargetRegisterInfo::FirstVirtualRegister);
+
+ allocatableRCRegs.clear();
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I)
+ allocatableRCRegs.insert(std::make_pair(*I,
+ TRI->getAllocatableSet(mf, *I)));
+
+ grow();
+
+ return false;
+}
+
+void VirtRegMap::grow() {
+ unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg();
+ Virt2PhysMap.grow(LastVirtReg);
+ Virt2StackSlotMap.grow(LastVirtReg);
+ Virt2ReMatIdMap.grow(LastVirtReg);
+ Virt2SplitMap.grow(LastVirtReg);
+ Virt2SplitKillMap.grow(LastVirtReg);
+ ReMatMap.grow(LastVirtReg);
+ ImplicitDefed.resize(LastVirtReg-TargetRegisterInfo::FirstVirtualRegister+1);
+}
+
+unsigned VirtRegMap::getRegAllocPref(unsigned virtReg) {
+ std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(virtReg);
+ unsigned physReg = Hint.second;
+ if (physReg &&
+ TargetRegisterInfo::isVirtualRegister(physReg) && hasPhys(physReg))
+ physReg = getPhys(physReg);
+ if (Hint.first == 0)
+ return (physReg && TargetRegisterInfo::isPhysicalRegister(physReg))
+ ? physReg : 0;
+ return TRI->ResolveRegAllocHint(Hint.first, physReg, *MF);
+}
+
+int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
+ "attempt to assign stack slot to already spilled register");
+ const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg);
+ int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
+ RC->getAlignment());
+ if (LowSpillSlot == NO_STACK_SLOT)
+ LowSpillSlot = SS;
+ if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
+ HighSpillSlot = SS;
+ unsigned Idx = SS-LowSpillSlot;
+ while (Idx >= SpillSlotToUsesMap.size())
+ SpillSlotToUsesMap.resize(SpillSlotToUsesMap.size()*2);
+ Virt2StackSlotMap[virtReg] = SS;
+ ++NumSpills;
+ return SS;
+}
+
+void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
+ "attempt to assign stack slot to already spilled register");
+ assert((SS >= 0 ||
+ (SS >= MF->getFrameInfo()->getObjectIndexBegin())) &&
+ "illegal fixed frame index");
+ Virt2StackSlotMap[virtReg] = SS;
+}
+
+int VirtRegMap::assignVirtReMatId(unsigned virtReg) {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT &&
+ "attempt to assign re-mat id to already spilled register");
+ Virt2ReMatIdMap[virtReg] = ReMatId;
+ return ReMatId++;
+}
+
+void VirtRegMap::assignVirtReMatId(unsigned virtReg, int id) {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT &&
+ "attempt to assign re-mat id to already spilled register");
+ Virt2ReMatIdMap[virtReg] = id;
+}
+
+int VirtRegMap::getEmergencySpillSlot(const TargetRegisterClass *RC) {
+ std::map<const TargetRegisterClass*, int>::iterator I =
+ EmergencySpillSlots.find(RC);
+ if (I != EmergencySpillSlots.end())
+ return I->second;
+ int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
+ RC->getAlignment());
+ if (LowSpillSlot == NO_STACK_SLOT)
+ LowSpillSlot = SS;
+ if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
+ HighSpillSlot = SS;
+ EmergencySpillSlots[RC] = SS;
+ return SS;
+}
+
+void VirtRegMap::addSpillSlotUse(int FI, MachineInstr *MI) {
+ if (!MF->getFrameInfo()->isFixedObjectIndex(FI)) {
+ // If FI < LowSpillSlot, this stack reference was produced by
+ // instruction selection and is not a spill
+ if (FI >= LowSpillSlot) {
+ assert(FI >= 0 && "Spill slot index should not be negative!");
+ assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size()
+ && "Invalid spill slot");
+ SpillSlotToUsesMap[FI-LowSpillSlot].insert(MI);
+ }
+ }
+}
+
+void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *OldMI,
+ MachineInstr *NewMI, ModRef MRInfo) {
+ // Move previous memory references folded to new instruction.
+ MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(NewMI);
+ for (MI2VirtMapTy::iterator I = MI2VirtMap.lower_bound(OldMI),
+ E = MI2VirtMap.end(); I != E && I->first == OldMI; ) {
+ MI2VirtMap.insert(IP, std::make_pair(NewMI, I->second));
+ MI2VirtMap.erase(I++);
+ }
+
+ // add new memory reference
+ MI2VirtMap.insert(IP, std::make_pair(NewMI, std::make_pair(VirtReg, MRInfo)));
+}
+
+void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo) {
+ MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(MI);
+ MI2VirtMap.insert(IP, std::make_pair(MI, std::make_pair(VirtReg, MRInfo)));
+}
+
+void VirtRegMap::RemoveMachineInstrFromMaps(MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isFI())
+ continue;
+ int FI = MO.getIndex();
+ if (MF->getFrameInfo()->isFixedObjectIndex(FI))
+ continue;
+ // This stack reference was produced by instruction selection and
+ // is not a spill
+ if (FI < LowSpillSlot)
+ continue;
+ assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size()
+ && "Invalid spill slot");
+ SpillSlotToUsesMap[FI-LowSpillSlot].erase(MI);
+ }
+ MI2VirtMap.erase(MI);
+ SpillPt2VirtMap.erase(MI);
+ RestorePt2VirtMap.erase(MI);
+ EmergencySpillMap.erase(MI);
+}
+
+/// FindUnusedRegisters - Gather a list of allocatable registers that
+/// have not been allocated to any virtual register.
+bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) {
+ unsigned NumRegs = TRI->getNumRegs();
+ UnusedRegs.reset();
+ UnusedRegs.resize(NumRegs);
+
+ BitVector Used(NumRegs);
+ for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
+ e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i)
+ if (Virt2PhysMap[i] != (unsigned)VirtRegMap::NO_PHYS_REG)
+ Used.set(Virt2PhysMap[i]);
+
+ BitVector Allocatable = TRI->getAllocatableSet(*MF);
+ bool AnyUnused = false;
+ for (unsigned Reg = 1; Reg < NumRegs; ++Reg) {
+ if (Allocatable[Reg] && !Used[Reg] && !LIs->hasInterval(Reg)) {
+ bool ReallyUnused = true;
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+ if (Used[*AS] || LIs->hasInterval(*AS)) {
+ ReallyUnused = false;
+ break;
+ }
+ }
+ if (ReallyUnused) {
+ AnyUnused = true;
+ UnusedRegs.set(Reg);
+ }
+ }
+ }
+
+ return AnyUnused;
+}
+
+void VirtRegMap::print(raw_ostream &OS, const Module* M) const {
+ const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
+
+ OS << "********** REGISTER MAP **********\n";
+ for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
+ e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i) {
+ if (Virt2PhysMap[i] != (unsigned)VirtRegMap::NO_PHYS_REG)
+ OS << "[reg" << i << " -> " << TRI->getName(Virt2PhysMap[i])
+ << "]\n";
+ }
+
+ for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
+ e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i)
+ if (Virt2StackSlotMap[i] != VirtRegMap::NO_STACK_SLOT)
+ OS << "[reg" << i << " -> fi#" << Virt2StackSlotMap[i] << "]\n";
+ OS << '\n';
+}
+
+void VirtRegMap::dump() const {
+ print(dbgs());
+}
diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h
new file mode 100644
index 0000000..a5599f6
--- /dev/null
+++ b/lib/CodeGen/VirtRegMap.h
@@ -0,0 +1,495 @@
+//===-- llvm/CodeGen/VirtRegMap.h - Virtual Register Map -*- C++ -*--------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a virtual register map. This maps virtual registers to
+// physical registers and virtual registers to stack slots. It is created and
+// updated by a register allocator and then used by a machine code rewriter that
+// adds spill code and rewrites virtual into physical register references.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_VIRTREGMAP_H
+#define LLVM_CODEGEN_VIRTREGMAP_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include <map>
+
+namespace llvm {
+ class LiveIntervals;
+ class MachineInstr;
+ class MachineFunction;
+ class MachineRegisterInfo;
+ class TargetInstrInfo;
+ class TargetRegisterInfo;
+ class raw_ostream;
+
+ class VirtRegMap : public MachineFunctionPass {
+ public:
+ enum {
+ NO_PHYS_REG = 0,
+ NO_STACK_SLOT = (1L << 30)-1,
+ MAX_STACK_SLOT = (1L << 18)-1
+ };
+
+ enum ModRef { isRef = 1, isMod = 2, isModRef = 3 };
+ typedef std::multimap<MachineInstr*,
+ std::pair<unsigned, ModRef> > MI2VirtMapTy;
+
+ private:
+ MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineFunction *MF;
+
+ DenseMap<const TargetRegisterClass*, BitVector> allocatableRCRegs;
+
+ /// Virt2PhysMap - This is a virtual to physical register
+ /// mapping. Each virtual register is required to have an entry in
+ /// it; even spilled virtual registers (the register mapped to a
+ /// spilled register is the temporary used to load it from the
+ /// stack).
+ IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysMap;
+
+ /// Virt2StackSlotMap - This is virtual register to stack slot
+ /// mapping. Each spilled virtual register has an entry in it
+ /// which corresponds to the stack slot this register is spilled
+ /// at.
+ IndexedMap<int, VirtReg2IndexFunctor> Virt2StackSlotMap;
+
+ /// Virt2ReMatIdMap - This is virtual register to rematerialization id
+ /// mapping. Each spilled virtual register that should be remat'd has an
+ /// entry in it which corresponds to the remat id.
+ IndexedMap<int, VirtReg2IndexFunctor> Virt2ReMatIdMap;
+
+ /// Virt2SplitMap - This is virtual register to splitted virtual register
+ /// mapping.
+ IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2SplitMap;
+
+ /// Virt2SplitKillMap - This is splitted virtual register to its last use
+ /// (kill) index mapping.
+ IndexedMap<SlotIndex> Virt2SplitKillMap;
+
+ /// ReMatMap - This is virtual register to re-materialized instruction
+ /// mapping. Each virtual register whose definition is going to be
+ /// re-materialized has an entry in it.
+ IndexedMap<MachineInstr*, VirtReg2IndexFunctor> ReMatMap;
+
+ /// MI2VirtMap - This is MachineInstr to virtual register
+ /// mapping. In the case of memory spill code being folded into
+ /// instructions, we need to know which virtual register was
+ /// read/written by this instruction.
+ MI2VirtMapTy MI2VirtMap;
+
+ /// SpillPt2VirtMap - This records the virtual registers which should
+ /// be spilled right after the MachineInstr due to live interval
+ /// splitting.
+ std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >
+ SpillPt2VirtMap;
+
+ /// RestorePt2VirtMap - This records the virtual registers which should
+ /// be restored right before the MachineInstr due to live interval
+ /// splitting.
+ std::map<MachineInstr*, std::vector<unsigned> > RestorePt2VirtMap;
+
+ /// EmergencySpillMap - This records the physical registers that should
+ /// be spilled / restored around the MachineInstr since the register
+ /// allocator has run out of registers.
+ std::map<MachineInstr*, std::vector<unsigned> > EmergencySpillMap;
+
+ /// EmergencySpillSlots - This records emergency spill slots used to
+ /// spill physical registers when the register allocator runs out of
+ /// registers. Ideally only one stack slot is used per function per
+ /// register class.
+ std::map<const TargetRegisterClass*, int> EmergencySpillSlots;
+
+ /// ReMatId - Instead of assigning a stack slot to a to be rematerialized
+ /// virtual register, an unique id is being assigned. This keeps track of
+ /// the highest id used so far. Note, this starts at (1<<18) to avoid
+ /// conflicts with stack slot numbers.
+ int ReMatId;
+
+ /// LowSpillSlot, HighSpillSlot - Lowest and highest spill slot indexes.
+ int LowSpillSlot, HighSpillSlot;
+
+ /// SpillSlotToUsesMap - Records uses for each register spill slot.
+ SmallVector<SmallPtrSet<MachineInstr*, 4>, 8> SpillSlotToUsesMap;
+
+ /// ImplicitDefed - One bit for each virtual register. If set it indicates
+ /// the register is implicitly defined.
+ BitVector ImplicitDefed;
+
+ /// UnusedRegs - A list of physical registers that have not been used.
+ BitVector UnusedRegs;
+
+ VirtRegMap(const VirtRegMap&); // DO NOT IMPLEMENT
+ void operator=(const VirtRegMap&); // DO NOT IMPLEMENT
+
+ public:
+ static char ID;
+ VirtRegMap() : MachineFunctionPass(&ID), Virt2PhysMap(NO_PHYS_REG),
+ Virt2StackSlotMap(NO_STACK_SLOT),
+ Virt2ReMatIdMap(NO_STACK_SLOT), Virt2SplitMap(0),
+ Virt2SplitKillMap(SlotIndex()), ReMatMap(NULL),
+ ReMatId(MAX_STACK_SLOT+1),
+ LowSpillSlot(NO_STACK_SLOT), HighSpillSlot(NO_STACK_SLOT) { }
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ void grow();
+
+ /// @brief returns true if the specified virtual register is
+ /// mapped to a physical register
+ bool hasPhys(unsigned virtReg) const {
+ return getPhys(virtReg) != NO_PHYS_REG;
+ }
+
+ /// @brief returns the physical register mapped to the specified
+ /// virtual register
+ unsigned getPhys(unsigned virtReg) const {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ return Virt2PhysMap[virtReg];
+ }
+
+ /// @brief creates a mapping for the specified virtual register to
+ /// the specified physical register
+ void assignVirt2Phys(unsigned virtReg, unsigned physReg) {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg) &&
+ TargetRegisterInfo::isPhysicalRegister(physReg));
+ assert(Virt2PhysMap[virtReg] == NO_PHYS_REG &&
+ "attempt to assign physical register to already mapped "
+ "virtual register");
+ Virt2PhysMap[virtReg] = physReg;
+ }
+
+ /// @brief clears the specified virtual register's, physical
+ /// register mapping
+ void clearVirt(unsigned virtReg) {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ assert(Virt2PhysMap[virtReg] != NO_PHYS_REG &&
+ "attempt to clear a not assigned virtual register");
+ Virt2PhysMap[virtReg] = NO_PHYS_REG;
+ }
+
+ /// @brief clears all virtual to physical register mappings
+ void clearAllVirt() {
+ Virt2PhysMap.clear();
+ grow();
+ }
+
+ /// @brief returns the register allocation preference.
+ unsigned getRegAllocPref(unsigned virtReg);
+
+ /// @brief records virtReg is a split live interval from SReg.
+ void setIsSplitFromReg(unsigned virtReg, unsigned SReg) {
+ Virt2SplitMap[virtReg] = SReg;
+ }
+
+ /// @brief returns the live interval virtReg is split from.
+ unsigned getPreSplitReg(unsigned virtReg) {
+ return Virt2SplitMap[virtReg];
+ }
+
+ /// @brief returns true if the specified virtual register is not
+ /// mapped to a stack slot or rematerialized.
+ bool isAssignedReg(unsigned virtReg) const {
+ if (getStackSlot(virtReg) == NO_STACK_SLOT &&
+ getReMatId(virtReg) == NO_STACK_SLOT)
+ return true;
+ // Split register can be assigned a physical register as well as a
+ // stack slot or remat id.
+ return (Virt2SplitMap[virtReg] && Virt2PhysMap[virtReg] != NO_PHYS_REG);
+ }
+
+ /// @brief returns the stack slot mapped to the specified virtual
+ /// register
+ int getStackSlot(unsigned virtReg) const {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ return Virt2StackSlotMap[virtReg];
+ }
+
+ /// @brief returns the rematerialization id mapped to the specified virtual
+ /// register
+ int getReMatId(unsigned virtReg) const {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ return Virt2ReMatIdMap[virtReg];
+ }
+
+ /// @brief create a mapping for the specifed virtual register to
+ /// the next available stack slot
+ int assignVirt2StackSlot(unsigned virtReg);
+ /// @brief create a mapping for the specified virtual register to
+ /// the specified stack slot
+ void assignVirt2StackSlot(unsigned virtReg, int frameIndex);
+
+ /// @brief assign an unique re-materialization id to the specified
+ /// virtual register.
+ int assignVirtReMatId(unsigned virtReg);
+ /// @brief assign an unique re-materialization id to the specified
+ /// virtual register.
+ void assignVirtReMatId(unsigned virtReg, int id);
+
+ /// @brief returns true if the specified virtual register is being
+ /// re-materialized.
+ bool isReMaterialized(unsigned virtReg) const {
+ return ReMatMap[virtReg] != NULL;
+ }
+
+ /// @brief returns the original machine instruction being re-issued
+ /// to re-materialize the specified virtual register.
+ MachineInstr *getReMaterializedMI(unsigned virtReg) const {
+ return ReMatMap[virtReg];
+ }
+
+ /// @brief records the specified virtual register will be
+ /// re-materialized and the original instruction which will be re-issed
+ /// for this purpose. If parameter all is true, then all uses of the
+ /// registers are rematerialized and it's safe to delete the definition.
+ void setVirtIsReMaterialized(unsigned virtReg, MachineInstr *def) {
+ ReMatMap[virtReg] = def;
+ }
+
+ /// @brief record the last use (kill) of a split virtual register.
+ void addKillPoint(unsigned virtReg, SlotIndex index) {
+ Virt2SplitKillMap[virtReg] = index;
+ }
+
+ SlotIndex getKillPoint(unsigned virtReg) const {
+ return Virt2SplitKillMap[virtReg];
+ }
+
+ /// @brief remove the last use (kill) of a split virtual register.
+ void removeKillPoint(unsigned virtReg) {
+ Virt2SplitKillMap[virtReg] = SlotIndex();
+ }
+
+ /// @brief returns true if the specified MachineInstr is a spill point.
+ bool isSpillPt(MachineInstr *Pt) const {
+ return SpillPt2VirtMap.find(Pt) != SpillPt2VirtMap.end();
+ }
+
+ /// @brief returns the virtual registers that should be spilled due to
+ /// splitting right after the specified MachineInstr.
+ std::vector<std::pair<unsigned,bool> > &getSpillPtSpills(MachineInstr *Pt) {
+ return SpillPt2VirtMap[Pt];
+ }
+
+ /// @brief records the specified MachineInstr as a spill point for virtReg.
+ void addSpillPoint(unsigned virtReg, bool isKill, MachineInstr *Pt) {
+ std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >::iterator
+ I = SpillPt2VirtMap.find(Pt);
+ if (I != SpillPt2VirtMap.end())
+ I->second.push_back(std::make_pair(virtReg, isKill));
+ else {
+ std::vector<std::pair<unsigned,bool> > Virts;
+ Virts.push_back(std::make_pair(virtReg, isKill));
+ SpillPt2VirtMap.insert(std::make_pair(Pt, Virts));
+ }
+ }
+
+ /// @brief - transfer spill point information from one instruction to
+ /// another.
+ void transferSpillPts(MachineInstr *Old, MachineInstr *New) {
+ std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >::iterator
+ I = SpillPt2VirtMap.find(Old);
+ if (I == SpillPt2VirtMap.end())
+ return;
+ while (!I->second.empty()) {
+ unsigned virtReg = I->second.back().first;
+ bool isKill = I->second.back().second;
+ I->second.pop_back();
+ addSpillPoint(virtReg, isKill, New);
+ }
+ SpillPt2VirtMap.erase(I);
+ }
+
+ /// @brief returns true if the specified MachineInstr is a restore point.
+ bool isRestorePt(MachineInstr *Pt) const {
+ return RestorePt2VirtMap.find(Pt) != RestorePt2VirtMap.end();
+ }
+
+ /// @brief returns the virtual registers that should be restoreed due to
+ /// splitting right after the specified MachineInstr.
+ std::vector<unsigned> &getRestorePtRestores(MachineInstr *Pt) {
+ return RestorePt2VirtMap[Pt];
+ }
+
+ /// @brief records the specified MachineInstr as a restore point for virtReg.
+ void addRestorePoint(unsigned virtReg, MachineInstr *Pt) {
+ std::map<MachineInstr*, std::vector<unsigned> >::iterator I =
+ RestorePt2VirtMap.find(Pt);
+ if (I != RestorePt2VirtMap.end())
+ I->second.push_back(virtReg);
+ else {
+ std::vector<unsigned> Virts;
+ Virts.push_back(virtReg);
+ RestorePt2VirtMap.insert(std::make_pair(Pt, Virts));
+ }
+ }
+
+ /// @brief - transfer restore point information from one instruction to
+ /// another.
+ void transferRestorePts(MachineInstr *Old, MachineInstr *New) {
+ std::map<MachineInstr*, std::vector<unsigned> >::iterator I =
+ RestorePt2VirtMap.find(Old);
+ if (I == RestorePt2VirtMap.end())
+ return;
+ while (!I->second.empty()) {
+ unsigned virtReg = I->second.back();
+ I->second.pop_back();
+ addRestorePoint(virtReg, New);
+ }
+ RestorePt2VirtMap.erase(I);
+ }
+
+ /// @brief records that the specified physical register must be spilled
+ /// around the specified machine instr.
+ void addEmergencySpill(unsigned PhysReg, MachineInstr *MI) {
+ if (EmergencySpillMap.find(MI) != EmergencySpillMap.end())
+ EmergencySpillMap[MI].push_back(PhysReg);
+ else {
+ std::vector<unsigned> PhysRegs;
+ PhysRegs.push_back(PhysReg);
+ EmergencySpillMap.insert(std::make_pair(MI, PhysRegs));
+ }
+ }
+
+ /// @brief returns true if one or more physical registers must be spilled
+ /// around the specified instruction.
+ bool hasEmergencySpills(MachineInstr *MI) const {
+ return EmergencySpillMap.find(MI) != EmergencySpillMap.end();
+ }
+
+ /// @brief returns the physical registers to be spilled and restored around
+ /// the instruction.
+ std::vector<unsigned> &getEmergencySpills(MachineInstr *MI) {
+ return EmergencySpillMap[MI];
+ }
+
+ /// @brief - transfer emergency spill information from one instruction to
+ /// another.
+ void transferEmergencySpills(MachineInstr *Old, MachineInstr *New) {
+ std::map<MachineInstr*,std::vector<unsigned> >::iterator I =
+ EmergencySpillMap.find(Old);
+ if (I == EmergencySpillMap.end())
+ return;
+ while (!I->second.empty()) {
+ unsigned virtReg = I->second.back();
+ I->second.pop_back();
+ addEmergencySpill(virtReg, New);
+ }
+ EmergencySpillMap.erase(I);
+ }
+
+ /// @brief return or get a emergency spill slot for the register class.
+ int getEmergencySpillSlot(const TargetRegisterClass *RC);
+
+ /// @brief Return lowest spill slot index.
+ int getLowSpillSlot() const {
+ return LowSpillSlot;
+ }
+
+ /// @brief Return highest spill slot index.
+ int getHighSpillSlot() const {
+ return HighSpillSlot;
+ }
+
+ /// @brief Records a spill slot use.
+ void addSpillSlotUse(int FrameIndex, MachineInstr *MI);
+
+ /// @brief Returns true if spill slot has been used.
+ bool isSpillSlotUsed(int FrameIndex) const {
+ assert(FrameIndex >= 0 && "Spill slot index should not be negative!");
+ return !SpillSlotToUsesMap[FrameIndex-LowSpillSlot].empty();
+ }
+
+ /// @brief Mark the specified register as being implicitly defined.
+ void setIsImplicitlyDefined(unsigned VirtReg) {
+ ImplicitDefed.set(VirtReg-TargetRegisterInfo::FirstVirtualRegister);
+ }
+
+ /// @brief Returns true if the virtual register is implicitly defined.
+ bool isImplicitlyDefined(unsigned VirtReg) const {
+ return ImplicitDefed[VirtReg-TargetRegisterInfo::FirstVirtualRegister];
+ }
+
+ /// @brief Updates information about the specified virtual register's value
+ /// folded into newMI machine instruction.
+ void virtFolded(unsigned VirtReg, MachineInstr *OldMI, MachineInstr *NewMI,
+ ModRef MRInfo);
+
+ /// @brief Updates information about the specified virtual register's value
+ /// folded into the specified machine instruction.
+ void virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo);
+
+ /// @brief returns the virtual registers' values folded in memory
+ /// operands of this instruction
+ std::pair<MI2VirtMapTy::const_iterator, MI2VirtMapTy::const_iterator>
+ getFoldedVirts(MachineInstr* MI) const {
+ return MI2VirtMap.equal_range(MI);
+ }
+
+ /// RemoveMachineInstrFromMaps - MI is being erased, remove it from the
+ /// the folded instruction map and spill point map.
+ void RemoveMachineInstrFromMaps(MachineInstr *MI);
+
+ /// FindUnusedRegisters - Gather a list of allocatable registers that
+ /// have not been allocated to any virtual register.
+ bool FindUnusedRegisters(LiveIntervals* LIs);
+
+ /// HasUnusedRegisters - Return true if there are any allocatable registers
+ /// that have not been allocated to any virtual register.
+ bool HasUnusedRegisters() const {
+ return !UnusedRegs.none();
+ }
+
+ /// setRegisterUsed - Remember the physical register is now used.
+ void setRegisterUsed(unsigned Reg) {
+ UnusedRegs.reset(Reg);
+ }
+
+ /// isRegisterUnused - Return true if the physical register has not been
+ /// used.
+ bool isRegisterUnused(unsigned Reg) const {
+ return UnusedRegs[Reg];
+ }
+
+ /// getFirstUnusedRegister - Return the first physical register that has not
+ /// been used.
+ unsigned getFirstUnusedRegister(const TargetRegisterClass *RC) {
+ int Reg = UnusedRegs.find_first();
+ while (Reg != -1) {
+ if (allocatableRCRegs[RC][Reg])
+ return (unsigned)Reg;
+ Reg = UnusedRegs.find_next(Reg);
+ }
+ return 0;
+ }
+
+ void print(raw_ostream &OS, const Module* M = 0) const;
+ void dump() const;
+ };
+
+ inline raw_ostream &operator<<(raw_ostream &OS, const VirtRegMap &VRM) {
+ VRM.print(OS);
+ return OS;
+ }
+} // End llvm namespace
+
+#endif
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
new file mode 100644
index 0000000..ce62594
--- /dev/null
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -0,0 +1,2453 @@
+//===-- llvm/CodeGen/Rewriter.cpp - Rewriter -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "virtregrewriter"
+#include "VirtRegRewriter.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumDSE , "Number of dead stores elided");
+STATISTIC(NumDSS , "Number of dead spill slots removed");
+STATISTIC(NumCommutes, "Number of instructions commuted");
+STATISTIC(NumDRM , "Number of re-materializable defs elided");
+STATISTIC(NumStores , "Number of stores added");
+STATISTIC(NumPSpills , "Number of physical register spills");
+STATISTIC(NumOmitted , "Number of reloads omited");
+STATISTIC(NumAvoided , "Number of reloads deemed unnecessary");
+STATISTIC(NumCopified, "Number of available reloads turned into copies");
+STATISTIC(NumReMats , "Number of re-materialization");
+STATISTIC(NumLoads , "Number of loads added");
+STATISTIC(NumReused , "Number of values reused");
+STATISTIC(NumDCE , "Number of copies elided");
+STATISTIC(NumSUnfold , "Number of stores unfolded");
+STATISTIC(NumModRefUnfold, "Number of modref unfolded");
+
+namespace {
+ enum RewriterName { local, trivial };
+}
+
+static cl::opt<RewriterName>
+RewriterOpt("rewriter",
+ cl::desc("Rewriter to use: (default: local)"),
+ cl::Prefix,
+ cl::values(clEnumVal(local, "local rewriter"),
+ clEnumVal(trivial, "trivial rewriter"),
+ clEnumValEnd),
+ cl::init(local));
+
+static cl::opt<bool>
+ScheduleSpills("schedule-spills",
+ cl::desc("Schedule spill code"),
+ cl::init(false));
+
+VirtRegRewriter::~VirtRegRewriter() {}
+
+/// substitutePhysReg - Replace virtual register in MachineOperand with a
+/// physical register. Do the right thing with the sub-register index.
+static void substitutePhysReg(MachineOperand &MO, unsigned Reg,
+ const TargetRegisterInfo &TRI) {
+ if (unsigned SubIdx = MO.getSubReg()) {
+ // Insert the physical subreg and reset the subreg field.
+ MO.setReg(TRI.getSubReg(Reg, SubIdx));
+ MO.setSubReg(0);
+
+ // Any def, dead, and kill flags apply to the full virtual register, so they
+ // also apply to the full physical register. Add imp-def/dead and imp-kill
+ // as needed.
+ MachineInstr &MI = *MO.getParent();
+ if (MO.isDef())
+ if (MO.isDead())
+ MI.addRegisterDead(Reg, &TRI, /*AddIfNotFound=*/ true);
+ else
+ MI.addRegisterDefined(Reg, &TRI);
+ else if (!MO.isUndef() &&
+ (MO.isKill() ||
+ MI.isRegTiedToDefOperand(&MO-&MI.getOperand(0))))
+ MI.addRegisterKilled(Reg, &TRI, /*AddIfNotFound=*/ true);
+ } else {
+ MO.setReg(Reg);
+ }
+}
+
+namespace {
+
+/// This class is intended for use with the new spilling framework only. It
+/// rewrites vreg def/uses to use the assigned preg, but does not insert any
+/// spill code.
+struct TrivialRewriter : public VirtRegRewriter {
+
+ bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
+ LiveIntervals* LIs) {
+ DEBUG(dbgs() << "********** REWRITE MACHINE CODE **********\n");
+ DEBUG(dbgs() << "********** Function: "
+ << MF.getFunction()->getName() << '\n');
+ DEBUG(dbgs() << "**** Machine Instrs"
+ << "(NOTE! Does not include spills and reloads!) ****\n");
+ DEBUG(MF.dump());
+
+ MachineRegisterInfo *mri = &MF.getRegInfo();
+ const TargetRegisterInfo *tri = MF.getTarget().getRegisterInfo();
+
+ bool changed = false;
+
+ for (LiveIntervals::iterator liItr = LIs->begin(), liEnd = LIs->end();
+ liItr != liEnd; ++liItr) {
+
+ const LiveInterval *li = liItr->second;
+ unsigned reg = li->reg;
+
+ if (TargetRegisterInfo::isPhysicalRegister(reg)) {
+ if (!li->empty())
+ mri->setPhysRegUsed(reg);
+ }
+ else {
+ if (!VRM.hasPhys(reg))
+ continue;
+ unsigned pReg = VRM.getPhys(reg);
+ mri->setPhysRegUsed(pReg);
+ for (MachineRegisterInfo::reg_iterator regItr = mri->reg_begin(reg),
+ regEnd = mri->reg_end(); regItr != regEnd;) {
+ MachineOperand &mop = regItr.getOperand();
+ assert(mop.isReg() && mop.getReg() == reg && "reg_iterator broken?");
+ ++regItr;
+ substitutePhysReg(mop, pReg, *tri);
+ changed = true;
+ }
+ }
+ }
+
+ DEBUG(dbgs() << "**** Post Machine Instrs ****\n");
+ DEBUG(MF.dump());
+
+ return changed;
+ }
+
+};
+
+}
+
+// ************************************************************************ //
+
+namespace {
+
+/// AvailableSpills - As the local rewriter is scanning and rewriting an MBB
+/// from top down, keep track of which spill slots or remat are available in
+/// each register.
+///
+/// Note that not all physregs are created equal here. In particular, some
+/// physregs are reloads that we are allowed to clobber or ignore at any time.
+/// Other physregs are values that the register allocated program is using
+/// that we cannot CHANGE, but we can read if we like. We keep track of this
+/// on a per-stack-slot / remat id basis as the low bit in the value of the
+/// SpillSlotsAvailable entries. The predicate 'canClobberPhysReg()' checks
+/// this bit and addAvailable sets it if.
+class AvailableSpills {
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+
+ // SpillSlotsOrReMatsAvailable - This map keeps track of all of the spilled
+ // or remat'ed virtual register values that are still available, due to
+ // being loaded or stored to, but not invalidated yet.
+ std::map<int, unsigned> SpillSlotsOrReMatsAvailable;
+
+ // PhysRegsAvailable - This is the inverse of SpillSlotsOrReMatsAvailable,
+ // indicating which stack slot values are currently held by a physreg. This
+ // is used to invalidate entries in SpillSlotsOrReMatsAvailable when a
+ // physreg is modified.
+ std::multimap<unsigned, int> PhysRegsAvailable;
+
+ void disallowClobberPhysRegOnly(unsigned PhysReg);
+
+ void ClobberPhysRegOnly(unsigned PhysReg);
+public:
+ AvailableSpills(const TargetRegisterInfo *tri, const TargetInstrInfo *tii)
+ : TRI(tri), TII(tii) {
+ }
+
+ /// clear - Reset the state.
+ void clear() {
+ SpillSlotsOrReMatsAvailable.clear();
+ PhysRegsAvailable.clear();
+ }
+
+ const TargetRegisterInfo *getRegInfo() const { return TRI; }
+
+ /// getSpillSlotOrReMatPhysReg - If the specified stack slot or remat is
+ /// available in a physical register, return that PhysReg, otherwise
+ /// return 0.
+ unsigned getSpillSlotOrReMatPhysReg(int Slot) const {
+ std::map<int, unsigned>::const_iterator I =
+ SpillSlotsOrReMatsAvailable.find(Slot);
+ if (I != SpillSlotsOrReMatsAvailable.end()) {
+ return I->second >> 1; // Remove the CanClobber bit.
+ }
+ return 0;
+ }
+
+ /// addAvailable - Mark that the specified stack slot / remat is available
+ /// in the specified physreg. If CanClobber is true, the physreg can be
+ /// modified at any time without changing the semantics of the program.
+ void addAvailable(int SlotOrReMat, unsigned Reg, bool CanClobber = true) {
+ // If this stack slot is thought to be available in some other physreg,
+ // remove its record.
+ ModifyStackSlotOrReMat(SlotOrReMat);
+
+ PhysRegsAvailable.insert(std::make_pair(Reg, SlotOrReMat));
+ SpillSlotsOrReMatsAvailable[SlotOrReMat]= (Reg << 1) |
+ (unsigned)CanClobber;
+
+ if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT)
+ DEBUG(dbgs() << "Remembering RM#"
+ << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1);
+ else
+ DEBUG(dbgs() << "Remembering SS#" << SlotOrReMat);
+ DEBUG(dbgs() << " in physreg " << TRI->getName(Reg) << "\n");
+ }
+
+ /// canClobberPhysRegForSS - Return true if the spiller is allowed to change
+ /// the value of the specified stackslot register if it desires. The
+ /// specified stack slot must be available in a physreg for this query to
+ /// make sense.
+ bool canClobberPhysRegForSS(int SlotOrReMat) const {
+ assert(SpillSlotsOrReMatsAvailable.count(SlotOrReMat) &&
+ "Value not available!");
+ return SpillSlotsOrReMatsAvailable.find(SlotOrReMat)->second & 1;
+ }
+
+ /// canClobberPhysReg - Return true if the spiller is allowed to clobber the
+ /// physical register where values for some stack slot(s) might be
+ /// available.
+ bool canClobberPhysReg(unsigned PhysReg) const {
+ std::multimap<unsigned, int>::const_iterator I =
+ PhysRegsAvailable.lower_bound(PhysReg);
+ while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
+ int SlotOrReMat = I->second;
+ I++;
+ if (!canClobberPhysRegForSS(SlotOrReMat))
+ return false;
+ }
+ return true;
+ }
+
+ /// disallowClobberPhysReg - Unset the CanClobber bit of the specified
+ /// stackslot register. The register is still available but is no longer
+ /// allowed to be modifed.
+ void disallowClobberPhysReg(unsigned PhysReg);
+
+ /// ClobberPhysReg - This is called when the specified physreg changes
+ /// value. We use this to invalidate any info about stuff that lives in
+ /// it and any of its aliases.
+ void ClobberPhysReg(unsigned PhysReg);
+
+ /// ModifyStackSlotOrReMat - This method is called when the value in a stack
+ /// slot changes. This removes information about which register the
+ /// previous value for this slot lives in (as the previous value is dead
+ /// now).
+ void ModifyStackSlotOrReMat(int SlotOrReMat);
+
+ /// AddAvailableRegsToLiveIn - Availability information is being kept coming
+ /// into the specified MBB. Add available physical registers as potential
+ /// live-in's. If they are reused in the MBB, they will be added to the
+ /// live-in set to make register scavenger and post-allocation scheduler.
+ void AddAvailableRegsToLiveIn(MachineBasicBlock &MBB, BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps);
+};
+
+}
+
+// ************************************************************************ //
+
+// Given a location where a reload of a spilled register or a remat of
+// a constant is to be inserted, attempt to find a safe location to
+// insert the load at an earlier point in the basic-block, to hide
+// latency of the load and to avoid address-generation interlock
+// issues.
+static MachineBasicBlock::iterator
+ComputeReloadLoc(MachineBasicBlock::iterator const InsertLoc,
+ MachineBasicBlock::iterator const Begin,
+ unsigned PhysReg,
+ const TargetRegisterInfo *TRI,
+ bool DoReMat,
+ int SSorRMId,
+ const TargetInstrInfo *TII,
+ const MachineFunction &MF)
+{
+ if (!ScheduleSpills)
+ return InsertLoc;
+
+ // Spill backscheduling is of primary interest to addresses, so
+ // don't do anything if the register isn't in the register class
+ // used for pointers.
+
+ const TargetLowering *TL = MF.getTarget().getTargetLowering();
+
+ if (!TL->isTypeLegal(TL->getPointerTy()))
+ // Believe it or not, this is true on PIC16.
+ return InsertLoc;
+
+ const TargetRegisterClass *ptrRegClass =
+ TL->getRegClassFor(TL->getPointerTy());
+ if (!ptrRegClass->contains(PhysReg))
+ return InsertLoc;
+
+ // Scan upwards through the preceding instructions. If an instruction doesn't
+ // reference the stack slot or the register we're loading, we can
+ // backschedule the reload up past it.
+ MachineBasicBlock::iterator NewInsertLoc = InsertLoc;
+ while (NewInsertLoc != Begin) {
+ MachineBasicBlock::iterator Prev = prior(NewInsertLoc);
+ for (unsigned i = 0; i < Prev->getNumOperands(); ++i) {
+ MachineOperand &Op = Prev->getOperand(i);
+ if (!DoReMat && Op.isFI() && Op.getIndex() == SSorRMId)
+ goto stop;
+ }
+ if (Prev->findRegisterUseOperandIdx(PhysReg) != -1 ||
+ Prev->findRegisterDefOperand(PhysReg))
+ goto stop;
+ for (const unsigned *Alias = TRI->getAliasSet(PhysReg); *Alias; ++Alias)
+ if (Prev->findRegisterUseOperandIdx(*Alias) != -1 ||
+ Prev->findRegisterDefOperand(*Alias))
+ goto stop;
+ NewInsertLoc = Prev;
+ }
+stop:;
+
+ // If we made it to the beginning of the block, turn around and move back
+ // down just past any existing reloads. They're likely to be reloads/remats
+ // for instructions earlier than what our current reload/remat is for, so
+ // they should be scheduled earlier.
+ if (NewInsertLoc == Begin) {
+ int FrameIdx;
+ while (InsertLoc != NewInsertLoc &&
+ (TII->isLoadFromStackSlot(NewInsertLoc, FrameIdx) ||
+ TII->isTriviallyReMaterializable(NewInsertLoc)))
+ ++NewInsertLoc;
+ }
+
+ return NewInsertLoc;
+}
+
+namespace {
+
+// ReusedOp - For each reused operand, we keep track of a bit of information,
+// in case we need to rollback upon processing a new operand. See comments
+// below.
+struct ReusedOp {
+ // The MachineInstr operand that reused an available value.
+ unsigned Operand;
+
+ // StackSlotOrReMat - The spill slot or remat id of the value being reused.
+ unsigned StackSlotOrReMat;
+
+ // PhysRegReused - The physical register the value was available in.
+ unsigned PhysRegReused;
+
+ // AssignedPhysReg - The physreg that was assigned for use by the reload.
+ unsigned AssignedPhysReg;
+
+ // VirtReg - The virtual register itself.
+ unsigned VirtReg;
+
+ ReusedOp(unsigned o, unsigned ss, unsigned prr, unsigned apr,
+ unsigned vreg)
+ : Operand(o), StackSlotOrReMat(ss), PhysRegReused(prr),
+ AssignedPhysReg(apr), VirtReg(vreg) {}
+};
+
+/// ReuseInfo - This maintains a collection of ReuseOp's for each operand that
+/// is reused instead of reloaded.
+class ReuseInfo {
+ MachineInstr &MI;
+ std::vector<ReusedOp> Reuses;
+ BitVector PhysRegsClobbered;
+public:
+ ReuseInfo(MachineInstr &mi, const TargetRegisterInfo *tri) : MI(mi) {
+ PhysRegsClobbered.resize(tri->getNumRegs());
+ }
+
+ bool hasReuses() const {
+ return !Reuses.empty();
+ }
+
+ /// addReuse - If we choose to reuse a virtual register that is already
+ /// available instead of reloading it, remember that we did so.
+ void addReuse(unsigned OpNo, unsigned StackSlotOrReMat,
+ unsigned PhysRegReused, unsigned AssignedPhysReg,
+ unsigned VirtReg) {
+ // If the reload is to the assigned register anyway, no undo will be
+ // required.
+ if (PhysRegReused == AssignedPhysReg) return;
+
+ // Otherwise, remember this.
+ Reuses.push_back(ReusedOp(OpNo, StackSlotOrReMat, PhysRegReused,
+ AssignedPhysReg, VirtReg));
+ }
+
+ void markClobbered(unsigned PhysReg) {
+ PhysRegsClobbered.set(PhysReg);
+ }
+
+ bool isClobbered(unsigned PhysReg) const {
+ return PhysRegsClobbered.test(PhysReg);
+ }
+
+ /// GetRegForReload - We are about to emit a reload into PhysReg. If there
+ /// is some other operand that is using the specified register, either pick
+ /// a new register to use, or evict the previous reload and use this reg.
+ unsigned GetRegForReload(const TargetRegisterClass *RC, unsigned PhysReg,
+ MachineFunction &MF, MachineInstr *MI,
+ AvailableSpills &Spills,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ SmallSet<unsigned, 8> &Rejected,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ VirtRegMap &VRM);
+
+ /// GetRegForReload - Helper for the above GetRegForReload(). Add a
+ /// 'Rejected' set to remember which registers have been considered and
+ /// rejected for the reload. This avoids infinite looping in case like
+ /// this:
+ /// t1 := op t2, t3
+ /// t2 <- assigned r0 for use by the reload but ended up reuse r1
+ /// t3 <- assigned r1 for use by the reload but ended up reuse r0
+ /// t1 <- desires r1
+ /// sees r1 is taken by t2, tries t2's reload register r0
+ /// sees r0 is taken by t3, tries t3's reload register r1
+ /// sees r1 is taken by t2, tries t2's reload register r0 ...
+ unsigned GetRegForReload(unsigned VirtReg, unsigned PhysReg, MachineInstr *MI,
+ AvailableSpills &Spills,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ VirtRegMap &VRM) {
+ SmallSet<unsigned, 8> Rejected;
+ MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetRegisterClass* RC = MF.getRegInfo().getRegClass(VirtReg);
+ return GetRegForReload(RC, PhysReg, MF, MI, Spills, MaybeDeadStores,
+ Rejected, RegKills, KillOps, VRM);
+ }
+};
+
+}
+
+// ****************** //
+// Utility Functions //
+// ****************** //
+
+/// findSinglePredSuccessor - Return via reference a vector of machine basic
+/// blocks each of which is a successor of the specified BB and has no other
+/// predecessor.
+static void findSinglePredSuccessor(MachineBasicBlock *MBB,
+ SmallVectorImpl<MachineBasicBlock *> &Succs) {
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock *SuccMBB = *SI;
+ if (SuccMBB->pred_size() == 1)
+ Succs.push_back(SuccMBB);
+ }
+}
+
+/// InvalidateKill - Invalidate register kill information for a specific
+/// register. This also unsets the kills marker on the last kill operand.
+static void InvalidateKill(unsigned Reg,
+ const TargetRegisterInfo* TRI,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps) {
+ if (RegKills[Reg]) {
+ KillOps[Reg]->setIsKill(false);
+ // KillOps[Reg] might be a def of a super-register.
+ unsigned KReg = KillOps[Reg]->getReg();
+ KillOps[KReg] = NULL;
+ RegKills.reset(KReg);
+ for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) {
+ if (RegKills[*SR]) {
+ KillOps[*SR]->setIsKill(false);
+ KillOps[*SR] = NULL;
+ RegKills.reset(*SR);
+ }
+ }
+ }
+}
+
+/// InvalidateKills - MI is going to be deleted. If any of its operands are
+/// marked kill, then invalidate the information.
+static void InvalidateKills(MachineInstr &MI,
+ const TargetRegisterInfo* TRI,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ SmallVector<unsigned, 2> *KillRegs = NULL) {
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || !MO.isKill() || MO.isUndef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ if (KillRegs)
+ KillRegs->push_back(Reg);
+ assert(Reg < KillOps.size());
+ if (KillOps[Reg] == &MO) {
+ KillOps[Reg] = NULL;
+ RegKills.reset(Reg);
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
+ if (RegKills[*SR]) {
+ KillOps[*SR] = NULL;
+ RegKills.reset(*SR);
+ }
+ }
+ }
+ }
+}
+
+/// InvalidateRegDef - If the def operand of the specified def MI is now dead
+/// (since its spill instruction is removed), mark it isDead. Also checks if
+/// the def MI has other definition operands that are not dead. Returns it by
+/// reference.
+static bool InvalidateRegDef(MachineBasicBlock::iterator I,
+ MachineInstr &NewDef, unsigned Reg,
+ bool &HasLiveDef,
+ const TargetRegisterInfo *TRI) {
+ // Due to remat, it's possible this reg isn't being reused. That is,
+ // the def of this reg (by prev MI) is now dead.
+ MachineInstr *DefMI = I;
+ MachineOperand *DefOp = NULL;
+ for (unsigned i = 0, e = DefMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = DefMI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef() || !MO.isKill() || MO.isUndef())
+ continue;
+ if (MO.getReg() == Reg)
+ DefOp = &MO;
+ else if (!MO.isDead())
+ HasLiveDef = true;
+ }
+ if (!DefOp)
+ return false;
+
+ bool FoundUse = false, Done = false;
+ MachineBasicBlock::iterator E = &NewDef;
+ ++I; ++E;
+ for (; !Done && I != E; ++I) {
+ MachineInstr *NMI = I;
+ for (unsigned j = 0, ee = NMI->getNumOperands(); j != ee; ++j) {
+ MachineOperand &MO = NMI->getOperand(j);
+ if (!MO.isReg() || MO.getReg() == 0 ||
+ (MO.getReg() != Reg && !TRI->isSubRegister(Reg, MO.getReg())))
+ continue;
+ if (MO.isUse())
+ FoundUse = true;
+ Done = true; // Stop after scanning all the operands of this MI.
+ }
+ }
+ if (!FoundUse) {
+ // Def is dead!
+ DefOp->setIsDead();
+ return true;
+ }
+ return false;
+}
+
+/// UpdateKills - Track and update kill info. If a MI reads a register that is
+/// marked kill, then it must be due to register reuse. Transfer the kill info
+/// over.
+static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps) {
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || MO.isUndef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+
+ if (RegKills[Reg] && KillOps[Reg]->getParent() != &MI) {
+ // That can't be right. Register is killed but not re-defined and it's
+ // being reused. Let's fix that.
+ KillOps[Reg]->setIsKill(false);
+ // KillOps[Reg] might be a def of a super-register.
+ unsigned KReg = KillOps[Reg]->getReg();
+ KillOps[KReg] = NULL;
+ RegKills.reset(KReg);
+
+ // Must be a def of a super-register. Its other sub-regsters are no
+ // longer killed as well.
+ for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) {
+ KillOps[*SR] = NULL;
+ RegKills.reset(*SR);
+ }
+ } else {
+ // Check for subreg kills as well.
+ // d4 =
+ // store d4, fi#0
+ // ...
+ // = s8<kill>
+ // ...
+ // = d4 <avoiding reload>
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
+ unsigned SReg = *SR;
+ if (RegKills[SReg] && KillOps[SReg]->getParent() != &MI) {
+ KillOps[SReg]->setIsKill(false);
+ unsigned KReg = KillOps[SReg]->getReg();
+ KillOps[KReg] = NULL;
+ RegKills.reset(KReg);
+
+ for (const unsigned *SSR = TRI->getSubRegisters(KReg); *SSR; ++SSR) {
+ KillOps[*SSR] = NULL;
+ RegKills.reset(*SSR);
+ }
+ }
+ }
+ }
+
+ if (MO.isKill()) {
+ RegKills.set(Reg);
+ KillOps[Reg] = &MO;
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
+ RegKills.set(*SR);
+ KillOps[*SR] = &MO;
+ }
+ }
+ }
+
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.getReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ RegKills.reset(Reg);
+ KillOps[Reg] = NULL;
+ // It also defines (or partially define) aliases.
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
+ RegKills.reset(*SR);
+ KillOps[*SR] = NULL;
+ }
+ for (const unsigned *SR = TRI->getSuperRegisters(Reg); *SR; ++SR) {
+ RegKills.reset(*SR);
+ KillOps[*SR] = NULL;
+ }
+ }
+}
+
+/// ReMaterialize - Re-materialize definition for Reg targetting DestReg.
+///
+static void ReMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MII,
+ unsigned DestReg, unsigned Reg,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI,
+ VirtRegMap &VRM) {
+ MachineInstr *ReMatDefMI = VRM.getReMaterializedMI(Reg);
+#ifndef NDEBUG
+ const TargetInstrDesc &TID = ReMatDefMI->getDesc();
+ assert(TID.getNumDefs() == 1 &&
+ "Don't know how to remat instructions that define > 1 values!");
+#endif
+ TII->reMaterialize(MBB, MII, DestReg,
+ ReMatDefMI->getOperand(0).getSubReg(), ReMatDefMI, TRI);
+ MachineInstr *NewMI = prior(MII);
+ for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = NewMI->getOperand(i);
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue;
+ unsigned VirtReg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(VirtReg))
+ continue;
+ assert(MO.isUse());
+ unsigned Phys = VRM.getPhys(VirtReg);
+ assert(Phys && "Virtual register is not assigned a register?");
+ substitutePhysReg(MO, Phys, *TRI);
+ }
+ ++NumReMats;
+}
+
+/// findSuperReg - Find the SubReg's super-register of given register class
+/// where its SubIdx sub-register is SubReg.
+static unsigned findSuperReg(const TargetRegisterClass *RC, unsigned SubReg,
+ unsigned SubIdx, const TargetRegisterInfo *TRI) {
+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+ I != E; ++I) {
+ unsigned Reg = *I;
+ if (TRI->getSubReg(Reg, SubIdx) == SubReg)
+ return Reg;
+ }
+ return 0;
+}
+
+// ******************************** //
+// Available Spills Implementation //
+// ******************************** //
+
+/// disallowClobberPhysRegOnly - Unset the CanClobber bit of the specified
+/// stackslot register. The register is still available but is no longer
+/// allowed to be modifed.
+void AvailableSpills::disallowClobberPhysRegOnly(unsigned PhysReg) {
+ std::multimap<unsigned, int>::iterator I =
+ PhysRegsAvailable.lower_bound(PhysReg);
+ while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
+ int SlotOrReMat = I->second;
+ I++;
+ assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg &&
+ "Bidirectional map mismatch!");
+ SpillSlotsOrReMatsAvailable[SlotOrReMat] &= ~1;
+ DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg)
+ << " copied, it is available for use but can no longer be modified\n");
+ }
+}
+
+/// disallowClobberPhysReg - Unset the CanClobber bit of the specified
+/// stackslot register and its aliases. The register and its aliases may
+/// still available but is no longer allowed to be modifed.
+void AvailableSpills::disallowClobberPhysReg(unsigned PhysReg) {
+ for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS)
+ disallowClobberPhysRegOnly(*AS);
+ disallowClobberPhysRegOnly(PhysReg);
+}
+
+/// ClobberPhysRegOnly - This is called when the specified physreg changes
+/// value. We use this to invalidate any info about stuff we thing lives in it.
+void AvailableSpills::ClobberPhysRegOnly(unsigned PhysReg) {
+ std::multimap<unsigned, int>::iterator I =
+ PhysRegsAvailable.lower_bound(PhysReg);
+ while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
+ int SlotOrReMat = I->second;
+ PhysRegsAvailable.erase(I++);
+ assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg &&
+ "Bidirectional map mismatch!");
+ SpillSlotsOrReMatsAvailable.erase(SlotOrReMat);
+ DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg)
+ << " clobbered, invalidating ");
+ if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT)
+ DEBUG(dbgs() << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 <<"\n");
+ else
+ DEBUG(dbgs() << "SS#" << SlotOrReMat << "\n");
+ }
+}
+
+/// ClobberPhysReg - This is called when the specified physreg changes
+/// value. We use this to invalidate any info about stuff we thing lives in
+/// it and any of its aliases.
+void AvailableSpills::ClobberPhysReg(unsigned PhysReg) {
+ for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS)
+ ClobberPhysRegOnly(*AS);
+ ClobberPhysRegOnly(PhysReg);
+}
+
+/// AddAvailableRegsToLiveIn - Availability information is being kept coming
+/// into the specified MBB. Add available physical registers as potential
+/// live-in's. If they are reused in the MBB, they will be added to the
+/// live-in set to make register scavenger and post-allocation scheduler.
+void AvailableSpills::AddAvailableRegsToLiveIn(MachineBasicBlock &MBB,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps) {
+ std::set<unsigned> NotAvailable;
+ for (std::multimap<unsigned, int>::iterator
+ I = PhysRegsAvailable.begin(), E = PhysRegsAvailable.end();
+ I != E; ++I) {
+ unsigned Reg = I->first;
+ const TargetRegisterClass* RC = TRI->getPhysicalRegisterRegClass(Reg);
+ // FIXME: A temporary workaround. We can't reuse available value if it's
+ // not safe to move the def of the virtual register's class. e.g.
+ // X86::RFP* register classes. Do not add it as a live-in.
+ if (!TII->isSafeToMoveRegClassDefs(RC))
+ // This is no longer available.
+ NotAvailable.insert(Reg);
+ else {
+ MBB.addLiveIn(Reg);
+ InvalidateKill(Reg, TRI, RegKills, KillOps);
+ }
+
+ // Skip over the same register.
+ std::multimap<unsigned, int>::iterator NI = llvm::next(I);
+ while (NI != E && NI->first == Reg) {
+ ++I;
+ ++NI;
+ }
+ }
+
+ for (std::set<unsigned>::iterator I = NotAvailable.begin(),
+ E = NotAvailable.end(); I != E; ++I) {
+ ClobberPhysReg(*I);
+ for (const unsigned *SubRegs = TRI->getSubRegisters(*I);
+ *SubRegs; ++SubRegs)
+ ClobberPhysReg(*SubRegs);
+ }
+}
+
+/// ModifyStackSlotOrReMat - This method is called when the value in a stack
+/// slot changes. This removes information about which register the previous
+/// value for this slot lives in (as the previous value is dead now).
+void AvailableSpills::ModifyStackSlotOrReMat(int SlotOrReMat) {
+ std::map<int, unsigned>::iterator It =
+ SpillSlotsOrReMatsAvailable.find(SlotOrReMat);
+ if (It == SpillSlotsOrReMatsAvailable.end()) return;
+ unsigned Reg = It->second >> 1;
+ SpillSlotsOrReMatsAvailable.erase(It);
+
+ // This register may hold the value of multiple stack slots, only remove this
+ // stack slot from the set of values the register contains.
+ std::multimap<unsigned, int>::iterator I = PhysRegsAvailable.lower_bound(Reg);
+ for (; ; ++I) {
+ assert(I != PhysRegsAvailable.end() && I->first == Reg &&
+ "Map inverse broken!");
+ if (I->second == SlotOrReMat) break;
+ }
+ PhysRegsAvailable.erase(I);
+}
+
+// ************************** //
+// Reuse Info Implementation //
+// ************************** //
+
+/// GetRegForReload - We are about to emit a reload into PhysReg. If there
+/// is some other operand that is using the specified register, either pick
+/// a new register to use, or evict the previous reload and use this reg.
+unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC,
+ unsigned PhysReg,
+ MachineFunction &MF,
+ MachineInstr *MI, AvailableSpills &Spills,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ SmallSet<unsigned, 8> &Rejected,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ VirtRegMap &VRM) {
+ const TargetInstrInfo* TII = MF.getTarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = Spills.getRegInfo();
+
+ if (Reuses.empty()) return PhysReg; // This is most often empty.
+
+ for (unsigned ro = 0, e = Reuses.size(); ro != e; ++ro) {
+ ReusedOp &Op = Reuses[ro];
+ // If we find some other reuse that was supposed to use this register
+ // exactly for its reload, we can change this reload to use ITS reload
+ // register. That is, unless its reload register has already been
+ // considered and subsequently rejected because it has also been reused
+ // by another operand.
+ if (Op.PhysRegReused == PhysReg &&
+ Rejected.count(Op.AssignedPhysReg) == 0 &&
+ RC->contains(Op.AssignedPhysReg)) {
+ // Yup, use the reload register that we didn't use before.
+ unsigned NewReg = Op.AssignedPhysReg;
+ Rejected.insert(PhysReg);
+ return GetRegForReload(RC, NewReg, MF, MI, Spills, MaybeDeadStores, Rejected,
+ RegKills, KillOps, VRM);
+ } else {
+ // Otherwise, we might also have a problem if a previously reused
+ // value aliases the new register. If so, codegen the previous reload
+ // and use this one.
+ unsigned PRRU = Op.PhysRegReused;
+ if (TRI->regsOverlap(PRRU, PhysReg)) {
+ // Okay, we found out that an alias of a reused register
+ // was used. This isn't good because it means we have
+ // to undo a previous reuse.
+ MachineBasicBlock *MBB = MI->getParent();
+ const TargetRegisterClass *AliasRC =
+ MBB->getParent()->getRegInfo().getRegClass(Op.VirtReg);
+
+ // Copy Op out of the vector and remove it, we're going to insert an
+ // explicit load for it.
+ ReusedOp NewOp = Op;
+ Reuses.erase(Reuses.begin()+ro);
+
+ // MI may be using only a sub-register of PhysRegUsed.
+ unsigned RealPhysRegUsed = MI->getOperand(NewOp.Operand).getReg();
+ unsigned SubIdx = 0;
+ assert(TargetRegisterInfo::isPhysicalRegister(RealPhysRegUsed) &&
+ "A reuse cannot be a virtual register");
+ if (PRRU != RealPhysRegUsed) {
+ // What was the sub-register index?
+ SubIdx = TRI->getSubRegIndex(PRRU, RealPhysRegUsed);
+ assert(SubIdx &&
+ "Operand physreg is not a sub-register of PhysRegUsed");
+ }
+
+ // Ok, we're going to try to reload the assigned physreg into the
+ // slot that we were supposed to in the first place. However, that
+ // register could hold a reuse. Check to see if it conflicts or
+ // would prefer us to use a different register.
+ unsigned NewPhysReg = GetRegForReload(RC, NewOp.AssignedPhysReg,
+ MF, MI, Spills, MaybeDeadStores,
+ Rejected, RegKills, KillOps, VRM);
+
+ bool DoReMat = NewOp.StackSlotOrReMat > VirtRegMap::MAX_STACK_SLOT;
+ int SSorRMId = DoReMat
+ ? VRM.getReMatId(NewOp.VirtReg) : NewOp.StackSlotOrReMat;
+
+ // Back-schedule reloads and remats.
+ MachineBasicBlock::iterator InsertLoc =
+ ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI,
+ DoReMat, SSorRMId, TII, MF);
+
+ if (DoReMat) {
+ ReMaterialize(*MBB, InsertLoc, NewPhysReg, NewOp.VirtReg, TII,
+ TRI, VRM);
+ } else {
+ TII->loadRegFromStackSlot(*MBB, InsertLoc, NewPhysReg,
+ NewOp.StackSlotOrReMat, AliasRC);
+ MachineInstr *LoadMI = prior(InsertLoc);
+ VRM.addSpillSlotUse(NewOp.StackSlotOrReMat, LoadMI);
+ // Any stores to this stack slot are not dead anymore.
+ MaybeDeadStores[NewOp.StackSlotOrReMat] = NULL;
+ ++NumLoads;
+ }
+ Spills.ClobberPhysReg(NewPhysReg);
+ Spills.ClobberPhysReg(NewOp.PhysRegReused);
+
+ unsigned RReg = SubIdx ? TRI->getSubReg(NewPhysReg, SubIdx) :NewPhysReg;
+ MI->getOperand(NewOp.Operand).setReg(RReg);
+ MI->getOperand(NewOp.Operand).setSubReg(0);
+
+ Spills.addAvailable(NewOp.StackSlotOrReMat, NewPhysReg);
+ UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
+ DEBUG(dbgs() << '\t' << *prior(InsertLoc));
+
+ DEBUG(dbgs() << "Reuse undone!\n");
+ --NumReused;
+
+ // Finally, PhysReg is now available, go ahead and use it.
+ return PhysReg;
+ }
+ }
+ }
+ return PhysReg;
+}
+
+// ************************************************************************ //
+
+/// FoldsStackSlotModRef - Return true if the specified MI folds the specified
+/// stack slot mod/ref. It also checks if it's possible to unfold the
+/// instruction by having it define a specified physical register instead.
+static bool FoldsStackSlotModRef(MachineInstr &MI, int SS, unsigned PhysReg,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI,
+ VirtRegMap &VRM) {
+ if (VRM.hasEmergencySpills(&MI) || VRM.isSpillPt(&MI))
+ return false;
+
+ bool Found = false;
+ VirtRegMap::MI2VirtMapTy::const_iterator I, End;
+ for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ++I) {
+ unsigned VirtReg = I->second.first;
+ VirtRegMap::ModRef MR = I->second.second;
+ if (MR & VirtRegMap::isModRef)
+ if (VRM.getStackSlot(VirtReg) == SS) {
+ Found= TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(), true, true) != 0;
+ break;
+ }
+ }
+ if (!Found)
+ return false;
+
+ // Does the instruction uses a register that overlaps the scratch register?
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue;
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (!VRM.hasPhys(Reg))
+ continue;
+ Reg = VRM.getPhys(Reg);
+ }
+ if (TRI->regsOverlap(PhysReg, Reg))
+ return false;
+ }
+ return true;
+}
+
+/// FindFreeRegister - Find a free register of a given register class by looking
+/// at (at most) the last two machine instructions.
+static unsigned FindFreeRegister(MachineBasicBlock::iterator MII,
+ MachineBasicBlock &MBB,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI,
+ BitVector &AllocatableRegs) {
+ BitVector Defs(TRI->getNumRegs());
+ BitVector Uses(TRI->getNumRegs());
+ SmallVector<unsigned, 4> LocalUses;
+ SmallVector<unsigned, 4> Kills;
+
+ // Take a look at 2 instructions at most.
+ for (unsigned Count = 0; Count < 2; ++Count) {
+ if (MII == MBB.begin())
+ break;
+ MachineInstr *PrevMI = prior(MII);
+ for (unsigned i = 0, e = PrevMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = PrevMI->getOperand(i);
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue;
+ unsigned Reg = MO.getReg();
+ if (MO.isDef()) {
+ Defs.set(Reg);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ Defs.set(*AS);
+ } else {
+ LocalUses.push_back(Reg);
+ if (MO.isKill() && AllocatableRegs[Reg])
+ Kills.push_back(Reg);
+ }
+ }
+
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
+ unsigned Kill = Kills[i];
+ if (!Defs[Kill] && !Uses[Kill] &&
+ TRI->getPhysicalRegisterRegClass(Kill) == RC)
+ return Kill;
+ }
+ for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
+ unsigned Reg = LocalUses[i];
+ Uses.set(Reg);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ Uses.set(*AS);
+ }
+
+ MII = PrevMI;
+ }
+
+ return 0;
+}
+
+static
+void AssignPhysToVirtReg(MachineInstr *MI, unsigned VirtReg, unsigned PhysReg,
+ const TargetRegisterInfo &TRI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == VirtReg)
+ substitutePhysReg(MO, PhysReg, TRI);
+ }
+}
+
+namespace {
+ struct RefSorter {
+ bool operator()(const std::pair<MachineInstr*, int> &A,
+ const std::pair<MachineInstr*, int> &B) {
+ return A.second < B.second;
+ }
+ };
+}
+
+// ***************************** //
+// Local Spiller Implementation //
+// ***************************** //
+
+namespace {
+
+class LocalRewriter : public VirtRegRewriter {
+ MachineRegisterInfo *RegInfo;
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+ BitVector AllocatableRegs;
+ DenseMap<MachineInstr*, unsigned> DistanceMap;
+public:
+
+ bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
+ LiveIntervals* LIs) {
+ RegInfo = &MF.getRegInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getTarget().getInstrInfo();
+ AllocatableRegs = TRI->getAllocatableSet(MF);
+ DEBUG(dbgs() << "\n**** Local spiller rewriting function '"
+ << MF.getFunction()->getName() << "':\n");
+ DEBUG(dbgs() << "**** Machine Instrs (NOTE! Does not include spills and"
+ " reloads!) ****\n");
+ DEBUG(MF.dump());
+
+ // Spills - Keep track of which spilled values are available in physregs
+ // so that we can choose to reuse the physregs instead of emitting
+ // reloads. This is usually refreshed per basic block.
+ AvailableSpills Spills(TRI, TII);
+
+ // Keep track of kill information.
+ BitVector RegKills(TRI->getNumRegs());
+ std::vector<MachineOperand*> KillOps;
+ KillOps.resize(TRI->getNumRegs(), NULL);
+
+ // SingleEntrySuccs - Successor blocks which have a single predecessor.
+ SmallVector<MachineBasicBlock*, 4> SinglePredSuccs;
+ SmallPtrSet<MachineBasicBlock*,16> EarlyVisited;
+
+ // Traverse the basic blocks depth first.
+ MachineBasicBlock *Entry = MF.begin();
+ SmallPtrSet<MachineBasicBlock*,16> Visited;
+ for (df_ext_iterator<MachineBasicBlock*,
+ SmallPtrSet<MachineBasicBlock*,16> >
+ DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
+ DFI != E; ++DFI) {
+ MachineBasicBlock *MBB = *DFI;
+ if (!EarlyVisited.count(MBB))
+ RewriteMBB(*MBB, VRM, LIs, Spills, RegKills, KillOps);
+
+ // If this MBB is the only predecessor of a successor. Keep the
+ // availability information and visit it next.
+ do {
+ // Keep visiting single predecessor successor as long as possible.
+ SinglePredSuccs.clear();
+ findSinglePredSuccessor(MBB, SinglePredSuccs);
+ if (SinglePredSuccs.empty())
+ MBB = 0;
+ else {
+ // FIXME: More than one successors, each of which has MBB has
+ // the only predecessor.
+ MBB = SinglePredSuccs[0];
+ if (!Visited.count(MBB) && EarlyVisited.insert(MBB)) {
+ Spills.AddAvailableRegsToLiveIn(*MBB, RegKills, KillOps);
+ RewriteMBB(*MBB, VRM, LIs, Spills, RegKills, KillOps);
+ }
+ }
+ } while (MBB);
+
+ // Clear the availability info.
+ Spills.clear();
+ }
+
+ DEBUG(dbgs() << "**** Post Machine Instrs ****\n");
+ DEBUG(MF.dump());
+
+ // Mark unused spill slots.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ int SS = VRM.getLowSpillSlot();
+ if (SS != VirtRegMap::NO_STACK_SLOT)
+ for (int e = VRM.getHighSpillSlot(); SS <= e; ++SS)
+ if (!VRM.isSpillSlotUsed(SS)) {
+ MFI->RemoveStackObject(SS);
+ ++NumDSS;
+ }
+
+ return true;
+ }
+
+private:
+
+ /// OptimizeByUnfold2 - Unfold a series of load / store folding instructions if
+ /// a scratch register is available.
+ /// xorq %r12<kill>, %r13
+ /// addq %rax, -184(%rbp)
+ /// addq %r13, -184(%rbp)
+ /// ==>
+ /// xorq %r12<kill>, %r13
+ /// movq -184(%rbp), %r12
+ /// addq %rax, %r12
+ /// addq %r13, %r12
+ /// movq %r12, -184(%rbp)
+ bool OptimizeByUnfold2(unsigned VirtReg, int SS,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MII,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ AvailableSpills &Spills,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ VirtRegMap &VRM) {
+
+ MachineBasicBlock::iterator NextMII = llvm::next(MII);
+ if (NextMII == MBB.end())
+ return false;
+
+ if (TII->getOpcodeAfterMemoryUnfold(MII->getOpcode(), true, true) == 0)
+ return false;
+
+ // Now let's see if the last couple of instructions happens to have freed up
+ // a register.
+ const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
+ unsigned PhysReg = FindFreeRegister(MII, MBB, RC, TRI, AllocatableRegs);
+ if (!PhysReg)
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ TRI = MF.getTarget().getRegisterInfo();
+ MachineInstr &MI = *MII;
+ if (!FoldsStackSlotModRef(MI, SS, PhysReg, TII, TRI, VRM))
+ return false;
+
+ // If the next instruction also folds the same SS modref and can be unfoled,
+ // then it's worthwhile to issue a load from SS into the free register and
+ // then unfold these instructions.
+ if (!FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, VRM))
+ return false;
+
+ // Back-schedule reloads and remats.
+ ComputeReloadLoc(MII, MBB.begin(), PhysReg, TRI, false, SS, TII, MF);
+
+ // Load from SS to the spare physical register.
+ TII->loadRegFromStackSlot(MBB, MII, PhysReg, SS, RC);
+ // This invalidates Phys.
+ Spills.ClobberPhysReg(PhysReg);
+ // Remember it's available.
+ Spills.addAvailable(SS, PhysReg);
+ MaybeDeadStores[SS] = NULL;
+
+ // Unfold current MI.
+ SmallVector<MachineInstr*, 4> NewMIs;
+ if (!TII->unfoldMemoryOperand(MF, &MI, VirtReg, false, false, NewMIs))
+ llvm_unreachable("Unable unfold the load / store folding instruction!");
+ assert(NewMIs.size() == 1);
+ AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI);
+ VRM.transferRestorePts(&MI, NewMIs[0]);
+ MII = MBB.insert(MII, NewMIs[0]);
+ InvalidateKills(MI, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(&MI);
+ MBB.erase(&MI);
+ ++NumModRefUnfold;
+
+ // Unfold next instructions that fold the same SS.
+ do {
+ MachineInstr &NextMI = *NextMII;
+ NextMII = llvm::next(NextMII);
+ NewMIs.clear();
+ if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs))
+ llvm_unreachable("Unable unfold the load / store folding instruction!");
+ assert(NewMIs.size() == 1);
+ AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI);
+ VRM.transferRestorePts(&NextMI, NewMIs[0]);
+ MBB.insert(NextMII, NewMIs[0]);
+ InvalidateKills(NextMI, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(&NextMI);
+ MBB.erase(&NextMI);
+ ++NumModRefUnfold;
+ if (NextMII == MBB.end())
+ break;
+ } while (FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, VRM));
+
+ // Store the value back into SS.
+ TII->storeRegToStackSlot(MBB, NextMII, PhysReg, true, SS, RC);
+ MachineInstr *StoreMI = prior(NextMII);
+ VRM.addSpillSlotUse(SS, StoreMI);
+ VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
+
+ return true;
+ }
+
+ /// OptimizeByUnfold - Turn a store folding instruction into a load folding
+ /// instruction. e.g.
+ /// xorl %edi, %eax
+ /// movl %eax, -32(%ebp)
+ /// movl -36(%ebp), %eax
+ /// orl %eax, -32(%ebp)
+ /// ==>
+ /// xorl %edi, %eax
+ /// orl -36(%ebp), %eax
+ /// mov %eax, -32(%ebp)
+ /// This enables unfolding optimization for a subsequent instruction which will
+ /// also eliminate the newly introduced store instruction.
+ bool OptimizeByUnfold(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MII,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ AvailableSpills &Spills,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ VirtRegMap &VRM) {
+ MachineFunction &MF = *MBB.getParent();
+ MachineInstr &MI = *MII;
+ unsigned UnfoldedOpc = 0;
+ unsigned UnfoldPR = 0;
+ unsigned UnfoldVR = 0;
+ int FoldedSS = VirtRegMap::NO_STACK_SLOT;
+ VirtRegMap::MI2VirtMapTy::const_iterator I, End;
+ for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ) {
+ // Only transform a MI that folds a single register.
+ if (UnfoldedOpc)
+ return false;
+ UnfoldVR = I->second.first;
+ VirtRegMap::ModRef MR = I->second.second;
+ // MI2VirtMap be can updated which invalidate the iterator.
+ // Increment the iterator first.
+ ++I;
+ if (VRM.isAssignedReg(UnfoldVR))
+ continue;
+ // If this reference is not a use, any previous store is now dead.
+ // Otherwise, the store to this stack slot is not dead anymore.
+ FoldedSS = VRM.getStackSlot(UnfoldVR);
+ MachineInstr* DeadStore = MaybeDeadStores[FoldedSS];
+ if (DeadStore && (MR & VirtRegMap::isModRef)) {
+ unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(FoldedSS);
+ if (!PhysReg || !DeadStore->readsRegister(PhysReg))
+ continue;
+ UnfoldPR = PhysReg;
+ UnfoldedOpc = TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(),
+ false, true);
+ }
+ }
+
+ if (!UnfoldedOpc) {
+ if (!UnfoldVR)
+ return false;
+
+ // Look for other unfolding opportunities.
+ return OptimizeByUnfold2(UnfoldVR, FoldedSS, MBB, MII,
+ MaybeDeadStores, Spills, RegKills, KillOps, VRM);
+ }
+
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.getReg() == 0 || !MO.isUse())
+ continue;
+ unsigned VirtReg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(VirtReg) || MO.getSubReg())
+ continue;
+ if (VRM.isAssignedReg(VirtReg)) {
+ unsigned PhysReg = VRM.getPhys(VirtReg);
+ if (PhysReg && TRI->regsOverlap(PhysReg, UnfoldPR))
+ return false;
+ } else if (VRM.isReMaterialized(VirtReg))
+ continue;
+ int SS = VRM.getStackSlot(VirtReg);
+ unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
+ if (PhysReg) {
+ if (TRI->regsOverlap(PhysReg, UnfoldPR))
+ return false;
+ continue;
+ }
+ if (VRM.hasPhys(VirtReg)) {
+ PhysReg = VRM.getPhys(VirtReg);
+ if (!TRI->regsOverlap(PhysReg, UnfoldPR))
+ continue;
+ }
+
+ // Ok, we'll need to reload the value into a register which makes
+ // it impossible to perform the store unfolding optimization later.
+ // Let's see if it is possible to fold the load if the store is
+ // unfolded. This allows us to perform the store unfolding
+ // optimization.
+ SmallVector<MachineInstr*, 4> NewMIs;
+ if (TII->unfoldMemoryOperand(MF, &MI, UnfoldVR, false, false, NewMIs)) {
+ assert(NewMIs.size() == 1);
+ MachineInstr *NewMI = NewMIs.back();
+ NewMIs.clear();
+ int Idx = NewMI->findRegisterUseOperandIdx(VirtReg, false);
+ assert(Idx != -1);
+ SmallVector<unsigned, 1> Ops;
+ Ops.push_back(Idx);
+ MachineInstr *FoldedMI = TII->foldMemoryOperand(MF, NewMI, Ops, SS);
+ if (FoldedMI) {
+ VRM.addSpillSlotUse(SS, FoldedMI);
+ if (!VRM.hasPhys(UnfoldVR))
+ VRM.assignVirt2Phys(UnfoldVR, UnfoldPR);
+ VRM.virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
+ MII = MBB.insert(MII, FoldedMI);
+ InvalidateKills(MI, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(&MI);
+ MBB.erase(&MI);
+ MF.DeleteMachineInstr(NewMI);
+ return true;
+ }
+ MF.DeleteMachineInstr(NewMI);
+ }
+ }
+
+ return false;
+ }
+
+ /// CommuteChangesDestination - We are looking for r0 = op r1, r2 and
+ /// where SrcReg is r1 and it is tied to r0. Return true if after
+ /// commuting this instruction it will be r0 = op r2, r1.
+ static bool CommuteChangesDestination(MachineInstr *DefMI,
+ const TargetInstrDesc &TID,
+ unsigned SrcReg,
+ const TargetInstrInfo *TII,
+ unsigned &DstIdx) {
+ if (TID.getNumDefs() != 1 && TID.getNumOperands() != 3)
+ return false;
+ if (!DefMI->getOperand(1).isReg() ||
+ DefMI->getOperand(1).getReg() != SrcReg)
+ return false;
+ unsigned DefIdx;
+ if (!DefMI->isRegTiedToDefOperand(1, &DefIdx) || DefIdx != 0)
+ return false;
+ unsigned SrcIdx1, SrcIdx2;
+ if (!TII->findCommutedOpIndices(DefMI, SrcIdx1, SrcIdx2))
+ return false;
+ if (SrcIdx1 == 1 && SrcIdx2 == 2) {
+ DstIdx = 2;
+ return true;
+ }
+ return false;
+ }
+
+ /// CommuteToFoldReload -
+ /// Look for
+ /// r1 = load fi#1
+ /// r1 = op r1, r2<kill>
+ /// store r1, fi#1
+ ///
+ /// If op is commutable and r2 is killed, then we can xform these to
+ /// r2 = op r2, fi#1
+ /// store r2, fi#1
+ bool CommuteToFoldReload(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MII,
+ unsigned VirtReg, unsigned SrcReg, int SS,
+ AvailableSpills &Spills,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ const TargetRegisterInfo *TRI,
+ VirtRegMap &VRM) {
+ if (MII == MBB.begin() || !MII->killsRegister(SrcReg))
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ MachineInstr &MI = *MII;
+ MachineBasicBlock::iterator DefMII = prior(MII);
+ MachineInstr *DefMI = DefMII;
+ const TargetInstrDesc &TID = DefMI->getDesc();
+ unsigned NewDstIdx;
+ if (DefMII != MBB.begin() &&
+ TID.isCommutable() &&
+ CommuteChangesDestination(DefMI, TID, SrcReg, TII, NewDstIdx)) {
+ MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
+ unsigned NewReg = NewDstMO.getReg();
+ if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg))
+ return false;
+ MachineInstr *ReloadMI = prior(DefMII);
+ int FrameIdx;
+ unsigned DestReg = TII->isLoadFromStackSlot(ReloadMI, FrameIdx);
+ if (DestReg != SrcReg || FrameIdx != SS)
+ return false;
+ int UseIdx = DefMI->findRegisterUseOperandIdx(DestReg, false);
+ if (UseIdx == -1)
+ return false;
+ unsigned DefIdx;
+ if (!MI.isRegTiedToDefOperand(UseIdx, &DefIdx))
+ return false;
+ assert(DefMI->getOperand(DefIdx).isReg() &&
+ DefMI->getOperand(DefIdx).getReg() == SrcReg);
+
+ // Now commute def instruction.
+ MachineInstr *CommutedMI = TII->commuteInstruction(DefMI, true);
+ if (!CommutedMI)
+ return false;
+ SmallVector<unsigned, 1> Ops;
+ Ops.push_back(NewDstIdx);
+ MachineInstr *FoldedMI = TII->foldMemoryOperand(MF, CommutedMI, Ops, SS);
+ // Not needed since foldMemoryOperand returns new MI.
+ MF.DeleteMachineInstr(CommutedMI);
+ if (!FoldedMI)
+ return false;
+
+ VRM.addSpillSlotUse(SS, FoldedMI);
+ VRM.virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
+ // Insert new def MI and spill MI.
+ const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
+ TII->storeRegToStackSlot(MBB, &MI, NewReg, true, SS, RC);
+ MII = prior(MII);
+ MachineInstr *StoreMI = MII;
+ VRM.addSpillSlotUse(SS, StoreMI);
+ VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
+ MII = MBB.insert(MII, FoldedMI); // Update MII to backtrack.
+
+ // Delete all 3 old instructions.
+ InvalidateKills(*ReloadMI, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(ReloadMI);
+ MBB.erase(ReloadMI);
+ InvalidateKills(*DefMI, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(DefMI);
+ MBB.erase(DefMI);
+ InvalidateKills(MI, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(&MI);
+ MBB.erase(&MI);
+
+ // If NewReg was previously holding value of some SS, it's now clobbered.
+ // This has to be done now because it's a physical register. When this
+ // instruction is re-visited, it's ignored.
+ Spills.ClobberPhysReg(NewReg);
+
+ ++NumCommutes;
+ return true;
+ }
+
+ return false;
+ }
+
+ /// SpillRegToStackSlot - Spill a register to a specified stack slot. Check if
+ /// the last store to the same slot is now dead. If so, remove the last store.
+ void SpillRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MII,
+ int Idx, unsigned PhysReg, int StackSlot,
+ const TargetRegisterClass *RC,
+ bool isAvailable, MachineInstr *&LastStore,
+ AvailableSpills &Spills,
+ SmallSet<MachineInstr*, 4> &ReMatDefs,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ VirtRegMap &VRM) {
+
+ MachineBasicBlock::iterator oldNextMII = llvm::next(MII);
+ TII->storeRegToStackSlot(MBB, llvm::next(MII), PhysReg, true, StackSlot, RC);
+ MachineInstr *StoreMI = prior(oldNextMII);
+ VRM.addSpillSlotUse(StackSlot, StoreMI);
+ DEBUG(dbgs() << "Store:\t" << *StoreMI);
+
+ // If there is a dead store to this stack slot, nuke it now.
+ if (LastStore) {
+ DEBUG(dbgs() << "Removed dead store:\t" << *LastStore);
+ ++NumDSE;
+ SmallVector<unsigned, 2> KillRegs;
+ InvalidateKills(*LastStore, TRI, RegKills, KillOps, &KillRegs);
+ MachineBasicBlock::iterator PrevMII = LastStore;
+ bool CheckDef = PrevMII != MBB.begin();
+ if (CheckDef)
+ --PrevMII;
+ VRM.RemoveMachineInstrFromMaps(LastStore);
+ MBB.erase(LastStore);
+ if (CheckDef) {
+ // Look at defs of killed registers on the store. Mark the defs
+ // as dead since the store has been deleted and they aren't
+ // being reused.
+ for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) {
+ bool HasOtherDef = false;
+ if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef, TRI)) {
+ MachineInstr *DeadDef = PrevMII;
+ if (ReMatDefs.count(DeadDef) && !HasOtherDef) {
+ // FIXME: This assumes a remat def does not have side effects.
+ VRM.RemoveMachineInstrFromMaps(DeadDef);
+ MBB.erase(DeadDef);
+ ++NumDRM;
+ }
+ }
+ }
+ }
+ }
+
+ // Allow for multi-instruction spill sequences, as on PPC Altivec. Presume
+ // the last of multiple instructions is the actual store.
+ LastStore = prior(oldNextMII);
+
+ // If the stack slot value was previously available in some other
+ // register, change it now. Otherwise, make the register available,
+ // in PhysReg.
+ Spills.ModifyStackSlotOrReMat(StackSlot);
+ Spills.ClobberPhysReg(PhysReg);
+ Spills.addAvailable(StackSlot, PhysReg, isAvailable);
+ ++NumStores;
+ }
+
+ /// isSafeToDelete - Return true if this instruction doesn't produce any side
+ /// effect and all of its defs are dead.
+ static bool isSafeToDelete(MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+ if (TID.mayLoad() || TID.mayStore() || TID.isCall() || TID.isTerminator() ||
+ TID.isCall() || TID.isBarrier() || TID.isReturn() ||
+ TID.hasUnmodeledSideEffects())
+ return false;
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ if (MO.isDef() && !MO.isDead())
+ return false;
+ if (MO.isUse() && MO.isKill())
+ // FIXME: We can't remove kill markers or else the scavenger will assert.
+ // An alternative is to add a ADD pseudo instruction to replace kill
+ // markers.
+ return false;
+ }
+ return true;
+ }
+
+ /// TransferDeadness - A identity copy definition is dead and it's being
+ /// removed. Find the last def or use and mark it as dead / kill.
+ void TransferDeadness(MachineBasicBlock *MBB, unsigned CurDist,
+ unsigned Reg, BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ VirtRegMap &VRM) {
+ SmallPtrSet<MachineInstr*, 4> Seens;
+ SmallVector<std::pair<MachineInstr*, int>,8> Refs;
+ for (MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(Reg),
+ RE = RegInfo->reg_end(); RI != RE; ++RI) {
+ MachineInstr *UDMI = &*RI;
+ if (UDMI->getParent() != MBB)
+ continue;
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UDMI);
+ if (DI == DistanceMap.end() || DI->second > CurDist)
+ continue;
+ if (Seens.insert(UDMI))
+ Refs.push_back(std::make_pair(UDMI, DI->second));
+ }
+
+ if (Refs.empty())
+ return;
+ std::sort(Refs.begin(), Refs.end(), RefSorter());
+
+ while (!Refs.empty()) {
+ MachineInstr *LastUDMI = Refs.back().first;
+ Refs.pop_back();
+
+ MachineOperand *LastUD = NULL;
+ for (unsigned i = 0, e = LastUDMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = LastUDMI->getOperand(i);
+ if (!MO.isReg() || MO.getReg() != Reg)
+ continue;
+ if (!LastUD || (LastUD->isUse() && MO.isDef()))
+ LastUD = &MO;
+ if (LastUDMI->isRegTiedToDefOperand(i))
+ break;
+ }
+ if (LastUD->isDef()) {
+ // If the instruction has no side effect, delete it and propagate
+ // backward further. Otherwise, mark is dead and we are done.
+ if (!isSafeToDelete(*LastUDMI)) {
+ LastUD->setIsDead();
+ break;
+ }
+ VRM.RemoveMachineInstrFromMaps(LastUDMI);
+ MBB->erase(LastUDMI);
+ } else {
+ LastUD->setIsKill();
+ RegKills.set(Reg);
+ KillOps[Reg] = LastUD;
+ break;
+ }
+ }
+ }
+
+ /// rewriteMBB - Keep track of which spills are available even after the
+ /// register allocator is done with them. If possible, avid reloading vregs.
+ void RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM,
+ LiveIntervals *LIs,
+ AvailableSpills &Spills, BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps) {
+
+ DEBUG(dbgs() << "\n**** Local spiller rewriting MBB '"
+ << MBB.getName() << "':\n");
+
+ MachineFunction &MF = *MBB.getParent();
+
+ // MaybeDeadStores - When we need to write a value back into a stack slot,
+ // keep track of the inserted store. If the stack slot value is never read
+ // (because the value was used from some available register, for example), and
+ // subsequently stored to, the original store is dead. This map keeps track
+ // of inserted stores that are not used. If we see a subsequent store to the
+ // same stack slot, the original store is deleted.
+ std::vector<MachineInstr*> MaybeDeadStores;
+ MaybeDeadStores.resize(MF.getFrameInfo()->getObjectIndexEnd(), NULL);
+
+ // ReMatDefs - These are rematerializable def MIs which are not deleted.
+ SmallSet<MachineInstr*, 4> ReMatDefs;
+
+ // Clear kill info.
+ SmallSet<unsigned, 2> KilledMIRegs;
+ RegKills.reset();
+ KillOps.clear();
+ KillOps.resize(TRI->getNumRegs(), NULL);
+
+ unsigned Dist = 0;
+ DistanceMap.clear();
+ for (MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
+ MII != E; ) {
+ MachineBasicBlock::iterator NextMII = llvm::next(MII);
+
+ VirtRegMap::MI2VirtMapTy::const_iterator I, End;
+ bool Erased = false;
+ bool BackTracked = false;
+ if (OptimizeByUnfold(MBB, MII,
+ MaybeDeadStores, Spills, RegKills, KillOps, VRM))
+ NextMII = llvm::next(MII);
+
+ MachineInstr &MI = *MII;
+
+ if (VRM.hasEmergencySpills(&MI)) {
+ // Spill physical register(s) in the rare case the allocator has run out
+ // of registers to allocate.
+ SmallSet<int, 4> UsedSS;
+ std::vector<unsigned> &EmSpills = VRM.getEmergencySpills(&MI);
+ for (unsigned i = 0, e = EmSpills.size(); i != e; ++i) {
+ unsigned PhysReg = EmSpills[i];
+ const TargetRegisterClass *RC =
+ TRI->getPhysicalRegisterRegClass(PhysReg);
+ assert(RC && "Unable to determine register class!");
+ int SS = VRM.getEmergencySpillSlot(RC);
+ if (UsedSS.count(SS))
+ llvm_unreachable("Need to spill more than one physical registers!");
+ UsedSS.insert(SS);
+ TII->storeRegToStackSlot(MBB, MII, PhysReg, true, SS, RC);
+ MachineInstr *StoreMI = prior(MII);
+ VRM.addSpillSlotUse(SS, StoreMI);
+
+ // Back-schedule reloads and remats.
+ MachineBasicBlock::iterator InsertLoc =
+ ComputeReloadLoc(llvm::next(MII), MBB.begin(), PhysReg, TRI, false,
+ SS, TII, MF);
+
+ TII->loadRegFromStackSlot(MBB, InsertLoc, PhysReg, SS, RC);
+
+ MachineInstr *LoadMI = prior(InsertLoc);
+ VRM.addSpillSlotUse(SS, LoadMI);
+ ++NumPSpills;
+ DistanceMap.insert(std::make_pair(LoadMI, Dist++));
+ }
+ NextMII = llvm::next(MII);
+ }
+
+ // Insert restores here if asked to.
+ if (VRM.isRestorePt(&MI)) {
+ std::vector<unsigned> &RestoreRegs = VRM.getRestorePtRestores(&MI);
+ for (unsigned i = 0, e = RestoreRegs.size(); i != e; ++i) {
+ unsigned VirtReg = RestoreRegs[e-i-1]; // Reverse order.
+ if (!VRM.getPreSplitReg(VirtReg))
+ continue; // Split interval spilled again.
+ unsigned Phys = VRM.getPhys(VirtReg);
+ RegInfo->setPhysRegUsed(Phys);
+
+ // Check if the value being restored if available. If so, it must be
+ // from a predecessor BB that fallthrough into this BB. We do not
+ // expect:
+ // BB1:
+ // r1 = load fi#1
+ // ...
+ // = r1<kill>
+ // ... # r1 not clobbered
+ // ...
+ // = load fi#1
+ bool DoReMat = VRM.isReMaterialized(VirtReg);
+ int SSorRMId = DoReMat
+ ? VRM.getReMatId(VirtReg) : VRM.getStackSlot(VirtReg);
+ const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
+ unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
+ if (InReg == Phys) {
+ // If the value is already available in the expected register, save
+ // a reload / remat.
+ if (SSorRMId)
+ DEBUG(dbgs() << "Reusing RM#"
+ << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1);
+ else
+ DEBUG(dbgs() << "Reusing SS#" << SSorRMId);
+ DEBUG(dbgs() << " from physreg "
+ << TRI->getName(InReg) << " for vreg"
+ << VirtReg <<" instead of reloading into physreg "
+ << TRI->getName(Phys) << '\n');
+ ++NumOmitted;
+ continue;
+ } else if (InReg && InReg != Phys) {
+ if (SSorRMId)
+ DEBUG(dbgs() << "Reusing RM#"
+ << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1);
+ else
+ DEBUG(dbgs() << "Reusing SS#" << SSorRMId);
+ DEBUG(dbgs() << " from physreg "
+ << TRI->getName(InReg) << " for vreg"
+ << VirtReg <<" by copying it into physreg "
+ << TRI->getName(Phys) << '\n');
+
+ // If the reloaded / remat value is available in another register,
+ // copy it to the desired register.
+
+ // Back-schedule reloads and remats.
+ MachineBasicBlock::iterator InsertLoc =
+ ComputeReloadLoc(MII, MBB.begin(), Phys, TRI, DoReMat,
+ SSorRMId, TII, MF);
+
+ TII->copyRegToReg(MBB, InsertLoc, Phys, InReg, RC, RC);
+
+ // This invalidates Phys.
+ Spills.ClobberPhysReg(Phys);
+ // Remember it's available.
+ Spills.addAvailable(SSorRMId, Phys);
+
+ // Mark is killed.
+ MachineInstr *CopyMI = prior(InsertLoc);
+ CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
+ MachineOperand *KillOpnd = CopyMI->findRegisterUseOperand(InReg);
+ KillOpnd->setIsKill();
+ UpdateKills(*CopyMI, TRI, RegKills, KillOps);
+
+ DEBUG(dbgs() << '\t' << *CopyMI);
+ ++NumCopified;
+ continue;
+ }
+
+ // Back-schedule reloads and remats.
+ MachineBasicBlock::iterator InsertLoc =
+ ComputeReloadLoc(MII, MBB.begin(), Phys, TRI, DoReMat,
+ SSorRMId, TII, MF);
+
+ if (VRM.isReMaterialized(VirtReg)) {
+ ReMaterialize(MBB, InsertLoc, Phys, VirtReg, TII, TRI, VRM);
+ } else {
+ const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
+ TII->loadRegFromStackSlot(MBB, InsertLoc, Phys, SSorRMId, RC);
+ MachineInstr *LoadMI = prior(InsertLoc);
+ VRM.addSpillSlotUse(SSorRMId, LoadMI);
+ ++NumLoads;
+ DistanceMap.insert(std::make_pair(LoadMI, Dist++));
+ }
+
+ // This invalidates Phys.
+ Spills.ClobberPhysReg(Phys);
+ // Remember it's available.
+ Spills.addAvailable(SSorRMId, Phys);
+
+ UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
+ DEBUG(dbgs() << '\t' << *prior(MII));
+ }
+ }
+
+ // Insert spills here if asked to.
+ if (VRM.isSpillPt(&MI)) {
+ std::vector<std::pair<unsigned,bool> > &SpillRegs =
+ VRM.getSpillPtSpills(&MI);
+ for (unsigned i = 0, e = SpillRegs.size(); i != e; ++i) {
+ unsigned VirtReg = SpillRegs[i].first;
+ bool isKill = SpillRegs[i].second;
+ if (!VRM.getPreSplitReg(VirtReg))
+ continue; // Split interval spilled again.
+ const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg);
+ unsigned Phys = VRM.getPhys(VirtReg);
+ int StackSlot = VRM.getStackSlot(VirtReg);
+ MachineBasicBlock::iterator oldNextMII = llvm::next(MII);
+ TII->storeRegToStackSlot(MBB, llvm::next(MII), Phys, isKill, StackSlot, RC);
+ MachineInstr *StoreMI = prior(oldNextMII);
+ VRM.addSpillSlotUse(StackSlot, StoreMI);
+ DEBUG(dbgs() << "Store:\t" << *StoreMI);
+ VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
+ }
+ NextMII = llvm::next(MII);
+ }
+
+ /// ReusedOperands - Keep track of operand reuse in case we need to undo
+ /// reuse.
+ ReuseInfo ReusedOperands(MI, TRI);
+ SmallVector<unsigned, 4> VirtUseOps;
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue; // Ignore non-register operands.
+
+ unsigned VirtReg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) {
+ // Ignore physregs for spilling, but remember that it is used by this
+ // function.
+ RegInfo->setPhysRegUsed(VirtReg);
+ continue;
+ }
+
+ // We want to process implicit virtual register uses first.
+ if (MO.isImplicit())
+ // If the virtual register is implicitly defined, emit a implicit_def
+ // before so scavenger knows it's "defined".
+ // FIXME: This is a horrible hack done the by register allocator to
+ // remat a definition with virtual register operand.
+ VirtUseOps.insert(VirtUseOps.begin(), i);
+ else
+ VirtUseOps.push_back(i);
+ }
+
+ // Process all of the spilled uses and all non spilled reg references.
+ SmallVector<int, 2> PotentialDeadStoreSlots;
+ KilledMIRegs.clear();
+ for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) {
+ unsigned i = VirtUseOps[j];
+ MachineOperand &MO = MI.getOperand(i);
+ unsigned VirtReg = MO.getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "Not a virtual register?");
+
+ unsigned SubIdx = MO.getSubReg();
+ if (VRM.isAssignedReg(VirtReg)) {
+ // This virtual register was assigned a physreg!
+ unsigned Phys = VRM.getPhys(VirtReg);
+ RegInfo->setPhysRegUsed(Phys);
+ if (MO.isDef())
+ ReusedOperands.markClobbered(Phys);
+ substitutePhysReg(MO, Phys, *TRI);
+ if (VRM.isImplicitlyDefined(VirtReg))
+ // FIXME: Is this needed?
+ BuildMI(MBB, &MI, MI.getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), Phys);
+ continue;
+ }
+
+ // This virtual register is now known to be a spilled value.
+ if (!MO.isUse())
+ continue; // Handle defs in the loop below (handle use&def here though)
+
+ bool AvoidReload = MO.isUndef();
+ // Check if it is defined by an implicit def. It should not be spilled.
+ // Note, this is for correctness reason. e.g.
+ // 8 %reg1024<def> = IMPLICIT_DEF
+ // 12 %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
+ // The live range [12, 14) are not part of the r1024 live interval since
+ // it's defined by an implicit def. It will not conflicts with live
+ // interval of r1025. Now suppose both registers are spilled, you can
+ // easily see a situation where both registers are reloaded before
+ // the INSERT_SUBREG and both target registers that would overlap.
+ bool DoReMat = VRM.isReMaterialized(VirtReg);
+ int SSorRMId = DoReMat
+ ? VRM.getReMatId(VirtReg) : VRM.getStackSlot(VirtReg);
+ int ReuseSlot = SSorRMId;
+
+ // Check to see if this stack slot is available.
+ unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
+
+ // If this is a sub-register use, make sure the reuse register is in the
+ // right register class. For example, for x86 not all of the 32-bit
+ // registers have accessible sub-registers.
+ // Similarly so for EXTRACT_SUBREG. Consider this:
+ // EDI = op
+ // MOV32_mr fi#1, EDI
+ // ...
+ // = EXTRACT_SUBREG fi#1
+ // fi#1 is available in EDI, but it cannot be reused because it's not in
+ // the right register file.
+ if (PhysReg && !AvoidReload && (SubIdx || MI.isExtractSubreg())) {
+ const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
+ if (!RC->contains(PhysReg))
+ PhysReg = 0;
+ }
+
+ if (PhysReg && !AvoidReload) {
+ // This spilled operand might be part of a two-address operand. If this
+ // is the case, then changing it will necessarily require changing the
+ // def part of the instruction as well. However, in some cases, we
+ // aren't allowed to modify the reused register. If none of these cases
+ // apply, reuse it.
+ bool CanReuse = true;
+ bool isTied = MI.isRegTiedToDefOperand(i);
+ if (isTied) {
+ // Okay, we have a two address operand. We can reuse this physreg as
+ // long as we are allowed to clobber the value and there isn't an
+ // earlier def that has already clobbered the physreg.
+ CanReuse = !ReusedOperands.isClobbered(PhysReg) &&
+ Spills.canClobberPhysReg(PhysReg);
+ }
+
+ if (CanReuse) {
+ // If this stack slot value is already available, reuse it!
+ if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
+ DEBUG(dbgs() << "Reusing RM#"
+ << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
+ else
+ DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
+ DEBUG(dbgs() << " from physreg "
+ << TRI->getName(PhysReg) << " for vreg"
+ << VirtReg <<" instead of reloading into physreg "
+ << TRI->getName(VRM.getPhys(VirtReg)) << '\n');
+ unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+
+ // The only technical detail we have is that we don't know that
+ // PhysReg won't be clobbered by a reloaded stack slot that occurs
+ // later in the instruction. In particular, consider 'op V1, V2'.
+ // If V1 is available in physreg R0, we would choose to reuse it
+ // here, instead of reloading it into the register the allocator
+ // indicated (say R1). However, V2 might have to be reloaded
+ // later, and it might indicate that it needs to live in R0. When
+ // this occurs, we need to have information available that
+ // indicates it is safe to use R1 for the reload instead of R0.
+ //
+ // To further complicate matters, we might conflict with an alias,
+ // or R0 and R1 might not be compatible with each other. In this
+ // case, we actually insert a reload for V1 in R1, ensuring that
+ // we can get at R0 or its alias.
+ ReusedOperands.addReuse(i, ReuseSlot, PhysReg,
+ VRM.getPhys(VirtReg), VirtReg);
+ if (isTied)
+ // Only mark it clobbered if this is a use&def operand.
+ ReusedOperands.markClobbered(PhysReg);
+ ++NumReused;
+
+ if (MI.getOperand(i).isKill() &&
+ ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) {
+
+ // The store of this spilled value is potentially dead, but we
+ // won't know for certain until we've confirmed that the re-use
+ // above is valid, which means waiting until the other operands
+ // are processed. For now we just track the spill slot, we'll
+ // remove it after the other operands are processed if valid.
+
+ PotentialDeadStoreSlots.push_back(ReuseSlot);
+ }
+
+ // Mark is isKill if it's there no other uses of the same virtual
+ // register and it's not a two-address operand. IsKill will be
+ // unset if reg is reused.
+ if (!isTied && KilledMIRegs.count(VirtReg) == 0) {
+ MI.getOperand(i).setIsKill();
+ KilledMIRegs.insert(VirtReg);
+ }
+
+ continue;
+ } // CanReuse
+
+ // Otherwise we have a situation where we have a two-address instruction
+ // whose mod/ref operand needs to be reloaded. This reload is already
+ // available in some register "PhysReg", but if we used PhysReg as the
+ // operand to our 2-addr instruction, the instruction would modify
+ // PhysReg. This isn't cool if something later uses PhysReg and expects
+ // to get its initial value.
+ //
+ // To avoid this problem, and to avoid doing a load right after a store,
+ // we emit a copy from PhysReg into the designated register for this
+ // operand.
+ unsigned DesignatedReg = VRM.getPhys(VirtReg);
+ assert(DesignatedReg && "Must map virtreg to physreg!");
+
+ // Note that, if we reused a register for a previous operand, the
+ // register we want to reload into might not actually be
+ // available. If this occurs, use the register indicated by the
+ // reuser.
+ if (ReusedOperands.hasReuses())
+ DesignatedReg = ReusedOperands.GetRegForReload(VirtReg,
+ DesignatedReg, &MI,
+ Spills, MaybeDeadStores, RegKills, KillOps, VRM);
+
+ // If the mapped designated register is actually the physreg we have
+ // incoming, we don't need to inserted a dead copy.
+ if (DesignatedReg == PhysReg) {
+ // If this stack slot value is already available, reuse it!
+ if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
+ DEBUG(dbgs() << "Reusing RM#"
+ << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
+ else
+ DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
+ DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg)
+ << " for vreg" << VirtReg
+ << " instead of reloading into same physreg.\n");
+ unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+ ReusedOperands.markClobbered(RReg);
+ ++NumReused;
+ continue;
+ }
+
+ const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
+ RegInfo->setPhysRegUsed(DesignatedReg);
+ ReusedOperands.markClobbered(DesignatedReg);
+
+ // Back-schedule reloads and remats.
+ MachineBasicBlock::iterator InsertLoc =
+ ComputeReloadLoc(&MI, MBB.begin(), PhysReg, TRI, DoReMat,
+ SSorRMId, TII, MF);
+
+ TII->copyRegToReg(MBB, InsertLoc, DesignatedReg, PhysReg, RC, RC);
+
+ MachineInstr *CopyMI = prior(InsertLoc);
+ CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
+ UpdateKills(*CopyMI, TRI, RegKills, KillOps);
+
+ // This invalidates DesignatedReg.
+ Spills.ClobberPhysReg(DesignatedReg);
+
+ Spills.addAvailable(ReuseSlot, DesignatedReg);
+ unsigned RReg =
+ SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg;
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+ DEBUG(dbgs() << '\t' << *prior(MII));
+ ++NumReused;
+ continue;
+ } // if (PhysReg)
+
+ // Otherwise, reload it and remember that we have it.
+ PhysReg = VRM.getPhys(VirtReg);
+ assert(PhysReg && "Must map virtreg to physreg!");
+
+ // Note that, if we reused a register for a previous operand, the
+ // register we want to reload into might not actually be
+ // available. If this occurs, use the register indicated by the
+ // reuser.
+ if (ReusedOperands.hasReuses())
+ PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI,
+ Spills, MaybeDeadStores, RegKills, KillOps, VRM);
+
+ RegInfo->setPhysRegUsed(PhysReg);
+ ReusedOperands.markClobbered(PhysReg);
+ if (AvoidReload)
+ ++NumAvoided;
+ else {
+ // Back-schedule reloads and remats.
+ MachineBasicBlock::iterator InsertLoc =
+ ComputeReloadLoc(MII, MBB.begin(), PhysReg, TRI, DoReMat,
+ SSorRMId, TII, MF);
+
+ if (DoReMat) {
+ ReMaterialize(MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, VRM);
+ } else {
+ const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
+ TII->loadRegFromStackSlot(MBB, InsertLoc, PhysReg, SSorRMId, RC);
+ MachineInstr *LoadMI = prior(InsertLoc);
+ VRM.addSpillSlotUse(SSorRMId, LoadMI);
+ ++NumLoads;
+ DistanceMap.insert(std::make_pair(LoadMI, Dist++));
+ }
+ // This invalidates PhysReg.
+ Spills.ClobberPhysReg(PhysReg);
+
+ // Any stores to this stack slot are not dead anymore.
+ if (!DoReMat)
+ MaybeDeadStores[SSorRMId] = NULL;
+ Spills.addAvailable(SSorRMId, PhysReg);
+ // Assumes this is the last use. IsKill will be unset if reg is reused
+ // unless it's a two-address operand.
+ if (!MI.isRegTiedToDefOperand(i) &&
+ KilledMIRegs.count(VirtReg) == 0) {
+ MI.getOperand(i).setIsKill();
+ KilledMIRegs.insert(VirtReg);
+ }
+
+ UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
+ DEBUG(dbgs() << '\t' << *prior(InsertLoc));
+ }
+ unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+ }
+
+ // Ok - now we can remove stores that have been confirmed dead.
+ for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) {
+ // This was the last use and the spilled value is still available
+ // for reuse. That means the spill was unnecessary!
+ int PDSSlot = PotentialDeadStoreSlots[j];
+ MachineInstr* DeadStore = MaybeDeadStores[PDSSlot];
+ if (DeadStore) {
+ DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
+ InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(DeadStore);
+ MBB.erase(DeadStore);
+ MaybeDeadStores[PDSSlot] = NULL;
+ ++NumDSE;
+ }
+ }
+
+
+ DEBUG(dbgs() << '\t' << MI);
+
+
+ // If we have folded references to memory operands, make sure we clear all
+ // physical registers that may contain the value of the spilled virtual
+ // register
+ SmallSet<int, 2> FoldedSS;
+ for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ) {
+ unsigned VirtReg = I->second.first;
+ VirtRegMap::ModRef MR = I->second.second;
+ DEBUG(dbgs() << "Folded vreg: " << VirtReg << " MR: " << MR);
+
+ // MI2VirtMap be can updated which invalidate the iterator.
+ // Increment the iterator first.
+ ++I;
+ int SS = VRM.getStackSlot(VirtReg);
+ if (SS == VirtRegMap::NO_STACK_SLOT)
+ continue;
+ FoldedSS.insert(SS);
+ DEBUG(dbgs() << " - StackSlot: " << SS << "\n");
+
+ // If this folded instruction is just a use, check to see if it's a
+ // straight load from the virt reg slot.
+ if ((MR & VirtRegMap::isRef) && !(MR & VirtRegMap::isMod)) {
+ int FrameIdx;
+ unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx);
+ if (DestReg && FrameIdx == SS) {
+ // If this spill slot is available, turn it into a copy (or nothing)
+ // instead of leaving it as a load!
+ if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) {
+ DEBUG(dbgs() << "Promoted Load To Copy: " << MI);
+ if (DestReg != InReg) {
+ const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg);
+ TII->copyRegToReg(MBB, &MI, DestReg, InReg, RC, RC);
+ MachineOperand *DefMO = MI.findRegisterDefOperand(DestReg);
+ unsigned SubIdx = DefMO->getSubReg();
+ // Revisit the copy so we make sure to notice the effects of the
+ // operation on the destreg (either needing to RA it if it's
+ // virtual or needing to clobber any values if it's physical).
+ NextMII = &MI;
+ --NextMII; // backtrack to the copy.
+ NextMII->setAsmPrinterFlag(MachineInstr::ReloadReuse);
+ // Propagate the sub-register index over.
+ if (SubIdx) {
+ DefMO = NextMII->findRegisterDefOperand(DestReg);
+ DefMO->setSubReg(SubIdx);
+ }
+
+ // Mark is killed.
+ MachineOperand *KillOpnd = NextMII->findRegisterUseOperand(InReg);
+ KillOpnd->setIsKill();
+
+ BackTracked = true;
+ } else {
+ DEBUG(dbgs() << "Removing now-noop copy: " << MI);
+ // Unset last kill since it's being reused.
+ InvalidateKill(InReg, TRI, RegKills, KillOps);
+ Spills.disallowClobberPhysReg(InReg);
+ }
+
+ InvalidateKills(MI, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(&MI);
+ MBB.erase(&MI);
+ Erased = true;
+ goto ProcessNextInst;
+ }
+ } else {
+ unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
+ SmallVector<MachineInstr*, 4> NewMIs;
+ if (PhysReg &&
+ TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)) {
+ MBB.insert(MII, NewMIs[0]);
+ InvalidateKills(MI, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(&MI);
+ MBB.erase(&MI);
+ Erased = true;
+ --NextMII; // backtrack to the unfolded instruction.
+ BackTracked = true;
+ goto ProcessNextInst;
+ }
+ }
+ }
+
+ // If this reference is not a use, any previous store is now dead.
+ // Otherwise, the store to this stack slot is not dead anymore.
+ MachineInstr* DeadStore = MaybeDeadStores[SS];
+ if (DeadStore) {
+ bool isDead = !(MR & VirtRegMap::isRef);
+ MachineInstr *NewStore = NULL;
+ if (MR & VirtRegMap::isModRef) {
+ unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
+ SmallVector<MachineInstr*, 4> NewMIs;
+ // We can reuse this physreg as long as we are allowed to clobber
+ // the value and there isn't an earlier def that has already clobbered
+ // the physreg.
+ if (PhysReg &&
+ !ReusedOperands.isClobbered(PhysReg) &&
+ Spills.canClobberPhysReg(PhysReg) &&
+ !TII->isStoreToStackSlot(&MI, SS)) { // Not profitable!
+ MachineOperand *KillOpnd =
+ DeadStore->findRegisterUseOperand(PhysReg, true);
+ // Note, if the store is storing a sub-register, it's possible the
+ // super-register is needed below.
+ if (KillOpnd && !KillOpnd->getSubReg() &&
+ TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, true,NewMIs)){
+ MBB.insert(MII, NewMIs[0]);
+ NewStore = NewMIs[1];
+ MBB.insert(MII, NewStore);
+ VRM.addSpillSlotUse(SS, NewStore);
+ InvalidateKills(MI, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(&MI);
+ MBB.erase(&MI);
+ Erased = true;
+ --NextMII;
+ --NextMII; // backtrack to the unfolded instruction.
+ BackTracked = true;
+ isDead = true;
+ ++NumSUnfold;
+ }
+ }
+ }
+
+ if (isDead) { // Previous store is dead.
+ // If we get here, the store is dead, nuke it now.
+ DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
+ InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(DeadStore);
+ MBB.erase(DeadStore);
+ if (!NewStore)
+ ++NumDSE;
+ }
+
+ MaybeDeadStores[SS] = NULL;
+ if (NewStore) {
+ // Treat this store as a spill merged into a copy. That makes the
+ // stack slot value available.
+ VRM.virtFolded(VirtReg, NewStore, VirtRegMap::isMod);
+ goto ProcessNextInst;
+ }
+ }
+
+ // If the spill slot value is available, and this is a new definition of
+ // the value, the value is not available anymore.
+ if (MR & VirtRegMap::isMod) {
+ // Notice that the value in this stack slot has been modified.
+ Spills.ModifyStackSlotOrReMat(SS);
+
+ // If this is *just* a mod of the value, check to see if this is just a
+ // store to the spill slot (i.e. the spill got merged into the copy). If
+ // so, realize that the vreg is available now, and add the store to the
+ // MaybeDeadStore info.
+ int StackSlot;
+ if (!(MR & VirtRegMap::isRef)) {
+ if (unsigned SrcReg = TII->isStoreToStackSlot(&MI, StackSlot)) {
+ assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+ "Src hasn't been allocated yet?");
+
+ if (CommuteToFoldReload(MBB, MII, VirtReg, SrcReg, StackSlot,
+ Spills, RegKills, KillOps, TRI, VRM)) {
+ NextMII = llvm::next(MII);
+ BackTracked = true;
+ goto ProcessNextInst;
+ }
+
+ // Okay, this is certainly a store of SrcReg to [StackSlot]. Mark
+ // this as a potentially dead store in case there is a subsequent
+ // store into the stack slot without a read from it.
+ MaybeDeadStores[StackSlot] = &MI;
+
+ // If the stack slot value was previously available in some other
+ // register, change it now. Otherwise, make the register
+ // available in PhysReg.
+ Spills.addAvailable(StackSlot, SrcReg, MI.killsRegister(SrcReg));
+ }
+ }
+ }
+ }
+
+ // Process all of the spilled defs.
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!(MO.isReg() && MO.getReg() && MO.isDef()))
+ continue;
+
+ unsigned VirtReg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(VirtReg)) {
+ // Check to see if this is a noop copy. If so, eliminate the
+ // instruction before considering the dest reg to be changed.
+ // Also check if it's copying from an "undef", if so, we can't
+ // eliminate this or else the undef marker is lost and it will
+ // confuses the scavenger. This is extremely rare.
+ unsigned Src, Dst, SrcSR, DstSR;
+ if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst &&
+ !MI.findRegisterUseOperand(Src)->isUndef()) {
+ ++NumDCE;
+ DEBUG(dbgs() << "Removing now-noop copy: " << MI);
+ SmallVector<unsigned, 2> KillRegs;
+ InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs);
+ if (MO.isDead() && !KillRegs.empty()) {
+ // Source register or an implicit super/sub-register use is killed.
+ assert(KillRegs[0] == Dst ||
+ TRI->isSubRegister(KillRegs[0], Dst) ||
+ TRI->isSuperRegister(KillRegs[0], Dst));
+ // Last def is now dead.
+ TransferDeadness(&MBB, Dist, Src, RegKills, KillOps, VRM);
+ }
+ VRM.RemoveMachineInstrFromMaps(&MI);
+ MBB.erase(&MI);
+ Erased = true;
+ Spills.disallowClobberPhysReg(VirtReg);
+ goto ProcessNextInst;
+ }
+
+ // If it's not a no-op copy, it clobbers the value in the destreg.
+ Spills.ClobberPhysReg(VirtReg);
+ ReusedOperands.markClobbered(VirtReg);
+
+ // Check to see if this instruction is a load from a stack slot into
+ // a register. If so, this provides the stack slot value in the reg.
+ int FrameIdx;
+ if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) {
+ assert(DestReg == VirtReg && "Unknown load situation!");
+
+ // If it is a folded reference, then it's not safe to clobber.
+ bool Folded = FoldedSS.count(FrameIdx);
+ // Otherwise, if it wasn't available, remember that it is now!
+ Spills.addAvailable(FrameIdx, DestReg, !Folded);
+ goto ProcessNextInst;
+ }
+
+ continue;
+ }
+
+ unsigned SubIdx = MO.getSubReg();
+ bool DoReMat = VRM.isReMaterialized(VirtReg);
+ if (DoReMat)
+ ReMatDefs.insert(&MI);
+
+ // The only vregs left are stack slot definitions.
+ int StackSlot = VRM.getStackSlot(VirtReg);
+ const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg);
+
+ // If this def is part of a two-address operand, make sure to execute
+ // the store from the correct physical register.
+ unsigned PhysReg;
+ unsigned TiedOp;
+ if (MI.isRegTiedToUseOperand(i, &TiedOp)) {
+ PhysReg = MI.getOperand(TiedOp).getReg();
+ if (SubIdx) {
+ unsigned SuperReg = findSuperReg(RC, PhysReg, SubIdx, TRI);
+ assert(SuperReg && TRI->getSubReg(SuperReg, SubIdx) == PhysReg &&
+ "Can't find corresponding super-register!");
+ PhysReg = SuperReg;
+ }
+ } else {
+ PhysReg = VRM.getPhys(VirtReg);
+ if (ReusedOperands.isClobbered(PhysReg)) {
+ // Another def has taken the assigned physreg. It must have been a
+ // use&def which got it due to reuse. Undo the reuse!
+ PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI,
+ Spills, MaybeDeadStores, RegKills, KillOps, VRM);
+ }
+ }
+
+ assert(PhysReg && "VR not assigned a physical register?");
+ RegInfo->setPhysRegUsed(PhysReg);
+ unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+ ReusedOperands.markClobbered(RReg);
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+
+ if (!MO.isDead()) {
+ MachineInstr *&LastStore = MaybeDeadStores[StackSlot];
+ SpillRegToStackSlot(MBB, MII, -1, PhysReg, StackSlot, RC, true,
+ LastStore, Spills, ReMatDefs, RegKills, KillOps, VRM);
+ NextMII = llvm::next(MII);
+
+ // Check to see if this is a noop copy. If so, eliminate the
+ // instruction before considering the dest reg to be changed.
+ {
+ unsigned Src, Dst, SrcSR, DstSR;
+ if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst) {
+ ++NumDCE;
+ DEBUG(dbgs() << "Removing now-noop copy: " << MI);
+ InvalidateKills(MI, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(&MI);
+ MBB.erase(&MI);
+ Erased = true;
+ UpdateKills(*LastStore, TRI, RegKills, KillOps);
+ goto ProcessNextInst;
+ }
+ }
+ }
+ }
+ ProcessNextInst:
+ // Delete dead instructions without side effects.
+ if (!Erased && !BackTracked && isSafeToDelete(MI)) {
+ InvalidateKills(MI, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(&MI);
+ MBB.erase(&MI);
+ Erased = true;
+ }
+ if (!Erased)
+ DistanceMap.insert(std::make_pair(&MI, Dist++));
+ if (!Erased && !BackTracked) {
+ for (MachineBasicBlock::iterator II = &MI; II != NextMII; ++II)
+ UpdateKills(*II, TRI, RegKills, KillOps);
+ }
+ MII = NextMII;
+ }
+
+ }
+
+};
+
+}
+
+llvm::VirtRegRewriter* llvm::createVirtRegRewriter() {
+ switch (RewriterOpt) {
+ default: llvm_unreachable("Unreachable!");
+ case local:
+ return new LocalRewriter();
+ case trivial:
+ return new TrivialRewriter();
+ }
+}
diff --git a/lib/CodeGen/VirtRegRewriter.h b/lib/CodeGen/VirtRegRewriter.h
new file mode 100644
index 0000000..44f9df6
--- /dev/null
+++ b/lib/CodeGen/VirtRegRewriter.h
@@ -0,0 +1,33 @@
+//===-- llvm/CodeGen/VirtRegRewriter.h - VirtRegRewriter -*- C++ -*--------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_VIRTREGREWRITER_H
+#define LLVM_CODEGEN_VIRTREGREWRITER_H
+
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "VirtRegMap.h"
+
+namespace llvm {
+
+ /// VirtRegRewriter interface: Implementations of this interface assign
+ /// spilled virtual registers to stack slots, rewriting the code.
+ struct VirtRegRewriter {
+ virtual ~VirtRegRewriter();
+ virtual bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
+ LiveIntervals* LIs) = 0;
+ };
+
+ /// createVirtRegRewriter - Create an return a rewriter object, as specified
+ /// on the command line.
+ VirtRegRewriter* createVirtRegRewriter();
+
+}
+
+#endif