|  | //===- InstrScheduling.cpp - Generic Instruction Scheduling support -------===// | 
|  | // | 
|  | // This file implements the llvm/CodeGen/InstrScheduling.h interface, along with | 
|  | // generic support routines for instruction scheduling. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "SchedPriorities.h" | 
|  | #include "llvm/CodeGen/MachineInstr.h" | 
|  | #include "llvm/CodeGen/MachineCodeForInstruction.h" | 
|  | #include "llvm/CodeGen/MachineCodeForMethod.h" | 
|  | #include "llvm/Analysis/LiveVar/FunctionLiveVarInfo.h" // FIXME: Remove when modularized better | 
|  | #include "llvm/Target/TargetMachine.h" | 
|  | #include "llvm/BasicBlock.h" | 
|  | #include "llvm/Instruction.h" | 
|  | #include <algorithm> | 
|  | using std::cerr; | 
|  | using std::vector; | 
|  |  | 
|  | //************************* External Data Types *****************************/ | 
|  |  | 
|  | cl::Enum<enum SchedDebugLevel_t> SchedDebugLevel("dsched", cl::Hidden, | 
|  | "enable instruction scheduling debugging information", | 
|  | clEnumValN(Sched_NoDebugInfo,      "n", "disable debug output"), | 
|  | clEnumValN(Sched_Disable,        "off", "disable instruction scheduling"), | 
|  | clEnumValN(Sched_PrintMachineCode, "y", "print machine code after scheduling"), | 
|  | clEnumValN(Sched_PrintSchedTrace,  "t", "print trace of scheduling actions"), | 
|  | clEnumValN(Sched_PrintSchedGraphs, "g", "print scheduling graphs"), 0); | 
|  |  | 
|  |  | 
|  | //************************* Internal Data Types *****************************/ | 
|  |  | 
|  | class InstrSchedule; | 
|  | class SchedulingManager; | 
|  |  | 
|  |  | 
|  | //---------------------------------------------------------------------- | 
|  | // class InstrGroup: | 
|  | // | 
|  | // Represents a group of instructions scheduled to be issued | 
|  | // in a single cycle. | 
|  | //---------------------------------------------------------------------- | 
|  |  | 
|  | class InstrGroup: public NonCopyable { | 
|  | public: | 
|  | inline const SchedGraphNode* operator[](unsigned int slotNum) const { | 
|  | assert(slotNum  < group.size()); | 
|  | return group[slotNum]; | 
|  | } | 
|  |  | 
|  | private: | 
|  | friend class InstrSchedule; | 
|  |  | 
|  | inline void	addInstr(const SchedGraphNode* node, unsigned int slotNum) { | 
|  | assert(slotNum < group.size()); | 
|  | group[slotNum] = node; | 
|  | } | 
|  |  | 
|  | /*ctor*/	InstrGroup(unsigned int nslots) | 
|  | : group(nslots, NULL) {} | 
|  |  | 
|  | /*ctor*/	InstrGroup();		// disable: DO NOT IMPLEMENT | 
|  |  | 
|  | private: | 
|  | vector<const SchedGraphNode*> group; | 
|  | }; | 
|  |  | 
|  |  | 
|  | //---------------------------------------------------------------------- | 
|  | // class ScheduleIterator: | 
|  | // | 
|  | // Iterates over the machine instructions in the for a single basic block. | 
|  | // The schedule is represented by an InstrSchedule object. | 
|  | //---------------------------------------------------------------------- | 
|  |  | 
|  | template<class _NodeType> | 
|  | class ScheduleIterator: public std::forward_iterator<_NodeType, ptrdiff_t> { | 
|  | private: | 
|  | unsigned cycleNum; | 
|  | unsigned slotNum; | 
|  | const InstrSchedule& S; | 
|  | public: | 
|  | typedef ScheduleIterator<_NodeType> _Self; | 
|  |  | 
|  | /*ctor*/ inline ScheduleIterator(const InstrSchedule& _schedule, | 
|  | unsigned _cycleNum, | 
|  | unsigned _slotNum) | 
|  | : cycleNum(_cycleNum), slotNum(_slotNum), S(_schedule) { | 
|  | skipToNextInstr(); | 
|  | } | 
|  |  | 
|  | /*ctor*/ inline ScheduleIterator(const _Self& x) | 
|  | : cycleNum(x.cycleNum), slotNum(x.slotNum), S(x.S) {} | 
|  |  | 
|  | inline bool operator==(const _Self& x) const { | 
|  | return (slotNum == x.slotNum && cycleNum== x.cycleNum && &S==&x.S); | 
|  | } | 
|  |  | 
|  | inline bool operator!=(const _Self& x) const { return !operator==(x); } | 
|  |  | 
|  | inline _NodeType* operator*() const { | 
|  | assert(cycleNum < S.groups.size()); | 
|  | return (*S.groups[cycleNum])[slotNum]; | 
|  | } | 
|  | inline _NodeType* operator->() const { return operator*(); } | 
|  |  | 
|  | _Self& operator++();				// Preincrement | 
|  | inline _Self operator++(int) {			// Postincrement | 
|  | _Self tmp(*this); ++*this; return tmp; | 
|  | } | 
|  |  | 
|  | static _Self begin(const InstrSchedule& _schedule); | 
|  | static _Self end(  const InstrSchedule& _schedule); | 
|  |  | 
|  | private: | 
|  | inline _Self& operator=(const _Self& x); // DISABLE -- DO NOT IMPLEMENT | 
|  | void	skipToNextInstr(); | 
|  | }; | 
|  |  | 
|  |  | 
|  | //---------------------------------------------------------------------- | 
|  | // class InstrSchedule: | 
|  | // | 
|  | // Represents the schedule of machine instructions for a single basic block. | 
|  | //---------------------------------------------------------------------- | 
|  |  | 
|  | class InstrSchedule: public NonCopyable { | 
|  | private: | 
|  | const unsigned int nslots; | 
|  | unsigned int numInstr; | 
|  | vector<InstrGroup*> groups;		// indexed by cycle number | 
|  | vector<cycles_t> startTime;		// indexed by node id | 
|  |  | 
|  | public: // iterators | 
|  | typedef ScheduleIterator<SchedGraphNode> iterator; | 
|  | typedef ScheduleIterator<const SchedGraphNode> const_iterator; | 
|  |  | 
|  | iterator begin(); | 
|  | const_iterator begin() const; | 
|  | iterator end(); | 
|  | const_iterator end()   const; | 
|  |  | 
|  | public: // constructors and destructor | 
|  | /*ctor*/		InstrSchedule	(unsigned int _nslots, | 
|  | unsigned int _numNodes); | 
|  | /*dtor*/		~InstrSchedule	(); | 
|  |  | 
|  | public: // accessor functions to query chosen schedule | 
|  | const SchedGraphNode* getInstr	(unsigned int slotNum, | 
|  | cycles_t c) const { | 
|  | const InstrGroup* igroup = this->getIGroup(c); | 
|  | return (igroup == NULL)? NULL : (*igroup)[slotNum]; | 
|  | } | 
|  |  | 
|  | inline InstrGroup*	getIGroup	(cycles_t c) { | 
|  | if ((unsigned)c >= groups.size()) | 
|  | groups.resize(c+1); | 
|  | if (groups[c] == NULL) | 
|  | groups[c] = new InstrGroup(nslots); | 
|  | return groups[c]; | 
|  | } | 
|  |  | 
|  | inline const InstrGroup* getIGroup	(cycles_t c) const { | 
|  | assert((unsigned)c < groups.size()); | 
|  | return groups[c]; | 
|  | } | 
|  |  | 
|  | inline cycles_t	getStartTime	(unsigned int nodeId) const { | 
|  | assert(nodeId < startTime.size()); | 
|  | return startTime[nodeId]; | 
|  | } | 
|  |  | 
|  | unsigned int		getNumInstructions() const { | 
|  | return numInstr; | 
|  | } | 
|  |  | 
|  | inline void		scheduleInstr	(const SchedGraphNode* node, | 
|  | unsigned int slotNum, | 
|  | cycles_t cycle) { | 
|  | InstrGroup* igroup = this->getIGroup(cycle); | 
|  | assert((*igroup)[slotNum] == NULL &&  "Slot already filled?"); | 
|  | igroup->addInstr(node, slotNum); | 
|  | assert(node->getNodeId() < startTime.size()); | 
|  | startTime[node->getNodeId()] = cycle; | 
|  | ++numInstr; | 
|  | } | 
|  |  | 
|  | private: | 
|  | friend class iterator; | 
|  | friend class const_iterator; | 
|  | /*ctor*/	InstrSchedule	();	// Disable: DO NOT IMPLEMENT. | 
|  | }; | 
|  |  | 
|  |  | 
|  | /*ctor*/ | 
|  | InstrSchedule::InstrSchedule(unsigned int _nslots, unsigned int _numNodes) | 
|  | : nslots(_nslots), | 
|  | numInstr(0), | 
|  | groups(2 * _numNodes / _nslots),		// 2 x lower-bound for #cycles | 
|  | startTime(_numNodes, (cycles_t) -1)		// set all to -1 | 
|  | { | 
|  | } | 
|  |  | 
|  |  | 
|  | /*dtor*/ | 
|  | InstrSchedule::~InstrSchedule() | 
|  | { | 
|  | for (unsigned c=0, NC=groups.size(); c < NC; c++) | 
|  | if (groups[c] != NULL) | 
|  | delete groups[c];			// delete InstrGroup objects | 
|  | } | 
|  |  | 
|  |  | 
|  | template<class _NodeType> | 
|  | inline | 
|  | void | 
|  | ScheduleIterator<_NodeType>::skipToNextInstr() | 
|  | { | 
|  | while(cycleNum < S.groups.size() && S.groups[cycleNum] == NULL) | 
|  | ++cycleNum;			// skip cycles with no instructions | 
|  |  | 
|  | while (cycleNum < S.groups.size() && | 
|  | (*S.groups[cycleNum])[slotNum] == NULL) | 
|  | { | 
|  | ++slotNum; | 
|  | if (slotNum == S.nslots) | 
|  | { | 
|  | ++cycleNum; | 
|  | slotNum = 0; | 
|  | while(cycleNum < S.groups.size() && S.groups[cycleNum] == NULL) | 
|  | ++cycleNum;			// skip cycles with no instructions | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | template<class _NodeType> | 
|  | inline | 
|  | ScheduleIterator<_NodeType>& | 
|  | ScheduleIterator<_NodeType>::operator++()	// Preincrement | 
|  | { | 
|  | ++slotNum; | 
|  | if (slotNum == S.nslots) | 
|  | { | 
|  | ++cycleNum; | 
|  | slotNum = 0; | 
|  | } | 
|  | skipToNextInstr(); | 
|  | return *this; | 
|  | } | 
|  |  | 
|  | template<class _NodeType> | 
|  | ScheduleIterator<_NodeType> | 
|  | ScheduleIterator<_NodeType>::begin(const InstrSchedule& _schedule) | 
|  | { | 
|  | return _Self(_schedule, 0, 0); | 
|  | } | 
|  |  | 
|  | template<class _NodeType> | 
|  | ScheduleIterator<_NodeType> | 
|  | ScheduleIterator<_NodeType>::end(const InstrSchedule& _schedule) | 
|  | { | 
|  | return _Self(_schedule, _schedule.groups.size(), 0); | 
|  | } | 
|  |  | 
|  | InstrSchedule::iterator | 
|  | InstrSchedule::begin() | 
|  | { | 
|  | return iterator::begin(*this); | 
|  | } | 
|  |  | 
|  | InstrSchedule::const_iterator | 
|  | InstrSchedule::begin() const | 
|  | { | 
|  | return const_iterator::begin(*this); | 
|  | } | 
|  |  | 
|  | InstrSchedule::iterator | 
|  | InstrSchedule::end() | 
|  | { | 
|  | return iterator::end(*this); | 
|  | } | 
|  |  | 
|  | InstrSchedule::const_iterator | 
|  | InstrSchedule::end() const | 
|  | { | 
|  | return const_iterator::end(  *this); | 
|  | } | 
|  |  | 
|  |  | 
|  | //---------------------------------------------------------------------- | 
|  | // class DelaySlotInfo: | 
|  | // | 
|  | // Record information about delay slots for a single branch instruction. | 
|  | // Delay slots are simply indexed by slot number 1 ... numDelaySlots | 
|  | //---------------------------------------------------------------------- | 
|  |  | 
|  | class DelaySlotInfo: public NonCopyable { | 
|  | private: | 
|  | const SchedGraphNode* brNode; | 
|  | unsigned int ndelays; | 
|  | vector<const SchedGraphNode*> delayNodeVec; | 
|  | cycles_t delayedNodeCycle; | 
|  | unsigned int delayedNodeSlotNum; | 
|  |  | 
|  | public: | 
|  | /*ctor*/	DelaySlotInfo		(const SchedGraphNode* _brNode, | 
|  | unsigned _ndelays) | 
|  | : brNode(_brNode), ndelays(_ndelays), | 
|  | delayedNodeCycle(0), delayedNodeSlotNum(0) {} | 
|  |  | 
|  | inline unsigned getNumDelays	() { | 
|  | return ndelays; | 
|  | } | 
|  |  | 
|  | inline const vector<const SchedGraphNode*>& getDelayNodeVec() { | 
|  | return delayNodeVec; | 
|  | } | 
|  |  | 
|  | inline void	addDelayNode		(const SchedGraphNode* node) { | 
|  | delayNodeVec.push_back(node); | 
|  | assert(delayNodeVec.size() <= ndelays && "Too many delay slot instrs!"); | 
|  | } | 
|  |  | 
|  | inline void	recordChosenSlot	(cycles_t cycle, unsigned slotNum) { | 
|  | delayedNodeCycle = cycle; | 
|  | delayedNodeSlotNum = slotNum; | 
|  | } | 
|  |  | 
|  | unsigned	scheduleDelayedNode	(SchedulingManager& S); | 
|  | }; | 
|  |  | 
|  |  | 
|  | //---------------------------------------------------------------------- | 
|  | // class SchedulingManager: | 
|  | // | 
|  | // Represents the schedule of machine instructions for a single basic block. | 
|  | //---------------------------------------------------------------------- | 
|  |  | 
|  | class SchedulingManager: public NonCopyable { | 
|  | public: // publicly accessible data members | 
|  | const unsigned int nslots; | 
|  | const MachineSchedInfo& schedInfo; | 
|  | SchedPriorities& schedPrio; | 
|  | InstrSchedule isched; | 
|  |  | 
|  | private: | 
|  | unsigned int totalInstrCount; | 
|  | cycles_t curTime; | 
|  | cycles_t nextEarliestIssueTime;		// next cycle we can issue | 
|  | vector<std::hash_set<const SchedGraphNode*> > choicesForSlot; // indexed by slot# | 
|  | vector<const SchedGraphNode*> choiceVec;	// indexed by node ptr | 
|  | vector<int> numInClass;			// indexed by sched class | 
|  | vector<cycles_t> nextEarliestStartTime;	// indexed by opCode | 
|  | std::hash_map<const SchedGraphNode*, DelaySlotInfo*> delaySlotInfoForBranches; | 
|  | // indexed by branch node ptr | 
|  |  | 
|  | public: | 
|  | SchedulingManager(const TargetMachine& _target, const SchedGraph* graph, | 
|  | SchedPriorities& schedPrio); | 
|  | ~SchedulingManager() { | 
|  | for (std::hash_map<const SchedGraphNode*, | 
|  | DelaySlotInfo*>::iterator I = delaySlotInfoForBranches.begin(), | 
|  | E = delaySlotInfoForBranches.end(); I != E; ++I) | 
|  | delete I->second; | 
|  | } | 
|  |  | 
|  | //---------------------------------------------------------------------- | 
|  | // Simplify access to the machine instruction info | 
|  | //---------------------------------------------------------------------- | 
|  |  | 
|  | inline const MachineInstrInfo& getInstrInfo	() const { | 
|  | return schedInfo.getInstrInfo(); | 
|  | } | 
|  |  | 
|  | //---------------------------------------------------------------------- | 
|  | // Interface for checking and updating the current time | 
|  | //---------------------------------------------------------------------- | 
|  |  | 
|  | inline cycles_t	getTime			() const { | 
|  | return curTime; | 
|  | } | 
|  |  | 
|  | inline cycles_t	getEarliestIssueTime() const { | 
|  | return nextEarliestIssueTime; | 
|  | } | 
|  |  | 
|  | inline cycles_t	getEarliestStartTimeForOp(MachineOpCode opCode) const { | 
|  | assert(opCode < (int) nextEarliestStartTime.size()); | 
|  | return nextEarliestStartTime[opCode]; | 
|  | } | 
|  |  | 
|  | // Update current time to specified cycle | 
|  | inline void	updateTime		(cycles_t c) { | 
|  | curTime = c; | 
|  | schedPrio.updateTime(c); | 
|  | } | 
|  |  | 
|  | //---------------------------------------------------------------------- | 
|  | // Functions to manage the choices for the current cycle including: | 
|  | // -- a vector of choices by priority (choiceVec) | 
|  | // -- vectors of the choices for each instruction slot (choicesForSlot[]) | 
|  | // -- number of choices in each sched class, used to check issue conflicts | 
|  | //    between choices for a single cycle | 
|  | //---------------------------------------------------------------------- | 
|  |  | 
|  | inline unsigned int getNumChoices	() const { | 
|  | return choiceVec.size(); | 
|  | } | 
|  |  | 
|  | inline unsigned getNumChoicesInClass	(const InstrSchedClass& sc) const { | 
|  | assert(sc < (int) numInClass.size() && "Invalid op code or sched class!"); | 
|  | return numInClass[sc]; | 
|  | } | 
|  |  | 
|  | inline const SchedGraphNode* getChoice(unsigned int i) const { | 
|  | // assert(i < choiceVec.size());	don't check here. | 
|  | return choiceVec[i]; | 
|  | } | 
|  |  | 
|  | inline std::hash_set<const SchedGraphNode*>& getChoicesForSlot(unsigned slotNum) { | 
|  | assert(slotNum < nslots); | 
|  | return choicesForSlot[slotNum]; | 
|  | } | 
|  |  | 
|  | inline void	addChoice		(const SchedGraphNode* node) { | 
|  | // Append the instruction to the vector of choices for current cycle. | 
|  | // Increment numInClass[c] for the sched class to which the instr belongs. | 
|  | choiceVec.push_back(node); | 
|  | const InstrSchedClass& sc = schedInfo.getSchedClass(node->getOpCode()); | 
|  | assert(sc < (int) numInClass.size()); | 
|  | numInClass[sc]++; | 
|  | } | 
|  |  | 
|  | inline void	addChoiceToSlot		(unsigned int slotNum, | 
|  | const SchedGraphNode* node) { | 
|  | // Add the instruction to the choice set for the specified slot | 
|  | assert(slotNum < nslots); | 
|  | choicesForSlot[slotNum].insert(node); | 
|  | } | 
|  |  | 
|  | inline void	resetChoices		() { | 
|  | choiceVec.clear(); | 
|  | for (unsigned int s=0; s < nslots; s++) | 
|  | choicesForSlot[s].clear(); | 
|  | for (unsigned int c=0; c < numInClass.size(); c++) | 
|  | numInClass[c] = 0; | 
|  | } | 
|  |  | 
|  | //---------------------------------------------------------------------- | 
|  | // Code to query and manage the partial instruction schedule so far | 
|  | //---------------------------------------------------------------------- | 
|  |  | 
|  | inline unsigned int	getNumScheduled	() const { | 
|  | return isched.getNumInstructions(); | 
|  | } | 
|  |  | 
|  | inline unsigned int	getNumUnscheduled() const { | 
|  | return totalInstrCount - isched.getNumInstructions(); | 
|  | } | 
|  |  | 
|  | inline bool		isScheduled	(const SchedGraphNode* node) const { | 
|  | return (isched.getStartTime(node->getNodeId()) >= 0); | 
|  | } | 
|  |  | 
|  | inline void	scheduleInstr		(const SchedGraphNode* node, | 
|  | unsigned int slotNum, | 
|  | cycles_t cycle) | 
|  | { | 
|  | assert(! isScheduled(node) && "Instruction already scheduled?"); | 
|  |  | 
|  | // add the instruction to the schedule | 
|  | isched.scheduleInstr(node, slotNum, cycle); | 
|  |  | 
|  | // update the earliest start times of all nodes that conflict with `node' | 
|  | // and the next-earliest time anything can issue if `node' causes bubbles | 
|  | updateEarliestStartTimes(node, cycle); | 
|  |  | 
|  | // remove the instruction from the choice sets for all slots | 
|  | for (unsigned s=0; s < nslots; s++) | 
|  | choicesForSlot[s].erase(node); | 
|  |  | 
|  | // and decrement the instr count for the sched class to which it belongs | 
|  | const InstrSchedClass& sc = schedInfo.getSchedClass(node->getOpCode()); | 
|  | assert(sc < (int) numInClass.size()); | 
|  | numInClass[sc]--; | 
|  | } | 
|  |  | 
|  | //---------------------------------------------------------------------- | 
|  | // Create and retrieve delay slot info for delayed instructions | 
|  | //---------------------------------------------------------------------- | 
|  |  | 
|  | inline DelaySlotInfo* getDelaySlotInfoForInstr(const SchedGraphNode* bn, | 
|  | bool createIfMissing=false) | 
|  | { | 
|  | std::hash_map<const SchedGraphNode*, DelaySlotInfo*>::const_iterator | 
|  | I = delaySlotInfoForBranches.find(bn); | 
|  | if (I != delaySlotInfoForBranches.end()) | 
|  | return I->second; | 
|  |  | 
|  | if (!createIfMissing) return 0; | 
|  |  | 
|  | DelaySlotInfo *dinfo = | 
|  | new DelaySlotInfo(bn, getInstrInfo().getNumDelaySlots(bn->getOpCode())); | 
|  | return delaySlotInfoForBranches[bn] = dinfo; | 
|  | } | 
|  |  | 
|  | private: | 
|  | SchedulingManager();     // DISABLED: DO NOT IMPLEMENT | 
|  | void updateEarliestStartTimes(const SchedGraphNode* node, cycles_t schedTime); | 
|  | }; | 
|  |  | 
|  |  | 
|  | /*ctor*/ | 
|  | SchedulingManager::SchedulingManager(const TargetMachine& target, | 
|  | const SchedGraph* graph, | 
|  | SchedPriorities& _schedPrio) | 
|  | : nslots(target.getSchedInfo().getMaxNumIssueTotal()), | 
|  | schedInfo(target.getSchedInfo()), | 
|  | schedPrio(_schedPrio), | 
|  | isched(nslots, graph->getNumNodes()), | 
|  | totalInstrCount(graph->getNumNodes() - 2), | 
|  | nextEarliestIssueTime(0), | 
|  | choicesForSlot(nslots), | 
|  | numInClass(target.getSchedInfo().getNumSchedClasses(), 0),	// set all to 0 | 
|  | nextEarliestStartTime(target.getInstrInfo().getNumRealOpCodes(), | 
|  | (cycles_t) 0)				// set all to 0 | 
|  | { | 
|  | updateTime(0); | 
|  |  | 
|  | // Note that an upper bound on #choices for each slot is = nslots since | 
|  | // we use this vector to hold a feasible set of instructions, and more | 
|  | // would be infeasible. Reserve that much memory since it is probably small. | 
|  | for (unsigned int i=0; i < nslots; i++) | 
|  | choicesForSlot[i].resize(nslots); | 
|  | } | 
|  |  | 
|  |  | 
|  | void | 
|  | SchedulingManager::updateEarliestStartTimes(const SchedGraphNode* node, | 
|  | cycles_t schedTime) | 
|  | { | 
|  | if (schedInfo.numBubblesAfter(node->getOpCode()) > 0) | 
|  | { // Update next earliest time before which *nothing* can issue. | 
|  | nextEarliestIssueTime = std::max(nextEarliestIssueTime, | 
|  | curTime + 1 + schedInfo.numBubblesAfter(node->getOpCode())); | 
|  | } | 
|  |  | 
|  | const vector<MachineOpCode>* | 
|  | conflictVec = schedInfo.getConflictList(node->getOpCode()); | 
|  |  | 
|  | if (conflictVec != NULL) | 
|  | for (unsigned i=0; i < conflictVec->size(); i++) | 
|  | { | 
|  | MachineOpCode toOp = (*conflictVec)[i]; | 
|  | cycles_t est = schedTime + schedInfo.getMinIssueGap(node->getOpCode(), | 
|  | toOp); | 
|  | assert(toOp < (int) nextEarliestStartTime.size()); | 
|  | if (nextEarliestStartTime[toOp] < est) | 
|  | nextEarliestStartTime[toOp] = est; | 
|  | } | 
|  | } | 
|  |  | 
|  | //************************* Internal Functions *****************************/ | 
|  |  | 
|  |  | 
|  | static void | 
|  | AssignInstructionsToSlots(class SchedulingManager& S, unsigned maxIssue) | 
|  | { | 
|  | // find the slot to start from, in the current cycle | 
|  | unsigned int startSlot = 0; | 
|  | cycles_t curTime = S.getTime(); | 
|  |  | 
|  | assert(maxIssue > 0 && maxIssue <= S.nslots - startSlot); | 
|  |  | 
|  | // If only one instruction can be issued, do so. | 
|  | if (maxIssue == 1) | 
|  | for (unsigned s=startSlot; s < S.nslots; s++) | 
|  | if (S.getChoicesForSlot(s).size() > 0) | 
|  | {// found the one instruction | 
|  | S.scheduleInstr(*S.getChoicesForSlot(s).begin(), s, curTime); | 
|  | return; | 
|  | } | 
|  |  | 
|  | // Otherwise, choose from the choices for each slot | 
|  | // | 
|  | InstrGroup* igroup = S.isched.getIGroup(S.getTime()); | 
|  | assert(igroup != NULL && "Group creation failed?"); | 
|  |  | 
|  | // Find a slot that has only a single choice, and take it. | 
|  | // If all slots have 0 or multiple choices, pick the first slot with | 
|  | // choices and use its last instruction (just to avoid shifting the vector). | 
|  | unsigned numIssued; | 
|  | for (numIssued = 0; numIssued < maxIssue; numIssued++) | 
|  | { | 
|  | int chosenSlot = -1; | 
|  | for (unsigned s=startSlot; s < S.nslots; s++) | 
|  | if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() == 1) | 
|  | { | 
|  | chosenSlot = (int) s; | 
|  | break; | 
|  | } | 
|  |  | 
|  | if (chosenSlot == -1) | 
|  | for (unsigned s=startSlot; s < S.nslots; s++) | 
|  | if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() > 0) | 
|  | { | 
|  | chosenSlot = (int) s; | 
|  | break; | 
|  | } | 
|  |  | 
|  | if (chosenSlot != -1) | 
|  | { // Insert the chosen instr in the chosen slot and | 
|  | // erase it from all slots. | 
|  | const SchedGraphNode* node= *S.getChoicesForSlot(chosenSlot).begin(); | 
|  | S.scheduleInstr(node, chosenSlot, curTime); | 
|  | } | 
|  | } | 
|  |  | 
|  | assert(numIssued > 0 && "Should not happen when maxIssue > 0!"); | 
|  | } | 
|  |  | 
|  |  | 
|  | // | 
|  | // For now, just assume we are scheduling within a single basic block. | 
|  | // Get the machine instruction vector for the basic block and clear it, | 
|  | // then append instructions in scheduled order. | 
|  | // Also, re-insert the dummy PHI instructions that were at the beginning | 
|  | // of the basic block, since they are not part of the schedule. | 
|  | // | 
|  | static void | 
|  | RecordSchedule(const BasicBlock* bb, const SchedulingManager& S) | 
|  | { | 
|  | MachineCodeForBasicBlock& mvec = bb->getMachineInstrVec(); | 
|  | const MachineInstrInfo& mii = S.schedInfo.getInstrInfo(); | 
|  |  | 
|  | #ifndef NDEBUG | 
|  | // Lets make sure we didn't lose any instructions, except possibly | 
|  | // some NOPs from delay slots.  Also, PHIs are not included in the schedule. | 
|  | unsigned numInstr = 0; | 
|  | for (MachineCodeForBasicBlock::iterator I=mvec.begin(); I != mvec.end(); ++I) | 
|  | if (! mii.isNop((*I)->getOpCode()) && | 
|  | ! mii.isDummyPhiInstr((*I)->getOpCode())) | 
|  | ++numInstr; | 
|  | assert(S.isched.getNumInstructions() >= numInstr && | 
|  | "Lost some non-NOP instructions during scheduling!"); | 
|  | #endif | 
|  |  | 
|  | if (S.isched.getNumInstructions() == 0) | 
|  | return;				// empty basic block! | 
|  |  | 
|  | // First find the dummy instructions at the start of the basic block | 
|  | MachineCodeForBasicBlock::iterator I = mvec.begin(); | 
|  | for ( ; I != mvec.end(); ++I) | 
|  | if (! mii.isDummyPhiInstr((*I)->getOpCode())) | 
|  | break; | 
|  |  | 
|  | // Erase all except the dummy PHI instructions from mvec, and | 
|  | // pre-allocate create space for the ones we will put back in. | 
|  | mvec.erase(I, mvec.end()); | 
|  |  | 
|  | InstrSchedule::const_iterator NIend = S.isched.end(); | 
|  | for (InstrSchedule::const_iterator NI = S.isched.begin(); NI != NIend; ++NI) | 
|  | mvec.push_back(const_cast<MachineInstr*>((*NI)->getMachineInstr())); | 
|  | } | 
|  |  | 
|  |  | 
|  |  | 
|  | static void | 
|  | MarkSuccessorsReady(SchedulingManager& S, const SchedGraphNode* node) | 
|  | { | 
|  | // Check if any successors are now ready that were not already marked | 
|  | // ready before, and that have not yet been scheduled. | 
|  | // | 
|  | for (sg_succ_const_iterator SI = succ_begin(node); SI !=succ_end(node); ++SI) | 
|  | if (! (*SI)->isDummyNode() | 
|  | && ! S.isScheduled(*SI) | 
|  | && ! S.schedPrio.nodeIsReady(*SI)) | 
|  | {// successor not scheduled and not marked ready; check *its* preds. | 
|  |  | 
|  | bool succIsReady = true; | 
|  | for (sg_pred_const_iterator P=pred_begin(*SI); P != pred_end(*SI); ++P) | 
|  | if (! (*P)->isDummyNode() | 
|  | && ! S.isScheduled(*P)) | 
|  | { | 
|  | succIsReady = false; | 
|  | break; | 
|  | } | 
|  |  | 
|  | if (succIsReady)	// add the successor to the ready list | 
|  | S.schedPrio.insertReady(*SI); | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | // Choose up to `nslots' FEASIBLE instructions and assign each | 
|  | // instruction to all possible slots that do not violate feasibility. | 
|  | // FEASIBLE means it should be guaranteed that the set | 
|  | // of chosen instructions can be issued in a single group. | 
|  | // | 
|  | // Return value: | 
|  | //	maxIssue : total number of feasible instructions | 
|  | //	S.choicesForSlot[i=0..nslots] : set of instructions feasible in slot i | 
|  | // | 
|  | static unsigned | 
|  | FindSlotChoices(SchedulingManager& S, | 
|  | DelaySlotInfo*& getDelaySlotInfo) | 
|  | { | 
|  | // initialize result vectors to empty | 
|  | S.resetChoices(); | 
|  |  | 
|  | // find the slot to start from, in the current cycle | 
|  | unsigned int startSlot = 0; | 
|  | InstrGroup* igroup = S.isched.getIGroup(S.getTime()); | 
|  | for (int s = S.nslots - 1; s >= 0; s--) | 
|  | if ((*igroup)[s] != NULL) | 
|  | { | 
|  | startSlot = s+1; | 
|  | break; | 
|  | } | 
|  |  | 
|  | // Make sure we pick at most one instruction that would break the group. | 
|  | // Also, if we do pick one, remember which it was. | 
|  | unsigned int indexForBreakingNode = S.nslots; | 
|  | unsigned int indexForDelayedInstr = S.nslots; | 
|  | DelaySlotInfo* delaySlotInfo = NULL; | 
|  |  | 
|  | getDelaySlotInfo = NULL; | 
|  |  | 
|  | // Choose instructions in order of priority. | 
|  | // Add choices to the choice vector in the SchedulingManager class as | 
|  | // we choose them so that subsequent choices will be correctly tested | 
|  | // for feasibility, w.r.t. higher priority choices for the same cycle. | 
|  | // | 
|  | while (S.getNumChoices() < S.nslots - startSlot) | 
|  | { | 
|  | const SchedGraphNode* nextNode=S.schedPrio.getNextHighest(S,S.getTime()); | 
|  | if (nextNode == NULL) | 
|  | break;			// no more instructions for this cycle | 
|  |  | 
|  | if (S.getInstrInfo().getNumDelaySlots(nextNode->getOpCode()) > 0) | 
|  | { | 
|  | delaySlotInfo = S.getDelaySlotInfoForInstr(nextNode); | 
|  | if (delaySlotInfo != NULL) | 
|  | { | 
|  | if (indexForBreakingNode < S.nslots) | 
|  | // cannot issue a delayed instr in the same cycle as one | 
|  | // that breaks the issue group or as another delayed instr | 
|  | nextNode = NULL; | 
|  | else | 
|  | indexForDelayedInstr = S.getNumChoices(); | 
|  | } | 
|  | } | 
|  | else if (S.schedInfo.breaksIssueGroup(nextNode->getOpCode())) | 
|  | { | 
|  | if (indexForBreakingNode < S.nslots) | 
|  | // have a breaking instruction already so throw this one away | 
|  | nextNode = NULL; | 
|  | else | 
|  | indexForBreakingNode = S.getNumChoices(); | 
|  | } | 
|  |  | 
|  | if (nextNode != NULL) | 
|  | { | 
|  | S.addChoice(nextNode); | 
|  |  | 
|  | if (S.schedInfo.isSingleIssue(nextNode->getOpCode())) | 
|  | { | 
|  | assert(S.getNumChoices() == 1 && | 
|  | "Prioritizer returned invalid instr for this cycle!"); | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | if (indexForDelayedInstr < S.nslots) | 
|  | break;			// leave the rest for delay slots | 
|  | } | 
|  |  | 
|  | assert(S.getNumChoices() <= S.nslots); | 
|  | assert(! (indexForDelayedInstr < S.nslots && | 
|  | indexForBreakingNode < S.nslots) && "Cannot have both in a cycle"); | 
|  |  | 
|  | // Assign each chosen instruction to all possible slots for that instr. | 
|  | // But if only one instruction was chosen, put it only in the first | 
|  | // feasible slot; no more analysis will be needed. | 
|  | // | 
|  | if (indexForDelayedInstr >= S.nslots && | 
|  | indexForBreakingNode >= S.nslots) | 
|  | { // No instructions that break the issue group or that have delay slots. | 
|  | // This is the common case, so handle it separately for efficiency. | 
|  |  | 
|  | if (S.getNumChoices() == 1) | 
|  | { | 
|  | MachineOpCode opCode = S.getChoice(0)->getOpCode(); | 
|  | unsigned int s; | 
|  | for (s=startSlot; s < S.nslots; s++) | 
|  | if (S.schedInfo.instrCanUseSlot(opCode, s)) | 
|  | break; | 
|  | assert(s < S.nslots && "No feasible slot for this opCode?"); | 
|  | S.addChoiceToSlot(s, S.getChoice(0)); | 
|  | } | 
|  | else | 
|  | { | 
|  | for (unsigned i=0; i < S.getNumChoices(); i++) | 
|  | { | 
|  | MachineOpCode opCode = S.getChoice(i)->getOpCode(); | 
|  | for (unsigned int s=startSlot; s < S.nslots; s++) | 
|  | if (S.schedInfo.instrCanUseSlot(opCode, s)) | 
|  | S.addChoiceToSlot(s, S.getChoice(i)); | 
|  | } | 
|  | } | 
|  | } | 
|  | else if (indexForDelayedInstr < S.nslots) | 
|  | { | 
|  | // There is an instruction that needs delay slots. | 
|  | // Try to assign that instruction to a higher slot than any other | 
|  | // instructions in the group, so that its delay slots can go | 
|  | // right after it. | 
|  | // | 
|  |  | 
|  | assert(indexForDelayedInstr == S.getNumChoices() - 1 && | 
|  | "Instruction with delay slots should be last choice!"); | 
|  | assert(delaySlotInfo != NULL && "No delay slot info for instr?"); | 
|  |  | 
|  | const SchedGraphNode* delayedNode = S.getChoice(indexForDelayedInstr); | 
|  | MachineOpCode delayOpCode = delayedNode->getOpCode(); | 
|  | unsigned ndelays= S.getInstrInfo().getNumDelaySlots(delayOpCode); | 
|  |  | 
|  | unsigned delayedNodeSlot = S.nslots; | 
|  | int highestSlotUsed; | 
|  |  | 
|  | // Find the last possible slot for the delayed instruction that leaves | 
|  | // at least `d' slots vacant after it (d = #delay slots) | 
|  | for (int s = S.nslots-ndelays-1; s >= (int) startSlot; s--) | 
|  | if (S.schedInfo.instrCanUseSlot(delayOpCode, s)) | 
|  | { | 
|  | delayedNodeSlot = s; | 
|  | break; | 
|  | } | 
|  |  | 
|  | highestSlotUsed = -1; | 
|  | for (unsigned i=0; i < S.getNumChoices() - 1; i++) | 
|  | { | 
|  | // Try to assign every other instruction to a lower numbered | 
|  | // slot than delayedNodeSlot. | 
|  | MachineOpCode opCode =S.getChoice(i)->getOpCode(); | 
|  | bool noSlotFound = true; | 
|  | unsigned int s; | 
|  | for (s=startSlot; s < delayedNodeSlot; s++) | 
|  | if (S.schedInfo.instrCanUseSlot(opCode, s)) | 
|  | { | 
|  | S.addChoiceToSlot(s, S.getChoice(i)); | 
|  | noSlotFound = false; | 
|  | } | 
|  |  | 
|  | // No slot before `delayedNodeSlot' was found for this opCode | 
|  | // Use a later slot, and allow some delay slots to fall in | 
|  | // the next cycle. | 
|  | if (noSlotFound) | 
|  | for ( ; s < S.nslots; s++) | 
|  | if (S.schedInfo.instrCanUseSlot(opCode, s)) | 
|  | { | 
|  | S.addChoiceToSlot(s, S.getChoice(i)); | 
|  | break; | 
|  | } | 
|  |  | 
|  | assert(s < S.nslots && "No feasible slot for instruction?"); | 
|  |  | 
|  | highestSlotUsed = std::max(highestSlotUsed, (int) s); | 
|  | } | 
|  |  | 
|  | assert(highestSlotUsed <= (int) S.nslots-1 && "Invalid slot used?"); | 
|  |  | 
|  | // We will put the delayed node in the first slot after the | 
|  | // highest slot used.  But we just mark that for now, and | 
|  | // schedule it separately because we want to schedule the delay | 
|  | // slots for the node at the same time. | 
|  | cycles_t dcycle = S.getTime(); | 
|  | unsigned int dslot = highestSlotUsed + 1; | 
|  | if (dslot == S.nslots) | 
|  | { | 
|  | dslot = 0; | 
|  | ++dcycle; | 
|  | } | 
|  | delaySlotInfo->recordChosenSlot(dcycle, dslot); | 
|  | getDelaySlotInfo = delaySlotInfo; | 
|  | } | 
|  | else | 
|  | { // There is an instruction that breaks the issue group. | 
|  | // For such an instruction, assign to the last possible slot in | 
|  | // the current group, and then don't assign any other instructions | 
|  | // to later slots. | 
|  | assert(indexForBreakingNode < S.nslots); | 
|  | const SchedGraphNode* breakingNode=S.getChoice(indexForBreakingNode); | 
|  | unsigned breakingSlot = INT_MAX; | 
|  | unsigned int nslotsToUse = S.nslots; | 
|  |  | 
|  | // Find the last possible slot for this instruction. | 
|  | for (int s = S.nslots-1; s >= (int) startSlot; s--) | 
|  | if (S.schedInfo.instrCanUseSlot(breakingNode->getOpCode(), s)) | 
|  | { | 
|  | breakingSlot = s; | 
|  | break; | 
|  | } | 
|  | assert(breakingSlot < S.nslots && | 
|  | "No feasible slot for `breakingNode'?"); | 
|  |  | 
|  | // Higher priority instructions than the one that breaks the group: | 
|  | // These can be assigned to all slots, but will be assigned only | 
|  | // to earlier slots if possible. | 
|  | for (unsigned i=0; | 
|  | i < S.getNumChoices() && i < indexForBreakingNode; i++) | 
|  | { | 
|  | MachineOpCode opCode =S.getChoice(i)->getOpCode(); | 
|  |  | 
|  | // If a higher priority instruction cannot be assigned to | 
|  | // any earlier slots, don't schedule the breaking instruction. | 
|  | // | 
|  | bool foundLowerSlot = false; | 
|  | nslotsToUse = S.nslots;	    // May be modified in the loop | 
|  | for (unsigned int s=startSlot; s < nslotsToUse; s++) | 
|  | if (S.schedInfo.instrCanUseSlot(opCode, s)) | 
|  | { | 
|  | if (breakingSlot < S.nslots && s < breakingSlot) | 
|  | { | 
|  | foundLowerSlot = true; | 
|  | nslotsToUse = breakingSlot; // RESETS LOOP UPPER BOUND! | 
|  | } | 
|  |  | 
|  | S.addChoiceToSlot(s, S.getChoice(i)); | 
|  | } | 
|  |  | 
|  | if (!foundLowerSlot) | 
|  | breakingSlot = INT_MAX;		// disable breaking instr | 
|  | } | 
|  |  | 
|  | // Assign the breaking instruction (if any) to a single slot | 
|  | // Otherwise, just ignore the instruction.  It will simply be | 
|  | // scheduled in a later cycle. | 
|  | if (breakingSlot < S.nslots) | 
|  | { | 
|  | S.addChoiceToSlot(breakingSlot, breakingNode); | 
|  | nslotsToUse = breakingSlot; | 
|  | } | 
|  | else | 
|  | nslotsToUse = S.nslots; | 
|  |  | 
|  | // For lower priority instructions than the one that breaks the | 
|  | // group, only assign them to slots lower than the breaking slot. | 
|  | // Otherwise, just ignore the instruction. | 
|  | for (unsigned i=indexForBreakingNode+1; i < S.getNumChoices(); i++) | 
|  | { | 
|  | MachineOpCode opCode = S.getChoice(i)->getOpCode(); | 
|  | for (unsigned int s=startSlot; s < nslotsToUse; s++) | 
|  | if (S.schedInfo.instrCanUseSlot(opCode, s)) | 
|  | S.addChoiceToSlot(s, S.getChoice(i)); | 
|  | } | 
|  | } // endif (no delay slots and no breaking slots) | 
|  |  | 
|  | return S.getNumChoices(); | 
|  | } | 
|  |  | 
|  |  | 
|  | static unsigned | 
|  | ChooseOneGroup(SchedulingManager& S) | 
|  | { | 
|  | assert(S.schedPrio.getNumReady() > 0 | 
|  | && "Don't get here without ready instructions."); | 
|  |  | 
|  | cycles_t firstCycle = S.getTime(); | 
|  | DelaySlotInfo* getDelaySlotInfo = NULL; | 
|  |  | 
|  | // Choose up to `nslots' feasible instructions and their possible slots. | 
|  | unsigned numIssued = FindSlotChoices(S, getDelaySlotInfo); | 
|  |  | 
|  | while (numIssued == 0) | 
|  | { | 
|  | S.updateTime(S.getTime()+1); | 
|  | numIssued = FindSlotChoices(S, getDelaySlotInfo); | 
|  | } | 
|  |  | 
|  | AssignInstructionsToSlots(S, numIssued); | 
|  |  | 
|  | if (getDelaySlotInfo != NULL) | 
|  | numIssued += getDelaySlotInfo->scheduleDelayedNode(S); | 
|  |  | 
|  | // Print trace of scheduled instructions before newly ready ones | 
|  | if (SchedDebugLevel >= Sched_PrintSchedTrace) | 
|  | { | 
|  | for (cycles_t c = firstCycle; c <= S.getTime(); c++) | 
|  | { | 
|  | cerr << "    Cycle " << (long)c << " : Scheduled instructions:\n"; | 
|  | const InstrGroup* igroup = S.isched.getIGroup(c); | 
|  | for (unsigned int s=0; s < S.nslots; s++) | 
|  | { | 
|  | cerr << "        "; | 
|  | if ((*igroup)[s] != NULL) | 
|  | cerr << * ((*igroup)[s])->getMachineInstr() << "\n"; | 
|  | else | 
|  | cerr << "<none>\n"; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | return numIssued; | 
|  | } | 
|  |  | 
|  |  | 
|  | static void | 
|  | ForwardListSchedule(SchedulingManager& S) | 
|  | { | 
|  | unsigned N; | 
|  | const SchedGraphNode* node; | 
|  |  | 
|  | S.schedPrio.initialize(); | 
|  |  | 
|  | while ((N = S.schedPrio.getNumReady()) > 0) | 
|  | { | 
|  | cycles_t nextCycle = S.getTime(); | 
|  |  | 
|  | // Choose one group of instructions for a cycle, plus any delay slot | 
|  | // instructions (which may overflow into successive cycles). | 
|  | // This will advance S.getTime() to the last cycle in which | 
|  | // instructions are actually issued. | 
|  | // | 
|  | unsigned numIssued = ChooseOneGroup(S); | 
|  | assert(numIssued > 0 && "Deadlock in list scheduling algorithm?"); | 
|  |  | 
|  | // Notify the priority manager of scheduled instructions and mark | 
|  | // any successors that may now be ready | 
|  | // | 
|  | for (cycles_t c = nextCycle; c <= S.getTime(); c++) | 
|  | { | 
|  | const InstrGroup* igroup = S.isched.getIGroup(c); | 
|  | for (unsigned int s=0; s < S.nslots; s++) | 
|  | if ((node = (*igroup)[s]) != NULL) | 
|  | { | 
|  | S.schedPrio.issuedReadyNodeAt(S.getTime(), node); | 
|  | MarkSuccessorsReady(S, node); | 
|  | } | 
|  | } | 
|  |  | 
|  | // Move to the next the next earliest cycle for which | 
|  | // an instruction can be issued, or the next earliest in which | 
|  | // one will be ready, or to the next cycle, whichever is latest. | 
|  | // | 
|  | S.updateTime(std::max(S.getTime() + 1, | 
|  | std::max(S.getEarliestIssueTime(), | 
|  | S.schedPrio.getEarliestReadyTime()))); | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | //--------------------------------------------------------------------- | 
|  | // Code for filling delay slots for delayed terminator instructions | 
|  | // (e.g., BRANCH and RETURN).  Delay slots for non-terminator | 
|  | // instructions (e.g., CALL) are not handled here because they almost | 
|  | // always can be filled with instructions from the call sequence code | 
|  | // before a call.  That's preferable because we incur many tradeoffs here | 
|  | // when we cannot find single-cycle instructions that can be reordered. | 
|  | //---------------------------------------------------------------------- | 
|  |  | 
|  | static bool | 
|  | NodeCanFillDelaySlot(const SchedulingManager& S, | 
|  | const SchedGraphNode* node, | 
|  | const SchedGraphNode* brNode, | 
|  | bool nodeIsPredecessor) | 
|  | { | 
|  | assert(! node->isDummyNode()); | 
|  |  | 
|  | // don't put a branch in the delay slot of another branch | 
|  | if (S.getInstrInfo().isBranch(node->getOpCode())) | 
|  | return false; | 
|  |  | 
|  | // don't put a single-issue instruction in the delay slot of a branch | 
|  | if (S.schedInfo.isSingleIssue(node->getOpCode())) | 
|  | return false; | 
|  |  | 
|  | // don't put a load-use dependence in the delay slot of a branch | 
|  | const MachineInstrInfo& mii = S.getInstrInfo(); | 
|  |  | 
|  | for (SchedGraphNode::const_iterator EI = node->beginInEdges(); | 
|  | EI != node->endInEdges(); ++EI) | 
|  | if (! (*EI)->getSrc()->isDummyNode() | 
|  | && mii.isLoad((*EI)->getSrc()->getOpCode()) | 
|  | && (*EI)->getDepType() == SchedGraphEdge::CtrlDep) | 
|  | return false; | 
|  |  | 
|  | // for now, don't put an instruction that does not have operand | 
|  | // interlocks in the delay slot of a branch | 
|  | if (! S.getInstrInfo().hasOperandInterlock(node->getOpCode())) | 
|  | return false; | 
|  |  | 
|  | // Finally, if the instruction preceeds the branch, we make sure the | 
|  | // instruction can be reordered relative to the branch.  We simply check | 
|  | // if the instr. has only 1 outgoing edge, viz., a CD edge to the branch. | 
|  | // | 
|  | if (nodeIsPredecessor) | 
|  | { | 
|  | bool onlyCDEdgeToBranch = true; | 
|  | for (SchedGraphNode::const_iterator OEI = node->beginOutEdges(); | 
|  | OEI != node->endOutEdges(); ++OEI) | 
|  | if (! (*OEI)->getSink()->isDummyNode() | 
|  | && ((*OEI)->getSink() != brNode | 
|  | || (*OEI)->getDepType() != SchedGraphEdge::CtrlDep)) | 
|  | { | 
|  | onlyCDEdgeToBranch = false; | 
|  | break; | 
|  | } | 
|  |  | 
|  | if (!onlyCDEdgeToBranch) | 
|  | return false; | 
|  | } | 
|  |  | 
|  | return true; | 
|  | } | 
|  |  | 
|  |  | 
|  | static void | 
|  | MarkNodeForDelaySlot(SchedulingManager& S, | 
|  | SchedGraph* graph, | 
|  | SchedGraphNode* node, | 
|  | const SchedGraphNode* brNode, | 
|  | bool nodeIsPredecessor) | 
|  | { | 
|  | if (nodeIsPredecessor) | 
|  | { // If node is in the same basic block (i.e., preceeds brNode), | 
|  | // remove it and all its incident edges from the graph.  Make sure we | 
|  | // add dummy edges for pred/succ nodes that become entry/exit nodes. | 
|  | graph->eraseIncidentEdges(node, /*addDummyEdges*/ true); | 
|  | } | 
|  | else | 
|  | { // If the node was from a target block, add the node to the graph | 
|  | // and add a CD edge from brNode to node. | 
|  | assert(0 && "NOT IMPLEMENTED YET"); | 
|  | } | 
|  |  | 
|  | DelaySlotInfo* dinfo = S.getDelaySlotInfoForInstr(brNode, /*create*/ true); | 
|  | dinfo->addDelayNode(node); | 
|  | } | 
|  |  | 
|  |  | 
|  | void | 
|  | FindUsefulInstructionsForDelaySlots(SchedulingManager& S, | 
|  | SchedGraphNode* brNode, | 
|  | vector<SchedGraphNode*>& sdelayNodeVec) | 
|  | { | 
|  | const MachineInstrInfo& mii = S.getInstrInfo(); | 
|  | unsigned ndelays = | 
|  | mii.getNumDelaySlots(brNode->getOpCode()); | 
|  |  | 
|  | if (ndelays == 0) | 
|  | return; | 
|  |  | 
|  | sdelayNodeVec.reserve(ndelays); | 
|  |  | 
|  | // Use a separate vector to hold the feasible multi-cycle nodes. | 
|  | // These will be used if not enough single-cycle nodes are found. | 
|  | // | 
|  | vector<SchedGraphNode*> mdelayNodeVec; | 
|  |  | 
|  | for (sg_pred_iterator P = pred_begin(brNode); | 
|  | P != pred_end(brNode) && sdelayNodeVec.size() < ndelays; ++P) | 
|  | if (! (*P)->isDummyNode() && | 
|  | ! mii.isNop((*P)->getOpCode()) && | 
|  | NodeCanFillDelaySlot(S, *P, brNode, /*pred*/ true)) | 
|  | { | 
|  | if (mii.maxLatency((*P)->getOpCode()) > 1) | 
|  | mdelayNodeVec.push_back(*P); | 
|  | else | 
|  | sdelayNodeVec.push_back(*P); | 
|  | } | 
|  |  | 
|  | // If not enough single-cycle instructions were found, select the | 
|  | // lowest-latency multi-cycle instructions and use them. | 
|  | // Note that this is the most efficient code when only 1 (or even 2) | 
|  | // values need to be selected. | 
|  | // | 
|  | while (sdelayNodeVec.size() < ndelays && mdelayNodeVec.size() > 0) | 
|  | { | 
|  | unsigned lmin = | 
|  | mii.maxLatency(mdelayNodeVec[0]->getOpCode()); | 
|  | unsigned minIndex   = 0; | 
|  | for (unsigned i=1; i < mdelayNodeVec.size(); i++) | 
|  | { | 
|  | unsigned li = | 
|  | mii.maxLatency(mdelayNodeVec[i]->getOpCode()); | 
|  | if (lmin >= li) | 
|  | { | 
|  | lmin = li; | 
|  | minIndex = i; | 
|  | } | 
|  | } | 
|  | sdelayNodeVec.push_back(mdelayNodeVec[minIndex]); | 
|  | if (sdelayNodeVec.size() < ndelays) // avoid the last erase! | 
|  | mdelayNodeVec.erase(mdelayNodeVec.begin() + minIndex); | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | // Remove the NOPs currently in delay slots from the graph. | 
|  | // Mark instructions specified in sdelayNodeVec to replace them. | 
|  | // If not enough useful instructions were found, mark the NOPs to be used | 
|  | // for filling delay slots, otherwise, otherwise just discard them. | 
|  | // | 
|  | void | 
|  | ReplaceNopsWithUsefulInstr(SchedulingManager& S, | 
|  | SchedGraphNode* node, | 
|  | vector<SchedGraphNode*> sdelayNodeVec, | 
|  | SchedGraph* graph) | 
|  | { | 
|  | vector<SchedGraphNode*> nopNodeVec;   // this will hold unused NOPs | 
|  | const MachineInstrInfo& mii = S.getInstrInfo(); | 
|  | const MachineInstr* brInstr = node->getMachineInstr(); | 
|  | unsigned ndelays= mii.getNumDelaySlots(brInstr->getOpCode()); | 
|  | assert(ndelays > 0 && "Unnecessary call to replace NOPs"); | 
|  |  | 
|  | // Remove the NOPs currently in delay slots from the graph. | 
|  | // If not enough useful instructions were found, use the NOPs to | 
|  | // fill delay slots, otherwise, just discard them. | 
|  | // | 
|  | unsigned int firstDelaySlotIdx = node->getOrigIndexInBB() + 1; | 
|  | MachineCodeForBasicBlock& bbMvec  = node->getBB()->getMachineInstrVec(); | 
|  | assert(bbMvec[firstDelaySlotIdx - 1] == brInstr && | 
|  | "Incorrect instr. index in basic block for brInstr"); | 
|  |  | 
|  | // First find all useful instructions already in the delay slots | 
|  | // and USE THEM.  We'll throw away the unused alternatives below | 
|  | // | 
|  | for (unsigned i=firstDelaySlotIdx; i < firstDelaySlotIdx + ndelays; ++i) | 
|  | if (! mii.isNop(bbMvec[i]->getOpCode())) | 
|  | sdelayNodeVec.insert(sdelayNodeVec.begin(), | 
|  | graph->getGraphNodeForInstr(bbMvec[i])); | 
|  |  | 
|  | // Then find the NOPs and keep only as many as are needed. | 
|  | // Put the rest in nopNodeVec to be deleted. | 
|  | for (unsigned i=firstDelaySlotIdx; i < firstDelaySlotIdx + ndelays; ++i) | 
|  | if (mii.isNop(bbMvec[i]->getOpCode())) | 
|  | if (sdelayNodeVec.size() < ndelays) | 
|  | sdelayNodeVec.push_back(graph->getGraphNodeForInstr(bbMvec[i])); | 
|  | else | 
|  | nopNodeVec.push_back(graph->getGraphNodeForInstr(bbMvec[i])); | 
|  |  | 
|  | assert(sdelayNodeVec.size() >= ndelays); | 
|  |  | 
|  | // If some delay slots were already filled, throw away that many new choices | 
|  | if (sdelayNodeVec.size() > ndelays) | 
|  | sdelayNodeVec.resize(ndelays); | 
|  |  | 
|  | // Mark the nodes chosen for delay slots.  This removes them from the graph. | 
|  | for (unsigned i=0; i < sdelayNodeVec.size(); i++) | 
|  | MarkNodeForDelaySlot(S, graph, sdelayNodeVec[i], node, true); | 
|  |  | 
|  | // And remove the unused NOPs from the graph. | 
|  | for (unsigned i=0; i < nopNodeVec.size(); i++) | 
|  | graph->eraseIncidentEdges(nopNodeVec[i], /*addDummyEdges*/ true); | 
|  | } | 
|  |  | 
|  |  | 
|  | // For all delayed instructions, choose instructions to put in the delay | 
|  | // slots and pull those out of the graph.  Mark them for the delay slots | 
|  | // in the DelaySlotInfo object for that graph node.  If no useful work | 
|  | // is found for a delay slot, use the NOP that is currently in that slot. | 
|  | // | 
|  | // We try to fill the delay slots with useful work for all instructions | 
|  | // EXCEPT CALLS AND RETURNS. | 
|  | // For CALLs and RETURNs, it is nearly always possible to use one of the | 
|  | // call sequence instrs and putting anything else in the delay slot could be | 
|  | // suboptimal.  Also, it complicates generating the calling sequence code in | 
|  | // regalloc. | 
|  | // | 
|  | static void | 
|  | ChooseInstructionsForDelaySlots(SchedulingManager& S, | 
|  | const BasicBlock *bb, | 
|  | SchedGraph *graph) | 
|  | { | 
|  | const MachineInstrInfo& mii = S.getInstrInfo(); | 
|  | const Instruction *termInstr = (Instruction*)bb->getTerminator(); | 
|  | MachineCodeForInstruction &termMvec=MachineCodeForInstruction::get(termInstr); | 
|  | vector<SchedGraphNode*> delayNodeVec; | 
|  | const MachineInstr* brInstr = NULL; | 
|  |  | 
|  | if (termInstr->getOpcode() != Instruction::Ret) | 
|  | { | 
|  | // To find instructions that need delay slots without searching the full | 
|  | // machine code, we assume that the only delayed instructions are CALLs | 
|  | // or instructions generated for the terminator inst. | 
|  | // Find the first branch instr in the sequence of machine instrs for term | 
|  | // | 
|  | unsigned first = 0; | 
|  | while (first < termMvec.size() && | 
|  | ! mii.isBranch(termMvec[first]->getOpCode())) | 
|  | { | 
|  | ++first; | 
|  | } | 
|  | assert(first < termMvec.size() && | 
|  | "No branch instructions for BR?  Ok, but weird!  Delete assertion."); | 
|  |  | 
|  | brInstr = (first < termMvec.size())? termMvec[first] : NULL; | 
|  |  | 
|  | // Compute a vector of the nodes chosen for delay slots and then | 
|  | // mark delay slots to replace NOPs with these useful instructions. | 
|  | // | 
|  | if (brInstr != NULL) | 
|  | { | 
|  | SchedGraphNode* brNode = graph->getGraphNodeForInstr(brInstr); | 
|  | FindUsefulInstructionsForDelaySlots(S, brNode, delayNodeVec); | 
|  | ReplaceNopsWithUsefulInstr(S, brNode, delayNodeVec, graph); | 
|  | } | 
|  | } | 
|  |  | 
|  | // Also mark delay slots for other delayed instructions to hold NOPs. | 
|  | // Simply passing in an empty delayNodeVec will have this effect. | 
|  | // | 
|  | delayNodeVec.clear(); | 
|  | const MachineCodeForBasicBlock& bbMvec = bb->getMachineInstrVec(); | 
|  | for (unsigned i=0; i < bbMvec.size(); i++) | 
|  | if (bbMvec[i] != brInstr && | 
|  | mii.getNumDelaySlots(bbMvec[i]->getOpCode()) > 0) | 
|  | { | 
|  | SchedGraphNode* node = graph->getGraphNodeForInstr(bbMvec[i]); | 
|  | ReplaceNopsWithUsefulInstr(S, node, delayNodeVec, graph); | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | // | 
|  | // Schedule the delayed branch and its delay slots | 
|  | // | 
|  | unsigned | 
|  | DelaySlotInfo::scheduleDelayedNode(SchedulingManager& S) | 
|  | { | 
|  | assert(delayedNodeSlotNum < S.nslots && "Illegal slot for branch"); | 
|  | assert(S.isched.getInstr(delayedNodeSlotNum, delayedNodeCycle) == NULL | 
|  | && "Slot for branch should be empty"); | 
|  |  | 
|  | unsigned int nextSlot = delayedNodeSlotNum; | 
|  | cycles_t nextTime = delayedNodeCycle; | 
|  |  | 
|  | S.scheduleInstr(brNode, nextSlot, nextTime); | 
|  |  | 
|  | for (unsigned d=0; d < ndelays; d++) | 
|  | { | 
|  | ++nextSlot; | 
|  | if (nextSlot == S.nslots) | 
|  | { | 
|  | nextSlot = 0; | 
|  | nextTime++; | 
|  | } | 
|  |  | 
|  | // Find the first feasible instruction for this delay slot | 
|  | // Note that we only check for issue restrictions here. | 
|  | // We do *not* check for flow dependences but rely on pipeline | 
|  | // interlocks to resolve them.  Machines without interlocks | 
|  | // will require this code to be modified. | 
|  | for (unsigned i=0; i < delayNodeVec.size(); i++) | 
|  | { | 
|  | const SchedGraphNode* dnode = delayNodeVec[i]; | 
|  | if ( ! S.isScheduled(dnode) | 
|  | && S.schedInfo.instrCanUseSlot(dnode->getOpCode(), nextSlot) | 
|  | && instrIsFeasible(S, dnode->getOpCode())) | 
|  | { | 
|  | assert(S.getInstrInfo().hasOperandInterlock(dnode->getOpCode()) | 
|  | && "Instructions without interlocks not yet supported " | 
|  | "when filling branch delay slots"); | 
|  | S.scheduleInstr(dnode, nextSlot, nextTime); | 
|  | break; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // Update current time if delay slots overflowed into later cycles. | 
|  | // Do this here because we know exactly which cycle is the last cycle | 
|  | // that contains delay slots.  The next loop doesn't compute that. | 
|  | if (nextTime > S.getTime()) | 
|  | S.updateTime(nextTime); | 
|  |  | 
|  | // Now put any remaining instructions in the unfilled delay slots. | 
|  | // This could lead to suboptimal performance but needed for correctness. | 
|  | nextSlot = delayedNodeSlotNum; | 
|  | nextTime = delayedNodeCycle; | 
|  | for (unsigned i=0; i < delayNodeVec.size(); i++) | 
|  | if (! S.isScheduled(delayNodeVec[i])) | 
|  | { | 
|  | do { // find the next empty slot | 
|  | ++nextSlot; | 
|  | if (nextSlot == S.nslots) | 
|  | { | 
|  | nextSlot = 0; | 
|  | nextTime++; | 
|  | } | 
|  | } while (S.isched.getInstr(nextSlot, nextTime) != NULL); | 
|  |  | 
|  | S.scheduleInstr(delayNodeVec[i], nextSlot, nextTime); | 
|  | break; | 
|  | } | 
|  |  | 
|  | return 1 + ndelays; | 
|  | } | 
|  |  | 
|  |  | 
|  | // Check if the instruction would conflict with instructions already | 
|  | // chosen for the current cycle | 
|  | // | 
|  | static inline bool | 
|  | ConflictsWithChoices(const SchedulingManager& S, | 
|  | MachineOpCode opCode) | 
|  | { | 
|  | // Check if the instruction must issue by itself, and some feasible | 
|  | // choices have already been made for this cycle | 
|  | if (S.getNumChoices() > 0 && S.schedInfo.isSingleIssue(opCode)) | 
|  | return true; | 
|  |  | 
|  | // For each class that opCode belongs to, check if there are too many | 
|  | // instructions of that class. | 
|  | // | 
|  | const InstrSchedClass sc = S.schedInfo.getSchedClass(opCode); | 
|  | return (S.getNumChoicesInClass(sc) == S.schedInfo.getMaxIssueForClass(sc)); | 
|  | } | 
|  |  | 
|  |  | 
|  | //************************* External Functions *****************************/ | 
|  |  | 
|  |  | 
|  | //--------------------------------------------------------------------------- | 
|  | // Function: ViolatesMinimumGap | 
|  | // | 
|  | // Purpose: | 
|  | //   Check minimum gap requirements relative to instructions scheduled in | 
|  | //   previous cycles. | 
|  | //   Note that we do not need to consider `nextEarliestIssueTime' here because | 
|  | //   that is also captured in the earliest start times for each opcode. | 
|  | //--------------------------------------------------------------------------- | 
|  |  | 
|  | static inline bool | 
|  | ViolatesMinimumGap(const SchedulingManager& S, | 
|  | MachineOpCode opCode, | 
|  | const cycles_t inCycle) | 
|  | { | 
|  | return (inCycle < S.getEarliestStartTimeForOp(opCode)); | 
|  | } | 
|  |  | 
|  |  | 
|  | //--------------------------------------------------------------------------- | 
|  | // Function: instrIsFeasible | 
|  | // | 
|  | // Purpose: | 
|  | //   Check if any issue restrictions would prevent the instruction from | 
|  | //   being issued in the current cycle | 
|  | //--------------------------------------------------------------------------- | 
|  |  | 
|  | bool | 
|  | instrIsFeasible(const SchedulingManager& S, | 
|  | MachineOpCode opCode) | 
|  | { | 
|  | // skip the instruction if it cannot be issued due to issue restrictions | 
|  | // caused by previously issued instructions | 
|  | if (ViolatesMinimumGap(S, opCode, S.getTime())) | 
|  | return false; | 
|  |  | 
|  | // skip the instruction if it cannot be issued due to issue restrictions | 
|  | // caused by previously chosen instructions for the current cycle | 
|  | if (ConflictsWithChoices(S, opCode)) | 
|  | return false; | 
|  |  | 
|  | return true; | 
|  | } | 
|  |  | 
|  | //--------------------------------------------------------------------------- | 
|  | // Function: ScheduleInstructionsWithSSA | 
|  | // | 
|  | // Purpose: | 
|  | //   Entry point for instruction scheduling on SSA form. | 
|  | //   Schedules the machine instructions generated by instruction selection. | 
|  | //   Assumes that register allocation has not been done, i.e., operands | 
|  | //   are still in SSA form. | 
|  | //--------------------------------------------------------------------------- | 
|  |  | 
|  | namespace { | 
|  | class InstructionSchedulingWithSSA : public FunctionPass { | 
|  | const TargetMachine ⌖ | 
|  | public: | 
|  | inline InstructionSchedulingWithSSA(const TargetMachine &T) : target(T) {} | 
|  |  | 
|  | const char *getPassName() const { return "Instruction Scheduling"; } | 
|  |  | 
|  | // getAnalysisUsage - We use LiveVarInfo... | 
|  | virtual void getAnalysisUsage(AnalysisUsage &AU) const { | 
|  | AU.addRequired(FunctionLiveVarInfo::ID); | 
|  | } | 
|  |  | 
|  | bool runOnFunction(Function *F); | 
|  | }; | 
|  | } // end anonymous namespace | 
|  |  | 
|  |  | 
|  | bool | 
|  | InstructionSchedulingWithSSA::runOnFunction(Function *M) | 
|  | { | 
|  | if (SchedDebugLevel == Sched_Disable) | 
|  | return false; | 
|  |  | 
|  | SchedGraphSet graphSet(M, target); | 
|  |  | 
|  | if (SchedDebugLevel >= Sched_PrintSchedGraphs) | 
|  | { | 
|  | cerr << "\n*** SCHEDULING GRAPHS FOR INSTRUCTION SCHEDULING\n"; | 
|  | graphSet.dump(); | 
|  | } | 
|  |  | 
|  | for (SchedGraphSet::const_iterator GI=graphSet.begin(), GE=graphSet.end(); | 
|  | GI != GE; ++GI) | 
|  | { | 
|  | SchedGraph* graph = (*GI); | 
|  | const vector<const BasicBlock*> &bbvec = graph->getBasicBlocks(); | 
|  | assert(bbvec.size() == 1 && "Cannot schedule multiple basic blocks"); | 
|  | const BasicBlock* bb = bbvec[0]; | 
|  |  | 
|  | if (SchedDebugLevel >= Sched_PrintSchedTrace) | 
|  | cerr << "\n*** TRACE OF INSTRUCTION SCHEDULING OPERATIONS\n\n"; | 
|  |  | 
|  | // expensive! | 
|  | SchedPriorities schedPrio(M, graph,getAnalysis<FunctionLiveVarInfo>()); | 
|  | SchedulingManager S(target, graph, schedPrio); | 
|  |  | 
|  | ChooseInstructionsForDelaySlots(S, bb, graph); // modifies graph | 
|  |  | 
|  | ForwardListSchedule(S);               // computes schedule in S | 
|  |  | 
|  | RecordSchedule(bb, S);                // records schedule in BB | 
|  | } | 
|  |  | 
|  | if (SchedDebugLevel >= Sched_PrintMachineCode) | 
|  | { | 
|  | cerr << "\n*** Machine instructions after INSTRUCTION SCHEDULING\n"; | 
|  | MachineCodeForMethod::get(M).dump(); | 
|  | } | 
|  |  | 
|  | return false; | 
|  | } | 
|  |  | 
|  |  | 
|  | Pass *createInstructionSchedulingWithSSAPass(const TargetMachine &tgt) { | 
|  | return new InstructionSchedulingWithSSA(tgt); | 
|  | } |