| //===- subzero/src/IceTargetLowering.h - Lowering interface -----*- C++ -*-===// |
| // |
| // The Subzero Code Generator |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| /// |
| /// \file |
| /// This file declares the TargetLowering, LoweringContext, and |
| /// TargetDataLowering classes. TargetLowering is an abstract class |
| /// used to drive the translation/lowering process. LoweringContext |
| /// maintains a context for lowering each instruction, offering |
| /// conveniences such as iterating over non-deleted instructions. |
| /// TargetDataLowering is an abstract class used to drive the |
| /// lowering/emission of global initializers, external global |
| /// declarations, and internal constant pools. |
| /// |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef SUBZERO_SRC_ICETARGETLOWERING_H |
| #define SUBZERO_SRC_ICETARGETLOWERING_H |
| |
| #include "IceDefs.h" |
| #include "IceInst.h" // for the names of the Inst subtypes |
| #include "IceOperand.h" |
| #include "IceTypes.h" |
| |
| namespace Ice { |
| |
| /// LoweringContext makes it easy to iterate through non-deleted |
| /// instructions in a node, and insert new (lowered) instructions at |
| /// the current point. Along with the instruction list container and |
| /// associated iterators, it holds the current node, which is needed |
| /// when inserting new instructions in order to track whether variables |
| /// are used as single-block or multi-block. |
| class LoweringContext { |
| LoweringContext(const LoweringContext &) = delete; |
| LoweringContext &operator=(const LoweringContext &) = delete; |
| |
| public: |
| LoweringContext() = default; |
| ~LoweringContext() = default; |
| void init(CfgNode *Node); |
| Inst *getNextInst() const { |
| if (Next == End) |
| return nullptr; |
| return Next; |
| } |
| Inst *getNextInst(InstList::iterator &Iter) const { |
| advanceForward(Iter); |
| if (Iter == End) |
| return nullptr; |
| return Iter; |
| } |
| CfgNode *getNode() const { return Node; } |
| bool atEnd() const { return Cur == End; } |
| InstList::iterator getCur() const { return Cur; } |
| InstList::iterator getNext() const { return Next; } |
| InstList::iterator getEnd() const { return End; } |
| void insert(Inst *Inst); |
| Inst *getLastInserted() const; |
| void advanceCur() { Cur = Next; } |
| void advanceNext() { advanceForward(Next); } |
| void setCur(InstList::iterator C) { Cur = C; } |
| void setNext(InstList::iterator N) { Next = N; } |
| void rewind(); |
| void setInsertPoint(const InstList::iterator &Position) { Next = Position; } |
| |
| private: |
| /// Node is the argument to Inst::updateVars(). |
| CfgNode *Node = nullptr; |
| Inst *LastInserted = nullptr; |
| /// Cur points to the current instruction being considered. It is |
| /// guaranteed to point to a non-deleted instruction, or to be End. |
| InstList::iterator Cur; |
| /// Next doubles as a pointer to the next valid instruction (if any), |
| /// and the new-instruction insertion point. It is also updated for |
| /// the caller in case the lowering consumes more than one high-level |
| /// instruction. It is guaranteed to point to a non-deleted |
| /// instruction after Cur, or to be End. TODO: Consider separating |
| /// the notion of "next valid instruction" and "new instruction |
| /// insertion point", to avoid confusion when previously-deleted |
| /// instructions come between the two points. |
| InstList::iterator Next; |
| /// Begin is a copy of Insts.begin(), used if iterators are moved backward. |
| InstList::iterator Begin; |
| /// End is a copy of Insts.end(), used if Next needs to be advanced. |
| InstList::iterator End; |
| |
| void skipDeleted(InstList::iterator &I) const; |
| void advanceForward(InstList::iterator &I) const; |
| }; |
| |
| /// A helper class to advance the LoweringContext at each loop iteration. |
| class PostIncrLoweringContext { |
| PostIncrLoweringContext() = delete; |
| PostIncrLoweringContext(const PostIncrLoweringContext &) = delete; |
| PostIncrLoweringContext &operator=(const PostIncrLoweringContext &) = delete; |
| |
| public: |
| explicit PostIncrLoweringContext(LoweringContext &Context) |
| : Context(Context) {} |
| ~PostIncrLoweringContext() { |
| Context.advanceCur(); |
| Context.advanceNext(); |
| } |
| |
| private: |
| LoweringContext &Context; |
| }; |
| |
| class TargetLowering { |
| TargetLowering() = delete; |
| TargetLowering(const TargetLowering &) = delete; |
| TargetLowering &operator=(const TargetLowering &) = delete; |
| |
| public: |
| // TODO(jvoung): return a unique_ptr like the other factory functions. |
| static TargetLowering *createLowering(TargetArch Target, Cfg *Func); |
| static std::unique_ptr<Assembler> createAssembler(TargetArch Target, |
| Cfg *Func); |
| void translate() { |
| switch (Ctx->getFlags().getOptLevel()) { |
| case Opt_m1: |
| translateOm1(); |
| break; |
| case Opt_0: |
| translateO0(); |
| break; |
| case Opt_1: |
| translateO1(); |
| break; |
| case Opt_2: |
| translateO2(); |
| break; |
| } |
| } |
| virtual void translateOm1() { |
| Func->setError("Target doesn't specify Om1 lowering steps."); |
| } |
| virtual void translateO0() { |
| Func->setError("Target doesn't specify O0 lowering steps."); |
| } |
| virtual void translateO1() { |
| Func->setError("Target doesn't specify O1 lowering steps."); |
| } |
| virtual void translateO2() { |
| Func->setError("Target doesn't specify O2 lowering steps."); |
| } |
| |
| /// Tries to do address mode optimization on a single instruction. |
| void doAddressOpt(); |
| /// Randomly insert NOPs. |
| void doNopInsertion(); |
| /// Lowers a single non-Phi instruction. |
| void lower(); |
| /// Inserts and lowers a single high-level instruction at a specific insertion |
| /// point. |
| void lowerInst(CfgNode *Node, InstList::iterator Next, InstHighLevel *Instr); |
| /// Does preliminary lowering of the set of Phi instructions in the |
| /// current node. The main intention is to do what's needed to keep |
| /// the unlowered Phi instructions consistent with the lowered |
| /// non-Phi instructions, e.g. to lower 64-bit operands on a 32-bit |
| /// target. |
| virtual void prelowerPhis() {} |
| /// Tries to do branch optimization on a single instruction. Returns |
| /// true if some optimization was done. |
| virtual bool doBranchOpt(Inst * /*I*/, const CfgNode * /*NextNode*/) { |
| return false; |
| } |
| |
| virtual SizeT getNumRegisters() const = 0; |
| /// Returns a variable pre-colored to the specified physical |
| /// register. This is generally used to get very direct access to |
| /// the register such as in the prolog or epilog or for marking |
| /// scratch registers as killed by a call. If a Type is not |
| /// provided, a target-specific default type is used. |
| virtual Variable *getPhysicalRegister(SizeT RegNum, |
| Type Ty = IceType_void) = 0; |
| /// Returns a printable name for the register. |
| virtual IceString getRegName(SizeT RegNum, Type Ty) const = 0; |
| |
| virtual bool hasFramePointer() const { return false; } |
| virtual SizeT getFrameOrStackReg() const = 0; |
| virtual size_t typeWidthInBytesOnStack(Type Ty) const = 0; |
| |
| bool hasComputedFrame() const { return HasComputedFrame; } |
| /// Returns true if this function calls a function that has the |
| /// "returns twice" attribute. |
| bool callsReturnsTwice() const { return CallsReturnsTwice; } |
| void setCallsReturnsTwice(bool RetTwice) { CallsReturnsTwice = RetTwice; } |
| int32_t getStackAdjustment() const { return StackAdjustment; } |
| void updateStackAdjustment(int32_t Offset) { StackAdjustment += Offset; } |
| void resetStackAdjustment() { StackAdjustment = 0; } |
| SizeT makeNextLabelNumber() { return NextLabelNumber++; } |
| SizeT makeNextJumpTableNumber() { return NextJumpTableNumber++; } |
| LoweringContext &getContext() { return Context; } |
| |
| enum RegSet { |
| RegSet_None = 0, |
| RegSet_CallerSave = 1 << 0, |
| RegSet_CalleeSave = 1 << 1, |
| RegSet_StackPointer = 1 << 2, |
| RegSet_FramePointer = 1 << 3, |
| RegSet_All = ~RegSet_None |
| }; |
| typedef uint32_t RegSetMask; |
| |
| virtual llvm::SmallBitVector getRegisterSet(RegSetMask Include, |
| RegSetMask Exclude) const = 0; |
| virtual const llvm::SmallBitVector &getRegisterSetForType(Type Ty) const = 0; |
| void regAlloc(RegAllocKind Kind); |
| |
| virtual void makeRandomRegisterPermutation( |
| llvm::SmallVectorImpl<int32_t> &Permutation, |
| const llvm::SmallBitVector &ExcludeRegisters) const = 0; |
| |
| /// Save/restore any mutable state for the situation where code |
| /// emission needs multiple passes, such as sandboxing or relaxation. |
| /// Subclasses may provide their own implementation, but should be |
| /// sure to also call the parent class's methods. |
| virtual void snapshotEmitState() { |
| SnapshotStackAdjustment = StackAdjustment; |
| } |
| virtual void rollbackEmitState() { |
| StackAdjustment = SnapshotStackAdjustment; |
| } |
| |
| /// Get the minimum number of clusters required for a jump table to be |
| /// considered. |
| virtual SizeT getMinJumpTableSize() const = 0; |
| virtual void emitJumpTable(const Cfg *Func, |
| const InstJumpTable *JumpTable) const = 0; |
| |
| virtual void emitVariable(const Variable *Var) const = 0; |
| |
| void emitWithoutPrefix(const ConstantRelocatable *CR) const; |
| void emit(const ConstantRelocatable *CR) const; |
| virtual const char *getConstantPrefix() const = 0; |
| |
| virtual void emit(const ConstantUndef *C) const = 0; |
| virtual void emit(const ConstantInteger32 *C) const = 0; |
| virtual void emit(const ConstantInteger64 *C) const = 0; |
| virtual void emit(const ConstantFloat *C) const = 0; |
| virtual void emit(const ConstantDouble *C) const = 0; |
| |
| /// Performs target-specific argument lowering. |
| virtual void lowerArguments() = 0; |
| |
| virtual void initNodeForLowering(CfgNode *) {} |
| virtual void addProlog(CfgNode *Node) = 0; |
| virtual void addEpilog(CfgNode *Node) = 0; |
| |
| virtual ~TargetLowering() = default; |
| |
| protected: |
| explicit TargetLowering(Cfg *Func); |
| virtual void lowerAlloca(const InstAlloca *Inst) = 0; |
| virtual void lowerArithmetic(const InstArithmetic *Inst) = 0; |
| virtual void lowerAssign(const InstAssign *Inst) = 0; |
| virtual void lowerBr(const InstBr *Inst) = 0; |
| virtual void lowerCall(const InstCall *Inst) = 0; |
| virtual void lowerCast(const InstCast *Inst) = 0; |
| virtual void lowerFcmp(const InstFcmp *Inst) = 0; |
| virtual void lowerExtractElement(const InstExtractElement *Inst) = 0; |
| virtual void lowerIcmp(const InstIcmp *Inst) = 0; |
| virtual void lowerInsertElement(const InstInsertElement *Inst) = 0; |
| virtual void lowerIntrinsicCall(const InstIntrinsicCall *Inst) = 0; |
| virtual void lowerLoad(const InstLoad *Inst) = 0; |
| virtual void lowerPhi(const InstPhi *Inst) = 0; |
| virtual void lowerRet(const InstRet *Inst) = 0; |
| virtual void lowerSelect(const InstSelect *Inst) = 0; |
| virtual void lowerStore(const InstStore *Inst) = 0; |
| virtual void lowerSwitch(const InstSwitch *Inst) = 0; |
| virtual void lowerUnreachable(const InstUnreachable *Inst) = 0; |
| virtual void lowerOther(const Inst *Instr); |
| |
| virtual void doAddressOptLoad() {} |
| virtual void doAddressOptStore() {} |
| virtual void randomlyInsertNop(float Probability) = 0; |
| /// This gives the target an opportunity to post-process the lowered |
| /// expansion before returning. |
| virtual void postLower() {} |
| |
| /// Find two-address non-SSA instructions and set the DestNonKillable flag |
| /// to keep liveness analysis consistent. |
| void inferTwoAddress(); |
| |
| /// Make a pass over the Cfg to determine which variables need stack slots |
| /// and place them in a sorted list (SortedSpilledVariables). Among those, |
| /// vars, classify the spill variables as local to the basic block vs |
| /// global (multi-block) in order to compute the parameters GlobalsSize |
| /// and SpillAreaSizeBytes (represents locals or general vars if the |
| /// coalescing of locals is disallowed) along with alignments required |
| /// for variables in each area. We rely on accurate VMetadata in order to |
| /// classify a variable as global vs local (otherwise the variable is |
| /// conservatively global). The in-args should be initialized to 0. |
| /// |
| /// This is only a pre-pass and the actual stack slot assignment is |
| /// handled separately. |
| /// |
| /// There may be target-specific Variable types, which will be handled |
| /// by TargetVarHook. If the TargetVarHook returns true, then the variable |
| /// is skipped and not considered with the rest of the spilled variables. |
| void getVarStackSlotParams(VarList &SortedSpilledVariables, |
| llvm::SmallBitVector &RegsUsed, |
| size_t *GlobalsSize, size_t *SpillAreaSizeBytes, |
| uint32_t *SpillAreaAlignmentBytes, |
| uint32_t *LocalsSlotsAlignmentBytes, |
| std::function<bool(Variable *)> TargetVarHook); |
| |
| /// Calculate the amount of padding needed to align the local and global |
| /// areas to the required alignment. This assumes the globals/locals layout |
| /// used by getVarStackSlotParams and assignVarStackSlots. |
| void alignStackSpillAreas(uint32_t SpillAreaStartOffset, |
| uint32_t SpillAreaAlignmentBytes, |
| size_t GlobalsSize, |
| uint32_t LocalsSlotsAlignmentBytes, |
| uint32_t *SpillAreaPaddingBytes, |
| uint32_t *LocalsSlotsPaddingBytes); |
| |
| /// Make a pass through the SortedSpilledVariables and actually assign |
| /// stack slots. SpillAreaPaddingBytes takes into account stack alignment |
| /// padding. The SpillArea starts after that amount of padding. |
| /// This matches the scheme in getVarStackSlotParams, where there may |
| /// be a separate multi-block global var spill area and a local var |
| /// spill area. |
| void assignVarStackSlots(VarList &SortedSpilledVariables, |
| size_t SpillAreaPaddingBytes, |
| size_t SpillAreaSizeBytes, |
| size_t GlobalsAndSubsequentPaddingSize, |
| bool UsesFramePointer); |
| |
| /// Sort the variables in Source based on required alignment. |
| /// The variables with the largest alignment need are placed in the front |
| /// of the Dest list. |
| void sortVarsByAlignment(VarList &Dest, const VarList &Source) const; |
| |
| /// Make a call to an external helper function. |
| InstCall *makeHelperCall(const IceString &Name, Variable *Dest, |
| SizeT MaxSrcs); |
| |
| void |
| _bundle_lock(InstBundleLock::Option BundleOption = InstBundleLock::Opt_None) { |
| Context.insert(InstBundleLock::create(Func, BundleOption)); |
| } |
| void _bundle_unlock() { Context.insert(InstBundleUnlock::create(Func)); } |
| void _set_dest_nonkillable() { |
| Context.getLastInserted()->setDestNonKillable(); |
| } |
| |
| bool shouldOptimizeMemIntrins(); |
| |
| Cfg *Func; |
| GlobalContext *Ctx; |
| bool HasComputedFrame = false; |
| bool CallsReturnsTwice = false; |
| /// StackAdjustment keeps track of the current stack offset from its |
| /// natural location, as arguments are pushed for a function call. |
| int32_t StackAdjustment = 0; |
| SizeT NextLabelNumber = 0; |
| SizeT NextJumpTableNumber = 0; |
| LoweringContext Context; |
| |
| // Runtime helper function names |
| const static constexpr char *H_bitcast_16xi1_i16 = "__Sz_bitcast_16xi1_i16"; |
| const static constexpr char *H_bitcast_8xi1_i8 = "__Sz_bitcast_8xi1_i8"; |
| const static constexpr char *H_bitcast_i16_16xi1 = "__Sz_bitcast_i16_16xi1"; |
| const static constexpr char *H_bitcast_i8_8xi1 = "__Sz_bitcast_i8_8xi1"; |
| const static constexpr char *H_call_ctpop_i32 = "__popcountsi2"; |
| const static constexpr char *H_call_ctpop_i64 = "__popcountdi2"; |
| const static constexpr char *H_call_longjmp = "longjmp"; |
| const static constexpr char *H_call_memcpy = "memcpy"; |
| const static constexpr char *H_call_memmove = "memmove"; |
| const static constexpr char *H_call_memset = "memset"; |
| const static constexpr char *H_call_read_tp = "__nacl_read_tp"; |
| const static constexpr char *H_call_setjmp = "setjmp"; |
| const static constexpr char *H_fptosi_f32_i64 = "__Sz_fptosi_f32_i64"; |
| const static constexpr char *H_fptosi_f64_i64 = "__Sz_fptosi_f64_i64"; |
| const static constexpr char *H_fptoui_4xi32_f32 = "__Sz_fptoui_4xi32_f32"; |
| const static constexpr char *H_fptoui_f32_i32 = "__Sz_fptoui_f32_i32"; |
| const static constexpr char *H_fptoui_f32_i64 = "__Sz_fptoui_f32_i64"; |
| const static constexpr char *H_fptoui_f64_i32 = "__Sz_fptoui_f64_i32"; |
| const static constexpr char *H_fptoui_f64_i64 = "__Sz_fptoui_f64_i64"; |
| const static constexpr char *H_frem_f32 = "fmodf"; |
| const static constexpr char *H_frem_f64 = "fmod"; |
| const static constexpr char *H_sdiv_i32 = "__divsi3"; |
| const static constexpr char *H_sdiv_i64 = "__divdi3"; |
| const static constexpr char *H_sitofp_i64_f32 = "__Sz_sitofp_i64_f32"; |
| const static constexpr char *H_sitofp_i64_f64 = "__Sz_sitofp_i64_f64"; |
| const static constexpr char *H_srem_i32 = "__modsi3"; |
| const static constexpr char *H_srem_i64 = "__moddi3"; |
| const static constexpr char *H_udiv_i32 = "__udivsi3"; |
| const static constexpr char *H_udiv_i64 = "__udivdi3"; |
| const static constexpr char *H_uitofp_4xi32_4xf32 = "__Sz_uitofp_4xi32_4xf32"; |
| const static constexpr char *H_uitofp_i32_f32 = "__Sz_uitofp_i32_f32"; |
| const static constexpr char *H_uitofp_i32_f64 = "__Sz_uitofp_i32_f64"; |
| const static constexpr char *H_uitofp_i64_f32 = "__Sz_uitofp_i64_f32"; |
| const static constexpr char *H_uitofp_i64_f64 = "__Sz_uitofp_i64_f64"; |
| const static constexpr char *H_urem_i32 = "__umodsi3"; |
| const static constexpr char *H_urem_i64 = "__umoddi3"; |
| |
| private: |
| int32_t SnapshotStackAdjustment = 0; |
| }; |
| |
| /// TargetDataLowering is used for "lowering" data including initializers |
| /// for global variables, and the internal constant pools. It is separated |
| /// out from TargetLowering because it does not require a Cfg. |
| class TargetDataLowering { |
| TargetDataLowering() = delete; |
| TargetDataLowering(const TargetDataLowering &) = delete; |
| TargetDataLowering &operator=(const TargetDataLowering &) = delete; |
| |
| public: |
| static std::unique_ptr<TargetDataLowering> createLowering(GlobalContext *Ctx); |
| virtual ~TargetDataLowering(); |
| |
| virtual void lowerGlobals(const VariableDeclarationList &Vars, |
| const IceString &SectionSuffix) = 0; |
| virtual void lowerConstants() = 0; |
| virtual void lowerJumpTables() = 0; |
| |
| protected: |
| void emitGlobal(const VariableDeclaration &Var, |
| const IceString &SectionSuffix); |
| |
| /// For now, we assume .long is the right directive for emitting 4 byte |
| /// emit global relocations. However, LLVM MIPS usually uses .4byte instead. |
| /// Perhaps there is some difference when the location is unaligned. |
| static const char *getEmit32Directive() { return ".long"; } |
| |
| explicit TargetDataLowering(GlobalContext *Ctx) : Ctx(Ctx) {} |
| GlobalContext *Ctx; |
| }; |
| |
| /// TargetHeaderLowering is used to "lower" the header of an output file. |
| /// It writes out the target-specific header attributes. E.g., for ARM |
| /// this writes out the build attributes (float ABI, etc.). |
| class TargetHeaderLowering { |
| TargetHeaderLowering() = delete; |
| TargetHeaderLowering(const TargetHeaderLowering &) = delete; |
| TargetHeaderLowering &operator=(const TargetHeaderLowering &) = delete; |
| |
| public: |
| static std::unique_ptr<TargetHeaderLowering> |
| createLowering(GlobalContext *Ctx); |
| virtual ~TargetHeaderLowering(); |
| |
| virtual void lower() {} |
| |
| protected: |
| explicit TargetHeaderLowering(GlobalContext *Ctx) : Ctx(Ctx) {} |
| GlobalContext *Ctx; |
| }; |
| |
| } // end of namespace Ice |
| |
| #endif // SUBZERO_SRC_ICETARGETLOWERING_H |