src/IceTargetLowering.h - platform/external/swiftshader - Gitiles

 //===- subzero/src/IceTargetLowering.h - Lowering interface -----*- C++ -*-===//
 //
 //                        The Subzero Code Generator
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 ///
 /// \file
 /// This file declares the TargetLowering, LoweringContext, and
 /// TargetDataLowering classes.  TargetLowering is an abstract class
 /// used to drive the translation/lowering process.  LoweringContext
 /// maintains a context for lowering each instruction, offering
 /// conveniences such as iterating over non-deleted instructions.
 /// TargetDataLowering is an abstract class used to drive the
 /// lowering/emission of global initializers, external global
 /// declarations, and internal constant pools.
 ///
 //===----------------------------------------------------------------------===//

 #ifndef SUBZERO_SRC_ICETARGETLOWERING_H
 #define SUBZERO_SRC_ICETARGETLOWERING_H

 #include "IceDefs.h"
 #include "IceInst.h" // for the names of the Inst subtypes
 #include "IceOperand.h"
 #include "IceTypes.h"

 namespace Ice {

 /// LoweringContext makes it easy to iterate through non-deleted
 /// instructions in a node, and insert new (lowered) instructions at
 /// the current point.  Along with the instruction list container and
 /// associated iterators, it holds the current node, which is needed
 /// when inserting new instructions in order to track whether variables
 /// are used as single-block or multi-block.
 class LoweringContext {
   LoweringContext(const LoweringContext &) = delete;
   LoweringContext &operator=(const LoweringContext &) = delete;

 public:
   LoweringContext() = default;
   ~LoweringContext() = default;
   void init(CfgNode *Node);
   Inst *getNextInst() const {
     if (Next == End)
       return nullptr;
     return Next;
   }
   Inst *getNextInst(InstList::iterator &Iter) const {
     advanceForward(Iter);
     if (Iter == End)
       return nullptr;
     return Iter;
   }
   CfgNode *getNode() const { return Node; }
   bool atEnd() const { return Cur == End; }
   InstList::iterator getCur() const { return Cur; }
   InstList::iterator getNext() const { return Next; }
   InstList::iterator getEnd() const { return End; }
   void insert(Inst *Inst);
   Inst *getLastInserted() const;
   void advanceCur() { Cur = Next; }
   void advanceNext() { advanceForward(Next); }
   void setCur(InstList::iterator C) { Cur = C; }
   void setNext(InstList::iterator N) { Next = N; }
   void rewind();
   void setInsertPoint(const InstList::iterator &Position) { Next = Position; }

 private:
   /// Node is the argument to Inst::updateVars().
   CfgNode *Node = nullptr;
   Inst *LastInserted = nullptr;
   /// Cur points to the current instruction being considered.  It is
   /// guaranteed to point to a non-deleted instruction, or to be End.
   InstList::iterator Cur;
   /// Next doubles as a pointer to the next valid instruction (if any),
   /// and the new-instruction insertion point.  It is also updated for
   /// the caller in case the lowering consumes more than one high-level
   /// instruction.  It is guaranteed to point to a non-deleted
   /// instruction after Cur, or to be End.  TODO: Consider separating
   /// the notion of "next valid instruction" and "new instruction
   /// insertion point", to avoid confusion when previously-deleted
   /// instructions come between the two points.
   InstList::iterator Next;
   /// Begin is a copy of Insts.begin(), used if iterators are moved backward.
   InstList::iterator Begin;
   /// End is a copy of Insts.end(), used if Next needs to be advanced.
   InstList::iterator End;

   void skipDeleted(InstList::iterator &I) const;
   void advanceForward(InstList::iterator &I) const;
 };

 /// A helper class to advance the LoweringContext at each loop iteration.
 class PostIncrLoweringContext {
   PostIncrLoweringContext() = delete;
   PostIncrLoweringContext(const PostIncrLoweringContext &) = delete;
   PostIncrLoweringContext &operator=(const PostIncrLoweringContext &) = delete;

 public:
   explicit PostIncrLoweringContext(LoweringContext &Context)
       : Context(Context) {}
   ~PostIncrLoweringContext() {
     Context.advanceCur();
     Context.advanceNext();
   }

 private:
   LoweringContext &Context;
 };

 class TargetLowering {
   TargetLowering() = delete;
   TargetLowering(const TargetLowering &) = delete;
   TargetLowering &operator=(const TargetLowering &) = delete;

 public:
   // TODO(jvoung): return a unique_ptr like the other factory functions.
   static TargetLowering *createLowering(TargetArch Target, Cfg *Func);
   static std::unique_ptr<Assembler> createAssembler(TargetArch Target,
                                                     Cfg *Func);
   void translate() {
     switch (Ctx->getFlags().getOptLevel()) {
     case Opt_m1:
       translateOm1();
       break;
     case Opt_0:
       translateO0();
       break;
     case Opt_1:
       translateO1();
       break;
     case Opt_2:
       translateO2();
       break;
     }
   }
   virtual void translateOm1() {
     Func->setError("Target doesn't specify Om1 lowering steps.");
   }
   virtual void translateO0() {
     Func->setError("Target doesn't specify O0 lowering steps.");
   }
   virtual void translateO1() {
     Func->setError("Target doesn't specify O1 lowering steps.");
   }
   virtual void translateO2() {
     Func->setError("Target doesn't specify O2 lowering steps.");
   }

   /// Tries to do address mode optimization on a single instruction.
   void doAddressOpt();
   /// Randomly insert NOPs.
   void doNopInsertion();
   /// Lowers a single non-Phi instruction.
   void lower();
   /// Inserts and lowers a single high-level instruction at a specific insertion
   /// point.
   void lowerInst(CfgNode *Node, InstList::iterator Next, InstHighLevel *Instr);
   /// Does preliminary lowering of the set of Phi instructions in the
   /// current node.  The main intention is to do what's needed to keep
   /// the unlowered Phi instructions consistent with the lowered
   /// non-Phi instructions, e.g. to lower 64-bit operands on a 32-bit
   /// target.
   virtual void prelowerPhis() {}
   /// Tries to do branch optimization on a single instruction.  Returns
   /// true if some optimization was done.
   virtual bool doBranchOpt(Inst * /*I*/, const CfgNode * /*NextNode*/) {
     return false;
   }

   virtual SizeT getNumRegisters() const = 0;
   /// Returns a variable pre-colored to the specified physical
   /// register.  This is generally used to get very direct access to
   /// the register such as in the prolog or epilog or for marking
   /// scratch registers as killed by a call.  If a Type is not
   /// provided, a target-specific default type is used.
   virtual Variable *getPhysicalRegister(SizeT RegNum,
                                         Type Ty = IceType_void) = 0;
   /// Returns a printable name for the register.
   virtual IceString getRegName(SizeT RegNum, Type Ty) const = 0;

   virtual bool hasFramePointer() const { return false; }
   virtual SizeT getFrameOrStackReg() const = 0;
   virtual size_t typeWidthInBytesOnStack(Type Ty) const = 0;

   bool hasComputedFrame() const { return HasComputedFrame; }
   /// Returns true if this function calls a function that has the
   /// "returns twice" attribute.
   bool callsReturnsTwice() const { return CallsReturnsTwice; }
   void setCallsReturnsTwice(bool RetTwice) { CallsReturnsTwice = RetTwice; }
   int32_t getStackAdjustment() const { return StackAdjustment; }
   void updateStackAdjustment(int32_t Offset) { StackAdjustment += Offset; }
   void resetStackAdjustment() { StackAdjustment = 0; }
   SizeT makeNextLabelNumber() { return NextLabelNumber++; }
   SizeT makeNextJumpTableNumber() { return NextJumpTableNumber++; }
   LoweringContext &getContext() { return Context; }

   enum RegSet {
     RegSet_None = 0,
     RegSet_CallerSave = 1 << 0,
     RegSet_CalleeSave = 1 << 1,
     RegSet_StackPointer = 1 << 2,
     RegSet_FramePointer = 1 << 3,
     RegSet_All = ~RegSet_None
   };
   typedef uint32_t RegSetMask;

   virtual llvm::SmallBitVector getRegisterSet(RegSetMask Include,
                                               RegSetMask Exclude) const = 0;
   virtual const llvm::SmallBitVector &getRegisterSetForType(Type Ty) const = 0;
   void regAlloc(RegAllocKind Kind);

   virtual void makeRandomRegisterPermutation(
       llvm::SmallVectorImpl<int32_t> &Permutation,
       const llvm::SmallBitVector &ExcludeRegisters) const = 0;

   /// Save/restore any mutable state for the situation where code
   /// emission needs multiple passes, such as sandboxing or relaxation.
   /// Subclasses may provide their own implementation, but should be
   /// sure to also call the parent class's methods.
   virtual void snapshotEmitState() {
     SnapshotStackAdjustment = StackAdjustment;
   }
   virtual void rollbackEmitState() {
     StackAdjustment = SnapshotStackAdjustment;
   }

   /// Get the minimum number of clusters required for a jump table to be
   /// considered.
   virtual SizeT getMinJumpTableSize() const = 0;
   virtual void emitJumpTable(const Cfg *Func,
                              const InstJumpTable *JumpTable) const = 0;

   virtual void emitVariable(const Variable *Var) const = 0;

   void emitWithoutPrefix(const ConstantRelocatable *CR) const;
   void emit(const ConstantRelocatable *CR) const;
   virtual const char *getConstantPrefix() const = 0;

   virtual void emit(const ConstantUndef *C) const = 0;
   virtual void emit(const ConstantInteger32 *C) const = 0;
   virtual void emit(const ConstantInteger64 *C) const = 0;
   virtual void emit(const ConstantFloat *C) const = 0;
   virtual void emit(const ConstantDouble *C) const = 0;

   /// Performs target-specific argument lowering.
   virtual void lowerArguments() = 0;

   virtual void initNodeForLowering(CfgNode *) {}
   virtual void addProlog(CfgNode *Node) = 0;
   virtual void addEpilog(CfgNode *Node) = 0;

   virtual ~TargetLowering() = default;

 protected:
   explicit TargetLowering(Cfg *Func);
   virtual void lowerAlloca(const InstAlloca *Inst) = 0;
   virtual void lowerArithmetic(const InstArithmetic *Inst) = 0;
   virtual void lowerAssign(const InstAssign *Inst) = 0;
   virtual void lowerBr(const InstBr *Inst) = 0;
   virtual void lowerCall(const InstCall *Inst) = 0;
   virtual void lowerCast(const InstCast *Inst) = 0;
   virtual void lowerFcmp(const InstFcmp *Inst) = 0;
   virtual void lowerExtractElement(const InstExtractElement *Inst) = 0;
   virtual void lowerIcmp(const InstIcmp *Inst) = 0;
   virtual void lowerInsertElement(const InstInsertElement *Inst) = 0;
   virtual void lowerIntrinsicCall(const InstIntrinsicCall *Inst) = 0;
   virtual void lowerLoad(const InstLoad *Inst) = 0;
   virtual void lowerPhi(const InstPhi *Inst) = 0;
   virtual void lowerRet(const InstRet *Inst) = 0;
   virtual void lowerSelect(const InstSelect *Inst) = 0;
   virtual void lowerStore(const InstStore *Inst) = 0;
   virtual void lowerSwitch(const InstSwitch *Inst) = 0;
   virtual void lowerUnreachable(const InstUnreachable *Inst) = 0;
   virtual void lowerOther(const Inst *Instr);

   virtual void doAddressOptLoad() {}
   virtual void doAddressOptStore() {}
   virtual void randomlyInsertNop(float Probability) = 0;
   /// This gives the target an opportunity to post-process the lowered
   /// expansion before returning.
   virtual void postLower() {}

   /// Find two-address non-SSA instructions and set the DestNonKillable flag
   /// to keep liveness analysis consistent.
   void inferTwoAddress();

   /// Make a pass over the Cfg to determine which variables need stack slots
   /// and place them in a sorted list (SortedSpilledVariables). Among those,
   /// vars, classify the spill variables as local to the basic block vs
   /// global (multi-block) in order to compute the parameters GlobalsSize
   /// and SpillAreaSizeBytes (represents locals or general vars if the
   /// coalescing of locals is disallowed) along with alignments required
   /// for variables in each area. We rely on accurate VMetadata in order to
   /// classify a variable as global vs local (otherwise the variable is
   /// conservatively global). The in-args should be initialized to 0.
   ///
   /// This is only a pre-pass and the actual stack slot assignment is
   /// handled separately.
   ///
   /// There may be target-specific Variable types, which will be handled
   /// by TargetVarHook. If the TargetVarHook returns true, then the variable
   /// is skipped and not considered with the rest of the spilled variables.
   void getVarStackSlotParams(VarList &SortedSpilledVariables,
                              llvm::SmallBitVector &RegsUsed,
                              size_t *GlobalsSize, size_t *SpillAreaSizeBytes,
                              uint32_t *SpillAreaAlignmentBytes,
                              uint32_t *LocalsSlotsAlignmentBytes,
                              std::function<bool(Variable *)> TargetVarHook);

   /// Calculate the amount of padding needed to align the local and global
   /// areas to the required alignment.  This assumes the globals/locals layout
   /// used by getVarStackSlotParams and assignVarStackSlots.
   void alignStackSpillAreas(uint32_t SpillAreaStartOffset,
                             uint32_t SpillAreaAlignmentBytes,
                             size_t GlobalsSize,
                             uint32_t LocalsSlotsAlignmentBytes,
                             uint32_t *SpillAreaPaddingBytes,
                             uint32_t *LocalsSlotsPaddingBytes);

   /// Make a pass through the SortedSpilledVariables and actually assign
   /// stack slots. SpillAreaPaddingBytes takes into account stack alignment
   /// padding. The SpillArea starts after that amount of padding.
   /// This matches the scheme in getVarStackSlotParams, where there may
   /// be a separate multi-block global var spill area and a local var
   /// spill area.
   void assignVarStackSlots(VarList &SortedSpilledVariables,
                            size_t SpillAreaPaddingBytes,
                            size_t SpillAreaSizeBytes,
                            size_t GlobalsAndSubsequentPaddingSize,
                            bool UsesFramePointer);

   /// Sort the variables in Source based on required alignment.
   /// The variables with the largest alignment need are placed in the front
   /// of the Dest list.
   void sortVarsByAlignment(VarList &Dest, const VarList &Source) const;

   /// Make a call to an external helper function.
   InstCall *makeHelperCall(const IceString &Name, Variable *Dest,
                            SizeT MaxSrcs);

   void
   _bundle_lock(InstBundleLock::Option BundleOption = InstBundleLock::Opt_None) {
     Context.insert(InstBundleLock::create(Func, BundleOption));
   }
   void _bundle_unlock() { Context.insert(InstBundleUnlock::create(Func)); }
   void _set_dest_nonkillable() {
     Context.getLastInserted()->setDestNonKillable();
   }

   bool shouldOptimizeMemIntrins();

   Cfg *Func;
   GlobalContext *Ctx;
   bool HasComputedFrame = false;
   bool CallsReturnsTwice = false;
   /// StackAdjustment keeps track of the current stack offset from its
   /// natural location, as arguments are pushed for a function call.
   int32_t StackAdjustment = 0;
   SizeT NextLabelNumber = 0;
   SizeT NextJumpTableNumber = 0;
   LoweringContext Context;

   // Runtime helper function names
   const static constexpr char *H_bitcast_16xi1_i16 = "__Sz_bitcast_16xi1_i16";
   const static constexpr char *H_bitcast_8xi1_i8 = "__Sz_bitcast_8xi1_i8";
   const static constexpr char *H_bitcast_i16_16xi1 = "__Sz_bitcast_i16_16xi1";
   const static constexpr char *H_bitcast_i8_8xi1 = "__Sz_bitcast_i8_8xi1";
   const static constexpr char *H_call_ctpop_i32 = "__popcountsi2";
   const static constexpr char *H_call_ctpop_i64 = "__popcountdi2";
   const static constexpr char *H_call_longjmp = "longjmp";
   const static constexpr char *H_call_memcpy = "memcpy";
   const static constexpr char *H_call_memmove = "memmove";
   const static constexpr char *H_call_memset = "memset";
   const static constexpr char *H_call_read_tp = "__nacl_read_tp";
   const static constexpr char *H_call_setjmp = "setjmp";
   const static constexpr char *H_fptosi_f32_i64 = "__Sz_fptosi_f32_i64";
   const static constexpr char *H_fptosi_f64_i64 = "__Sz_fptosi_f64_i64";
   const static constexpr char *H_fptoui_4xi32_f32 = "__Sz_fptoui_4xi32_f32";
   const static constexpr char *H_fptoui_f32_i32 = "__Sz_fptoui_f32_i32";
   const static constexpr char *H_fptoui_f32_i64 = "__Sz_fptoui_f32_i64";
   const static constexpr char *H_fptoui_f64_i32 = "__Sz_fptoui_f64_i32";
   const static constexpr char *H_fptoui_f64_i64 = "__Sz_fptoui_f64_i64";
   const static constexpr char *H_frem_f32 = "fmodf";
   const static constexpr char *H_frem_f64 = "fmod";
   const static constexpr char *H_sdiv_i32 = "__divsi3";
   const static constexpr char *H_sdiv_i64 = "__divdi3";
   const static constexpr char *H_sitofp_i64_f32 = "__Sz_sitofp_i64_f32";
   const static constexpr char *H_sitofp_i64_f64 = "__Sz_sitofp_i64_f64";
   const static constexpr char *H_srem_i32 = "__modsi3";
   const static constexpr char *H_srem_i64 = "__moddi3";
   const static constexpr char *H_udiv_i32 = "__udivsi3";
   const static constexpr char *H_udiv_i64 = "__udivdi3";
   const static constexpr char *H_uitofp_4xi32_4xf32 = "__Sz_uitofp_4xi32_4xf32";
   const static constexpr char *H_uitofp_i32_f32 = "__Sz_uitofp_i32_f32";
   const static constexpr char *H_uitofp_i32_f64 = "__Sz_uitofp_i32_f64";
   const static constexpr char *H_uitofp_i64_f32 = "__Sz_uitofp_i64_f32";
   const static constexpr char *H_uitofp_i64_f64 = "__Sz_uitofp_i64_f64";
   const static constexpr char *H_urem_i32 = "__umodsi3";
   const static constexpr char *H_urem_i64 = "__umoddi3";

 private:
   int32_t SnapshotStackAdjustment = 0;
 };

 /// TargetDataLowering is used for "lowering" data including initializers
 /// for global variables, and the internal constant pools.  It is separated
 /// out from TargetLowering because it does not require a Cfg.
 class TargetDataLowering {
   TargetDataLowering() = delete;
   TargetDataLowering(const TargetDataLowering &) = delete;
   TargetDataLowering &operator=(const TargetDataLowering &) = delete;

 public:
   static std::unique_ptr<TargetDataLowering> createLowering(GlobalContext *Ctx);
   virtual ~TargetDataLowering();

   virtual void lowerGlobals(const VariableDeclarationList &Vars,
                             const IceString &SectionSuffix) = 0;
   virtual void lowerConstants() = 0;
   virtual void lowerJumpTables() = 0;

 protected:
   void emitGlobal(const VariableDeclaration &Var,
                   const IceString &SectionSuffix);

   /// For now, we assume .long is the right directive for emitting 4 byte
   /// emit global relocations. However, LLVM MIPS usually uses .4byte instead.
   /// Perhaps there is some difference when the location is unaligned.
   static const char *getEmit32Directive() { return ".long"; }

   explicit TargetDataLowering(GlobalContext *Ctx) : Ctx(Ctx) {}
   GlobalContext *Ctx;
 };

 /// TargetHeaderLowering is used to "lower" the header of an output file.
 /// It writes out the target-specific header attributes. E.g., for ARM
 /// this writes out the build attributes (float ABI, etc.).
 class TargetHeaderLowering {
   TargetHeaderLowering() = delete;
   TargetHeaderLowering(const TargetHeaderLowering &) = delete;
   TargetHeaderLowering &operator=(const TargetHeaderLowering &) = delete;

 public:
   static std::unique_ptr<TargetHeaderLowering>
   createLowering(GlobalContext *Ctx);
   virtual ~TargetHeaderLowering();

   virtual void lower() {}

 protected:
   explicit TargetHeaderLowering(GlobalContext *Ctx) : Ctx(Ctx) {}
   GlobalContext *Ctx;
 };

 } // end of namespace Ice

 #endif // SUBZERO_SRC_ICETARGETLOWERING_H
	//===- subzero/src/IceTargetLowering.h - Lowering interface ------ C++ --===//
	//
	// The Subzero Code Generator
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	///
	/// \file
	/// This file declares the TargetLowering, LoweringContext, and
	/// TargetDataLowering classes. TargetLowering is an abstract class
	/// used to drive the translation/lowering process. LoweringContext
	/// maintains a context for lowering each instruction, offering
	/// conveniences such as iterating over non-deleted instructions.
	/// TargetDataLowering is an abstract class used to drive the
	/// lowering/emission of global initializers, external global
	/// declarations, and internal constant pools.
	///
	//===----------------------------------------------------------------------===//

	#ifndef SUBZERO_SRC_ICETARGETLOWERING_H
	#define SUBZERO_SRC_ICETARGETLOWERING_H

	#include "IceDefs.h"
	#include "IceInst.h" // for the names of the Inst subtypes
	#include "IceOperand.h"
	#include "IceTypes.h"

	namespace Ice {

	/// LoweringContext makes it easy to iterate through non-deleted
	/// instructions in a node, and insert new (lowered) instructions at
	/// the current point. Along with the instruction list container and
	/// associated iterators, it holds the current node, which is needed
	/// when inserting new instructions in order to track whether variables
	/// are used as single-block or multi-block.
	class LoweringContext {
	LoweringContext(const LoweringContext &) = delete;
	LoweringContext &operator=(const LoweringContext &) = delete;

	public:
	LoweringContext() = default;
	~LoweringContext() = default;
	void init(CfgNode *Node);
	Inst *getNextInst() const {
	if (Next == End)
	return nullptr;
	return Next;
	}
	Inst *getNextInst(InstList::iterator &Iter) const {
	advanceForward(Iter);
	if (Iter == End)
	return nullptr;
	return Iter;
	}
	CfgNode *getNode() const { return Node; }
	bool atEnd() const { return Cur == End; }
	InstList::iterator getCur() const { return Cur; }
	InstList::iterator getNext() const { return Next; }
	InstList::iterator getEnd() const { return End; }
	void insert(Inst *Inst);
	Inst *getLastInserted() const;
	void advanceCur() { Cur = Next; }
	void advanceNext() { advanceForward(Next); }
	void setCur(InstList::iterator C) { Cur = C; }
	void setNext(InstList::iterator N) { Next = N; }
	void rewind();
	void setInsertPoint(const InstList::iterator &Position) { Next = Position; }

	private:
	/// Node is the argument to Inst::updateVars().
	CfgNode *Node = nullptr;
	Inst *LastInserted = nullptr;
	/// Cur points to the current instruction being considered. It is
	/// guaranteed to point to a non-deleted instruction, or to be End.
	InstList::iterator Cur;
	/// Next doubles as a pointer to the next valid instruction (if any),
	/// and the new-instruction insertion point. It is also updated for
	/// the caller in case the lowering consumes more than one high-level
	/// instruction. It is guaranteed to point to a non-deleted
	/// instruction after Cur, or to be End. TODO: Consider separating
	/// the notion of "next valid instruction" and "new instruction
	/// insertion point", to avoid confusion when previously-deleted
	/// instructions come between the two points.
	InstList::iterator Next;
	/// Begin is a copy of Insts.begin(), used if iterators are moved backward.
	InstList::iterator Begin;
	/// End is a copy of Insts.end(), used if Next needs to be advanced.
	InstList::iterator End;

	void skipDeleted(InstList::iterator &I) const;
	void advanceForward(InstList::iterator &I) const;
	};

	/// A helper class to advance the LoweringContext at each loop iteration.
	class PostIncrLoweringContext {
	PostIncrLoweringContext() = delete;
	PostIncrLoweringContext(const PostIncrLoweringContext &) = delete;
	PostIncrLoweringContext &operator=(const PostIncrLoweringContext &) = delete;

	public:
	explicit PostIncrLoweringContext(LoweringContext &Context)
	: Context(Context) {}
	~PostIncrLoweringContext() {
	Context.advanceCur();
	Context.advanceNext();
	}

	private:
	LoweringContext &Context;
	};

	class TargetLowering {
	TargetLowering() = delete;
	TargetLowering(const TargetLowering &) = delete;
	TargetLowering &operator=(const TargetLowering &) = delete;

	public:
	// TODO(jvoung): return a unique_ptr like the other factory functions.
	static TargetLowering createLowering(TargetArch Target, Cfg Func);
	static std::unique_ptr<Assembler> createAssembler(TargetArch Target,
	Cfg *Func);
	void translate() {
	switch (Ctx->getFlags().getOptLevel()) {
	case Opt_m1:
	translateOm1();
	break;
	case Opt_0:
	translateO0();
	break;
	case Opt_1:
	translateO1();
	break;
	case Opt_2:
	translateO2();
	break;
	}
	}
	virtual void translateOm1() {
	Func->setError("Target doesn't specify Om1 lowering steps.");
	}
	virtual void translateO0() {
	Func->setError("Target doesn't specify O0 lowering steps.");
	}
	virtual void translateO1() {
	Func->setError("Target doesn't specify O1 lowering steps.");
	}
	virtual void translateO2() {
	Func->setError("Target doesn't specify O2 lowering steps.");
	}

	/// Tries to do address mode optimization on a single instruction.
	void doAddressOpt();
	/// Randomly insert NOPs.
	void doNopInsertion();
	/// Lowers a single non-Phi instruction.
	void lower();
	/// Inserts and lowers a single high-level instruction at a specific insertion
	/// point.
	void lowerInst(CfgNode Node, InstList::iterator Next, InstHighLevel Instr);
	/// Does preliminary lowering of the set of Phi instructions in the
	/// current node. The main intention is to do what's needed to keep
	/// the unlowered Phi instructions consistent with the lowered
	/// non-Phi instructions, e.g. to lower 64-bit operands on a 32-bit
	/// target.
	virtual void prelowerPhis() {}
	/// Tries to do branch optimization on a single instruction. Returns
	/// true if some optimization was done.
	virtual bool doBranchOpt(Inst * /I/, const CfgNode * /NextNode/) {
	return false;
	}

	virtual SizeT getNumRegisters() const = 0;
	/// Returns a variable pre-colored to the specified physical
	/// register. This is generally used to get very direct access to
	/// the register such as in the prolog or epilog or for marking
	/// scratch registers as killed by a call. If a Type is not
	/// provided, a target-specific default type is used.
	virtual Variable *getPhysicalRegister(SizeT RegNum,
	Type Ty = IceType_void) = 0;
	/// Returns a printable name for the register.
	virtual IceString getRegName(SizeT RegNum, Type Ty) const = 0;

	virtual bool hasFramePointer() const { return false; }
	virtual SizeT getFrameOrStackReg() const = 0;
	virtual size_t typeWidthInBytesOnStack(Type Ty) const = 0;

	bool hasComputedFrame() const { return HasComputedFrame; }
	/// Returns true if this function calls a function that has the
	/// "returns twice" attribute.
	bool callsReturnsTwice() const { return CallsReturnsTwice; }
	void setCallsReturnsTwice(bool RetTwice) { CallsReturnsTwice = RetTwice; }
	int32_t getStackAdjustment() const { return StackAdjustment; }
	void updateStackAdjustment(int32_t Offset) { StackAdjustment += Offset; }
	void resetStackAdjustment() { StackAdjustment = 0; }
	SizeT makeNextLabelNumber() { return NextLabelNumber++; }
	SizeT makeNextJumpTableNumber() { return NextJumpTableNumber++; }
	LoweringContext &getContext() { return Context; }

	enum RegSet {
	RegSet_None = 0,
	RegSet_CallerSave = 1 << 0,
	RegSet_CalleeSave = 1 << 1,
	RegSet_StackPointer = 1 << 2,
	RegSet_FramePointer = 1 << 3,
	RegSet_All = ~RegSet_None
	};
	typedef uint32_t RegSetMask;

	virtual llvm::SmallBitVector getRegisterSet(RegSetMask Include,
	RegSetMask Exclude) const = 0;
	virtual const llvm::SmallBitVector &getRegisterSetForType(Type Ty) const = 0;
	void regAlloc(RegAllocKind Kind);

	virtual void makeRandomRegisterPermutation(
	llvm::SmallVectorImpl<int32_t> &Permutation,
	const llvm::SmallBitVector &ExcludeRegisters) const = 0;

	/// Save/restore any mutable state for the situation where code
	/// emission needs multiple passes, such as sandboxing or relaxation.
	/// Subclasses may provide their own implementation, but should be
	/// sure to also call the parent class's methods.
	virtual void snapshotEmitState() {
	SnapshotStackAdjustment = StackAdjustment;
	}
	virtual void rollbackEmitState() {
	StackAdjustment = SnapshotStackAdjustment;
	}

	/// Get the minimum number of clusters required for a jump table to be
	/// considered.
	virtual SizeT getMinJumpTableSize() const = 0;
	virtual void emitJumpTable(const Cfg *Func,
	const InstJumpTable *JumpTable) const = 0;

	virtual void emitVariable(const Variable *Var) const = 0;

	void emitWithoutPrefix(const ConstantRelocatable *CR) const;
	void emit(const ConstantRelocatable *CR) const;
	virtual const char *getConstantPrefix() const = 0;

	virtual void emit(const ConstantUndef *C) const = 0;
	virtual void emit(const ConstantInteger32 *C) const = 0;
	virtual void emit(const ConstantInteger64 *C) const = 0;
	virtual void emit(const ConstantFloat *C) const = 0;
	virtual void emit(const ConstantDouble *C) const = 0;

	/// Performs target-specific argument lowering.
	virtual void lowerArguments() = 0;

	virtual void initNodeForLowering(CfgNode *) {}
	virtual void addProlog(CfgNode *Node) = 0;
	virtual void addEpilog(CfgNode *Node) = 0;

	virtual ~TargetLowering() = default;

	protected:
	explicit TargetLowering(Cfg *Func);
	virtual void lowerAlloca(const InstAlloca *Inst) = 0;
	virtual void lowerArithmetic(const InstArithmetic *Inst) = 0;
	virtual void lowerAssign(const InstAssign *Inst) = 0;
	virtual void lowerBr(const InstBr *Inst) = 0;
	virtual void lowerCall(const InstCall *Inst) = 0;
	virtual void lowerCast(const InstCast *Inst) = 0;
	virtual void lowerFcmp(const InstFcmp *Inst) = 0;
	virtual void lowerExtractElement(const InstExtractElement *Inst) = 0;
	virtual void lowerIcmp(const InstIcmp *Inst) = 0;
	virtual void lowerInsertElement(const InstInsertElement *Inst) = 0;
	virtual void lowerIntrinsicCall(const InstIntrinsicCall *Inst) = 0;
	virtual void lowerLoad(const InstLoad *Inst) = 0;
	virtual void lowerPhi(const InstPhi *Inst) = 0;
	virtual void lowerRet(const InstRet *Inst) = 0;
	virtual void lowerSelect(const InstSelect *Inst) = 0;
	virtual void lowerStore(const InstStore *Inst) = 0;
	virtual void lowerSwitch(const InstSwitch *Inst) = 0;
	virtual void lowerUnreachable(const InstUnreachable *Inst) = 0;
	virtual void lowerOther(const Inst *Instr);

	virtual void doAddressOptLoad() {}
	virtual void doAddressOptStore() {}
	virtual void randomlyInsertNop(float Probability) = 0;
	/// This gives the target an opportunity to post-process the lowered
	/// expansion before returning.
	virtual void postLower() {}

	/// Find two-address non-SSA instructions and set the DestNonKillable flag
	/// to keep liveness analysis consistent.
	void inferTwoAddress();

	/// Make a pass over the Cfg to determine which variables need stack slots
	/// and place them in a sorted list (SortedSpilledVariables). Among those,
	/// vars, classify the spill variables as local to the basic block vs
	/// global (multi-block) in order to compute the parameters GlobalsSize
	/// and SpillAreaSizeBytes (represents locals or general vars if the
	/// coalescing of locals is disallowed) along with alignments required
	/// for variables in each area. We rely on accurate VMetadata in order to
	/// classify a variable as global vs local (otherwise the variable is
	/// conservatively global). The in-args should be initialized to 0.
	///
	/// This is only a pre-pass and the actual stack slot assignment is
	/// handled separately.
	///
	/// There may be target-specific Variable types, which will be handled
	/// by TargetVarHook. If the TargetVarHook returns true, then the variable
	/// is skipped and not considered with the rest of the spilled variables.
	void getVarStackSlotParams(VarList &SortedSpilledVariables,
	llvm::SmallBitVector &RegsUsed,
	size_t GlobalsSize, size_t SpillAreaSizeBytes,
	uint32_t *SpillAreaAlignmentBytes,
	uint32_t *LocalsSlotsAlignmentBytes,
	std::function<bool(Variable *)> TargetVarHook);

	/// Calculate the amount of padding needed to align the local and global
	/// areas to the required alignment. This assumes the globals/locals layout
	/// used by getVarStackSlotParams and assignVarStackSlots.
	void alignStackSpillAreas(uint32_t SpillAreaStartOffset,
	uint32_t SpillAreaAlignmentBytes,
	size_t GlobalsSize,
	uint32_t LocalsSlotsAlignmentBytes,
	uint32_t *SpillAreaPaddingBytes,
	uint32_t *LocalsSlotsPaddingBytes);

	/// Make a pass through the SortedSpilledVariables and actually assign
	/// stack slots. SpillAreaPaddingBytes takes into account stack alignment
	/// padding. The SpillArea starts after that amount of padding.
	/// This matches the scheme in getVarStackSlotParams, where there may
	/// be a separate multi-block global var spill area and a local var
	/// spill area.
	void assignVarStackSlots(VarList &SortedSpilledVariables,
	size_t SpillAreaPaddingBytes,
	size_t SpillAreaSizeBytes,
	size_t GlobalsAndSubsequentPaddingSize,
	bool UsesFramePointer);

	/// Sort the variables in Source based on required alignment.
	/// The variables with the largest alignment need are placed in the front
	/// of the Dest list.
	void sortVarsByAlignment(VarList &Dest, const VarList &Source) const;

	/// Make a call to an external helper function.
	InstCall makeHelperCall(const IceString &Name, Variable Dest,
	SizeT MaxSrcs);

	void
	_bundle_lock(InstBundleLock::Option BundleOption = InstBundleLock::Opt_None) {
	Context.insert(InstBundleLock::create(Func, BundleOption));
	}
	void _bundle_unlock() { Context.insert(InstBundleUnlock::create(Func)); }
	void _set_dest_nonkillable() {
	Context.getLastInserted()->setDestNonKillable();
	}

	bool shouldOptimizeMemIntrins();

	Cfg *Func;
	GlobalContext *Ctx;
	bool HasComputedFrame = false;
	bool CallsReturnsTwice = false;
	/// StackAdjustment keeps track of the current stack offset from its
	/// natural location, as arguments are pushed for a function call.
	int32_t StackAdjustment = 0;
	SizeT NextLabelNumber = 0;
	SizeT NextJumpTableNumber = 0;
	LoweringContext Context;

	// Runtime helper function names
	const static constexpr char *H_bitcast_16xi1_i16 = "__Sz_bitcast_16xi1_i16";
	const static constexpr char *H_bitcast_8xi1_i8 = "__Sz_bitcast_8xi1_i8";
	const static constexpr char *H_bitcast_i16_16xi1 = "__Sz_bitcast_i16_16xi1";
	const static constexpr char *H_bitcast_i8_8xi1 = "__Sz_bitcast_i8_8xi1";
	const static constexpr char *H_call_ctpop_i32 = "__popcountsi2";
	const static constexpr char *H_call_ctpop_i64 = "__popcountdi2";
	const static constexpr char *H_call_longjmp = "longjmp";
	const static constexpr char *H_call_memcpy = "memcpy";
	const static constexpr char *H_call_memmove = "memmove";
	const static constexpr char *H_call_memset = "memset";
	const static constexpr char *H_call_read_tp = "__nacl_read_tp";
	const static constexpr char *H_call_setjmp = "setjmp";
	const static constexpr char *H_fptosi_f32_i64 = "__Sz_fptosi_f32_i64";
	const static constexpr char *H_fptosi_f64_i64 = "__Sz_fptosi_f64_i64";
	const static constexpr char *H_fptoui_4xi32_f32 = "__Sz_fptoui_4xi32_f32";
	const static constexpr char *H_fptoui_f32_i32 = "__Sz_fptoui_f32_i32";
	const static constexpr char *H_fptoui_f32_i64 = "__Sz_fptoui_f32_i64";
	const static constexpr char *H_fptoui_f64_i32 = "__Sz_fptoui_f64_i32";
	const static constexpr char *H_fptoui_f64_i64 = "__Sz_fptoui_f64_i64";
	const static constexpr char *H_frem_f32 = "fmodf";
	const static constexpr char *H_frem_f64 = "fmod";
	const static constexpr char *H_sdiv_i32 = "__divsi3";
	const static constexpr char *H_sdiv_i64 = "__divdi3";
	const static constexpr char *H_sitofp_i64_f32 = "__Sz_sitofp_i64_f32";
	const static constexpr char *H_sitofp_i64_f64 = "__Sz_sitofp_i64_f64";
	const static constexpr char *H_srem_i32 = "__modsi3";
	const static constexpr char *H_srem_i64 = "__moddi3";
	const static constexpr char *H_udiv_i32 = "__udivsi3";
	const static constexpr char *H_udiv_i64 = "__udivdi3";
	const static constexpr char *H_uitofp_4xi32_4xf32 = "__Sz_uitofp_4xi32_4xf32";
	const static constexpr char *H_uitofp_i32_f32 = "__Sz_uitofp_i32_f32";
	const static constexpr char *H_uitofp_i32_f64 = "__Sz_uitofp_i32_f64";
	const static constexpr char *H_uitofp_i64_f32 = "__Sz_uitofp_i64_f32";
	const static constexpr char *H_uitofp_i64_f64 = "__Sz_uitofp_i64_f64";
	const static constexpr char *H_urem_i32 = "__umodsi3";
	const static constexpr char *H_urem_i64 = "__umoddi3";

	private:
	int32_t SnapshotStackAdjustment = 0;
	};

	/// TargetDataLowering is used for "lowering" data including initializers
	/// for global variables, and the internal constant pools. It is separated
	/// out from TargetLowering because it does not require a Cfg.
	class TargetDataLowering {
	TargetDataLowering() = delete;
	TargetDataLowering(const TargetDataLowering &) = delete;
	TargetDataLowering &operator=(const TargetDataLowering &) = delete;

	public:
	static std::unique_ptr<TargetDataLowering> createLowering(GlobalContext *Ctx);
	virtual ~TargetDataLowering();

	virtual void lowerGlobals(const VariableDeclarationList &Vars,
	const IceString &SectionSuffix) = 0;
	virtual void lowerConstants() = 0;
	virtual void lowerJumpTables() = 0;

	protected:
	void emitGlobal(const VariableDeclaration &Var,
	const IceString &SectionSuffix);

	/// For now, we assume .long is the right directive for emitting 4 byte
	/// emit global relocations. However, LLVM MIPS usually uses .4byte instead.
	/// Perhaps there is some difference when the location is unaligned.
	static const char *getEmit32Directive() { return ".long"; }

	explicit TargetDataLowering(GlobalContext *Ctx) : Ctx(Ctx) {}
	GlobalContext *Ctx;
	};

	/// TargetHeaderLowering is used to "lower" the header of an output file.
	/// It writes out the target-specific header attributes. E.g., for ARM
	/// this writes out the build attributes (float ABI, etc.).
	class TargetHeaderLowering {
	TargetHeaderLowering() = delete;
	TargetHeaderLowering(const TargetHeaderLowering &) = delete;
	TargetHeaderLowering &operator=(const TargetHeaderLowering &) = delete;

	public:
	static std::unique_ptr<TargetHeaderLowering>
	createLowering(GlobalContext *Ctx);
	virtual ~TargetHeaderLowering();

	virtual void lower() {}

	protected:
	explicit TargetHeaderLowering(GlobalContext *Ctx) : Ctx(Ctx) {}
	GlobalContext *Ctx;
	};

	} // end of namespace Ice

	#endif // SUBZERO_SRC_ICETARGETLOWERING_H