compiler/optimizing/register_allocator_graph_color.cc - platform/art - Gitiles

 /*
  * Copyright (C) 2016 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #include "register_allocator_graph_color.h"

 #include "code_generator.h"
 #include "linear_order.h"
 #include "register_allocation_resolver.h"
 #include "ssa_liveness_analysis.h"
 #include "thread-current-inl.h"

 namespace art {

 // Highest number of registers that we support for any platform. This can be used for std::bitset,
 // for example, which needs to know its size at compile time.
 static constexpr size_t kMaxNumRegs = 32;

 // The maximum number of graph coloring attempts before triggering a DCHECK.
 // This is meant to catch changes to the graph coloring algorithm that undermine its forward
 // progress guarantees. Forward progress for the algorithm means splitting live intervals on
 // every graph coloring attempt so that eventually the interference graph will be sparse enough
 // to color. The main threat to forward progress is trying to split short intervals which cannot be
 // split further; this could cause infinite looping because the interference graph would never
 // change. This is avoided by prioritizing short intervals before long ones, so that long
 // intervals are split when coloring fails.
 static constexpr size_t kMaxGraphColoringAttemptsDebug = 100;

 // We always want to avoid spilling inside loops.
 static constexpr size_t kLoopSpillWeightMultiplier = 10;

 // If we avoid moves in single jump blocks, we can avoid jumps to jumps.
 static constexpr size_t kSingleJumpBlockWeightMultiplier = 2;

 // We avoid moves in blocks that dominate the exit block, since these blocks will
 // be executed on every path through the method.
 static constexpr size_t kDominatesExitBlockWeightMultiplier = 2;

 enum class CoalesceKind {
   kAdjacentSibling,       // Prevents moves at interval split points.
   kFixedOutputSibling,    // Prevents moves from a fixed output location.
   kFixedInput,            // Prevents moves into a fixed input location.
   kNonlinearControlFlow,  // Prevents moves between blocks.
   kPhi,                   // Prevents phi resolution moves.
   kFirstInput,            // Prevents a single input move.
   kAnyInput,              // May lead to better instruction selection / smaller encodings.
 };

 std::ostream& operator<<(std::ostream& os, const CoalesceKind& kind) {
   return os << static_cast<typename std::underlying_type<CoalesceKind>::type>(kind);
 }

 static size_t LoopDepthAt(HBasicBlock* block) {
   HLoopInformation* loop_info = block->GetLoopInformation();
   size_t depth = 0;
   while (loop_info != nullptr) {
     ++depth;
     loop_info = loop_info->GetPreHeader()->GetLoopInformation();
   }
   return depth;
 }

 // Return the runtime cost of inserting a move instruction at the specified location.
 static size_t CostForMoveAt(size_t position, const SsaLivenessAnalysis& liveness) {
   HBasicBlock* block = liveness.GetBlockFromPosition(position / 2);
   DCHECK(block != nullptr);
   size_t cost = 1;
   if (block->IsSingleJump()) {
     cost *= kSingleJumpBlockWeightMultiplier;
   }
   if (block->Dominates(block->GetGraph()->GetExitBlock())) {
     cost *= kDominatesExitBlockWeightMultiplier;
   }
   for (size_t loop_depth = LoopDepthAt(block); loop_depth > 0; --loop_depth) {
     cost *= kLoopSpillWeightMultiplier;
   }
   return cost;
 }

 // In general, we estimate coalesce priority by whether it will definitely avoid a move,
 // and by how likely it is to create an interference graph that's harder to color.
 static size_t ComputeCoalescePriority(CoalesceKind kind,
                                       size_t position,
                                       const SsaLivenessAnalysis& liveness) {
   if (kind == CoalesceKind::kAnyInput) {
     // This type of coalescing can affect instruction selection, but not moves, so we
     // give it the lowest priority.
     return 0;
   } else {
     return CostForMoveAt(position, liveness);
   }
 }

 enum class CoalesceStage {
   kWorklist,  // Currently in the iterative coalescing worklist.
   kActive,    // Not in a worklist, but could be considered again during iterative coalescing.
   kInactive,  // No longer considered until last-chance coalescing.
   kDefunct,   // Either the two nodes interfere, or have already been coalesced.
 };

 std::ostream& operator<<(std::ostream& os, const CoalesceStage& stage) {
   return os << static_cast<typename std::underlying_type<CoalesceStage>::type>(stage);
 }

 // Represents a coalesce opportunity between two nodes.
 struct CoalesceOpportunity : public ArenaObject<kArenaAllocRegisterAllocator> {
   CoalesceOpportunity(InterferenceNode* a,
                       InterferenceNode* b,
                       CoalesceKind kind,
                       size_t position,
                       const SsaLivenessAnalysis& liveness)
         : node_a(a),
           node_b(b),
           stage(CoalesceStage::kWorklist),
           priority(ComputeCoalescePriority(kind, position, liveness)) {}

   // Compare two coalesce opportunities based on their priority.
   // Return true if lhs has a lower priority than that of rhs.
   static bool CmpPriority(const CoalesceOpportunity* lhs,
                           const CoalesceOpportunity* rhs) {
     return lhs->priority < rhs->priority;
   }

   InterferenceNode* const node_a;
   InterferenceNode* const node_b;

   // The current stage of this coalesce opportunity, indicating whether it is in a worklist,
   // and whether it should still be considered.
   CoalesceStage stage;

   // The priority of this coalesce opportunity, based on heuristics.
   const size_t priority;
 };

 enum class NodeStage {
   kInitial,           // Uninitialized.
   kPrecolored,        // Marks fixed nodes.
   kSafepoint,         // Marks safepoint nodes.
   kPrunable,          // Marks uncolored nodes in the interference graph.
   kSimplifyWorklist,  // Marks non-move-related nodes with degree less than the number of registers.
   kFreezeWorklist,    // Marks move-related nodes with degree less than the number of registers.
   kSpillWorklist,     // Marks nodes with degree greater or equal to the number of registers.
   kPruned             // Marks nodes already pruned from the interference graph.
 };

 std::ostream& operator<<(std::ostream& os, const NodeStage& stage) {
   return os << static_cast<typename std::underlying_type<NodeStage>::type>(stage);
 }

 // Returns the estimated cost of spilling a particular live interval.
 static float ComputeSpillWeight(LiveInterval* interval, const SsaLivenessAnalysis& liveness) {
   if (interval->HasRegister()) {
     // Intervals with a fixed register cannot be spilled.
     return std::numeric_limits<float>::min();
   }

   size_t length = interval->GetLength();
   if (length == 1) {
     // Tiny intervals should have maximum priority, since they cannot be split any further.
     return std::numeric_limits<float>::max();
   }

   size_t use_weight = 0;
   if (interval->GetDefinedBy() != nullptr && interval->DefinitionRequiresRegister()) {
     // Cost for spilling at a register definition point.
     use_weight += CostForMoveAt(interval->GetStart() + 1, liveness);
   }

   // Process uses in the range (interval->GetStart(), interval->GetEnd()], i.e.
   // [interval->GetStart() + 1, interval->GetEnd() + 1)
   auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(),
                                                  interval->GetUses().end(),
                                                  interval->GetStart() + 1u,
                                                  interval->GetEnd() + 1u);
   for (const UsePosition& use : matching_use_range) {
     if (use.GetUser() != nullptr && use.RequiresRegister()) {
       // Cost for spilling at a register use point.
       use_weight += CostForMoveAt(use.GetUser()->GetLifetimePosition() - 1, liveness);
     }
   }

   // We divide by the length of the interval because we want to prioritize
   // short intervals; we do not benefit much if we split them further.
   return static_cast<float>(use_weight) / static_cast<float>(length);
 }

 // Interference nodes make up the interference graph, which is the primary data structure in
 // graph coloring register allocation. Each node represents a single live interval, and contains
 // a set of adjacent nodes corresponding to intervals overlapping with its own. To save memory,
 // pre-colored nodes never contain outgoing edges (only incoming ones).
 //
 // As nodes are pruned from the interference graph, incoming edges of the pruned node are removed,
 // but outgoing edges remain in order to later color the node based on the colors of its neighbors.
 //
 // Note that a pair interval is represented by a single node in the interference graph, which
 // essentially requires two colors. One consequence of this is that the degree of a node is not
 // necessarily equal to the number of adjacent nodes--instead, the degree reflects the maximum
 // number of colors with which a node could interfere. We model this by giving edges different
 // weights (1 or 2) to control how much it increases the degree of adjacent nodes.
 // For example, the edge between two single nodes will have weight 1. On the other hand,
 // the edge between a single node and a pair node will have weight 2. This is because the pair
 // node could block up to two colors for the single node, and because the single node could
 // block an entire two-register aligned slot for the pair node.
 // The degree is defined this way because we use it to decide whether a node is guaranteed a color,
 // and thus whether it is safe to prune it from the interference graph early on.
 class InterferenceNode : public ArenaObject<kArenaAllocRegisterAllocator> {
  public:
   InterferenceNode(LiveInterval* interval,
                    const SsaLivenessAnalysis& liveness)
         : stage(NodeStage::kInitial),
           interval_(interval),
           adjacent_nodes_(nullptr),
           coalesce_opportunities_(nullptr),
           out_degree_(interval->HasRegister() ? std::numeric_limits<size_t>::max() : 0),
           alias_(this),
           spill_weight_(ComputeSpillWeight(interval, liveness)),
           requires_color_(interval->RequiresRegister()),
           needs_spill_slot_(false) {
     DCHECK(!interval->IsHighInterval()) << "Pair nodes should be represented by the low interval";
   }

   void AddInterference(InterferenceNode* other,
                        bool guaranteed_not_interfering_yet,
                        ScopedArenaDeque<ScopedArenaVector<InterferenceNode*>>* storage) {
     DCHECK(!IsPrecolored()) << "To save memory, fixed nodes should not have outgoing interferences";
     DCHECK_NE(this, other) << "Should not create self loops in the interference graph";
     DCHECK_EQ(this, alias_) << "Should not add interferences to a node that aliases another";
     DCHECK_NE(stage, NodeStage::kPruned);
     DCHECK_NE(other->stage, NodeStage::kPruned);
     if (adjacent_nodes_ == nullptr) {
       ScopedArenaVector<InterferenceNode*>::allocator_type adapter(storage->get_allocator());
       storage->emplace_back(adapter);
       adjacent_nodes_ = &storage->back();
     }
     if (guaranteed_not_interfering_yet) {
       DCHECK(!ContainsElement(GetAdjacentNodes(), other));
       adjacent_nodes_->push_back(other);
       out_degree_ += EdgeWeightWith(other);
     } else {
       if (!ContainsElement(GetAdjacentNodes(), other)) {
         adjacent_nodes_->push_back(other);
         out_degree_ += EdgeWeightWith(other);
       }
     }
   }

   void RemoveInterference(InterferenceNode* other) {
     DCHECK_EQ(this, alias_) << "Should not remove interferences from a coalesced node";
     DCHECK_EQ(other->stage, NodeStage::kPruned) << "Should only remove interferences when pruning";
     if (adjacent_nodes_ != nullptr) {
       auto it = std::find(adjacent_nodes_->begin(), adjacent_nodes_->end(), other);
       if (it != adjacent_nodes_->end()) {
         adjacent_nodes_->erase(it);
         out_degree_ -= EdgeWeightWith(other);
       }
     }
   }

   bool ContainsInterference(InterferenceNode* other) const {
     DCHECK(!IsPrecolored()) << "Should not query fixed nodes for interferences";
     DCHECK_EQ(this, alias_) << "Should not query a coalesced node for interferences";
     return ContainsElement(GetAdjacentNodes(), other);
   }

   LiveInterval* GetInterval() const {
     return interval_;
   }

   ArrayRef<InterferenceNode*> GetAdjacentNodes() const {
     return adjacent_nodes_ != nullptr
         ? ArrayRef<InterferenceNode*>(*adjacent_nodes_)
         : ArrayRef<InterferenceNode*>();
   }

   size_t GetOutDegree() const {
     // Pre-colored nodes have infinite degree.
     DCHECK(!IsPrecolored() || out_degree_ == std::numeric_limits<size_t>::max());
     return out_degree_;
   }

   void AddCoalesceOpportunity(CoalesceOpportunity* opportunity,
                               ScopedArenaDeque<ScopedArenaVector<CoalesceOpportunity*>>* storage) {
     if (coalesce_opportunities_ == nullptr) {
       ScopedArenaVector<CoalesceOpportunity*>::allocator_type adapter(storage->get_allocator());
       storage->emplace_back(adapter);
       coalesce_opportunities_ = &storage->back();
     }
     coalesce_opportunities_->push_back(opportunity);
   }

   void ClearCoalesceOpportunities() {
     coalesce_opportunities_ = nullptr;
   }

   bool IsMoveRelated() const {
     for (CoalesceOpportunity* opportunity : GetCoalesceOpportunities()) {
       if (opportunity->stage == CoalesceStage::kWorklist ||
           opportunity->stage == CoalesceStage::kActive) {
         return true;
       }
     }
     return false;
   }

   // Return whether this node already has a color.
   // Used to find fixed nodes in the interference graph before coloring.
   bool IsPrecolored() const {
     return interval_->HasRegister();
   }

   bool IsPair() const {
     return interval_->HasHighInterval();
   }

   void SetAlias(InterferenceNode* rep) {
     DCHECK_NE(rep->stage, NodeStage::kPruned);
     DCHECK_EQ(this, alias_) << "Should only set a node's alias once";
     alias_ = rep;
   }

   InterferenceNode* GetAlias() {
     if (alias_ != this) {
       // Recurse in order to flatten tree of alias pointers.
       alias_ = alias_->GetAlias();
     }
     return alias_;
   }

   ArrayRef<CoalesceOpportunity*> GetCoalesceOpportunities() const {
     return coalesce_opportunities_ != nullptr
         ? ArrayRef<CoalesceOpportunity*>(*coalesce_opportunities_)
         : ArrayRef<CoalesceOpportunity*>();
   }

   float GetSpillWeight() const {
     return spill_weight_;
   }

   bool RequiresColor() const {
     return requires_color_;
   }

   // We give extra weight to edges adjacent to pair nodes. See the general comment on the
   // interference graph above.
   size_t EdgeWeightWith(const InterferenceNode* other) const {
     return (IsPair() || other->IsPair()) ? 2 : 1;
   }

   bool NeedsSpillSlot() const {
     return needs_spill_slot_;
   }

   void SetNeedsSpillSlot() {
     needs_spill_slot_ = true;
   }

   // The current stage of this node, indicating which worklist it belongs to.
   NodeStage stage;

  private:
   // The live interval that this node represents.
   LiveInterval* const interval_;

   // All nodes interfering with this one.
   // We use an unsorted vector as a set, since a tree or hash set is too heavy for the
   // set sizes that we encounter. Using a vector leads to much better performance.
   ScopedArenaVector<InterferenceNode*>* adjacent_nodes_;  // Owned by ColoringIteration.

   // Interference nodes that this node should be coalesced with to reduce moves.
   ScopedArenaVector<CoalesceOpportunity*>* coalesce_opportunities_;  // Owned by ColoringIteration.

   // The maximum number of colors with which this node could interfere. This could be more than
   // the number of adjacent nodes if this is a pair node, or if some adjacent nodes are pair nodes.
   // We use "out" degree because incoming edges come from nodes already pruned from the graph,
   // and do not affect the coloring of this node.
   // Pre-colored nodes are treated as having infinite degree.
   size_t out_degree_;

   // The node representing this node in the interference graph.
   // Initially set to `this`, and only changed if this node is coalesced into another.
   InterferenceNode* alias_;

   // The cost of splitting and spilling this interval to the stack.
   // Nodes with a higher spill weight should be prioritized when assigning registers.
   // This is essentially based on use density and location; short intervals with many uses inside
   // deeply nested loops have a high spill weight.
   const float spill_weight_;

   const bool requires_color_;

   bool needs_spill_slot_;

   DISALLOW_COPY_AND_ASSIGN(InterferenceNode);
 };

 // The order in which we color nodes is important. To guarantee forward progress,
 // we prioritize intervals that require registers, and after that we prioritize
 // short intervals. That way, if we fail to color a node, it either won't require a
 // register, or it will be a long interval that can be split in order to make the
 // interference graph sparser.
 // To improve code quality, we prioritize intervals used frequently in deeply nested loops.
 // (This metric is secondary to the forward progress requirements above.)
 // TODO: May also want to consider:
 // - Constants (since they can be rematerialized)
 // - Allocated spill slots
 static bool HasGreaterNodePriority(const InterferenceNode* lhs,
                                    const InterferenceNode* rhs) {
   // (1) Prioritize the node that requires a color.
   if (lhs->RequiresColor() != rhs->RequiresColor()) {
     return lhs->RequiresColor();
   }

   // (2) Prioritize the interval that has a higher spill weight.
   return lhs->GetSpillWeight() > rhs->GetSpillWeight();
 }

 // A ColoringIteration holds the many data structures needed for a single graph coloring attempt,
 // and provides methods for each phase of the attempt.
 class ColoringIteration {
  public:
   ColoringIteration(RegisterAllocatorGraphColor* register_allocator,
                     ScopedArenaAllocator* allocator,
                     bool processing_core_regs,
                     size_t num_regs)
         : register_allocator_(register_allocator),
           allocator_(allocator),
           processing_core_regs_(processing_core_regs),
           num_regs_(num_regs),
           interval_node_map_(allocator->Adapter(kArenaAllocRegisterAllocator)),
           prunable_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
           pruned_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
           simplify_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)),
           freeze_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)),
           spill_worklist_(HasGreaterNodePriority, allocator->Adapter(kArenaAllocRegisterAllocator)),
           coalesce_worklist_(CoalesceOpportunity::CmpPriority,
                              allocator->Adapter(kArenaAllocRegisterAllocator)),
           adjacent_nodes_links_(allocator->Adapter(kArenaAllocRegisterAllocator)),
           coalesce_opportunities_links_(allocator->Adapter(kArenaAllocRegisterAllocator)) {}

   // Use the intervals collected from instructions to construct an
   // interference graph mapping intervals to adjacency lists.
   // Also, collect synthesized safepoint nodes, used to keep
   // track of live intervals across safepoints.
   // TODO: Should build safepoints elsewhere.
   void BuildInterferenceGraph(const ScopedArenaVector<LiveInterval*>& intervals,
                               const ScopedArenaVector<InterferenceNode*>& physical_nodes);

   // Add coalesce opportunities to interference nodes.
   void FindCoalesceOpportunities();

   // Prune nodes from the interference graph to be colored later. Build
   // a stack (pruned_nodes) containing these intervals in an order determined
   // by various heuristics.
   void PruneInterferenceGraph();

   // Process pruned_intervals_ to color the interference graph, spilling when
   // necessary. Returns true if successful. Else, some intervals have been
   // split, and the interference graph should be rebuilt for another attempt.
   bool ColorInterferenceGraph();

   // Return prunable nodes.
   // The register allocator will need to access prunable nodes after coloring
   // in order to tell the code generator which registers have been assigned.
   ArrayRef<InterferenceNode* const> GetPrunableNodes() const {
     return ArrayRef<InterferenceNode* const>(prunable_nodes_);
   }

  private:
   // Create a coalesce opportunity between two nodes.
   void CreateCoalesceOpportunity(InterferenceNode* a,
                                  InterferenceNode* b,
                                  CoalesceKind kind,
                                  size_t position);

   // Add an edge in the interference graph, if valid.
   // Note that `guaranteed_not_interfering_yet` is used to optimize adjacency set insertion
   // when possible.
   void AddPotentialInterference(InterferenceNode* from,
                                 InterferenceNode* to,
                                 bool guaranteed_not_interfering_yet,
                                 bool both_directions = true);

   // Invalidate all coalesce opportunities this node has, so that it (and possibly its neighbors)
   // may be pruned from the interference graph.
   void FreezeMoves(InterferenceNode* node);

   // Prune a node from the interference graph, updating worklists if necessary.
   void PruneNode(InterferenceNode* node);

   // Add coalesce opportunities associated with this node to the coalesce worklist.
   void EnableCoalesceOpportunities(InterferenceNode* node);

   // If needed, from `node` from the freeze worklist to the simplify worklist.
   void CheckTransitionFromFreezeWorklist(InterferenceNode* node);

   // Return true if `into` is colored, and `from` can be coalesced with `into` conservatively.
   bool PrecoloredHeuristic(InterferenceNode* from, InterferenceNode* into);

   // Return true if `from` and `into` are uncolored, and can be coalesced conservatively.
   bool UncoloredHeuristic(InterferenceNode* from, InterferenceNode* into);

   void Coalesce(CoalesceOpportunity* opportunity);

   // Merge `from` into `into` in the interference graph.
   void Combine(InterferenceNode* from, InterferenceNode* into);

   // A reference to the register allocator instance,
   // needed to split intervals and assign spill slots.
   RegisterAllocatorGraphColor* register_allocator_;

   // A scoped arena allocator used for a single graph coloring attempt.
   ScopedArenaAllocator* allocator_;

   const bool processing_core_regs_;

   const size_t num_regs_;

   // A map from live intervals to interference nodes.
   ScopedArenaHashMap<LiveInterval*, InterferenceNode*> interval_node_map_;

   // Uncolored nodes that should be pruned from the interference graph.
   ScopedArenaVector<InterferenceNode*> prunable_nodes_;

   // A stack of nodes pruned from the interference graph, waiting to be pruned.
   ScopedArenaStdStack<InterferenceNode*> pruned_nodes_;

   // A queue containing low degree, non-move-related nodes that can pruned immediately.
   ScopedArenaDeque<InterferenceNode*> simplify_worklist_;

   // A queue containing low degree, move-related nodes.
   ScopedArenaDeque<InterferenceNode*> freeze_worklist_;

   // A queue containing high degree nodes.
   // If we have to prune from the spill worklist, we cannot guarantee
   // the pruned node a color, so we order the worklist by priority.
   ScopedArenaPriorityQueue<InterferenceNode*, decltype(&HasGreaterNodePriority)> spill_worklist_;

   // A queue containing coalesce opportunities.
   // We order the coalesce worklist by priority, since some coalesce opportunities (e.g., those
   // inside of loops) are more important than others.
   ScopedArenaPriorityQueue<CoalesceOpportunity*,
                            decltype(&CoalesceOpportunity::CmpPriority)> coalesce_worklist_;

   // Storage for links to adjacent nodes for interference nodes.
   // Using std::deque so that elements do not move when adding new ones.
   ScopedArenaDeque<ScopedArenaVector<InterferenceNode*>> adjacent_nodes_links_;

   // Storage for links to coalesce opportunities for interference nodes.
   // Using std::deque so that elements do not move when adding new ones.
   ScopedArenaDeque<ScopedArenaVector<CoalesceOpportunity*>> coalesce_opportunities_links_;

   DISALLOW_COPY_AND_ASSIGN(ColoringIteration);
 };

 static bool IsCoreInterval(LiveInterval* interval) {
   return !DataType::IsFloatingPointType(interval->GetType());
 }

 static size_t ComputeReservedArtMethodSlots(const CodeGenerator& codegen) {
   return static_cast<size_t>(InstructionSetPointerSize(codegen.GetInstructionSet())) / kVRegSize;
 }

 RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ScopedArenaAllocator* allocator,
                                                          CodeGenerator* codegen,
                                                          const SsaLivenessAnalysis& liveness,
                                                          bool iterative_move_coalescing)
       : RegisterAllocator(allocator, codegen, liveness),
         iterative_move_coalescing_(iterative_move_coalescing),
         core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
         fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
         temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
         safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)),
         physical_core_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
         physical_fp_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
         num_int_spill_slots_(0),
         num_double_spill_slots_(0),
         num_float_spill_slots_(0),
         num_long_spill_slots_(0),
         catch_phi_spill_slot_counter_(0),
         reserved_art_method_slots_(ComputeReservedArtMethodSlots(*codegen)),
         reserved_out_slots_(codegen->GetGraph()->GetMaximumNumberOfOutVRegs()) {
   // Before we ask for blocked registers, set them up in the code generator.
   codegen->SetupBlockedRegisters();

   // Initialize physical core register live intervals and blocked registers.
   // This includes globally blocked registers, such as the stack pointer.
   physical_core_nodes_.resize(codegen_->GetNumberOfCoreRegisters(), nullptr);
   for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
     LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, DataType::Type::kInt32);
     physical_core_nodes_[i] = new (allocator_) InterferenceNode(interval, liveness);
     physical_core_nodes_[i]->stage = NodeStage::kPrecolored;
     core_intervals_.push_back(interval);
     if (codegen_->IsBlockedCoreRegister(i)) {
       interval->AddRange(0, liveness.GetMaxLifetimePosition());
     }
   }
   // Initialize physical floating point register live intervals and blocked registers.
   physical_fp_nodes_.resize(codegen_->GetNumberOfFloatingPointRegisters(), nullptr);
   for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
     LiveInterval* interval =
         LiveInterval::MakeFixedInterval(allocator_, i, DataType::Type::kFloat32);
     physical_fp_nodes_[i] = new (allocator_) InterferenceNode(interval, liveness);
     physical_fp_nodes_[i]->stage = NodeStage::kPrecolored;
     fp_intervals_.push_back(interval);
     if (codegen_->IsBlockedFloatingPointRegister(i)) {
       interval->AddRange(0, liveness.GetMaxLifetimePosition());
     }
   }
 }

 RegisterAllocatorGraphColor::~RegisterAllocatorGraphColor() {}

 void RegisterAllocatorGraphColor::AllocateRegisters() {
   // (1) Collect and prepare live intervals.
   ProcessInstructions();

   for (bool processing_core_regs : {true, false}) {
     ScopedArenaVector<LiveInterval*>& intervals = processing_core_regs
         ? core_intervals_
         : fp_intervals_;
     size_t num_registers = processing_core_regs
         ? codegen_->GetNumberOfCoreRegisters()
         : codegen_->GetNumberOfFloatingPointRegisters();

     size_t attempt = 0;
     while (true) {
       ++attempt;
       DCHECK(attempt <= kMaxGraphColoringAttemptsDebug)
           << "Exceeded debug max graph coloring register allocation attempts. "
           << "This could indicate that the register allocator is not making forward progress, "
           << "which could be caused by prioritizing the wrong live intervals. (Short intervals "
           << "should be prioritized over long ones, because they cannot be split further.)";

       // Many data structures are cleared between graph coloring attempts, so we reduce
       // total memory usage by using a new scoped arena allocator for each attempt.
       ScopedArenaAllocator coloring_attempt_allocator(allocator_->GetArenaStack());
       ColoringIteration iteration(this,
                                   &coloring_attempt_allocator,
                                   processing_core_regs,
                                   num_registers);

       // (2) Build the interference graph.
       ScopedArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs
           ? physical_core_nodes_
           : physical_fp_nodes_;
       iteration.BuildInterferenceGraph(intervals, physical_nodes);

       // (3) Add coalesce opportunities.
       //     If we have tried coloring the graph a suspiciously high number of times, give
       //     up on move coalescing, just in case the coalescing heuristics are not conservative.
       //     (This situation will be caught if DCHECKs are turned on.)
       if (iterative_move_coalescing_ && attempt <= kMaxGraphColoringAttemptsDebug) {
         iteration.FindCoalesceOpportunities();
       }

       // (4) Prune all uncolored nodes from interference graph.
       iteration.PruneInterferenceGraph();

       // (5) Color pruned nodes based on interferences.
       bool successful = iteration.ColorInterferenceGraph();

       // We manually clear coalesce opportunities for physical nodes,
       // since they persist across coloring attempts.
       for (InterferenceNode* node : physical_core_nodes_) {
         node->ClearCoalesceOpportunities();
       }
       for (InterferenceNode* node : physical_fp_nodes_) {
         node->ClearCoalesceOpportunities();
       }

       if (successful) {
         // Assign spill slots.
         AllocateSpillSlots(iteration.GetPrunableNodes());

         // Tell the code generator which registers were allocated.
         // We only look at prunable_nodes because we already told the code generator about
         // fixed intervals while processing instructions. We also ignore the fixed intervals
         // placed at the top of catch blocks.
         for (InterferenceNode* node : iteration.GetPrunableNodes()) {
           LiveInterval* interval = node->GetInterval();
           if (interval->HasRegister()) {
             Location low_reg = processing_core_regs
                 ? Location::RegisterLocation(interval->GetRegister())
                 : Location::FpuRegisterLocation(interval->GetRegister());
             codegen_->AddAllocatedRegister(low_reg);
             if (interval->HasHighInterval()) {
               LiveInterval* high = interval->GetHighInterval();
               DCHECK(high->HasRegister());
               Location high_reg = processing_core_regs
                   ? Location::RegisterLocation(high->GetRegister())
                   : Location::FpuRegisterLocation(high->GetRegister());
               codegen_->AddAllocatedRegister(high_reg);
             }
           } else {
             DCHECK(!interval->HasHighInterval() || !interval->GetHighInterval()->HasRegister());
           }
         }

         break;
       }
     }  // while unsuccessful
   }  // for processing_core_instructions

   // (6) Resolve locations and deconstruct SSA form.
   RegisterAllocationResolver(codegen_, liveness_)
       .Resolve(ArrayRef<HInstruction* const>(safepoints_),
                reserved_art_method_slots_ + reserved_out_slots_,
                num_int_spill_slots_,
                num_long_spill_slots_,
                num_float_spill_slots_,
                num_double_spill_slots_,
                catch_phi_spill_slot_counter_,
                ArrayRef<LiveInterval* const>(temp_intervals_));

   if (kIsDebugBuild) {
     Validate(/*log_fatal_on_failure*/ true);
   }
 }

 bool RegisterAllocatorGraphColor::Validate(bool log_fatal_on_failure) {
   for (bool processing_core_regs : {true, false}) {
     ScopedArenaAllocator allocator(allocator_->GetArenaStack());
     ScopedArenaVector<LiveInterval*> intervals(
         allocator.Adapter(kArenaAllocRegisterAllocatorValidate));
     for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) {
       HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
       LiveInterval* interval = instruction->GetLiveInterval();
       if (interval != nullptr && IsCoreInterval(interval) == processing_core_regs) {
         intervals.push_back(instruction->GetLiveInterval());
       }
     }

     ScopedArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs
         ? physical_core_nodes_
         : physical_fp_nodes_;
     for (InterferenceNode* fixed : physical_nodes) {
       LiveInterval* interval = fixed->GetInterval();
       if (interval->GetFirstRange() != nullptr) {
         // Ideally we would check fixed ranges as well, but currently there are times when
         // two fixed intervals for the same register will overlap. For example, a fixed input
         // and a fixed output may sometimes share the same register, in which there will be two
         // fixed intervals for the same place.
       }
     }

     for (LiveInterval* temp : temp_intervals_) {
       if (IsCoreInterval(temp) == processing_core_regs) {
         intervals.push_back(temp);
       }
     }

     size_t spill_slots = num_int_spill_slots_
                        + num_long_spill_slots_
                        + num_float_spill_slots_
                        + num_double_spill_slots_
                        + catch_phi_spill_slot_counter_;
     bool ok = ValidateIntervals(ArrayRef<LiveInterval* const>(intervals),
                                 spill_slots,
                                 reserved_art_method_slots_ + reserved_out_slots_,
                                 *codegen_,
                                 processing_core_regs,
                                 log_fatal_on_failure);
     if (!ok) {
       return false;
     }
   }  // for processing_core_regs

   return true;
 }

 void RegisterAllocatorGraphColor::ProcessInstructions() {
   for (HBasicBlock* block : codegen_->GetGraph()->GetLinearPostOrder()) {
     // Note that we currently depend on this ordering, since some helper
     // code is designed for linear scan register allocation.
     for (HBackwardInstructionIterator instr_it(block->GetInstructions());
           !instr_it.Done();
           instr_it.Advance()) {
       ProcessInstruction(instr_it.Current());
     }

     for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
       ProcessInstruction(phi_it.Current());
     }

     if (block->IsCatchBlock()
         || (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
       // By blocking all registers at the top of each catch block or irreducible loop, we force
       // intervals belonging to the live-in set of the catch/header block to be spilled.
       // TODO(ngeoffray): Phis in this block could be allocated in register.
       size_t position = block->GetLifetimeStart();
       BlockRegisters(position, position + 1);
     }
   }
 }

 void RegisterAllocatorGraphColor::ProcessInstruction(HInstruction* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   if (locations == nullptr) {
     return;
   }
   if (locations->NeedsSafepoint() && codegen_->IsLeafMethod()) {
     // We do this here because we do not want the suspend check to artificially
     // create live registers.
     DCHECK(instruction->IsSuspendCheckEntry());
     DCHECK_EQ(locations->GetTempCount(), 0u);
     instruction->GetBlock()->RemoveInstruction(instruction);
     return;
   }

   CheckForTempLiveIntervals(instruction);
   CheckForSafepoint(instruction);
   if (instruction->GetLocations()->WillCall()) {
     // If a call will happen, create fixed intervals for caller-save registers.
     // TODO: Note that it may be beneficial to later split intervals at this point,
     //       so that we allow last-minute moves from a caller-save register
     //       to a callee-save register.
     BlockRegisters(instruction->GetLifetimePosition(),
                    instruction->GetLifetimePosition() + 1,
                    /*caller_save_only*/ true);
   }
   CheckForFixedInputs(instruction);

   LiveInterval* interval = instruction->GetLiveInterval();
   if (interval == nullptr) {
     // Instructions lacking a valid output location do not have a live interval.
     DCHECK(!locations->Out().IsValid());
     return;
   }

   // Low intervals act as representatives for their corresponding high interval.
   DCHECK(!interval->IsHighInterval());
   if (codegen_->NeedsTwoRegisters(interval->GetType())) {
     interval->AddHighInterval();
   }
   AddSafepointsFor(instruction);
   CheckForFixedOutput(instruction);
   AllocateSpillSlotForCatchPhi(instruction);

   ScopedArenaVector<LiveInterval*>& intervals = IsCoreInterval(interval)
       ? core_intervals_
       : fp_intervals_;
   if (interval->HasSpillSlot() || instruction->IsConstant()) {
     // Note that if an interval already has a spill slot, then its value currently resides
     // in the stack (e.g., parameters). Thus we do not have to allocate a register until its first
     // register use. This is also true for constants, which can be materialized at any point.
     size_t first_register_use = interval->FirstRegisterUse();
     if (first_register_use != kNoLifetime) {
       LiveInterval* split = SplitBetween(interval, interval->GetStart(), first_register_use - 1);
       intervals.push_back(split);
     } else {
       // We won't allocate a register for this value.
     }
   } else {
     intervals.push_back(interval);
   }
 }

 void RegisterAllocatorGraphColor::CheckForFixedInputs(HInstruction* instruction) {
   // We simply block physical registers where necessary.
   // TODO: Ideally we would coalesce the physical register with the register
   //       allocated to the input value, but this can be tricky if, e.g., there
   //       could be multiple physical register uses of the same value at the
   //       same instruction. Furthermore, there's currently no distinction between
   //       fixed inputs to a call (which will be clobbered) and other fixed inputs (which
   //       may not be clobbered).
   LocationSummary* locations = instruction->GetLocations();
   size_t position = instruction->GetLifetimePosition();
   for (size_t i = 0; i < locations->GetInputCount(); ++i) {
     Location input = locations->InAt(i);
     if (input.IsRegister() || input.IsFpuRegister()) {
       BlockRegister(input, position, position + 1);
       codegen_->AddAllocatedRegister(input);
     } else if (input.IsPair()) {
       BlockRegister(input.ToLow(), position, position + 1);
       BlockRegister(input.ToHigh(), position, position + 1);
       codegen_->AddAllocatedRegister(input.ToLow());
       codegen_->AddAllocatedRegister(input.ToHigh());
     }
   }
 }

 void RegisterAllocatorGraphColor::CheckForFixedOutput(HInstruction* instruction) {
   // If an instruction has a fixed output location, we give the live interval a register and then
   // proactively split it just after the definition point to avoid creating too many interferences
   // with a fixed node.
   LiveInterval* interval = instruction->GetLiveInterval();
   Location out = interval->GetDefinedBy()->GetLocations()->Out();
   size_t position = instruction->GetLifetimePosition();
   DCHECK_GE(interval->GetEnd() - position, 2u);

   if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) {
     out = instruction->GetLocations()->InAt(0);
   }

   if (out.IsRegister() || out.IsFpuRegister()) {
     interval->SetRegister(out.reg());
     codegen_->AddAllocatedRegister(out);
     Split(interval, position + 1);
   } else if (out.IsPair()) {
     interval->SetRegister(out.low());
     interval->GetHighInterval()->SetRegister(out.high());
     codegen_->AddAllocatedRegister(out.ToLow());
     codegen_->AddAllocatedRegister(out.ToHigh());
     Split(interval, position + 1);
   } else if (out.IsStackSlot() || out.IsDoubleStackSlot()) {
     interval->SetSpillSlot(out.GetStackIndex());
   } else {
     DCHECK(out.IsUnallocated() || out.IsConstant());
   }
 }

 void RegisterAllocatorGraphColor::AddSafepointsFor(HInstruction* instruction) {
   LiveInterval* interval = instruction->GetLiveInterval();
   for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) {
     HInstruction* safepoint = safepoints_[safepoint_index - 1u];
     size_t safepoint_position = safepoint->GetLifetimePosition();

     // Test that safepoints_ are ordered in the optimal way.
     DCHECK(safepoint_index == safepoints_.size() ||
            safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position);

     if (safepoint_position == interval->GetStart()) {
       // The safepoint is for this instruction, so the location of the instruction
       // does not need to be saved.
       DCHECK_EQ(safepoint_index, safepoints_.size());
       DCHECK_EQ(safepoint, instruction);
       continue;
     } else if (interval->IsDeadAt(safepoint_position)) {
       break;
     } else if (!interval->Covers(safepoint_position)) {
       // Hole in the interval.
       continue;
     }
     interval->AddSafepoint(safepoint);
   }
 }

 void RegisterAllocatorGraphColor::CheckForTempLiveIntervals(HInstruction* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   size_t position = instruction->GetLifetimePosition();
   for (size_t i = 0; i < locations->GetTempCount(); ++i) {
     Location temp = locations->GetTemp(i);
     if (temp.IsRegister() || temp.IsFpuRegister()) {
       BlockRegister(temp, position, position + 1);
       codegen_->AddAllocatedRegister(temp);
     } else {
       DCHECK(temp.IsUnallocated());
       switch (temp.GetPolicy()) {
         case Location::kRequiresRegister: {
           LiveInterval* interval =
               LiveInterval::MakeTempInterval(allocator_, DataType::Type::kInt32);
           interval->AddTempUse(instruction, i);
           core_intervals_.push_back(interval);
           temp_intervals_.push_back(interval);
           break;
         }

         case Location::kRequiresFpuRegister: {
           LiveInterval* interval =
               LiveInterval::MakeTempInterval(allocator_, DataType::Type::kFloat64);
           interval->AddTempUse(instruction, i);
           fp_intervals_.push_back(interval);
           temp_intervals_.push_back(interval);
           if (codegen_->NeedsTwoRegisters(DataType::Type::kFloat64)) {
             interval->AddHighInterval(/*is_temp*/ true);
             temp_intervals_.push_back(interval->GetHighInterval());
           }
           break;
         }

         default:
           LOG(FATAL) << "Unexpected policy for temporary location "
                      << temp.GetPolicy();
       }
     }
   }
 }

 void RegisterAllocatorGraphColor::CheckForSafepoint(HInstruction* instruction) {
   LocationSummary* locations = instruction->GetLocations();

   if (locations->NeedsSafepoint()) {
     safepoints_.push_back(instruction);
   }
 }

 LiveInterval* RegisterAllocatorGraphColor::TrySplit(LiveInterval* interval, size_t position) {
   if (interval->GetStart() < position && position < interval->GetEnd()) {
     return Split(interval, position);
   } else {
     return interval;
   }
 }

 void RegisterAllocatorGraphColor::SplitAtRegisterUses(LiveInterval* interval) {
   DCHECK(!interval->IsHighInterval());

   // Split just after a register definition.
   if (interval->IsParent() && interval->DefinitionRequiresRegister()) {
     interval = TrySplit(interval, interval->GetStart() + 1);
   }

   // Process uses in the range [interval->GetStart(), interval->GetEnd()], i.e.
   // [interval->GetStart(), interval->GetEnd() + 1)
   auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(),
                                                  interval->GetUses().end(),
                                                  interval->GetStart(),
                                                  interval->GetEnd() + 1u);
   // Split around register uses.
   for (const UsePosition& use : matching_use_range) {
     if (use.RequiresRegister()) {
       size_t position = use.GetPosition();
       interval = TrySplit(interval, position - 1);
       if (liveness_.GetInstructionFromPosition(position / 2)->IsControlFlow()) {
         // If we are at the very end of a basic block, we cannot split right
         // at the use. Split just after instead.
         interval = TrySplit(interval, position + 1);
       } else {
         interval = TrySplit(interval, position);
       }
     }
   }
 }

 void RegisterAllocatorGraphColor::AllocateSpillSlotForCatchPhi(HInstruction* instruction) {
   if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
     HPhi* phi = instruction->AsPhi();
     LiveInterval* interval = phi->GetLiveInterval();

     HInstruction* previous_phi = phi->GetPrevious();
     DCHECK(previous_phi == nullptr ||
            previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber())
         << "Phis expected to be sorted by vreg number, "
         << "so that equivalent phis are adjacent.";

     if (phi->IsVRegEquivalentOf(previous_phi)) {
       // Assign the same spill slot.
       DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot());
       interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot());
     } else {
       interval->SetSpillSlot(catch_phi_spill_slot_counter_);
       catch_phi_spill_slot_counter_ += interval->NumberOfSpillSlotsNeeded();
     }
   }
 }

 void RegisterAllocatorGraphColor::BlockRegister(Location location,
                                                 size_t start,
                                                 size_t end) {
   DCHECK(location.IsRegister() || location.IsFpuRegister());
   int reg = location.reg();
   LiveInterval* interval = location.IsRegister()
       ? physical_core_nodes_[reg]->GetInterval()
       : physical_fp_nodes_[reg]->GetInterval();
   DCHECK(interval->GetRegister() == reg);
   bool blocked_by_codegen = location.IsRegister()
       ? codegen_->IsBlockedCoreRegister(reg)
       : codegen_->IsBlockedFloatingPointRegister(reg);
   if (blocked_by_codegen) {
     // We've already blocked this register for the entire method. (And adding a
     // range inside another range violates the preconditions of AddRange).
   } else {
     interval->AddRange(start, end);
   }
 }

 void RegisterAllocatorGraphColor::BlockRegisters(size_t start, size_t end, bool caller_save_only) {
   for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
     if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) {
       BlockRegister(Location::RegisterLocation(i), start, end);
     }
   }
   for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
     if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) {
       BlockRegister(Location::FpuRegisterLocation(i), start, end);
     }
   }
 }

 void ColoringIteration::AddPotentialInterference(InterferenceNode* from,
                                                  InterferenceNode* to,
                                                  bool guaranteed_not_interfering_yet,
                                                  bool both_directions) {
   if (from->IsPrecolored()) {
     // We save space by ignoring outgoing edges from fixed nodes.
   } else if (to->IsPrecolored()) {
     // It is important that only a single node represents a given fixed register in the
     // interference graph. We retrieve that node here.
     const ScopedArenaVector<InterferenceNode*>& physical_nodes =
         to->GetInterval()->IsFloatingPoint() ? register_allocator_->physical_fp_nodes_
                                              : register_allocator_->physical_core_nodes_;
     InterferenceNode* physical_node = physical_nodes[to->GetInterval()->GetRegister()];
     from->AddInterference(
         physical_node, /*guaranteed_not_interfering_yet*/ false, &adjacent_nodes_links_);
     DCHECK_EQ(to->GetInterval()->GetRegister(), physical_node->GetInterval()->GetRegister());
     DCHECK_EQ(to->GetAlias(), physical_node) << "Fixed nodes should alias the canonical fixed node";

     // If a node interferes with a fixed pair node, the weight of the edge may
     // be inaccurate after using the alias of the pair node, because the alias of the pair node
     // is a singular node.
     // We could make special pair fixed nodes, but that ends up being too conservative because
     // a node could then interfere with both {r1} and {r1,r2}, leading to a degree of
     // three rather than two.
     // Instead, we explicitly add an interference with the high node of the fixed pair node.
     // TODO: This is too conservative at time for pair nodes, but the fact that fixed pair intervals
     //       can be unaligned on x86 complicates things.
     if (to->IsPair()) {
       InterferenceNode* high_node =
           physical_nodes[to->GetInterval()->GetHighInterval()->GetRegister()];
       DCHECK_EQ(to->GetInterval()->GetHighInterval()->GetRegister(),
                 high_node->GetInterval()->GetRegister());
       from->AddInterference(
           high_node, /*guaranteed_not_interfering_yet*/ false, &adjacent_nodes_links_);
     }
   } else {
     // Standard interference between two uncolored nodes.
     from->AddInterference(to, guaranteed_not_interfering_yet, &adjacent_nodes_links_);
   }

   if (both_directions) {
     AddPotentialInterference(to, from, guaranteed_not_interfering_yet, /*both_directions*/ false);
   }
 }

 // Returns true if `in_node` represents an input interval of `out_node`, and the output interval
 // is allowed to have the same register as the input interval.
 // TODO: Ideally we should just produce correct intervals in liveness analysis.
 //       We would need to refactor the current live interval layout to do so, which is
 //       no small task.
 static bool CheckInputOutputCanOverlap(InterferenceNode* in_node, InterferenceNode* out_node) {
   LiveInterval* output_interval = out_node->GetInterval();
   HInstruction* defined_by = output_interval->GetDefinedBy();
   if (defined_by == nullptr) {
     // This must not be a definition point.
     return false;
   }

   LocationSummary* locations = defined_by->GetLocations();
   if (locations->OutputCanOverlapWithInputs()) {
     // This instruction does not allow the output to reuse a register from an input.
     return false;
   }

   LiveInterval* input_interval = in_node->GetInterval();
   LiveInterval* next_sibling = input_interval->GetNextSibling();
   size_t def_position = defined_by->GetLifetimePosition();
   size_t use_position = def_position + 1;
   if (next_sibling != nullptr && next_sibling->GetStart() == use_position) {
     // The next sibling starts at the use position, so reusing the input register in the output
     // would clobber the input before it's moved into the sibling interval location.
     return false;
   }

   if (!input_interval->IsDeadAt(use_position) && input_interval->CoversSlow(use_position)) {
     // The input interval is live after the use position.
     return false;
   }

   HInputsRef inputs = defined_by->GetInputs();
   for (size_t i = 0; i < inputs.size(); ++i) {
     if (inputs[i]->GetLiveInterval()->GetSiblingAt(def_position) == input_interval) {
       DCHECK(input_interval->SameRegisterKind(*output_interval));
       return true;
     }
   }

   // The input interval was not an input for this instruction.
   return false;
 }

 void ColoringIteration::BuildInterferenceGraph(
     const ScopedArenaVector<LiveInterval*>& intervals,
     const ScopedArenaVector<InterferenceNode*>& physical_nodes) {
   DCHECK(interval_node_map_.empty() && prunable_nodes_.empty());
   // Build the interference graph efficiently by ordering range endpoints
   // by position and doing a linear sweep to find interferences. (That is, we
   // jump from endpoint to endpoint, maintaining a set of intervals live at each
   // point. If two nodes are ever in the live set at the same time, then they
   // interfere with each other.)
   //
   // We order by both position and (secondarily) by whether the endpoint
   // begins or ends a range; we want to process range endings before range
   // beginnings at the same position because they should not conflict.
   //
   // For simplicity, we create a tuple for each endpoint, and then sort the tuples.
   // Tuple contents: (position, is_range_beginning, node).
   ScopedArenaVector<std::tuple<size_t, bool, InterferenceNode*>> range_endpoints(
       allocator_->Adapter(kArenaAllocRegisterAllocator));

   // We reserve plenty of space to avoid excessive copying.
   range_endpoints.reserve(4 * prunable_nodes_.size());

   for (LiveInterval* parent : intervals) {
     for (LiveInterval* sibling = parent; sibling != nullptr; sibling = sibling->GetNextSibling()) {
       LiveRange* range = sibling->GetFirstRange();
       if (range != nullptr) {
         InterferenceNode* node =
             new (allocator_) InterferenceNode(sibling, register_allocator_->liveness_);
         interval_node_map_.insert(std::make_pair(sibling, node));

         if (sibling->HasRegister()) {
           // Fixed nodes should alias the canonical node for the corresponding register.
           node->stage = NodeStage::kPrecolored;
           InterferenceNode* physical_node = physical_nodes[sibling->GetRegister()];
           node->SetAlias(physical_node);
           DCHECK_EQ(node->GetInterval()->GetRegister(),
                     physical_node->GetInterval()->GetRegister());
         } else {
           node->stage = NodeStage::kPrunable;
           prunable_nodes_.push_back(node);
         }

         while (range != nullptr) {
           range_endpoints.push_back(std::make_tuple(range->GetStart(), true, node));
           range_endpoints.push_back(std::make_tuple(range->GetEnd(), false, node));
           range = range->GetNext();
         }
       }
     }
   }

   // Sort the endpoints.
   // We explicitly ignore the third entry of each tuple (the node pointer) in order
   // to maintain determinism.
   std::sort(range_endpoints.begin(), range_endpoints.end(),
             [] (const std::tuple<size_t, bool, InterferenceNode*>& lhs,
                 const std::tuple<size_t, bool, InterferenceNode*>& rhs) {
     return std::tie(std::get<0>(lhs), std::get<1>(lhs))
          < std::tie(std::get<0>(rhs), std::get<1>(rhs));
   });

   // Nodes live at the current position in the linear sweep.
   ScopedArenaVector<InterferenceNode*> live(allocator_->Adapter(kArenaAllocRegisterAllocator));

   // Linear sweep. When we encounter the beginning of a range, we add the corresponding node to the
   // live set. When we encounter the end of a range, we remove the corresponding node
   // from the live set. Nodes interfere if they are in the live set at the same time.
   for (auto it = range_endpoints.begin(); it != range_endpoints.end(); ++it) {
     bool is_range_beginning;
     InterferenceNode* node;
     size_t position;
     // Extract information from the tuple, including the node this tuple represents.
     std::tie(position, is_range_beginning, node) = *it;

     if (is_range_beginning) {
       bool guaranteed_not_interfering_yet = position == node->GetInterval()->GetStart();
       for (InterferenceNode* conflicting : live) {
         DCHECK_NE(node, conflicting);
         if (CheckInputOutputCanOverlap(conflicting, node)) {
           // We do not add an interference, because the instruction represented by `node` allows
           // its output to share a register with an input, represented here by `conflicting`.
         } else {
           AddPotentialInterference(node, conflicting, guaranteed_not_interfering_yet);
         }
       }
       DCHECK(std::find(live.begin(), live.end(), node) == live.end());
       live.push_back(node);
     } else {
       // End of range.
       auto live_it = std::find(live.begin(), live.end(), node);
       DCHECK(live_it != live.end());
       live.erase(live_it);
     }
   }
   DCHECK(live.empty());
 }

 void ColoringIteration::CreateCoalesceOpportunity(InterferenceNode* a,
                                                   InterferenceNode* b,
                                                   CoalesceKind kind,
                                                   size_t position) {
   DCHECK_EQ(a->IsPair(), b->IsPair())
       << "Nodes of different memory widths should never be coalesced";
   CoalesceOpportunity* opportunity =
       new (allocator_) CoalesceOpportunity(a, b, kind, position, register_allocator_->liveness_);
   a->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_);
   b->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_);
   coalesce_worklist_.push(opportunity);
 }

 // When looking for coalesce opportunities, we use the interval_node_map_ to find the node
 // corresponding to an interval. Note that not all intervals are in this map, notably the parents
 // of constants and stack arguments. (However, these interval should not be involved in coalesce
 // opportunities anyway, because they're not going to be in registers.)
 void ColoringIteration::FindCoalesceOpportunities() {
   DCHECK(coalesce_worklist_.empty());

   for (InterferenceNode* node : prunable_nodes_) {
     LiveInterval* interval = node->GetInterval();

     // Coalesce siblings.
     LiveInterval* next_sibling = interval->GetNextSibling();
     if (next_sibling != nullptr && interval->GetEnd() == next_sibling->GetStart()) {
       auto it = interval_node_map_.find(next_sibling);
       if (it != interval_node_map_.end()) {
         InterferenceNode* sibling_node = it->second;
         CreateCoalesceOpportunity(node,
                                   sibling_node,
                                   CoalesceKind::kAdjacentSibling,
                                   interval->GetEnd());
       }
     }

     // Coalesce fixed outputs with this interval if this interval is an adjacent sibling.
     LiveInterval* parent = interval->GetParent();
     if (parent->HasRegister()
         && parent->GetNextSibling() == interval
         && parent->GetEnd() == interval->GetStart()) {
       auto it = interval_node_map_.find(parent);
       if (it != interval_node_map_.end()) {
         InterferenceNode* parent_node = it->second;
         CreateCoalesceOpportunity(node,
                                   parent_node,
                                   CoalesceKind::kFixedOutputSibling,
                                   parent->GetEnd());
       }
     }

     // Try to prevent moves across blocks.
     // Note that this does not lead to many succeeding coalesce attempts, so could be removed
     // if found to add to compile time.
     const SsaLivenessAnalysis& liveness = register_allocator_->liveness_;
     if (interval->IsSplit() && liveness.IsAtBlockBoundary(interval->GetStart() / 2)) {
       // If the start of this interval is at a block boundary, we look at the
       // location of the interval in blocks preceding the block this interval
       // starts at. This can avoid a move between the two blocks.
       HBasicBlock* block = liveness.GetBlockFromPosition(interval->GetStart() / 2);
       for (HBasicBlock* predecessor : block->GetPredecessors()) {
         size_t position = predecessor->GetLifetimeEnd() - 1;
         LiveInterval* existing = interval->GetParent()->GetSiblingAt(position);
         if (existing != nullptr) {
           auto it = interval_node_map_.find(existing);
           if (it != interval_node_map_.end()) {
             InterferenceNode* existing_node = it->second;
             CreateCoalesceOpportunity(node,
                                       existing_node,
                                       CoalesceKind::kNonlinearControlFlow,
                                       position);
           }
         }
       }
     }

     // Coalesce phi inputs with the corresponding output.
     HInstruction* defined_by = interval->GetDefinedBy();
     if (defined_by != nullptr && defined_by->IsPhi()) {
       ArrayRef<HBasicBlock* const> predecessors(defined_by->GetBlock()->GetPredecessors());
       HInputsRef inputs = defined_by->GetInputs();

       for (size_t i = 0, e = inputs.size(); i < e; ++i) {
         // We want the sibling at the end of the appropriate predecessor block.
         size_t position = predecessors[i]->GetLifetimeEnd() - 1;
         LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(position);

         auto it = interval_node_map_.find(input_interval);
         if (it != interval_node_map_.end()) {
           InterferenceNode* input_node = it->second;
           CreateCoalesceOpportunity(node, input_node, CoalesceKind::kPhi, position);
         }
       }
     }

     // Coalesce output with first input when policy is kSameAsFirstInput.
     if (defined_by != nullptr) {
       Location out = defined_by->GetLocations()->Out();
       if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) {
         LiveInterval* input_interval
             = defined_by->InputAt(0)->GetLiveInterval()->GetSiblingAt(interval->GetStart() - 1);
         // TODO: Could we consider lifetime holes here?
         if (input_interval->GetEnd() == interval->GetStart()) {
           auto it = interval_node_map_.find(input_interval);
           if (it != interval_node_map_.end()) {
             InterferenceNode* input_node = it->second;
             CreateCoalesceOpportunity(node,
                                       input_node,
                                       CoalesceKind::kFirstInput,
                                       interval->GetStart());
           }
         }
       }
     }

     // An interval that starts an instruction (that is, it is not split), may
     // re-use the registers used by the inputs of that instruction, based on the
     // location summary.
     if (defined_by != nullptr) {
       DCHECK(!interval->IsSplit());
       LocationSummary* locations = defined_by->GetLocations();
       if (!locations->OutputCanOverlapWithInputs()) {
         HInputsRef inputs = defined_by->GetInputs();
         for (size_t i = 0; i < inputs.size(); ++i) {
           size_t def_point = defined_by->GetLifetimePosition();
           // TODO: Getting the sibling at the def_point might not be quite what we want
           //       for fixed inputs, since the use will be *at* the def_point rather than after.
           LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(def_point);
           if (input_interval != nullptr &&
               input_interval->HasHighInterval() == interval->HasHighInterval()) {
             auto it = interval_node_map_.find(input_interval);
             if (it != interval_node_map_.end()) {
               InterferenceNode* input_node = it->second;
               CreateCoalesceOpportunity(node,
                                         input_node,
                                         CoalesceKind::kAnyInput,
                                         interval->GetStart());
             }
           }
         }
       }
     }

     // Try to prevent moves into fixed input locations.
     // Process uses in the range (interval->GetStart(), interval->GetEnd()], i.e.
     // [interval->GetStart() + 1, interval->GetEnd() + 1)
     auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(),
                                                    interval->GetUses().end(),
                                                    interval->GetStart() + 1u,
                                                    interval->GetEnd() + 1u);
     for (const UsePosition& use : matching_use_range) {
       HInstruction* user = use.GetUser();
       if (user == nullptr) {
         // User may be null for certain intervals, such as temp intervals.
         continue;
       }
       LocationSummary* locations = user->GetLocations();
       Location input = locations->InAt(use.GetInputIndex());
       if (input.IsRegister() || input.IsFpuRegister()) {
         // TODO: Could try to handle pair interval too, but coalescing with fixed pair nodes
         //       is currently not supported.
         InterferenceNode* fixed_node = input.IsRegister()
             ? register_allocator_->physical_core_nodes_[input.reg()]
             : register_allocator_->physical_fp_nodes_[input.reg()];
         CreateCoalesceOpportunity(node,
                                   fixed_node,
                                   CoalesceKind::kFixedInput,
                                   user->GetLifetimePosition());
       }
     }
   }  // for node in prunable_nodes
 }

 static bool IsLowDegreeNode(InterferenceNode* node, size_t num_regs) {
   return node->GetOutDegree() < num_regs;
 }

 static bool IsHighDegreeNode(InterferenceNode* node, size_t num_regs) {
   return !IsLowDegreeNode(node, num_regs);
 }

 void ColoringIteration::PruneInterferenceGraph() {
   DCHECK(pruned_nodes_.empty()
       && simplify_worklist_.empty()
       && freeze_worklist_.empty()
       && spill_worklist_.empty());
   // When pruning the graph, we refer to nodes with degree less than num_regs as low degree nodes,
   // and all others as high degree nodes. The distinction is important: low degree nodes are
   // guaranteed a color, while high degree nodes are not.

   // Build worklists. Note that the coalesce worklist has already been
   // filled by FindCoalesceOpportunities().
   for (InterferenceNode* node : prunable_nodes_) {
     DCHECK(!node->IsPrecolored()) << "Fixed nodes should never be pruned";
     if (IsLowDegreeNode(node, num_regs_)) {
       if (node->GetCoalesceOpportunities().empty()) {
         // Simplify Worklist.
         node->stage = NodeStage::kSimplifyWorklist;
         simplify_worklist_.push_back(node);
       } else {
         // Freeze Worklist.
         node->stage = NodeStage::kFreezeWorklist;
         freeze_worklist_.push_back(node);
       }
     } else {
       // Spill worklist.
       node->stage = NodeStage::kSpillWorklist;
       spill_worklist_.push(node);
     }
   }

   // Prune graph.
   // Note that we do not remove a node from its current worklist if it moves to another, so it may
   // be in multiple worklists at once; the node's `phase` says which worklist it is really in.
   while (true) {
     if (!simplify_worklist_.empty()) {
       // Prune low-degree nodes.
       // TODO: pop_back() should work as well, but it didn't; we get a
       //       failed check while pruning. We should look into this.
       InterferenceNode* node = simplify_worklist_.front();
       simplify_worklist_.pop_front();
       DCHECK_EQ(node->stage, NodeStage::kSimplifyWorklist) << "Cannot move from simplify list";
       DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in simplify list should be low degree";
       DCHECK(!node->IsMoveRelated()) << "Nodes in simplify list should not be move related";
       PruneNode(node);
     } else if (!coalesce_worklist_.empty()) {
       // Coalesce.
       CoalesceOpportunity* opportunity = coalesce_worklist_.top();
       coalesce_worklist_.pop();
       if (opportunity->stage == CoalesceStage::kWorklist) {
         Coalesce(opportunity);
       }
     } else if (!freeze_worklist_.empty()) {
       // Freeze moves and prune a low-degree move-related node.
       InterferenceNode* node = freeze_worklist_.front();
       freeze_worklist_.pop_front();
       if (node->stage == NodeStage::kFreezeWorklist) {
         DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in freeze list should be low degree";
         DCHECK(node->IsMoveRelated()) << "Nodes in freeze list should be move related";
         FreezeMoves(node);
         PruneNode(node);
       }
     } else if (!spill_worklist_.empty()) {
       // We spill the lowest-priority node, because pruning a node earlier
       // gives it a higher chance of being spilled.
       InterferenceNode* node = spill_worklist_.top();
       spill_worklist_.pop();
       if (node->stage == NodeStage::kSpillWorklist) {
         DCHECK_GE(node->GetOutDegree(), num_regs_) << "Nodes in spill list should be high degree";
         FreezeMoves(node);
         PruneNode(node);
       }
     } else {
       // Pruning complete.
       break;
     }
   }
   DCHECK_EQ(prunable_nodes_.size(), pruned_nodes_.size());
 }

 void ColoringIteration::EnableCoalesceOpportunities(InterferenceNode* node) {
   for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
     if (opportunity->stage == CoalesceStage::kActive) {
       opportunity->stage = CoalesceStage::kWorklist;
       coalesce_worklist_.push(opportunity);
     }
   }
 }

 void ColoringIteration::PruneNode(InterferenceNode* node) {
   DCHECK_NE(node->stage, NodeStage::kPruned);
   DCHECK(!node->IsPrecolored());
   node->stage = NodeStage::kPruned;
   pruned_nodes_.push(node);

   for (InterferenceNode* adj : node->GetAdjacentNodes()) {
     DCHECK_NE(adj->stage, NodeStage::kPruned) << "Should be no interferences with pruned nodes";

     if (adj->IsPrecolored()) {
       // No effect on pre-colored nodes; they're never pruned.
     } else {
       // Remove the interference.
       bool was_high_degree = IsHighDegreeNode(adj, num_regs_);
       DCHECK(adj->ContainsInterference(node))
           << "Missing reflexive interference from non-fixed node";
       adj->RemoveInterference(node);

       // Handle transitions from high degree to low degree.
       if (was_high_degree && IsLowDegreeNode(adj, num_regs_)) {
         EnableCoalesceOpportunities(adj);
         for (InterferenceNode* adj_adj : adj->GetAdjacentNodes()) {
           EnableCoalesceOpportunities(adj_adj);
         }

         DCHECK_EQ(adj->stage, NodeStage::kSpillWorklist);
         if (adj->IsMoveRelated()) {
           adj->stage = NodeStage::kFreezeWorklist;
           freeze_worklist_.push_back(adj);
         } else {
           adj->stage = NodeStage::kSimplifyWorklist;
           simplify_worklist_.push_back(adj);
         }
       }
     }
   }
 }

 void ColoringIteration::CheckTransitionFromFreezeWorklist(InterferenceNode* node) {
   if (IsLowDegreeNode(node, num_regs_) && !node->IsMoveRelated()) {
     DCHECK_EQ(node->stage, NodeStage::kFreezeWorklist);
     node->stage = NodeStage::kSimplifyWorklist;
     simplify_worklist_.push_back(node);
   }
 }

 void ColoringIteration::FreezeMoves(InterferenceNode* node) {
   for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
     if (opportunity->stage == CoalesceStage::kDefunct) {
       // Constrained moves should remain constrained, since they will not be considered
       // during last-chance coalescing.
     } else {
       opportunity->stage = CoalesceStage::kInactive;
     }
     InterferenceNode* other = opportunity->node_a->GetAlias() == node
         ? opportunity->node_b->GetAlias()
         : opportunity->node_a->GetAlias();
     if (other != node && other->stage == NodeStage::kFreezeWorklist) {
       DCHECK(IsLowDegreeNode(node, num_regs_));
       CheckTransitionFromFreezeWorklist(other);
     }
   }
 }

 bool ColoringIteration::PrecoloredHeuristic(InterferenceNode* from,
                                             InterferenceNode* into) {
   if (!into->IsPrecolored()) {
     // The uncolored heuristic will cover this case.
     return false;
   }
   if (from->IsPair() || into->IsPair()) {
     // TODO: Merging from a pair node is currently not supported, since fixed pair nodes
     //       are currently represented as two single fixed nodes in the graph, and `into` is
     //       only one of them. (We may lose the implicit connections to the second one in a merge.)
     return false;
   }

   // If all adjacent nodes of `from` are "ok", then we can conservatively merge with `into`.
   // Reasons an adjacent node `adj` can be "ok":
   // (1) If `adj` is low degree, interference with `into` will not affect its existing
   //     colorable guarantee. (Notice that coalescing cannot increase its degree.)
   // (2) If `adj` is pre-colored, it already interferes with `into`. See (3).
   // (3) If there's already an interference with `into`, coalescing will not add interferences.
   for (InterferenceNode* adj : from->GetAdjacentNodes()) {
     if (IsLowDegreeNode(adj, num_regs_) || adj->IsPrecolored() || adj->ContainsInterference(into)) {
       // Ok.
     } else {
       return false;
     }
   }
   return true;
 }

 bool ColoringIteration::UncoloredHeuristic(InterferenceNode* from,
                                            InterferenceNode* into) {
   if (into->IsPrecolored()) {
     // The pre-colored heuristic will handle this case.
     return false;
   }

   // Arbitrary cap to improve compile time. Tests show that this has negligible affect
   // on generated code.
   if (from->GetOutDegree() + into->GetOutDegree() > 2 * num_regs_) {
     return false;
   }

   // It's safe to coalesce two nodes if the resulting node has fewer than `num_regs` neighbors
   // of high degree. (Low degree neighbors can be ignored, because they will eventually be
   // pruned from the interference graph in the simplify stage.)
   size_t high_degree_interferences = 0;
   for (InterferenceNode* adj : from->GetAdjacentNodes()) {
     if (IsHighDegreeNode(adj, num_regs_)) {
       high_degree_interferences += from->EdgeWeightWith(adj);
     }
   }
   for (InterferenceNode* adj : into->GetAdjacentNodes()) {
     if (IsHighDegreeNode(adj, num_regs_)) {
       if (from->ContainsInterference(adj)) {
         // We've already counted this adjacent node.
         // Furthermore, its degree will decrease if coalescing succeeds. Thus, it's possible that
         // we should not have counted it at all. (This extends the textbook Briggs coalescing test,
         // but remains conservative.)
         if (adj->GetOutDegree() - into->EdgeWeightWith(adj) < num_regs_) {
           high_degree_interferences -= from->EdgeWeightWith(adj);
         }
       } else {
         high_degree_interferences += into->EdgeWeightWith(adj);
       }
     }
   }

   return high_degree_interferences < num_regs_;
 }

 void ColoringIteration::Combine(InterferenceNode* from,
                                 InterferenceNode* into) {
   from->SetAlias(into);

   // Add interferences.
   for (InterferenceNode* adj : from->GetAdjacentNodes()) {
     bool was_low_degree = IsLowDegreeNode(adj, num_regs_);
     AddPotentialInterference(adj, into, /*guaranteed_not_interfering_yet*/ false);
     if (was_low_degree && IsHighDegreeNode(adj, num_regs_)) {
       // This is a (temporary) transition to a high degree node. Its degree will decrease again
       // when we prune `from`, but it's best to be consistent about the current worklist.
       adj->stage = NodeStage::kSpillWorklist;
       spill_worklist_.push(adj);
     }
   }

   // Add coalesce opportunities.
   for (CoalesceOpportunity* opportunity : from->GetCoalesceOpportunities()) {
     if (opportunity->stage != CoalesceStage::kDefunct) {
       into->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_);
     }
   }
   EnableCoalesceOpportunities(from);

   // Prune and update worklists.
   PruneNode(from);
   if (IsLowDegreeNode(into, num_regs_)) {
     // Coalesce(...) takes care of checking for a transition to the simplify worklist.
     DCHECK_EQ(into->stage, NodeStage::kFreezeWorklist);
   } else if (into->stage == NodeStage::kFreezeWorklist) {
     // This is a transition to a high degree node.
     into->stage = NodeStage::kSpillWorklist;
     spill_worklist_.push(into);
   } else {
     DCHECK(into->stage == NodeStage::kSpillWorklist || into->stage == NodeStage::kPrecolored);
   }
 }

 void ColoringIteration::Coalesce(CoalesceOpportunity* opportunity) {
   InterferenceNode* from = opportunity->node_a->GetAlias();
   InterferenceNode* into = opportunity->node_b->GetAlias();
   DCHECK_NE(from->stage, NodeStage::kPruned);
   DCHECK_NE(into->stage, NodeStage::kPruned);

   if (from->IsPrecolored()) {
     // If we have one pre-colored node, make sure it's the `into` node.
     std::swap(from, into);
   }

   if (from == into) {
     // These nodes have already been coalesced.
     opportunity->stage = CoalesceStage::kDefunct;
     CheckTransitionFromFreezeWorklist(from);
   } else if (from->IsPrecolored() || from->ContainsInterference(into)) {
     // These nodes interfere.
     opportunity->stage = CoalesceStage::kDefunct;
     CheckTransitionFromFreezeWorklist(from);
     CheckTransitionFromFreezeWorklist(into);
   } else if (PrecoloredHeuristic(from, into)
           || UncoloredHeuristic(from, into)) {
     // We can coalesce these nodes.
     opportunity->stage = CoalesceStage::kDefunct;
     Combine(from, into);
     CheckTransitionFromFreezeWorklist(into);
   } else {
     // We cannot coalesce, but we may be able to later.
     opportunity->stage = CoalesceStage::kActive;
   }
 }

 // Build a mask with a bit set for each register assigned to some
 // interval in `intervals`.
 template <typename Container>
 static std::bitset<kMaxNumRegs> BuildConflictMask(const Container& intervals) {
   std::bitset<kMaxNumRegs> conflict_mask;
   for (InterferenceNode* adjacent : intervals) {
     LiveInterval* conflicting = adjacent->GetInterval();
     if (conflicting->HasRegister()) {
       conflict_mask.set(conflicting->GetRegister());
       if (conflicting->HasHighInterval()) {
         DCHECK(conflicting->GetHighInterval()->HasRegister());
         conflict_mask.set(conflicting->GetHighInterval()->GetRegister());
       }
     } else {
       DCHECK(!conflicting->HasHighInterval()
           || !conflicting->GetHighInterval()->HasRegister());
     }
   }
   return conflict_mask;
 }

 bool RegisterAllocatorGraphColor::IsCallerSave(size_t reg, bool processing_core_regs) {
   return processing_core_regs
       ? !codegen_->IsCoreCalleeSaveRegister(reg)
       : !codegen_->IsFloatingPointCalleeSaveRegister(reg);
 }

 static bool RegisterIsAligned(size_t reg) {
   return reg % 2 == 0;
 }

 static size_t FindFirstZeroInConflictMask(std::bitset<kMaxNumRegs> conflict_mask) {
   // We use CTZ (count trailing zeros) to quickly find the lowest 0 bit.
   // Note that CTZ is undefined if all bits are 0, so we special-case it.
   return conflict_mask.all() ? conflict_mask.size() : CTZ(~conflict_mask.to_ulong());
 }

 bool ColoringIteration::ColorInterferenceGraph() {
   DCHECK_LE(num_regs_, kMaxNumRegs) << "kMaxNumRegs is too small";
   ScopedArenaVector<LiveInterval*> colored_intervals(
       allocator_->Adapter(kArenaAllocRegisterAllocator));
   bool successful = true;

   while (!pruned_nodes_.empty()) {
     InterferenceNode* node = pruned_nodes_.top();
     pruned_nodes_.pop();
     LiveInterval* interval = node->GetInterval();
     size_t reg = 0;

     InterferenceNode* alias = node->GetAlias();
     if (alias != node) {
       // This node was coalesced with another.
       LiveInterval* alias_interval = alias->GetInterval();
       if (alias_interval->HasRegister()) {
         reg = alias_interval->GetRegister();
         DCHECK(!BuildConflictMask(node->GetAdjacentNodes())[reg])
             << "This node conflicts with the register it was coalesced with";
       } else {
         DCHECK(false) << node->GetOutDegree() << " " << alias->GetOutDegree() << " "
             << "Move coalescing was not conservative, causing a node to be coalesced "
             << "with another node that could not be colored";
         if (interval->RequiresRegister()) {
           successful = false;
         }
       }
     } else {
       // Search for free register(s).
       std::bitset<kMaxNumRegs> conflict_mask = BuildConflictMask(node->GetAdjacentNodes());
       if (interval->HasHighInterval()) {
         // Note that the graph coloring allocator assumes that pair intervals are aligned here,
         // excluding pre-colored pair intervals (which can currently be unaligned on x86). If we
         // change the alignment requirements here, we will have to update the algorithm (e.g.,
         // be more conservative about the weight of edges adjacent to pair nodes.)
         while (reg < num_regs_ - 1 && (conflict_mask[reg] || conflict_mask[reg + 1])) {
           reg += 2;
         }

         // Try to use a caller-save register first.
         for (size_t i = 0; i < num_regs_ - 1; i += 2) {
           bool low_caller_save  = register_allocator_->IsCallerSave(i, processing_core_regs_);
           bool high_caller_save = register_allocator_->IsCallerSave(i + 1, processing_core_regs_);
           if (!conflict_mask[i] && !conflict_mask[i + 1]) {
             if (low_caller_save && high_caller_save) {
               reg = i;
               break;
             } else if (low_caller_save || high_caller_save) {
               reg = i;
               // Keep looking to try to get both parts in caller-save registers.
             }
           }
         }
       } else {
         // Not a pair interval.
         reg = FindFirstZeroInConflictMask(conflict_mask);

         // Try to use caller-save registers first.
         for (size_t i = 0; i < num_regs_; ++i) {
           if (!conflict_mask[i] && register_allocator_->IsCallerSave(i, processing_core_regs_)) {
             reg = i;
             break;
           }
         }
       }

       // Last-chance coalescing.
       for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
         if (opportunity->stage == CoalesceStage::kDefunct) {
           continue;
         }
         LiveInterval* other_interval = opportunity->node_a->GetAlias() == node
             ? opportunity->node_b->GetAlias()->GetInterval()
             : opportunity->node_a->GetAlias()->GetInterval();
         if (other_interval->HasRegister()) {
           size_t coalesce_register = other_interval->GetRegister();
           if (interval->HasHighInterval()) {
             if (!conflict_mask[coalesce_register] &&
                 !conflict_mask[coalesce_register + 1] &&
                 RegisterIsAligned(coalesce_register)) {
               reg = coalesce_register;
               break;
             }
           } else if (!conflict_mask[coalesce_register]) {
             reg = coalesce_register;
             break;
           }
         }
       }
     }

     if (reg < (interval->HasHighInterval() ? num_regs_ - 1 : num_regs_)) {
       // Assign register.
       DCHECK(!interval->HasRegister());
       interval->SetRegister(reg);
       colored_intervals.push_back(interval);
       if (interval->HasHighInterval()) {
         DCHECK(!interval->GetHighInterval()->HasRegister());
         interval->GetHighInterval()->SetRegister(reg + 1);
         colored_intervals.push_back(interval->GetHighInterval());
       }
     } else if (interval->RequiresRegister()) {
       // The interference graph is too dense to color. Make it sparser by
       // splitting this live interval.
       successful = false;
       register_allocator_->SplitAtRegisterUses(interval);
       // We continue coloring, because there may be additional intervals that cannot
       // be colored, and that we should split.
     } else {
       // Spill.
       node->SetNeedsSpillSlot();
     }
   }

   // If unsuccessful, reset all register assignments.
   if (!successful) {
     for (LiveInterval* interval : colored_intervals) {
       interval->ClearRegister();
     }
   }

   return successful;
 }

 void RegisterAllocatorGraphColor::AllocateSpillSlots(ArrayRef<InterferenceNode* const> nodes) {
   // The register allocation resolver will organize the stack based on value type,
   // so we assign stack slots for each value type separately.
   ScopedArenaAllocator allocator(allocator_->GetArenaStack());
   ScopedArenaAllocatorAdapter<void> adapter = allocator.Adapter(kArenaAllocRegisterAllocator);
   ScopedArenaVector<LiveInterval*> double_intervals(adapter);
   ScopedArenaVector<LiveInterval*> long_intervals(adapter);
   ScopedArenaVector<LiveInterval*> float_intervals(adapter);
   ScopedArenaVector<LiveInterval*> int_intervals(adapter);

   // The set of parent intervals already handled.
   ScopedArenaSet<LiveInterval*> seen(adapter);

   // Find nodes that need spill slots.
   for (InterferenceNode* node : nodes) {
     if (!node->NeedsSpillSlot()) {
       continue;
     }

     LiveInterval* parent = node->GetInterval()->GetParent();
     if (seen.find(parent) != seen.end()) {
       // We've already handled this interval.
       // This can happen if multiple siblings of the same interval request a stack slot.
       continue;
     }
     seen.insert(parent);

     HInstruction* defined_by = parent->GetDefinedBy();
     if (parent->HasSpillSlot()) {
       // We already have a spill slot for this value that we can reuse.
     } else if (defined_by->IsParameterValue()) {
       // Parameters already have a stack slot.
       parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue()));
     } else if (defined_by->IsCurrentMethod()) {
       // The current method is always at stack slot 0.
       parent->SetSpillSlot(0);
     } else if (defined_by->IsConstant()) {
       // Constants don't need a spill slot.
     } else {
       // We need to find a spill slot for this interval. Place it in the correct
       // worklist to be processed later.
       switch (node->GetInterval()->GetType()) {
         case DataType::Type::kFloat64:
           double_intervals.push_back(parent);
           break;
         case DataType::Type::kInt64:
           long_intervals.push_back(parent);
           break;
         case DataType::Type::kFloat32:
           float_intervals.push_back(parent);
           break;
         case DataType::Type::kReference:
         case DataType::Type::kInt32:
         case DataType::Type::kUint16:
         case DataType::Type::kUint8:
         case DataType::Type::kInt8:
         case DataType::Type::kBool:
         case DataType::Type::kInt16:
           int_intervals.push_back(parent);
           break;
         case DataType::Type::kUint32:
         case DataType::Type::kUint64:
         case DataType::Type::kVoid:
           LOG(FATAL) << "Unexpected type for interval " << node->GetInterval()->GetType();
           UNREACHABLE();
       }
     }
   }

   // Color spill slots for each value type.
   ColorSpillSlots(ArrayRef<LiveInterval* const>(double_intervals), &num_double_spill_slots_);
   ColorSpillSlots(ArrayRef<LiveInterval* const>(long_intervals), &num_long_spill_slots_);
   ColorSpillSlots(ArrayRef<LiveInterval* const>(float_intervals), &num_float_spill_slots_);
   ColorSpillSlots(ArrayRef<LiveInterval* const>(int_intervals), &num_int_spill_slots_);
 }

 void RegisterAllocatorGraphColor::ColorSpillSlots(ArrayRef<LiveInterval* const> intervals,
                                                   /* out */ size_t* num_stack_slots_used) {
   // We cannot use the original interference graph here because spill slots are assigned to
   // all of the siblings of an interval, whereas an interference node represents only a single
   // sibling. So, we assign spill slots linear-scan-style by sorting all the interval endpoints
   // by position, and assigning the lowest spill slot available when we encounter an interval
   // beginning. We ignore lifetime holes for simplicity.
   ScopedArenaAllocator allocator(allocator_->GetArenaStack());
   ScopedArenaVector<std::tuple<size_t, bool, LiveInterval*>> interval_endpoints(
       allocator.Adapter(kArenaAllocRegisterAllocator));

   for (LiveInterval* parent_interval : intervals) {
     DCHECK(parent_interval->IsParent());
     DCHECK(!parent_interval->HasSpillSlot());
     size_t start = parent_interval->GetStart();
     size_t end = parent_interval->GetLastSibling()->GetEnd();
     DCHECK_LT(start, end);
     interval_endpoints.push_back(std::make_tuple(start, true, parent_interval));
     interval_endpoints.push_back(std::make_tuple(end, false, parent_interval));
   }

   // Sort by position.
   // We explicitly ignore the third entry of each tuple (the interval pointer) in order
   // to maintain determinism.
   std::sort(interval_endpoints.begin(), interval_endpoints.end(),
             [] (const std::tuple<size_t, bool, LiveInterval*>& lhs,
                 const std::tuple<size_t, bool, LiveInterval*>& rhs) {
     return std::tie(std::get<0>(lhs), std::get<1>(lhs))
          < std::tie(std::get<0>(rhs), std::get<1>(rhs));
   });

   ArenaBitVector taken(&allocator, 0, true, kArenaAllocRegisterAllocator);
   for (auto it = interval_endpoints.begin(), end = interval_endpoints.end(); it != end; ++it) {
     // Extract information from the current tuple.
     LiveInterval* parent_interval;
     bool is_interval_beginning;
     size_t position;
     std::tie(position, is_interval_beginning, parent_interval) = *it;
     size_t number_of_spill_slots_needed = parent_interval->NumberOfSpillSlotsNeeded();

     if (is_interval_beginning) {
       DCHECK(!parent_interval->HasSpillSlot());
       DCHECK_EQ(position, parent_interval->GetStart());

       // Find first available free stack slot(s).
       size_t slot = 0;
       for (; ; ++slot) {
         bool found = true;
         for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
           if (taken.IsBitSet(s)) {
             found = false;
             break;  // failure
           }
         }
         if (found) {
           break;  // success
         }
       }

       parent_interval->SetSpillSlot(slot);

       *num_stack_slots_used = std::max(*num_stack_slots_used, slot + number_of_spill_slots_needed);
       if (number_of_spill_slots_needed > 1 && *num_stack_slots_used % 2 != 0) {
         // The parallel move resolver requires that there be an even number of spill slots
         // allocated for pair value types.
         ++(*num_stack_slots_used);
       }

       for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
         taken.SetBit(s);
       }
     } else {
       DCHECK_EQ(position, parent_interval->GetLastSibling()->GetEnd());
       DCHECK(parent_interval->HasSpillSlot());

       // Free up the stack slot(s) used by this interval.
       size_t slot = parent_interval->GetSpillSlot();
       for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
         DCHECK(taken.IsBitSet(s));
         taken.ClearBit(s);
       }
     }
   }
   DCHECK_EQ(taken.NumSetBits(), 0u);
 }

 }  // namespace art