blob: 717839914d96df47292522e0630b662a550668c9 [file] [log] [blame]
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001/*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "register_allocator_graph_color.h"
18
19#include "code_generator.h"
20#include "register_allocation_resolver.h"
21#include "ssa_liveness_analysis.h"
22#include "thread-inl.h"
23
24namespace art {
25
26// Highest number of registers that we support for any platform. This can be used for std::bitset,
27// for example, which needs to know its size at compile time.
28static constexpr size_t kMaxNumRegs = 32;
29
30// The maximum number of graph coloring attempts before triggering a DCHECK.
31// This is meant to catch changes to the graph coloring algorithm that undermine its forward
32// progress guarantees. Forward progress for the algorithm means splitting live intervals on
33// every graph coloring attempt so that eventually the interference graph will be sparse enough
34// to color. The main threat to forward progress is trying to split short intervals which cannot be
35// split further; this could cause infinite looping because the interference graph would never
36// change. This is avoided by prioritizing short intervals before long ones, so that long
37// intervals are split when coloring fails.
38static constexpr size_t kMaxGraphColoringAttemptsDebug = 100;
39
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +000040// We always want to avoid spilling inside loops.
41static constexpr size_t kLoopSpillWeightMultiplier = 10;
42
43// If we avoid moves in single jump blocks, we can avoid jumps to jumps.
44static constexpr size_t kSingleJumpBlockWeightMultiplier = 2;
45
46// We avoid moves in blocks that dominate the exit block, since these blocks will
47// be executed on every path through the method.
48static constexpr size_t kDominatesExitBlockWeightMultiplier = 2;
49
50enum class CoalesceKind {
51 kAdjacentSibling, // Prevents moves at interval split points.
52 kFixedOutputSibling, // Prevents moves from a fixed output location.
53 kFixedInput, // Prevents moves into a fixed input location.
54 kNonlinearControlFlow, // Prevents moves between blocks.
55 kPhi, // Prevents phi resolution moves.
56 kFirstInput, // Prevents a single input move.
57 kAnyInput, // May lead to better instruction selection / smaller encodings.
58};
59
60std::ostream& operator<<(std::ostream& os, const CoalesceKind& kind) {
61 return os << static_cast<typename std::underlying_type<CoalesceKind>::type>(kind);
62}
63
64static size_t LoopDepthAt(HBasicBlock* block) {
65 HLoopInformation* loop_info = block->GetLoopInformation();
66 size_t depth = 0;
67 while (loop_info != nullptr) {
68 ++depth;
69 loop_info = loop_info->GetPreHeader()->GetLoopInformation();
70 }
71 return depth;
72}
73
74// Return the runtime cost of inserting a move instruction at the specified location.
75static size_t CostForMoveAt(size_t position, const SsaLivenessAnalysis& liveness) {
76 HBasicBlock* block = liveness.GetBlockFromPosition(position / 2);
77 DCHECK(block != nullptr);
78 size_t cost = 1;
79 if (block->IsSingleJump()) {
80 cost *= kSingleJumpBlockWeightMultiplier;
81 }
82 if (block->Dominates(block->GetGraph()->GetExitBlock())) {
83 cost *= kDominatesExitBlockWeightMultiplier;
84 }
85 for (size_t loop_depth = LoopDepthAt(block); loop_depth > 0; --loop_depth) {
86 cost *= kLoopSpillWeightMultiplier;
87 }
88 return cost;
89}
90
91// In general, we estimate coalesce priority by whether it will definitely avoid a move,
92// and by how likely it is to create an interference graph that's harder to color.
93static size_t ComputeCoalescePriority(CoalesceKind kind,
94 size_t position,
95 const SsaLivenessAnalysis& liveness) {
96 if (kind == CoalesceKind::kAnyInput) {
97 // This type of coalescing can affect instruction selection, but not moves, so we
98 // give it the lowest priority.
99 return 0;
100 } else {
101 return CostForMoveAt(position, liveness);
102 }
103}
104
105enum class CoalesceStage {
106 kWorklist, // Currently in the iterative coalescing worklist.
107 kActive, // Not in a worklist, but could be considered again during iterative coalescing.
108 kInactive, // No longer considered until last-chance coalescing.
109 kDefunct, // Either the two nodes interfere, or have already been coalesced.
110};
111
112std::ostream& operator<<(std::ostream& os, const CoalesceStage& stage) {
113 return os << static_cast<typename std::underlying_type<CoalesceStage>::type>(stage);
114}
115
116// Represents a coalesce opportunity between two nodes.
117struct CoalesceOpportunity : public ArenaObject<kArenaAllocRegisterAllocator> {
118 CoalesceOpportunity(InterferenceNode* a,
119 InterferenceNode* b,
120 CoalesceKind kind,
121 size_t position,
122 const SsaLivenessAnalysis& liveness)
123 : node_a(a),
124 node_b(b),
125 stage(CoalesceStage::kWorklist),
126 priority(ComputeCoalescePriority(kind, position, liveness)) {}
127
128 // Compare two coalesce opportunities based on their priority.
129 // Return true if lhs has a lower priority than that of rhs.
130 static bool CmpPriority(const CoalesceOpportunity* lhs,
131 const CoalesceOpportunity* rhs) {
132 return lhs->priority < rhs->priority;
133 }
134
135 InterferenceNode* const node_a;
136 InterferenceNode* const node_b;
137
138 // The current stage of this coalesce opportunity, indicating whether it is in a worklist,
139 // and whether it should still be considered.
140 CoalesceStage stage;
141
142 // The priority of this coalesce opportunity, based on heuristics.
143 const size_t priority;
144};
145
146enum class NodeStage {
147 kInitial, // Uninitialized.
148 kPrecolored, // Marks fixed nodes.
149 kSafepoint, // Marks safepoint nodes.
150 kPrunable, // Marks uncolored nodes in the interference graph.
151 kSimplifyWorklist, // Marks non-move-related nodes with degree less than the number of registers.
152 kFreezeWorklist, // Marks move-related nodes with degree less than the number of registers.
153 kSpillWorklist, // Marks nodes with degree greater or equal to the number of registers.
154 kPruned // Marks nodes already pruned from the interference graph.
155};
156
157std::ostream& operator<<(std::ostream& os, const NodeStage& stage) {
158 return os << static_cast<typename std::underlying_type<NodeStage>::type>(stage);
159}
160
161// Returns the estimated cost of spilling a particular live interval.
162static float ComputeSpillWeight(LiveInterval* interval, const SsaLivenessAnalysis& liveness) {
163 if (interval->HasRegister()) {
164 // Intervals with a fixed register cannot be spilled.
165 return std::numeric_limits<float>::min();
166 }
167
168 size_t length = interval->GetLength();
169 if (length == 1) {
170 // Tiny intervals should have maximum priority, since they cannot be split any further.
171 return std::numeric_limits<float>::max();
172 }
173
174 size_t use_weight = 0;
175 if (interval->GetDefinedBy() != nullptr && interval->DefinitionRequiresRegister()) {
176 // Cost for spilling at a register definition point.
177 use_weight += CostForMoveAt(interval->GetStart() + 1, liveness);
178 }
179
180 UsePosition* use = interval->GetFirstUse();
181 while (use != nullptr && use->GetPosition() <= interval->GetStart()) {
182 // Skip uses before the start of this live interval.
183 use = use->GetNext();
184 }
185
186 while (use != nullptr && use->GetPosition() <= interval->GetEnd()) {
187 if (use->GetUser() != nullptr && use->RequiresRegister()) {
188 // Cost for spilling at a register use point.
189 use_weight += CostForMoveAt(use->GetUser()->GetLifetimePosition() - 1, liveness);
190 }
191 use = use->GetNext();
192 }
193
194 // We divide by the length of the interval because we want to prioritize
195 // short intervals; we do not benefit much if we split them further.
196 return static_cast<float>(use_weight) / static_cast<float>(length);
197}
198
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700199// Interference nodes make up the interference graph, which is the primary data structure in
200// graph coloring register allocation. Each node represents a single live interval, and contains
201// a set of adjacent nodes corresponding to intervals overlapping with its own. To save memory,
202// pre-colored nodes never contain outgoing edges (only incoming ones).
203//
204// As nodes are pruned from the interference graph, incoming edges of the pruned node are removed,
205// but outgoing edges remain in order to later color the node based on the colors of its neighbors.
206//
207// Note that a pair interval is represented by a single node in the interference graph, which
208// essentially requires two colors. One consequence of this is that the degree of a node is not
209// necessarily equal to the number of adjacent nodes--instead, the degree reflects the maximum
210// number of colors with which a node could interfere. We model this by giving edges different
211// weights (1 or 2) to control how much it increases the degree of adjacent nodes.
212// For example, the edge between two single nodes will have weight 1. On the other hand,
213// the edge between a single node and a pair node will have weight 2. This is because the pair
214// node could block up to two colors for the single node, and because the single node could
215// block an entire two-register aligned slot for the pair node.
216// The degree is defined this way because we use it to decide whether a node is guaranteed a color,
217// and thus whether it is safe to prune it from the interference graph early on.
218class InterferenceNode : public ArenaObject<kArenaAllocRegisterAllocator> {
219 public:
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000220 InterferenceNode(ArenaAllocator* allocator,
221 LiveInterval* interval,
222 const SsaLivenessAnalysis& liveness)
223 : stage(NodeStage::kInitial),
224 interval_(interval),
225 adjacent_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
226 coalesce_opportunities_(allocator->Adapter(kArenaAllocRegisterAllocator)),
227 out_degree_(interval->HasRegister() ? std::numeric_limits<size_t>::max() : 0),
228 alias_(this),
229 spill_weight_(ComputeSpillWeight(interval, liveness)),
Matthew Gharrityb6722ff2016-08-12 19:07:11 -0700230 requires_color_(interval->RequiresRegister()),
231 needs_spill_slot_(false) {
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000232 DCHECK(!interval->IsHighInterval()) << "Pair nodes should be represented by the low interval";
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700233 }
234
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000235 void AddInterference(InterferenceNode* other, bool guaranteed_not_interfering_yet) {
236 DCHECK(!IsPrecolored()) << "To save memory, fixed nodes should not have outgoing interferences";
237 DCHECK_NE(this, other) << "Should not create self loops in the interference graph";
238 DCHECK_EQ(this, alias_) << "Should not add interferences to a node that aliases another";
239 DCHECK_NE(stage, NodeStage::kPruned);
240 DCHECK_NE(other->stage, NodeStage::kPruned);
241 if (guaranteed_not_interfering_yet) {
242 DCHECK(std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other)
243 == adjacent_nodes_.end());
244 adjacent_nodes_.push_back(other);
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700245 out_degree_ += EdgeWeightWith(other);
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000246 } else {
247 auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other);
248 if (it == adjacent_nodes_.end()) {
249 adjacent_nodes_.push_back(other);
250 out_degree_ += EdgeWeightWith(other);
251 }
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700252 }
253 }
254
255 void RemoveInterference(InterferenceNode* other) {
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000256 DCHECK_EQ(this, alias_) << "Should not remove interferences from a coalesced node";
257 DCHECK_EQ(other->stage, NodeStage::kPruned) << "Should only remove interferences when pruning";
258 auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other);
259 if (it != adjacent_nodes_.end()) {
260 adjacent_nodes_.erase(it);
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700261 out_degree_ -= EdgeWeightWith(other);
262 }
263 }
264
265 bool ContainsInterference(InterferenceNode* other) const {
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000266 DCHECK(!IsPrecolored()) << "Should not query fixed nodes for interferences";
267 DCHECK_EQ(this, alias_) << "Should not query a coalesced node for interferences";
268 auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other);
269 return it != adjacent_nodes_.end();
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700270 }
271
272 LiveInterval* GetInterval() const {
273 return interval_;
274 }
275
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000276 const ArenaVector<InterferenceNode*>& GetAdjacentNodes() const {
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700277 return adjacent_nodes_;
278 }
279
280 size_t GetOutDegree() const {
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000281 // Pre-colored nodes have infinite degree.
282 DCHECK(!IsPrecolored() || out_degree_ == std::numeric_limits<size_t>::max());
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700283 return out_degree_;
284 }
285
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000286 void AddCoalesceOpportunity(CoalesceOpportunity* opportunity) {
287 coalesce_opportunities_.push_back(opportunity);
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700288 }
289
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000290 void ClearCoalesceOpportunities() {
291 coalesce_opportunities_.clear();
292 }
293
294 bool IsMoveRelated() const {
295 for (CoalesceOpportunity* opportunity : coalesce_opportunities_) {
296 if (opportunity->stage == CoalesceStage::kWorklist ||
297 opportunity->stage == CoalesceStage::kActive) {
298 return true;
299 }
300 }
301 return false;
302 }
303
304 // Return whether this node already has a color.
305 // Used to find fixed nodes in the interference graph before coloring.
306 bool IsPrecolored() const {
307 return interval_->HasRegister();
308 }
309
310 bool IsPair() const {
311 return interval_->HasHighInterval();
312 }
313
314 void SetAlias(InterferenceNode* rep) {
315 DCHECK_NE(rep->stage, NodeStage::kPruned);
316 DCHECK_EQ(this, alias_) << "Should only set a node's alias once";
317 alias_ = rep;
318 }
319
320 InterferenceNode* GetAlias() {
321 if (alias_ != this) {
322 // Recurse in order to flatten tree of alias pointers.
323 alias_ = alias_->GetAlias();
324 }
325 return alias_;
326 }
327
328 const ArenaVector<CoalesceOpportunity*>& GetCoalesceOpportunities() const {
329 return coalesce_opportunities_;
330 }
331
332 float GetSpillWeight() const {
333 return spill_weight_;
334 }
335
336 bool RequiresColor() const {
337 return requires_color_;
338 }
339
Matthew Gharrity465ed692016-07-22 08:52:13 -0700340 // We give extra weight to edges adjacent to pair nodes. See the general comment on the
341 // interference graph above.
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000342 size_t EdgeWeightWith(const InterferenceNode* other) const {
343 return (IsPair() || other->IsPair()) ? 2 : 1;
Matthew Gharrity465ed692016-07-22 08:52:13 -0700344 }
345
Matthew Gharrityb6722ff2016-08-12 19:07:11 -0700346 bool NeedsSpillSlot() const {
347 return needs_spill_slot_;
348 }
349
350 void SetNeedsSpillSlot() {
351 needs_spill_slot_ = true;
352 }
353
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000354 // The current stage of this node, indicating which worklist it belongs to.
355 NodeStage stage;
356
357 private:
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700358 // The live interval that this node represents.
359 LiveInterval* const interval_;
360
361 // All nodes interfering with this one.
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000362 // We use an unsorted vector as a set, since a tree or hash set is too heavy for the
363 // set sizes that we encounter. Using a vector leads to much better performance.
364 ArenaVector<InterferenceNode*> adjacent_nodes_;
365
366 // Interference nodes that this node should be coalesced with to reduce moves.
367 ArenaVector<CoalesceOpportunity*> coalesce_opportunities_;
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700368
369 // The maximum number of colors with which this node could interfere. This could be more than
370 // the number of adjacent nodes if this is a pair node, or if some adjacent nodes are pair nodes.
371 // We use "out" degree because incoming edges come from nodes already pruned from the graph,
372 // and do not affect the coloring of this node.
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000373 // Pre-colored nodes are treated as having infinite degree.
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700374 size_t out_degree_;
375
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000376 // The node representing this node in the interference graph.
377 // Initially set to `this`, and only changed if this node is coalesced into another.
378 InterferenceNode* alias_;
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700379
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000380 // The cost of splitting and spilling this interval to the stack.
381 // Nodes with a higher spill weight should be prioritized when assigning registers.
382 // This is essentially based on use density and location; short intervals with many uses inside
383 // deeply nested loops have a high spill weight.
384 const float spill_weight_;
385
386 const bool requires_color_;
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700387
Matthew Gharrityb6722ff2016-08-12 19:07:11 -0700388 bool needs_spill_slot_;
389
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700390 DISALLOW_COPY_AND_ASSIGN(InterferenceNode);
391};
392
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000393// The order in which we color nodes is important. To guarantee forward progress,
394// we prioritize intervals that require registers, and after that we prioritize
395// short intervals. That way, if we fail to color a node, it either won't require a
396// register, or it will be a long interval that can be split in order to make the
397// interference graph sparser.
398// To improve code quality, we prioritize intervals used frequently in deeply nested loops.
399// (This metric is secondary to the forward progress requirements above.)
400// TODO: May also want to consider:
401// - Constants (since they can be rematerialized)
402// - Allocated spill slots
403static bool HasGreaterNodePriority(const InterferenceNode* lhs,
404 const InterferenceNode* rhs) {
405 // (1) Prioritize the node that requires a color.
406 if (lhs->RequiresColor() != rhs->RequiresColor()) {
407 return lhs->RequiresColor();
408 }
409
410 // (2) Prioritize the interval that has a higher spill weight.
411 return lhs->GetSpillWeight() > rhs->GetSpillWeight();
412}
413
414// A ColoringIteration holds the many data structures needed for a single graph coloring attempt,
415// and provides methods for each phase of the attempt.
416class ColoringIteration {
417 public:
418 ColoringIteration(RegisterAllocatorGraphColor* register_allocator,
419 ArenaAllocator* allocator,
420 bool processing_core_regs,
421 size_t num_regs)
422 : register_allocator_(register_allocator),
423 allocator_(allocator),
424 processing_core_regs_(processing_core_regs),
425 num_regs_(num_regs),
426 interval_node_map_(allocator->Adapter(kArenaAllocRegisterAllocator)),
427 prunable_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
428 pruned_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
429 simplify_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)),
430 freeze_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)),
431 spill_worklist_(HasGreaterNodePriority, allocator->Adapter(kArenaAllocRegisterAllocator)),
432 coalesce_worklist_(CoalesceOpportunity::CmpPriority,
433 allocator->Adapter(kArenaAllocRegisterAllocator)) {}
434
435 // Use the intervals collected from instructions to construct an
436 // interference graph mapping intervals to adjacency lists.
437 // Also, collect synthesized safepoint nodes, used to keep
438 // track of live intervals across safepoints.
439 // TODO: Should build safepoints elsewhere.
440 void BuildInterferenceGraph(const ArenaVector<LiveInterval*>& intervals,
Vladimir Marko70e97462016-08-09 11:04:26 +0100441 const ArenaVector<InterferenceNode*>& physical_nodes);
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000442
443 // Add coalesce opportunities to interference nodes.
444 void FindCoalesceOpportunities();
445
446 // Prune nodes from the interference graph to be colored later. Build
447 // a stack (pruned_nodes) containing these intervals in an order determined
448 // by various heuristics.
449 void PruneInterferenceGraph();
450
451 // Process pruned_intervals_ to color the interference graph, spilling when
452 // necessary. Returns true if successful. Else, some intervals have been
453 // split, and the interference graph should be rebuilt for another attempt.
454 bool ColorInterferenceGraph();
455
456 // Return prunable nodes.
457 // The register allocator will need to access prunable nodes after coloring
458 // in order to tell the code generator which registers have been assigned.
459 const ArenaVector<InterferenceNode*>& GetPrunableNodes() const {
460 return prunable_nodes_;
461 }
462
463 private:
464 // Create a coalesce opportunity between two nodes.
465 void CreateCoalesceOpportunity(InterferenceNode* a,
466 InterferenceNode* b,
467 CoalesceKind kind,
468 size_t position);
469
470 // Add an edge in the interference graph, if valid.
471 // Note that `guaranteed_not_interfering_yet` is used to optimize adjacency set insertion
472 // when possible.
473 void AddPotentialInterference(InterferenceNode* from,
474 InterferenceNode* to,
475 bool guaranteed_not_interfering_yet,
476 bool both_directions = true);
477
478 // Invalidate all coalesce opportunities this node has, so that it (and possibly its neighbors)
479 // may be pruned from the interference graph.
480 void FreezeMoves(InterferenceNode* node);
481
482 // Prune a node from the interference graph, updating worklists if necessary.
483 void PruneNode(InterferenceNode* node);
484
485 // Add coalesce opportunities associated with this node to the coalesce worklist.
486 void EnableCoalesceOpportunities(InterferenceNode* node);
487
488 // If needed, from `node` from the freeze worklist to the simplify worklist.
489 void CheckTransitionFromFreezeWorklist(InterferenceNode* node);
490
491 // Return true if `into` is colored, and `from` can be coalesced with `into` conservatively.
492 bool PrecoloredHeuristic(InterferenceNode* from, InterferenceNode* into);
493
494 // Return true if `from` and `into` are uncolored, and can be coalesced conservatively.
495 bool UncoloredHeuristic(InterferenceNode* from, InterferenceNode* into);
496
497 void Coalesce(CoalesceOpportunity* opportunity);
498
499 // Merge `from` into `into` in the interference graph.
500 void Combine(InterferenceNode* from, InterferenceNode* into);
501
502 // A reference to the register allocator instance,
503 // needed to split intervals and assign spill slots.
504 RegisterAllocatorGraphColor* register_allocator_;
505
506 // An arena allocator used for a single graph coloring attempt.
507 ArenaAllocator* allocator_;
508
509 const bool processing_core_regs_;
510
511 const size_t num_regs_;
512
513 // A map from live intervals to interference nodes.
514 ArenaHashMap<LiveInterval*, InterferenceNode*> interval_node_map_;
515
516 // Uncolored nodes that should be pruned from the interference graph.
517 ArenaVector<InterferenceNode*> prunable_nodes_;
518
519 // A stack of nodes pruned from the interference graph, waiting to be pruned.
520 ArenaStdStack<InterferenceNode*> pruned_nodes_;
521
522 // A queue containing low degree, non-move-related nodes that can pruned immediately.
523 ArenaDeque<InterferenceNode*> simplify_worklist_;
524
525 // A queue containing low degree, move-related nodes.
526 ArenaDeque<InterferenceNode*> freeze_worklist_;
527
528 // A queue containing high degree nodes.
529 // If we have to prune from the spill worklist, we cannot guarantee
530 // the pruned node a color, so we order the worklist by priority.
531 ArenaPriorityQueue<InterferenceNode*, decltype(&HasGreaterNodePriority)> spill_worklist_;
532
533 // A queue containing coalesce opportunities.
534 // We order the coalesce worklist by priority, since some coalesce opportunities (e.g., those
535 // inside of loops) are more important than others.
536 ArenaPriorityQueue<CoalesceOpportunity*,
537 decltype(&CoalesceOpportunity::CmpPriority)> coalesce_worklist_;
538
539 DISALLOW_COPY_AND_ASSIGN(ColoringIteration);
540};
541
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700542static bool IsCoreInterval(LiveInterval* interval) {
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000543 return !Primitive::IsFloatingPointType(interval->GetType());
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700544}
545
546static size_t ComputeReservedArtMethodSlots(const CodeGenerator& codegen) {
547 return static_cast<size_t>(InstructionSetPointerSize(codegen.GetInstructionSet())) / kVRegSize;
548}
549
550RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ArenaAllocator* allocator,
551 CodeGenerator* codegen,
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000552 const SsaLivenessAnalysis& liveness,
553 bool iterative_move_coalescing)
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700554 : RegisterAllocator(allocator, codegen, liveness),
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000555 iterative_move_coalescing_(iterative_move_coalescing),
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700556 core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
557 fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
558 temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
559 safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)),
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000560 physical_core_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
561 physical_fp_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
Matthew Gharrityb6722ff2016-08-12 19:07:11 -0700562 num_int_spill_slots_(0),
563 num_double_spill_slots_(0),
564 num_float_spill_slots_(0),
565 num_long_spill_slots_(0),
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700566 catch_phi_spill_slot_counter_(0),
567 reserved_art_method_slots_(ComputeReservedArtMethodSlots(*codegen)),
Vladimir Marko70e97462016-08-09 11:04:26 +0100568 reserved_out_slots_(codegen->GetGraph()->GetMaximumNumberOfOutVRegs()) {
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700569 // Before we ask for blocked registers, set them up in the code generator.
570 codegen->SetupBlockedRegisters();
571
572 // Initialize physical core register live intervals and blocked registers.
573 // This includes globally blocked registers, such as the stack pointer.
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000574 physical_core_nodes_.resize(codegen_->GetNumberOfCoreRegisters(), nullptr);
575 for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700576 LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, Primitive::kPrimInt);
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000577 physical_core_nodes_[i] =
578 new (allocator_) InterferenceNode(allocator_, interval, liveness);
579 physical_core_nodes_[i]->stage = NodeStage::kPrecolored;
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700580 core_intervals_.push_back(interval);
581 if (codegen_->IsBlockedCoreRegister(i)) {
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700582 interval->AddRange(0, liveness.GetMaxLifetimePosition());
583 }
584 }
585 // Initialize physical floating point register live intervals and blocked registers.
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000586 physical_fp_nodes_.resize(codegen_->GetNumberOfFloatingPointRegisters(), nullptr);
587 for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700588 LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, Primitive::kPrimFloat);
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000589 physical_fp_nodes_[i] =
590 new (allocator_) InterferenceNode(allocator_, interval, liveness);
591 physical_fp_nodes_[i]->stage = NodeStage::kPrecolored;
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700592 fp_intervals_.push_back(interval);
593 if (codegen_->IsBlockedFloatingPointRegister(i)) {
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700594 interval->AddRange(0, liveness.GetMaxLifetimePosition());
595 }
596 }
597}
598
599void RegisterAllocatorGraphColor::AllocateRegisters() {
600 // (1) Collect and prepare live intervals.
601 ProcessInstructions();
602
603 for (bool processing_core_regs : {true, false}) {
604 ArenaVector<LiveInterval*>& intervals = processing_core_regs
605 ? core_intervals_
606 : fp_intervals_;
607 size_t num_registers = processing_core_regs
608 ? codegen_->GetNumberOfCoreRegisters()
609 : codegen_->GetNumberOfFloatingPointRegisters();
610
611 size_t attempt = 0;
612 while (true) {
613 ++attempt;
614 DCHECK(attempt <= kMaxGraphColoringAttemptsDebug)
615 << "Exceeded debug max graph coloring register allocation attempts. "
616 << "This could indicate that the register allocator is not making forward progress, "
617 << "which could be caused by prioritizing the wrong live intervals. (Short intervals "
618 << "should be prioritized over long ones, because they cannot be split further.)";
619
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000620 // Many data structures are cleared between graph coloring attempts, so we reduce
621 // total memory usage by using a new arena allocator for each attempt.
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700622 ArenaAllocator coloring_attempt_allocator(allocator_->GetArenaPool());
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000623 ColoringIteration iteration(this,
624 &coloring_attempt_allocator,
625 processing_core_regs,
626 num_registers);
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700627
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000628 // (2) Build the interference graph. Also gather safepoints.
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700629 ArenaVector<InterferenceNode*> safepoints(
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000630 coloring_attempt_allocator.Adapter(kArenaAllocRegisterAllocator));
631 ArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs
632 ? physical_core_nodes_
633 : physical_fp_nodes_;
Vladimir Marko70e97462016-08-09 11:04:26 +0100634 iteration.BuildInterferenceGraph(intervals, physical_nodes);
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700635
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000636 // (3) Add coalesce opportunities.
637 // If we have tried coloring the graph a suspiciously high number of times, give
638 // up on move coalescing, just in case the coalescing heuristics are not conservative.
639 // (This situation will be caught if DCHECKs are turned on.)
640 if (iterative_move_coalescing_ && attempt <= kMaxGraphColoringAttemptsDebug) {
641 iteration.FindCoalesceOpportunities();
642 }
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700643
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000644 // (4) Prune all uncolored nodes from interference graph.
645 iteration.PruneInterferenceGraph();
646
647 // (5) Color pruned nodes based on interferences.
648 bool successful = iteration.ColorInterferenceGraph();
649
650 // We manually clear coalesce opportunities for physical nodes,
651 // since they persist across coloring attempts.
652 for (InterferenceNode* node : physical_core_nodes_) {
653 node->ClearCoalesceOpportunities();
654 }
655 for (InterferenceNode* node : physical_fp_nodes_) {
656 node->ClearCoalesceOpportunities();
657 }
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700658
659 if (successful) {
Matthew Gharrityb6722ff2016-08-12 19:07:11 -0700660 // Assign spill slots.
661 AllocateSpillSlots(iteration.GetPrunableNodes());
662
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700663 // Tell the code generator which registers were allocated.
664 // We only look at prunable_nodes because we already told the code generator about
665 // fixed intervals while processing instructions. We also ignore the fixed intervals
666 // placed at the top of catch blocks.
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000667 for (InterferenceNode* node : iteration.GetPrunableNodes()) {
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700668 LiveInterval* interval = node->GetInterval();
669 if (interval->HasRegister()) {
670 Location low_reg = processing_core_regs
671 ? Location::RegisterLocation(interval->GetRegister())
672 : Location::FpuRegisterLocation(interval->GetRegister());
673 codegen_->AddAllocatedRegister(low_reg);
674 if (interval->HasHighInterval()) {
675 LiveInterval* high = interval->GetHighInterval();
676 DCHECK(high->HasRegister());
677 Location high_reg = processing_core_regs
678 ? Location::RegisterLocation(high->GetRegister())
679 : Location::FpuRegisterLocation(high->GetRegister());
680 codegen_->AddAllocatedRegister(high_reg);
681 }
682 } else {
683 DCHECK(!interval->HasHighInterval() || !interval->GetHighInterval()->HasRegister());
684 }
685 }
686
687 break;
688 }
689 } // while unsuccessful
690 } // for processing_core_instructions
691
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000692 // (6) Resolve locations and deconstruct SSA form.
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700693 RegisterAllocationResolver(allocator_, codegen_, liveness_)
Vladimir Marko70e97462016-08-09 11:04:26 +0100694 .Resolve(ArrayRef<HInstruction* const>(safepoints_),
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700695 reserved_art_method_slots_ + reserved_out_slots_,
Matthew Gharrityb6722ff2016-08-12 19:07:11 -0700696 num_int_spill_slots_,
697 num_long_spill_slots_,
698 num_float_spill_slots_,
699 num_double_spill_slots_,
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700700 catch_phi_spill_slot_counter_,
701 temp_intervals_);
702
703 if (kIsDebugBuild) {
704 Validate(/*log_fatal_on_failure*/ true);
705 }
706}
707
708bool RegisterAllocatorGraphColor::Validate(bool log_fatal_on_failure) {
709 for (bool processing_core_regs : {true, false}) {
710 ArenaVector<LiveInterval*> intervals(
711 allocator_->Adapter(kArenaAllocRegisterAllocatorValidate));
712 for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) {
713 HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
714 LiveInterval* interval = instruction->GetLiveInterval();
715 if (interval != nullptr && IsCoreInterval(interval) == processing_core_regs) {
716 intervals.push_back(instruction->GetLiveInterval());
717 }
718 }
719
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000720 ArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs
721 ? physical_core_nodes_
722 : physical_fp_nodes_;
723 for (InterferenceNode* fixed : physical_nodes) {
724 LiveInterval* interval = fixed->GetInterval();
725 if (interval->GetFirstRange() != nullptr) {
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700726 // Ideally we would check fixed ranges as well, but currently there are times when
727 // two fixed intervals for the same register will overlap. For example, a fixed input
728 // and a fixed output may sometimes share the same register, in which there will be two
729 // fixed intervals for the same place.
730 }
731 }
732
733 for (LiveInterval* temp : temp_intervals_) {
734 if (IsCoreInterval(temp) == processing_core_regs) {
735 intervals.push_back(temp);
736 }
737 }
738
Matthew Gharrityb6722ff2016-08-12 19:07:11 -0700739 size_t spill_slots = num_int_spill_slots_
740 + num_long_spill_slots_
741 + num_float_spill_slots_
742 + num_double_spill_slots_
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700743 + catch_phi_spill_slot_counter_;
744 bool ok = ValidateIntervals(intervals,
745 spill_slots,
746 reserved_art_method_slots_ + reserved_out_slots_,
747 *codegen_,
748 allocator_,
749 processing_core_regs,
750 log_fatal_on_failure);
751 if (!ok) {
752 return false;
753 }
754 } // for processing_core_regs
755
756 return true;
757}
758
759void RegisterAllocatorGraphColor::ProcessInstructions() {
760 for (HLinearPostOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
761 HBasicBlock* block = it.Current();
762
763 // Note that we currently depend on this ordering, since some helper
764 // code is designed for linear scan register allocation.
765 for (HBackwardInstructionIterator instr_it(block->GetInstructions());
766 !instr_it.Done();
767 instr_it.Advance()) {
768 ProcessInstruction(instr_it.Current());
769 }
770
771 for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
772 ProcessInstruction(phi_it.Current());
773 }
774
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000775 if (block->IsCatchBlock()
776 || (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700777 // By blocking all registers at the top of each catch block or irreducible loop, we force
778 // intervals belonging to the live-in set of the catch/header block to be spilled.
779 // TODO(ngeoffray): Phis in this block could be allocated in register.
780 size_t position = block->GetLifetimeStart();
781 BlockRegisters(position, position + 1);
782 }
783 }
784}
785
786void RegisterAllocatorGraphColor::ProcessInstruction(HInstruction* instruction) {
787 LocationSummary* locations = instruction->GetLocations();
788 if (locations == nullptr) {
789 return;
790 }
791 if (locations->NeedsSafepoint() && codegen_->IsLeafMethod()) {
792 // We do this here because we do not want the suspend check to artificially
793 // create live registers.
794 DCHECK(instruction->IsSuspendCheckEntry());
795 DCHECK_EQ(locations->GetTempCount(), 0u);
796 instruction->GetBlock()->RemoveInstruction(instruction);
797 return;
798 }
799
800 CheckForTempLiveIntervals(instruction);
801 CheckForSafepoint(instruction);
802 if (instruction->GetLocations()->WillCall()) {
803 // If a call will happen, create fixed intervals for caller-save registers.
804 // TODO: Note that it may be beneficial to later split intervals at this point,
805 // so that we allow last-minute moves from a caller-save register
806 // to a callee-save register.
807 BlockRegisters(instruction->GetLifetimePosition(),
808 instruction->GetLifetimePosition() + 1,
809 /*caller_save_only*/ true);
810 }
811 CheckForFixedInputs(instruction);
812
813 LiveInterval* interval = instruction->GetLiveInterval();
814 if (interval == nullptr) {
815 // Instructions lacking a valid output location do not have a live interval.
816 DCHECK(!locations->Out().IsValid());
817 return;
818 }
819
820 // Low intervals act as representatives for their corresponding high interval.
821 DCHECK(!interval->IsHighInterval());
822 if (codegen_->NeedsTwoRegisters(interval->GetType())) {
823 interval->AddHighInterval();
824 }
825 AddSafepointsFor(instruction);
826 CheckForFixedOutput(instruction);
827 AllocateSpillSlotForCatchPhi(instruction);
828
829 ArenaVector<LiveInterval*>& intervals = IsCoreInterval(interval)
830 ? core_intervals_
831 : fp_intervals_;
832 if (interval->HasSpillSlot() || instruction->IsConstant()) {
833 // Note that if an interval already has a spill slot, then its value currently resides
834 // in the stack (e.g., parameters). Thus we do not have to allocate a register until its first
835 // register use. This is also true for constants, which can be materialized at any point.
836 size_t first_register_use = interval->FirstRegisterUse();
837 if (first_register_use != kNoLifetime) {
838 LiveInterval* split = SplitBetween(interval, interval->GetStart(), first_register_use - 1);
839 intervals.push_back(split);
840 } else {
841 // We won't allocate a register for this value.
842 }
843 } else {
844 intervals.push_back(interval);
845 }
846}
847
848void RegisterAllocatorGraphColor::CheckForFixedInputs(HInstruction* instruction) {
849 // We simply block physical registers where necessary.
850 // TODO: Ideally we would coalesce the physical register with the register
851 // allocated to the input value, but this can be tricky if, e.g., there
852 // could be multiple physical register uses of the same value at the
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000853 // same instruction. Furthermore, there's currently no distinction between
854 // fixed inputs to a call (which will be clobbered) and other fixed inputs (which
855 // may not be clobbered).
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700856 LocationSummary* locations = instruction->GetLocations();
857 size_t position = instruction->GetLifetimePosition();
858 for (size_t i = 0; i < locations->GetInputCount(); ++i) {
859 Location input = locations->InAt(i);
860 if (input.IsRegister() || input.IsFpuRegister()) {
861 BlockRegister(input, position, position + 1);
862 codegen_->AddAllocatedRegister(input);
863 } else if (input.IsPair()) {
864 BlockRegister(input.ToLow(), position, position + 1);
865 BlockRegister(input.ToHigh(), position, position + 1);
866 codegen_->AddAllocatedRegister(input.ToLow());
867 codegen_->AddAllocatedRegister(input.ToHigh());
868 }
869 }
870}
871
872void RegisterAllocatorGraphColor::CheckForFixedOutput(HInstruction* instruction) {
873 // If an instruction has a fixed output location, we give the live interval a register and then
874 // proactively split it just after the definition point to avoid creating too many interferences
875 // with a fixed node.
876 LiveInterval* interval = instruction->GetLiveInterval();
877 Location out = interval->GetDefinedBy()->GetLocations()->Out();
878 size_t position = instruction->GetLifetimePosition();
879 DCHECK_GE(interval->GetEnd() - position, 2u);
880
881 if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) {
882 out = instruction->GetLocations()->InAt(0);
883 }
884
885 if (out.IsRegister() || out.IsFpuRegister()) {
886 interval->SetRegister(out.reg());
887 codegen_->AddAllocatedRegister(out);
888 Split(interval, position + 1);
889 } else if (out.IsPair()) {
890 interval->SetRegister(out.low());
891 interval->GetHighInterval()->SetRegister(out.high());
892 codegen_->AddAllocatedRegister(out.ToLow());
893 codegen_->AddAllocatedRegister(out.ToHigh());
894 Split(interval, position + 1);
895 } else if (out.IsStackSlot() || out.IsDoubleStackSlot()) {
896 interval->SetSpillSlot(out.GetStackIndex());
897 } else {
898 DCHECK(out.IsUnallocated() || out.IsConstant());
899 }
900}
901
902void RegisterAllocatorGraphColor::AddSafepointsFor(HInstruction* instruction) {
903 LiveInterval* interval = instruction->GetLiveInterval();
904 for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) {
905 HInstruction* safepoint = safepoints_[safepoint_index - 1u];
906 size_t safepoint_position = safepoint->GetLifetimePosition();
907
908 // Test that safepoints_ are ordered in the optimal way.
909 DCHECK(safepoint_index == safepoints_.size() ||
910 safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position);
911
912 if (safepoint_position == interval->GetStart()) {
913 // The safepoint is for this instruction, so the location of the instruction
914 // does not need to be saved.
915 DCHECK_EQ(safepoint_index, safepoints_.size());
916 DCHECK_EQ(safepoint, instruction);
917 continue;
918 } else if (interval->IsDeadAt(safepoint_position)) {
919 break;
920 } else if (!interval->Covers(safepoint_position)) {
921 // Hole in the interval.
922 continue;
923 }
924 interval->AddSafepoint(safepoint);
925 }
926}
927
928void RegisterAllocatorGraphColor::CheckForTempLiveIntervals(HInstruction* instruction) {
929 LocationSummary* locations = instruction->GetLocations();
930 size_t position = instruction->GetLifetimePosition();
931 for (size_t i = 0; i < locations->GetTempCount(); ++i) {
932 Location temp = locations->GetTemp(i);
933 if (temp.IsRegister() || temp.IsFpuRegister()) {
934 BlockRegister(temp, position, position + 1);
935 codegen_->AddAllocatedRegister(temp);
936 } else {
937 DCHECK(temp.IsUnallocated());
938 switch (temp.GetPolicy()) {
939 case Location::kRequiresRegister: {
940 LiveInterval* interval =
941 LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt);
942 interval->AddTempUse(instruction, i);
943 core_intervals_.push_back(interval);
944 temp_intervals_.push_back(interval);
945 break;
946 }
947
948 case Location::kRequiresFpuRegister: {
949 LiveInterval* interval =
950 LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble);
951 interval->AddTempUse(instruction, i);
952 fp_intervals_.push_back(interval);
953 temp_intervals_.push_back(interval);
954 if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
955 interval->AddHighInterval(/*is_temp*/ true);
956 temp_intervals_.push_back(interval->GetHighInterval());
957 }
958 break;
959 }
960
961 default:
962 LOG(FATAL) << "Unexpected policy for temporary location "
963 << temp.GetPolicy();
964 }
965 }
966 }
967}
968
969void RegisterAllocatorGraphColor::CheckForSafepoint(HInstruction* instruction) {
970 LocationSummary* locations = instruction->GetLocations();
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700971
972 if (locations->NeedsSafepoint()) {
973 safepoints_.push_back(instruction);
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700974 }
975}
976
977LiveInterval* RegisterAllocatorGraphColor::TrySplit(LiveInterval* interval, size_t position) {
978 if (interval->GetStart() < position && position < interval->GetEnd()) {
979 return Split(interval, position);
980 } else {
981 return interval;
982 }
983}
984
985void RegisterAllocatorGraphColor::SplitAtRegisterUses(LiveInterval* interval) {
986 DCHECK(!interval->IsHighInterval());
987
988 // Split just after a register definition.
989 if (interval->IsParent() && interval->DefinitionRequiresRegister()) {
990 interval = TrySplit(interval, interval->GetStart() + 1);
991 }
992
993 UsePosition* use = interval->GetFirstUse();
994 while (use != nullptr && use->GetPosition() < interval->GetStart()) {
995 use = use->GetNext();
996 }
997
998 // Split around register uses.
999 size_t end = interval->GetEnd();
1000 while (use != nullptr && use->GetPosition() <= end) {
1001 if (use->RequiresRegister()) {
1002 size_t position = use->GetPosition();
1003 interval = TrySplit(interval, position - 1);
1004 if (liveness_.GetInstructionFromPosition(position / 2)->IsControlFlow()) {
1005 // If we are at the very end of a basic block, we cannot split right
1006 // at the use. Split just after instead.
1007 interval = TrySplit(interval, position + 1);
1008 } else {
1009 interval = TrySplit(interval, position);
1010 }
1011 }
1012 use = use->GetNext();
1013 }
1014}
1015
1016void RegisterAllocatorGraphColor::AllocateSpillSlotForCatchPhi(HInstruction* instruction) {
1017 if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
1018 HPhi* phi = instruction->AsPhi();
1019 LiveInterval* interval = phi->GetLiveInterval();
1020
1021 HInstruction* previous_phi = phi->GetPrevious();
1022 DCHECK(previous_phi == nullptr ||
1023 previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber())
1024 << "Phis expected to be sorted by vreg number, "
1025 << "so that equivalent phis are adjacent.";
1026
1027 if (phi->IsVRegEquivalentOf(previous_phi)) {
1028 // Assign the same spill slot.
1029 DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot());
1030 interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot());
1031 } else {
1032 interval->SetSpillSlot(catch_phi_spill_slot_counter_);
1033 catch_phi_spill_slot_counter_ += interval->NeedsTwoSpillSlots() ? 2 : 1;
1034 }
1035 }
1036}
1037
1038void RegisterAllocatorGraphColor::BlockRegister(Location location,
1039 size_t start,
1040 size_t end) {
1041 DCHECK(location.IsRegister() || location.IsFpuRegister());
1042 int reg = location.reg();
1043 LiveInterval* interval = location.IsRegister()
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001044 ? physical_core_nodes_[reg]->GetInterval()
1045 : physical_fp_nodes_[reg]->GetInterval();
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001046 DCHECK(interval->GetRegister() == reg);
1047 bool blocked_by_codegen = location.IsRegister()
1048 ? codegen_->IsBlockedCoreRegister(reg)
1049 : codegen_->IsBlockedFloatingPointRegister(reg);
1050 if (blocked_by_codegen) {
1051 // We've already blocked this register for the entire method. (And adding a
1052 // range inside another range violates the preconditions of AddRange).
1053 } else {
1054 interval->AddRange(start, end);
1055 }
1056}
1057
1058void RegisterAllocatorGraphColor::BlockRegisters(size_t start, size_t end, bool caller_save_only) {
1059 for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
1060 if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) {
1061 BlockRegister(Location::RegisterLocation(i), start, end);
1062 }
1063 }
1064 for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
1065 if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) {
1066 BlockRegister(Location::FpuRegisterLocation(i), start, end);
1067 }
1068 }
1069}
1070
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001071void ColoringIteration::AddPotentialInterference(InterferenceNode* from,
1072 InterferenceNode* to,
1073 bool guaranteed_not_interfering_yet,
1074 bool both_directions) {
1075 if (from->IsPrecolored()) {
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001076 // We save space by ignoring outgoing edges from fixed nodes.
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001077 } else if (to->IsPrecolored()) {
1078 // It is important that only a single node represents a given fixed register in the
1079 // interference graph. We retrieve that node here.
1080 const ArenaVector<InterferenceNode*>& physical_nodes = to->GetInterval()->IsFloatingPoint()
1081 ? register_allocator_->physical_fp_nodes_
1082 : register_allocator_->physical_core_nodes_;
1083 InterferenceNode* physical_node = physical_nodes[to->GetInterval()->GetRegister()];
1084 from->AddInterference(physical_node, /*guaranteed_not_interfering_yet*/ false);
1085 DCHECK_EQ(to->GetInterval()->GetRegister(), physical_node->GetInterval()->GetRegister());
1086 DCHECK_EQ(to->GetAlias(), physical_node) << "Fixed nodes should alias the canonical fixed node";
1087
1088 // If a node interferes with a fixed pair node, the weight of the edge may
1089 // be inaccurate after using the alias of the pair node, because the alias of the pair node
1090 // is a singular node.
1091 // We could make special pair fixed nodes, but that ends up being too conservative because
1092 // a node could then interfere with both {r1} and {r1,r2}, leading to a degree of
1093 // three rather than two.
1094 // Instead, we explicitly add an interference with the high node of the fixed pair node.
1095 // TODO: This is too conservative at time for pair nodes, but the fact that fixed pair intervals
1096 // can be unaligned on x86 complicates things.
1097 if (to->IsPair()) {
1098 InterferenceNode* high_node =
1099 physical_nodes[to->GetInterval()->GetHighInterval()->GetRegister()];
1100 DCHECK_EQ(to->GetInterval()->GetHighInterval()->GetRegister(),
1101 high_node->GetInterval()->GetRegister());
1102 from->AddInterference(high_node, /*guaranteed_not_interfering_yet*/ false);
1103 }
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001104 } else {
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001105 // Standard interference between two uncolored nodes.
1106 from->AddInterference(to, guaranteed_not_interfering_yet);
1107 }
1108
1109 if (both_directions) {
1110 AddPotentialInterference(to, from, guaranteed_not_interfering_yet, /*both_directions*/ false);
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001111 }
1112}
1113
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001114// Returns true if `in_node` represents an input interval of `out_node`, and the output interval
1115// is allowed to have the same register as the input interval.
1116// TODO: Ideally we should just produce correct intervals in liveness analysis.
1117// We would need to refactor the current live interval layout to do so, which is
1118// no small task.
1119static bool CheckInputOutputCanOverlap(InterferenceNode* in_node, InterferenceNode* out_node) {
1120 LiveInterval* output_interval = out_node->GetInterval();
1121 HInstruction* defined_by = output_interval->GetDefinedBy();
1122 if (defined_by == nullptr) {
1123 // This must not be a definition point.
1124 return false;
1125 }
Andreas Gampe6f61ee52016-08-12 06:33:15 +00001126
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001127 LocationSummary* locations = defined_by->GetLocations();
1128 if (locations->OutputCanOverlapWithInputs()) {
1129 // This instruction does not allow the output to reuse a register from an input.
1130 return false;
1131 }
1132
1133 LiveInterval* input_interval = in_node->GetInterval();
1134 LiveInterval* next_sibling = input_interval->GetNextSibling();
1135 size_t def_position = defined_by->GetLifetimePosition();
1136 size_t use_position = def_position + 1;
1137 if (next_sibling != nullptr && next_sibling->GetStart() == use_position) {
1138 // The next sibling starts at the use position, so reusing the input register in the output
1139 // would clobber the input before it's moved into the sibling interval location.
1140 return false;
1141 }
1142
1143 if (!input_interval->IsDeadAt(use_position) && input_interval->CoversSlow(use_position)) {
1144 // The input interval is live after the use position.
1145 return false;
1146 }
1147
1148 HInputsRef inputs = defined_by->GetInputs();
1149 for (size_t i = 0; i < inputs.size(); ++i) {
1150 if (inputs[i]->GetLiveInterval()->GetSiblingAt(def_position) == input_interval) {
1151 DCHECK(input_interval->SameRegisterKind(*output_interval));
1152 return true;
1153 }
1154 }
1155
1156 // The input interval was not an input for this instruction.
1157 return false;
1158}
1159
1160void ColoringIteration::BuildInterferenceGraph(
1161 const ArenaVector<LiveInterval*>& intervals,
Vladimir Marko70e97462016-08-09 11:04:26 +01001162 const ArenaVector<InterferenceNode*>& physical_nodes) {
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001163 DCHECK(interval_node_map_.Empty() && prunable_nodes_.empty());
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001164 // Build the interference graph efficiently by ordering range endpoints
1165 // by position and doing a linear sweep to find interferences. (That is, we
1166 // jump from endpoint to endpoint, maintaining a set of intervals live at each
1167 // point. If two nodes are ever in the live set at the same time, then they
1168 // interfere with each other.)
1169 //
1170 // We order by both position and (secondarily) by whether the endpoint
1171 // begins or ends a range; we want to process range endings before range
1172 // beginnings at the same position because they should not conflict.
1173 //
1174 // For simplicity, we create a tuple for each endpoint, and then sort the tuples.
1175 // Tuple contents: (position, is_range_beginning, node).
1176 ArenaVector<std::tuple<size_t, bool, InterferenceNode*>> range_endpoints(
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001177 allocator_->Adapter(kArenaAllocRegisterAllocator));
1178
1179 // We reserve plenty of space to avoid excessive copying.
1180 range_endpoints.reserve(4 * prunable_nodes_.size());
1181
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001182 for (LiveInterval* parent : intervals) {
1183 for (LiveInterval* sibling = parent; sibling != nullptr; sibling = sibling->GetNextSibling()) {
1184 LiveRange* range = sibling->GetFirstRange();
1185 if (range != nullptr) {
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001186 InterferenceNode* node = new (allocator_) InterferenceNode(
1187 allocator_, sibling, register_allocator_->liveness_);
1188 interval_node_map_.Insert(std::make_pair(sibling, node));
1189
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001190 if (sibling->HasRegister()) {
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001191 // Fixed nodes should alias the canonical node for the corresponding register.
1192 node->stage = NodeStage::kPrecolored;
1193 InterferenceNode* physical_node = physical_nodes[sibling->GetRegister()];
1194 node->SetAlias(physical_node);
1195 DCHECK_EQ(node->GetInterval()->GetRegister(),
1196 physical_node->GetInterval()->GetRegister());
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001197 } else {
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001198 node->stage = NodeStage::kPrunable;
1199 prunable_nodes_.push_back(node);
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001200 }
1201
1202 while (range != nullptr) {
1203 range_endpoints.push_back(std::make_tuple(range->GetStart(), true, node));
1204 range_endpoints.push_back(std::make_tuple(range->GetEnd(), false, node));
1205 range = range->GetNext();
1206 }
1207 }
1208 }
1209 }
1210
1211 // Sort the endpoints.
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001212 // We explicitly ignore the third entry of each tuple (the node pointer) in order
1213 // to maintain determinism.
1214 std::sort(range_endpoints.begin(), range_endpoints.end(),
1215 [] (const std::tuple<size_t, bool, InterferenceNode*>& lhs,
1216 const std::tuple<size_t, bool, InterferenceNode*>& rhs) {
1217 return std::tie(std::get<0>(lhs), std::get<1>(lhs))
1218 < std::tie(std::get<0>(rhs), std::get<1>(rhs));
1219 });
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001220
1221 // Nodes live at the current position in the linear sweep.
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001222 ArenaVector<InterferenceNode*> live(
1223 allocator_->Adapter(kArenaAllocRegisterAllocator));
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001224
1225 // Linear sweep. When we encounter the beginning of a range, we add the corresponding node to the
1226 // live set. When we encounter the end of a range, we remove the corresponding node
1227 // from the live set. Nodes interfere if they are in the live set at the same time.
1228 for (auto it = range_endpoints.begin(); it != range_endpoints.end(); ++it) {
1229 bool is_range_beginning;
1230 InterferenceNode* node;
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001231 size_t position;
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001232 // Extract information from the tuple, including the node this tuple represents.
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001233 std::tie(position, is_range_beginning, node) = *it;
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001234
1235 if (is_range_beginning) {
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001236 bool guaranteed_not_interfering_yet = position == node->GetInterval()->GetStart();
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001237 for (InterferenceNode* conflicting : live) {
1238 DCHECK_NE(node, conflicting);
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001239 if (CheckInputOutputCanOverlap(conflicting, node)) {
1240 // We do not add an interference, because the instruction represented by `node` allows
1241 // its output to share a register with an input, represented here by `conflicting`.
1242 } else {
1243 AddPotentialInterference(node, conflicting, guaranteed_not_interfering_yet);
1244 }
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001245 }
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001246 DCHECK(std::find(live.begin(), live.end(), node) == live.end());
1247 live.push_back(node);
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001248 } else {
1249 // End of range.
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001250 auto live_it = std::find(live.begin(), live.end(), node);
1251 DCHECK(live_it != live.end());
1252 live.erase(live_it);
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001253 }
1254 }
1255 DCHECK(live.empty());
1256}
1257
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001258void ColoringIteration::CreateCoalesceOpportunity(InterferenceNode* a,
1259 InterferenceNode* b,
1260 CoalesceKind kind,
1261 size_t position) {
1262 DCHECK_EQ(a->IsPair(), b->IsPair())
1263 << "Nodes of different memory widths should never be coalesced";
1264 CoalesceOpportunity* opportunity =
1265 new (allocator_) CoalesceOpportunity(a, b, kind, position, register_allocator_->liveness_);
1266 a->AddCoalesceOpportunity(opportunity);
1267 b->AddCoalesceOpportunity(opportunity);
1268 coalesce_worklist_.push(opportunity);
Matthew Gharrity465ed692016-07-22 08:52:13 -07001269}
1270
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001271// When looking for coalesce opportunities, we use the interval_node_map_ to find the node
1272// corresponding to an interval. Note that not all intervals are in this map, notably the parents
1273// of constants and stack arguments. (However, these interval should not be involved in coalesce
1274// opportunities anyway, because they're not going to be in registers.)
1275void ColoringIteration::FindCoalesceOpportunities() {
1276 DCHECK(coalesce_worklist_.empty());
1277
1278 for (InterferenceNode* node : prunable_nodes_) {
1279 LiveInterval* interval = node->GetInterval();
1280
1281 // Coalesce siblings.
1282 LiveInterval* next_sibling = interval->GetNextSibling();
1283 if (next_sibling != nullptr && interval->GetEnd() == next_sibling->GetStart()) {
1284 auto it = interval_node_map_.Find(next_sibling);
1285 if (it != interval_node_map_.end()) {
1286 InterferenceNode* sibling_node = it->second;
1287 CreateCoalesceOpportunity(node,
1288 sibling_node,
1289 CoalesceKind::kAdjacentSibling,
1290 interval->GetEnd());
1291 }
1292 }
1293
1294 // Coalesce fixed outputs with this interval if this interval is an adjacent sibling.
1295 LiveInterval* parent = interval->GetParent();
1296 if (parent->HasRegister()
1297 && parent->GetNextSibling() == interval
1298 && parent->GetEnd() == interval->GetStart()) {
1299 auto it = interval_node_map_.Find(parent);
1300 if (it != interval_node_map_.end()) {
1301 InterferenceNode* parent_node = it->second;
1302 CreateCoalesceOpportunity(node,
1303 parent_node,
1304 CoalesceKind::kFixedOutputSibling,
1305 parent->GetEnd());
1306 }
1307 }
1308
1309 // Try to prevent moves across blocks.
1310 // Note that this does not lead to many succeeding coalesce attempts, so could be removed
1311 // if found to add to compile time.
1312 const SsaLivenessAnalysis& liveness = register_allocator_->liveness_;
1313 if (interval->IsSplit() && liveness.IsAtBlockBoundary(interval->GetStart() / 2)) {
1314 // If the start of this interval is at a block boundary, we look at the
1315 // location of the interval in blocks preceding the block this interval
1316 // starts at. This can avoid a move between the two blocks.
1317 HBasicBlock* block = liveness.GetBlockFromPosition(interval->GetStart() / 2);
1318 for (HBasicBlock* predecessor : block->GetPredecessors()) {
1319 size_t position = predecessor->GetLifetimeEnd() - 1;
1320 LiveInterval* existing = interval->GetParent()->GetSiblingAt(position);
1321 if (existing != nullptr) {
1322 auto it = interval_node_map_.Find(existing);
1323 if (it != interval_node_map_.end()) {
1324 InterferenceNode* existing_node = it->second;
1325 CreateCoalesceOpportunity(node,
1326 existing_node,
1327 CoalesceKind::kNonlinearControlFlow,
1328 position);
1329 }
1330 }
1331 }
1332 }
1333
1334 // Coalesce phi inputs with the corresponding output.
1335 HInstruction* defined_by = interval->GetDefinedBy();
1336 if (defined_by != nullptr && defined_by->IsPhi()) {
1337 const ArenaVector<HBasicBlock*>& predecessors = defined_by->GetBlock()->GetPredecessors();
1338 HInputsRef inputs = defined_by->GetInputs();
1339
1340 for (size_t i = 0, e = inputs.size(); i < e; ++i) {
1341 // We want the sibling at the end of the appropriate predecessor block.
1342 size_t position = predecessors[i]->GetLifetimeEnd() - 1;
1343 LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(position);
1344
1345 auto it = interval_node_map_.Find(input_interval);
1346 if (it != interval_node_map_.end()) {
1347 InterferenceNode* input_node = it->second;
1348 CreateCoalesceOpportunity(node, input_node, CoalesceKind::kPhi, position);
1349 }
1350 }
1351 }
1352
1353 // Coalesce output with first input when policy is kSameAsFirstInput.
1354 if (defined_by != nullptr) {
1355 Location out = defined_by->GetLocations()->Out();
1356 if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) {
1357 LiveInterval* input_interval
1358 = defined_by->InputAt(0)->GetLiveInterval()->GetSiblingAt(interval->GetStart() - 1);
1359 // TODO: Could we consider lifetime holes here?
1360 if (input_interval->GetEnd() == interval->GetStart()) {
1361 auto it = interval_node_map_.Find(input_interval);
1362 if (it != interval_node_map_.end()) {
1363 InterferenceNode* input_node = it->second;
1364 CreateCoalesceOpportunity(node,
1365 input_node,
1366 CoalesceKind::kFirstInput,
1367 interval->GetStart());
1368 }
1369 }
1370 }
1371 }
1372
1373 // An interval that starts an instruction (that is, it is not split), may
1374 // re-use the registers used by the inputs of that instruction, based on the
1375 // location summary.
1376 if (defined_by != nullptr) {
1377 DCHECK(!interval->IsSplit());
1378 LocationSummary* locations = defined_by->GetLocations();
1379 if (!locations->OutputCanOverlapWithInputs()) {
1380 HInputsRef inputs = defined_by->GetInputs();
1381 for (size_t i = 0; i < inputs.size(); ++i) {
1382 size_t def_point = defined_by->GetLifetimePosition();
1383 // TODO: Getting the sibling at the def_point might not be quite what we want
1384 // for fixed inputs, since the use will be *at* the def_point rather than after.
1385 LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(def_point);
1386 if (input_interval != nullptr &&
1387 input_interval->HasHighInterval() == interval->HasHighInterval()) {
1388 auto it = interval_node_map_.Find(input_interval);
1389 if (it != interval_node_map_.end()) {
1390 InterferenceNode* input_node = it->second;
1391 CreateCoalesceOpportunity(node,
1392 input_node,
1393 CoalesceKind::kAnyInput,
1394 interval->GetStart());
1395 }
1396 }
1397 }
1398 }
1399 }
1400
1401 // Try to prevent moves into fixed input locations.
1402 UsePosition* use = interval->GetFirstUse();
1403 for (; use != nullptr && use->GetPosition() <= interval->GetStart(); use = use->GetNext()) {
1404 // Skip past uses before the start of this interval.
1405 }
1406 for (; use != nullptr && use->GetPosition() <= interval->GetEnd(); use = use->GetNext()) {
1407 HInstruction* user = use->GetUser();
1408 if (user == nullptr) {
1409 // User may be null for certain intervals, such as temp intervals.
1410 continue;
1411 }
1412 LocationSummary* locations = user->GetLocations();
1413 Location input = locations->InAt(use->GetInputIndex());
1414 if (input.IsRegister() || input.IsFpuRegister()) {
1415 // TODO: Could try to handle pair interval too, but coalescing with fixed pair nodes
1416 // is currently not supported.
1417 InterferenceNode* fixed_node = input.IsRegister()
1418 ? register_allocator_->physical_core_nodes_[input.reg()]
1419 : register_allocator_->physical_fp_nodes_[input.reg()];
1420 CreateCoalesceOpportunity(node,
1421 fixed_node,
1422 CoalesceKind::kFixedInput,
1423 user->GetLifetimePosition());
1424 }
1425 }
1426 } // for node in prunable_nodes
1427}
1428
1429static bool IsLowDegreeNode(InterferenceNode* node, size_t num_regs) {
1430 return node->GetOutDegree() < num_regs;
1431}
1432
1433static bool IsHighDegreeNode(InterferenceNode* node, size_t num_regs) {
1434 return !IsLowDegreeNode(node, num_regs);
1435}
1436
1437void ColoringIteration::PruneInterferenceGraph() {
1438 DCHECK(pruned_nodes_.empty()
1439 && simplify_worklist_.empty()
1440 && freeze_worklist_.empty()
1441 && spill_worklist_.empty());
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001442 // When pruning the graph, we refer to nodes with degree less than num_regs as low degree nodes,
1443 // and all others as high degree nodes. The distinction is important: low degree nodes are
1444 // guaranteed a color, while high degree nodes are not.
1445
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001446 // Build worklists. Note that the coalesce worklist has already been
1447 // filled by FindCoalesceOpportunities().
1448 for (InterferenceNode* node : prunable_nodes_) {
1449 DCHECK(!node->IsPrecolored()) << "Fixed nodes should never be pruned";
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001450 if (IsLowDegreeNode(node, num_regs_)) {
1451 if (node->GetCoalesceOpportunities().empty()) {
1452 // Simplify Worklist.
1453 node->stage = NodeStage::kSimplifyWorklist;
1454 simplify_worklist_.push_back(node);
1455 } else {
1456 // Freeze Worklist.
1457 node->stage = NodeStage::kFreezeWorklist;
1458 freeze_worklist_.push_back(node);
1459 }
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001460 } else {
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001461 // Spill worklist.
1462 node->stage = NodeStage::kSpillWorklist;
1463 spill_worklist_.push(node);
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001464 }
1465 }
1466
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001467 // Prune graph.
1468 // Note that we do not remove a node from its current worklist if it moves to another, so it may
1469 // be in multiple worklists at once; the node's `phase` says which worklist it is really in.
1470 while (true) {
1471 if (!simplify_worklist_.empty()) {
1472 // Prune low-degree nodes.
1473 // TODO: pop_back() should work as well, but it didn't; we get a
1474 // failed check while pruning. We should look into this.
1475 InterferenceNode* node = simplify_worklist_.front();
1476 simplify_worklist_.pop_front();
1477 DCHECK_EQ(node->stage, NodeStage::kSimplifyWorklist) << "Cannot move from simplify list";
1478 DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in simplify list should be low degree";
1479 DCHECK(!node->IsMoveRelated()) << "Nodes in simplify list should not be move related";
1480 PruneNode(node);
1481 } else if (!coalesce_worklist_.empty()) {
1482 // Coalesce.
1483 CoalesceOpportunity* opportunity = coalesce_worklist_.top();
1484 coalesce_worklist_.pop();
1485 if (opportunity->stage == CoalesceStage::kWorklist) {
1486 Coalesce(opportunity);
1487 }
1488 } else if (!freeze_worklist_.empty()) {
1489 // Freeze moves and prune a low-degree move-related node.
1490 InterferenceNode* node = freeze_worklist_.front();
1491 freeze_worklist_.pop_front();
1492 if (node->stage == NodeStage::kFreezeWorklist) {
1493 DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in freeze list should be low degree";
1494 DCHECK(node->IsMoveRelated()) << "Nodes in freeze list should be move related";
1495 FreezeMoves(node);
1496 PruneNode(node);
1497 }
1498 } else if (!spill_worklist_.empty()) {
1499 // We spill the lowest-priority node, because pruning a node earlier
1500 // gives it a higher chance of being spilled.
1501 InterferenceNode* node = spill_worklist_.top();
1502 spill_worklist_.pop();
1503 if (node->stage == NodeStage::kSpillWorklist) {
1504 DCHECK_GE(node->GetOutDegree(), num_regs_) << "Nodes in spill list should be high degree";
1505 FreezeMoves(node);
1506 PruneNode(node);
1507 }
1508 } else {
1509 // Pruning complete.
1510 break;
1511 }
1512 }
1513 DCHECK_EQ(prunable_nodes_.size(), pruned_nodes_.size());
1514}
1515
1516void ColoringIteration::EnableCoalesceOpportunities(InterferenceNode* node) {
1517 for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
1518 if (opportunity->stage == CoalesceStage::kActive) {
1519 opportunity->stage = CoalesceStage::kWorklist;
1520 coalesce_worklist_.push(opportunity);
1521 }
1522 }
1523}
1524
1525void ColoringIteration::PruneNode(InterferenceNode* node) {
1526 DCHECK_NE(node->stage, NodeStage::kPruned);
1527 DCHECK(!node->IsPrecolored());
1528 node->stage = NodeStage::kPruned;
1529 pruned_nodes_.push(node);
1530
1531 for (InterferenceNode* adj : node->GetAdjacentNodes()) {
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001532 DCHECK_NE(adj->stage, NodeStage::kPruned) << "Should be no interferences with pruned nodes";
1533
1534 if (adj->IsPrecolored()) {
1535 // No effect on pre-colored nodes; they're never pruned.
1536 } else {
1537 // Remove the interference.
1538 bool was_high_degree = IsHighDegreeNode(adj, num_regs_);
1539 DCHECK(adj->ContainsInterference(node))
1540 << "Missing reflexive interference from non-fixed node";
1541 adj->RemoveInterference(node);
1542
1543 // Handle transitions from high degree to low degree.
1544 if (was_high_degree && IsLowDegreeNode(adj, num_regs_)) {
1545 EnableCoalesceOpportunities(adj);
1546 for (InterferenceNode* adj_adj : adj->GetAdjacentNodes()) {
1547 EnableCoalesceOpportunities(adj_adj);
1548 }
1549
1550 DCHECK_EQ(adj->stage, NodeStage::kSpillWorklist);
1551 if (adj->IsMoveRelated()) {
1552 adj->stage = NodeStage::kFreezeWorklist;
1553 freeze_worklist_.push_back(adj);
1554 } else {
1555 adj->stage = NodeStage::kSimplifyWorklist;
1556 simplify_worklist_.push_back(adj);
Andreas Gampe6f61ee52016-08-12 06:33:15 +00001557 }
1558 }
1559 }
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001560 }
1561}
Andreas Gampe6f61ee52016-08-12 06:33:15 +00001562
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001563void ColoringIteration::CheckTransitionFromFreezeWorklist(InterferenceNode* node) {
1564 if (IsLowDegreeNode(node, num_regs_) && !node->IsMoveRelated()) {
1565 DCHECK_EQ(node->stage, NodeStage::kFreezeWorklist);
1566 node->stage = NodeStage::kSimplifyWorklist;
1567 simplify_worklist_.push_back(node);
1568 }
1569}
1570
1571void ColoringIteration::FreezeMoves(InterferenceNode* node) {
1572 for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
1573 if (opportunity->stage == CoalesceStage::kDefunct) {
1574 // Constrained moves should remain constrained, since they will not be considered
1575 // during last-chance coalescing.
1576 } else {
1577 opportunity->stage = CoalesceStage::kInactive;
Andreas Gampe6f61ee52016-08-12 06:33:15 +00001578 }
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001579 InterferenceNode* other = opportunity->node_a->GetAlias() == node
1580 ? opportunity->node_b->GetAlias()
1581 : opportunity->node_a->GetAlias();
1582 if (other != node && other->stage == NodeStage::kFreezeWorklist) {
1583 DCHECK(IsLowDegreeNode(node, num_regs_));
1584 CheckTransitionFromFreezeWorklist(other);
Matthew Gharrity465ed692016-07-22 08:52:13 -07001585 }
1586 }
Matthew Gharrity465ed692016-07-22 08:52:13 -07001587}
1588
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001589bool ColoringIteration::PrecoloredHeuristic(InterferenceNode* from,
1590 InterferenceNode* into) {
1591 if (!into->IsPrecolored()) {
1592 // The uncolored heuristic will cover this case.
1593 return false;
1594 }
1595 if (from->IsPair() || into->IsPair()) {
1596 // TODO: Merging from a pair node is currently not supported, since fixed pair nodes
1597 // are currently represented as two single fixed nodes in the graph, and `into` is
1598 // only one of them. (We may lose the implicit connections to the second one in a merge.)
1599 return false;
1600 }
1601
1602 // If all adjacent nodes of `from` are "ok", then we can conservatively merge with `into`.
1603 // Reasons an adjacent node `adj` can be "ok":
1604 // (1) If `adj` is low degree, interference with `into` will not affect its existing
1605 // colorable guarantee. (Notice that coalescing cannot increase its degree.)
1606 // (2) If `adj` is pre-colored, it already interferes with `into`. See (3).
1607 // (3) If there's already an interference with `into`, coalescing will not add interferences.
1608 for (InterferenceNode* adj : from->GetAdjacentNodes()) {
1609 if (IsLowDegreeNode(adj, num_regs_) || adj->IsPrecolored() || adj->ContainsInterference(into)) {
1610 // Ok.
1611 } else {
1612 return false;
1613 }
1614 }
1615 return true;
1616}
1617
1618bool ColoringIteration::UncoloredHeuristic(InterferenceNode* from,
1619 InterferenceNode* into) {
1620 if (into->IsPrecolored()) {
1621 // The pre-colored heuristic will handle this case.
1622 return false;
1623 }
1624
1625 // Arbitrary cap to improve compile time. Tests show that this has negligible affect
1626 // on generated code.
1627 if (from->GetOutDegree() + into->GetOutDegree() > 2 * num_regs_) {
1628 return false;
1629 }
1630
1631 // It's safe to coalesce two nodes if the resulting node has fewer than `num_regs` neighbors
1632 // of high degree. (Low degree neighbors can be ignored, because they will eventually be
1633 // pruned from the interference graph in the simplify stage.)
1634 size_t high_degree_interferences = 0;
1635 for (InterferenceNode* adj : from->GetAdjacentNodes()) {
1636 if (IsHighDegreeNode(adj, num_regs_)) {
1637 high_degree_interferences += from->EdgeWeightWith(adj);
1638 }
1639 }
1640 for (InterferenceNode* adj : into->GetAdjacentNodes()) {
1641 if (IsHighDegreeNode(adj, num_regs_)) {
1642 if (from->ContainsInterference(adj)) {
1643 // We've already counted this adjacent node.
1644 // Furthermore, its degree will decrease if coalescing succeeds. Thus, it's possible that
1645 // we should not have counted it at all. (This extends the textbook Briggs coalescing test,
1646 // but remains conservative.)
1647 if (adj->GetOutDegree() - into->EdgeWeightWith(adj) < num_regs_) {
1648 high_degree_interferences -= from->EdgeWeightWith(adj);
1649 }
1650 } else {
1651 high_degree_interferences += into->EdgeWeightWith(adj);
1652 }
1653 }
1654 }
1655
1656 return high_degree_interferences < num_regs_;
1657}
1658
1659void ColoringIteration::Combine(InterferenceNode* from,
1660 InterferenceNode* into) {
1661 from->SetAlias(into);
1662
1663 // Add interferences.
1664 for (InterferenceNode* adj : from->GetAdjacentNodes()) {
1665 bool was_low_degree = IsLowDegreeNode(adj, num_regs_);
1666 AddPotentialInterference(adj, into, /*guaranteed_not_interfering_yet*/ false);
1667 if (was_low_degree && IsHighDegreeNode(adj, num_regs_)) {
1668 // This is a (temporary) transition to a high degree node. Its degree will decrease again
1669 // when we prune `from`, but it's best to be consistent about the current worklist.
1670 adj->stage = NodeStage::kSpillWorklist;
1671 spill_worklist_.push(adj);
1672 }
1673 }
1674
1675 // Add coalesce opportunities.
1676 for (CoalesceOpportunity* opportunity : from->GetCoalesceOpportunities()) {
1677 if (opportunity->stage != CoalesceStage::kDefunct) {
1678 into->AddCoalesceOpportunity(opportunity);
1679 }
1680 }
1681 EnableCoalesceOpportunities(from);
1682
1683 // Prune and update worklists.
1684 PruneNode(from);
1685 if (IsLowDegreeNode(into, num_regs_)) {
1686 // Coalesce(...) takes care of checking for a transition to the simplify worklist.
1687 DCHECK_EQ(into->stage, NodeStage::kFreezeWorklist);
1688 } else if (into->stage == NodeStage::kFreezeWorklist) {
1689 // This is a transition to a high degree node.
1690 into->stage = NodeStage::kSpillWorklist;
1691 spill_worklist_.push(into);
1692 } else {
1693 DCHECK(into->stage == NodeStage::kSpillWorklist || into->stage == NodeStage::kPrecolored);
1694 }
1695}
1696
1697void ColoringIteration::Coalesce(CoalesceOpportunity* opportunity) {
1698 InterferenceNode* from = opportunity->node_a->GetAlias();
1699 InterferenceNode* into = opportunity->node_b->GetAlias();
1700 DCHECK_NE(from->stage, NodeStage::kPruned);
1701 DCHECK_NE(into->stage, NodeStage::kPruned);
1702
1703 if (from->IsPrecolored()) {
1704 // If we have one pre-colored node, make sure it's the `into` node.
1705 std::swap(from, into);
1706 }
1707
1708 if (from == into) {
1709 // These nodes have already been coalesced.
1710 opportunity->stage = CoalesceStage::kDefunct;
1711 CheckTransitionFromFreezeWorklist(from);
1712 } else if (from->IsPrecolored() || from->ContainsInterference(into)) {
1713 // These nodes interfere.
1714 opportunity->stage = CoalesceStage::kDefunct;
1715 CheckTransitionFromFreezeWorklist(from);
1716 CheckTransitionFromFreezeWorklist(into);
1717 } else if (PrecoloredHeuristic(from, into)
1718 || UncoloredHeuristic(from, into)) {
1719 // We can coalesce these nodes.
1720 opportunity->stage = CoalesceStage::kDefunct;
1721 Combine(from, into);
1722 CheckTransitionFromFreezeWorklist(into);
1723 } else {
1724 // We cannot coalesce, but we may be able to later.
1725 opportunity->stage = CoalesceStage::kActive;
1726 }
1727}
1728
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001729// Build a mask with a bit set for each register assigned to some
1730// interval in `intervals`.
1731template <typename Container>
1732static std::bitset<kMaxNumRegs> BuildConflictMask(Container& intervals) {
1733 std::bitset<kMaxNumRegs> conflict_mask;
1734 for (InterferenceNode* adjacent : intervals) {
1735 LiveInterval* conflicting = adjacent->GetInterval();
1736 if (conflicting->HasRegister()) {
1737 conflict_mask.set(conflicting->GetRegister());
1738 if (conflicting->HasHighInterval()) {
1739 DCHECK(conflicting->GetHighInterval()->HasRegister());
1740 conflict_mask.set(conflicting->GetHighInterval()->GetRegister());
1741 }
1742 } else {
1743 DCHECK(!conflicting->HasHighInterval()
1744 || !conflicting->GetHighInterval()->HasRegister());
1745 }
1746 }
1747 return conflict_mask;
1748}
1749
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001750bool RegisterAllocatorGraphColor::IsCallerSave(size_t reg, bool processing_core_regs) {
1751 return processing_core_regs
1752 ? !codegen_->IsCoreCalleeSaveRegister(reg)
1753 : !codegen_->IsCoreCalleeSaveRegister(reg);
1754}
1755
1756static bool RegisterIsAligned(size_t reg) {
1757 return reg % 2 == 0;
1758}
1759
1760static size_t FindFirstZeroInConflictMask(std::bitset<kMaxNumRegs> conflict_mask) {
1761 // We use CTZ (count trailing zeros) to quickly find the lowest 0 bit.
1762 // Note that CTZ is undefined if all bits are 0, so we special-case it.
1763 return conflict_mask.all() ? conflict_mask.size() : CTZ(~conflict_mask.to_ulong());
1764}
1765
1766bool ColoringIteration::ColorInterferenceGraph() {
1767 DCHECK_LE(num_regs_, kMaxNumRegs) << "kMaxNumRegs is too small";
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001768 ArenaVector<LiveInterval*> colored_intervals(
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001769 allocator_->Adapter(kArenaAllocRegisterAllocator));
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001770 bool successful = true;
1771
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001772 while (!pruned_nodes_.empty()) {
1773 InterferenceNode* node = pruned_nodes_.top();
1774 pruned_nodes_.pop();
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001775 LiveInterval* interval = node->GetInterval();
Andreas Gampe6f61ee52016-08-12 06:33:15 +00001776 size_t reg = 0;
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001777
1778 InterferenceNode* alias = node->GetAlias();
1779 if (alias != node) {
1780 // This node was coalesced with another.
1781 LiveInterval* alias_interval = alias->GetInterval();
1782 if (alias_interval->HasRegister()) {
1783 reg = alias_interval->GetRegister();
1784 DCHECK(!BuildConflictMask(node->GetAdjacentNodes())[reg])
1785 << "This node conflicts with the register it was coalesced with";
1786 } else {
1787 DCHECK(false) << node->GetOutDegree() << " " << alias->GetOutDegree() << " "
1788 << "Move coalescing was not conservative, causing a node to be coalesced "
1789 << "with another node that could not be colored";
1790 if (interval->RequiresRegister()) {
1791 successful = false;
1792 }
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001793 }
1794 } else {
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001795 // Search for free register(s).
1796 std::bitset<kMaxNumRegs> conflict_mask = BuildConflictMask(node->GetAdjacentNodes());
1797 if (interval->HasHighInterval()) {
1798 // Note that the graph coloring allocator assumes that pair intervals are aligned here,
1799 // excluding pre-colored pair intervals (which can currently be unaligned on x86). If we
1800 // change the alignment requirements here, we will have to update the algorithm (e.g.,
1801 // be more conservative about the weight of edges adjacent to pair nodes.)
1802 while (reg < num_regs_ - 1 && (conflict_mask[reg] || conflict_mask[reg + 1])) {
1803 reg += 2;
1804 }
1805
1806 // Try to use a caller-save register first.
1807 for (size_t i = 0; i < num_regs_ - 1; i += 2) {
1808 bool low_caller_save = register_allocator_->IsCallerSave(i, processing_core_regs_);
1809 bool high_caller_save = register_allocator_->IsCallerSave(i + 1, processing_core_regs_);
1810 if (!conflict_mask[i] && !conflict_mask[i + 1]) {
1811 if (low_caller_save && high_caller_save) {
1812 reg = i;
1813 break;
1814 } else if (low_caller_save || high_caller_save) {
1815 reg = i;
1816 // Keep looking to try to get both parts in caller-save registers.
1817 }
1818 }
1819 }
1820 } else {
1821 // Not a pair interval.
1822 reg = FindFirstZeroInConflictMask(conflict_mask);
1823
1824 // Try to use caller-save registers first.
1825 for (size_t i = 0; i < num_regs_; ++i) {
1826 if (!conflict_mask[i] && register_allocator_->IsCallerSave(i, processing_core_regs_)) {
1827 reg = i;
1828 break;
1829 }
1830 }
1831 }
1832
1833 // Last-chance coalescing.
1834 for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
1835 if (opportunity->stage == CoalesceStage::kDefunct) {
1836 continue;
1837 }
1838 LiveInterval* other_interval = opportunity->node_a->GetAlias() == node
1839 ? opportunity->node_b->GetAlias()->GetInterval()
1840 : opportunity->node_a->GetAlias()->GetInterval();
1841 if (other_interval->HasRegister()) {
1842 size_t coalesce_register = other_interval->GetRegister();
1843 if (interval->HasHighInterval()) {
1844 if (!conflict_mask[coalesce_register] &&
1845 !conflict_mask[coalesce_register + 1] &&
1846 RegisterIsAligned(coalesce_register)) {
1847 reg = coalesce_register;
1848 break;
1849 }
1850 } else if (!conflict_mask[coalesce_register]) {
1851 reg = coalesce_register;
1852 break;
1853 }
1854 }
1855 }
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001856 }
1857
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001858 if (reg < (interval->HasHighInterval() ? num_regs_ - 1 : num_regs_)) {
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001859 // Assign register.
1860 DCHECK(!interval->HasRegister());
1861 interval->SetRegister(reg);
1862 colored_intervals.push_back(interval);
1863 if (interval->HasHighInterval()) {
1864 DCHECK(!interval->GetHighInterval()->HasRegister());
1865 interval->GetHighInterval()->SetRegister(reg + 1);
1866 colored_intervals.push_back(interval->GetHighInterval());
1867 }
1868 } else if (interval->RequiresRegister()) {
1869 // The interference graph is too dense to color. Make it sparser by
1870 // splitting this live interval.
1871 successful = false;
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001872 register_allocator_->SplitAtRegisterUses(interval);
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001873 // We continue coloring, because there may be additional intervals that cannot
1874 // be colored, and that we should split.
1875 } else {
1876 // Spill.
Matthew Gharrityb6722ff2016-08-12 19:07:11 -07001877 node->SetNeedsSpillSlot();
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001878 }
1879 }
1880
1881 // If unsuccessful, reset all register assignments.
1882 if (!successful) {
1883 for (LiveInterval* interval : colored_intervals) {
1884 interval->ClearRegister();
1885 }
1886 }
1887
1888 return successful;
1889}
1890
Matthew Gharrityb6722ff2016-08-12 19:07:11 -07001891void RegisterAllocatorGraphColor::AllocateSpillSlots(const ArenaVector<InterferenceNode*>& nodes) {
1892 // The register allocation resolver will organize the stack based on value type,
1893 // so we assign stack slots for each value type separately.
1894 ArenaVector<LiveInterval*> double_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
1895 ArenaVector<LiveInterval*> long_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
1896 ArenaVector<LiveInterval*> float_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
1897 ArenaVector<LiveInterval*> int_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
1898
1899 // The set of parent intervals already handled.
1900 ArenaSet<LiveInterval*> seen(allocator_->Adapter(kArenaAllocRegisterAllocator));
1901
1902 // Find nodes that need spill slots.
1903 for (InterferenceNode* node : nodes) {
1904 if (!node->NeedsSpillSlot()) {
1905 continue;
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001906 }
1907
Matthew Gharrityb6722ff2016-08-12 19:07:11 -07001908 LiveInterval* parent = node->GetInterval()->GetParent();
1909 if (seen.find(parent) != seen.end()) {
1910 // We've already handled this interval.
1911 // This can happen if multiple siblings of the same interval request a stack slot.
1912 continue;
1913 }
1914 seen.insert(parent);
1915
1916 HInstruction* defined_by = parent->GetDefinedBy();
1917 if (parent->HasSpillSlot()) {
1918 // We already have a spill slot for this value that we can reuse.
1919 } else if (defined_by->IsParameterValue()) {
1920 // Parameters already have a stack slot.
1921 parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue()));
1922 } else if (defined_by->IsCurrentMethod()) {
1923 // The current method is always at stack slot 0.
1924 parent->SetSpillSlot(0);
1925 } else if (defined_by->IsConstant()) {
1926 // Constants don't need a spill slot.
1927 } else {
1928 // We need to find a spill slot for this interval. Place it in the correct
1929 // worklist to be processed later.
1930 switch (node->GetInterval()->GetType()) {
1931 case Primitive::kPrimDouble:
1932 double_intervals.push_back(parent);
1933 break;
1934 case Primitive::kPrimLong:
1935 long_intervals.push_back(parent);
1936 break;
1937 case Primitive::kPrimFloat:
1938 float_intervals.push_back(parent);
1939 break;
1940 case Primitive::kPrimNot:
1941 case Primitive::kPrimInt:
1942 case Primitive::kPrimChar:
1943 case Primitive::kPrimByte:
1944 case Primitive::kPrimBoolean:
1945 case Primitive::kPrimShort:
1946 int_intervals.push_back(parent);
1947 break;
1948 case Primitive::kPrimVoid:
1949 LOG(FATAL) << "Unexpected type for interval " << node->GetInterval()->GetType();
1950 UNREACHABLE();
1951 }
1952 }
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001953 }
Matthew Gharrityb6722ff2016-08-12 19:07:11 -07001954
1955 // Color spill slots for each value type.
1956 ColorSpillSlots(&double_intervals, &num_double_spill_slots_);
1957 ColorSpillSlots(&long_intervals, &num_long_spill_slots_);
1958 ColorSpillSlots(&float_intervals, &num_float_spill_slots_);
1959 ColorSpillSlots(&int_intervals, &num_int_spill_slots_);
1960}
1961
1962void RegisterAllocatorGraphColor::ColorSpillSlots(ArenaVector<LiveInterval*>* intervals,
1963 size_t* num_stack_slots_used) {
1964 // We cannot use the original interference graph here because spill slots are assigned to
1965 // all of the siblings of an interval, whereas an interference node represents only a single
1966 // sibling. So, we assign spill slots linear-scan-style by sorting all the interval endpoints
1967 // by position, and assigning the lowest spill slot available when we encounter an interval
1968 // beginning. We ignore lifetime holes for simplicity.
1969 ArenaVector<std::tuple<size_t, bool, LiveInterval*>> interval_endpoints(
1970 allocator_->Adapter(kArenaAllocRegisterAllocator));
1971
1972 for (auto it = intervals->begin(), e = intervals->end(); it != e; ++it) {
1973 LiveInterval* parent_interval = *it;
1974 DCHECK(parent_interval->IsParent());
1975 DCHECK(!parent_interval->HasSpillSlot());
1976 size_t start = parent_interval->GetStart();
1977 size_t end = parent_interval->GetLastSibling()->GetEnd();
1978 DCHECK_LT(start, end);
1979 interval_endpoints.push_back(std::make_tuple(start, true, parent_interval));
1980 interval_endpoints.push_back(std::make_tuple(end, false, parent_interval));
1981 }
1982
1983 // Sort by position.
1984 // We explicitly ignore the third entry of each tuple (the interval pointer) in order
1985 // to maintain determinism.
1986 std::sort(interval_endpoints.begin(), interval_endpoints.end(),
1987 [] (const std::tuple<size_t, bool, LiveInterval*>& lhs,
1988 const std::tuple<size_t, bool, LiveInterval*>& rhs) {
1989 return std::tie(std::get<0>(lhs), std::get<1>(lhs))
1990 < std::tie(std::get<0>(rhs), std::get<1>(rhs));
1991 });
1992
1993 ArenaBitVector taken(allocator_, 0, true);
1994 for (auto it = interval_endpoints.begin(), end = interval_endpoints.end(); it != end; ++it) {
1995 // Extract information from the current tuple.
1996 LiveInterval* parent_interval;
1997 bool is_interval_beginning;
1998 size_t position;
1999 std::tie(position, is_interval_beginning, parent_interval) = *it;
2000
2001 bool needs_two_slots = parent_interval->NeedsTwoSpillSlots();
2002
2003 if (is_interval_beginning) {
2004 DCHECK(!parent_interval->HasSpillSlot());
2005 DCHECK_EQ(position, parent_interval->GetStart());
2006
2007 // Find a free stack slot.
2008 size_t slot = 0;
2009 for (; taken.IsBitSet(slot) || (needs_two_slots && taken.IsBitSet(slot + 1)); ++slot) {
2010 // Skip taken slots.
2011 }
2012 parent_interval->SetSpillSlot(slot);
2013
2014 *num_stack_slots_used = std::max(*num_stack_slots_used,
2015 needs_two_slots ? slot + 1 : slot + 2);
2016 if (needs_two_slots && *num_stack_slots_used % 2 != 0) {
2017 // The parallel move resolver requires that there be an even number of spill slots
2018 // allocated for pair value types.
2019 ++(*num_stack_slots_used);
2020 }
2021
2022 taken.SetBit(slot);
2023 if (needs_two_slots) {
2024 taken.SetBit(slot + 1);
2025 }
2026 } else {
2027 DCHECK_EQ(position, parent_interval->GetLastSibling()->GetEnd());
2028 DCHECK(parent_interval->HasSpillSlot());
2029
2030 // Free up the stack slot used by this interval.
2031 size_t slot = parent_interval->GetSpillSlot();
2032 DCHECK(taken.IsBitSet(slot));
2033 DCHECK(!needs_two_slots || taken.IsBitSet(slot + 1));
2034 taken.ClearBit(slot);
2035 if (needs_two_slots) {
2036 taken.ClearBit(slot + 1);
2037 }
2038 }
2039 }
2040 DCHECK_EQ(taken.NumSetBits(), 0u);
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07002041}
2042
2043} // namespace art