blob: 300f4c623926bfca66a6aeb08e6e3e315624a861 [file] [log] [blame]
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001/*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "register_allocator_graph_color.h"
18
19#include "code_generator.h"
Aart Bik96202302016-10-04 17:33:56 -070020#include "linear_order.h"
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -070021#include "register_allocation_resolver.h"
22#include "ssa_liveness_analysis.h"
23#include "thread-inl.h"
24
25namespace art {
26
27// Highest number of registers that we support for any platform. This can be used for std::bitset,
28// for example, which needs to know its size at compile time.
29static constexpr size_t kMaxNumRegs = 32;
30
31// The maximum number of graph coloring attempts before triggering a DCHECK.
32// This is meant to catch changes to the graph coloring algorithm that undermine its forward
33// progress guarantees. Forward progress for the algorithm means splitting live intervals on
34// every graph coloring attempt so that eventually the interference graph will be sparse enough
35// to color. The main threat to forward progress is trying to split short intervals which cannot be
36// split further; this could cause infinite looping because the interference graph would never
37// change. This is avoided by prioritizing short intervals before long ones, so that long
38// intervals are split when coloring fails.
39static constexpr size_t kMaxGraphColoringAttemptsDebug = 100;
40
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +000041// We always want to avoid spilling inside loops.
42static constexpr size_t kLoopSpillWeightMultiplier = 10;
43
44// If we avoid moves in single jump blocks, we can avoid jumps to jumps.
45static constexpr size_t kSingleJumpBlockWeightMultiplier = 2;
46
47// We avoid moves in blocks that dominate the exit block, since these blocks will
48// be executed on every path through the method.
49static constexpr size_t kDominatesExitBlockWeightMultiplier = 2;
50
51enum class CoalesceKind {
52 kAdjacentSibling, // Prevents moves at interval split points.
53 kFixedOutputSibling, // Prevents moves from a fixed output location.
54 kFixedInput, // Prevents moves into a fixed input location.
55 kNonlinearControlFlow, // Prevents moves between blocks.
56 kPhi, // Prevents phi resolution moves.
57 kFirstInput, // Prevents a single input move.
58 kAnyInput, // May lead to better instruction selection / smaller encodings.
59};
60
61std::ostream& operator<<(std::ostream& os, const CoalesceKind& kind) {
62 return os << static_cast<typename std::underlying_type<CoalesceKind>::type>(kind);
63}
64
65static size_t LoopDepthAt(HBasicBlock* block) {
66 HLoopInformation* loop_info = block->GetLoopInformation();
67 size_t depth = 0;
68 while (loop_info != nullptr) {
69 ++depth;
70 loop_info = loop_info->GetPreHeader()->GetLoopInformation();
71 }
72 return depth;
73}
74
75// Return the runtime cost of inserting a move instruction at the specified location.
76static size_t CostForMoveAt(size_t position, const SsaLivenessAnalysis& liveness) {
77 HBasicBlock* block = liveness.GetBlockFromPosition(position / 2);
78 DCHECK(block != nullptr);
79 size_t cost = 1;
80 if (block->IsSingleJump()) {
81 cost *= kSingleJumpBlockWeightMultiplier;
82 }
83 if (block->Dominates(block->GetGraph()->GetExitBlock())) {
84 cost *= kDominatesExitBlockWeightMultiplier;
85 }
86 for (size_t loop_depth = LoopDepthAt(block); loop_depth > 0; --loop_depth) {
87 cost *= kLoopSpillWeightMultiplier;
88 }
89 return cost;
90}
91
92// In general, we estimate coalesce priority by whether it will definitely avoid a move,
93// and by how likely it is to create an interference graph that's harder to color.
94static size_t ComputeCoalescePriority(CoalesceKind kind,
95 size_t position,
96 const SsaLivenessAnalysis& liveness) {
97 if (kind == CoalesceKind::kAnyInput) {
98 // This type of coalescing can affect instruction selection, but not moves, so we
99 // give it the lowest priority.
100 return 0;
101 } else {
102 return CostForMoveAt(position, liveness);
103 }
104}
105
106enum class CoalesceStage {
107 kWorklist, // Currently in the iterative coalescing worklist.
108 kActive, // Not in a worklist, but could be considered again during iterative coalescing.
109 kInactive, // No longer considered until last-chance coalescing.
110 kDefunct, // Either the two nodes interfere, or have already been coalesced.
111};
112
113std::ostream& operator<<(std::ostream& os, const CoalesceStage& stage) {
114 return os << static_cast<typename std::underlying_type<CoalesceStage>::type>(stage);
115}
116
117// Represents a coalesce opportunity between two nodes.
118struct CoalesceOpportunity : public ArenaObject<kArenaAllocRegisterAllocator> {
119 CoalesceOpportunity(InterferenceNode* a,
120 InterferenceNode* b,
121 CoalesceKind kind,
122 size_t position,
123 const SsaLivenessAnalysis& liveness)
124 : node_a(a),
125 node_b(b),
126 stage(CoalesceStage::kWorklist),
127 priority(ComputeCoalescePriority(kind, position, liveness)) {}
128
129 // Compare two coalesce opportunities based on their priority.
130 // Return true if lhs has a lower priority than that of rhs.
131 static bool CmpPriority(const CoalesceOpportunity* lhs,
132 const CoalesceOpportunity* rhs) {
133 return lhs->priority < rhs->priority;
134 }
135
136 InterferenceNode* const node_a;
137 InterferenceNode* const node_b;
138
139 // The current stage of this coalesce opportunity, indicating whether it is in a worklist,
140 // and whether it should still be considered.
141 CoalesceStage stage;
142
143 // The priority of this coalesce opportunity, based on heuristics.
144 const size_t priority;
145};
146
147enum class NodeStage {
148 kInitial, // Uninitialized.
149 kPrecolored, // Marks fixed nodes.
150 kSafepoint, // Marks safepoint nodes.
151 kPrunable, // Marks uncolored nodes in the interference graph.
152 kSimplifyWorklist, // Marks non-move-related nodes with degree less than the number of registers.
153 kFreezeWorklist, // Marks move-related nodes with degree less than the number of registers.
154 kSpillWorklist, // Marks nodes with degree greater or equal to the number of registers.
155 kPruned // Marks nodes already pruned from the interference graph.
156};
157
158std::ostream& operator<<(std::ostream& os, const NodeStage& stage) {
159 return os << static_cast<typename std::underlying_type<NodeStage>::type>(stage);
160}
161
162// Returns the estimated cost of spilling a particular live interval.
163static float ComputeSpillWeight(LiveInterval* interval, const SsaLivenessAnalysis& liveness) {
164 if (interval->HasRegister()) {
165 // Intervals with a fixed register cannot be spilled.
166 return std::numeric_limits<float>::min();
167 }
168
169 size_t length = interval->GetLength();
170 if (length == 1) {
171 // Tiny intervals should have maximum priority, since they cannot be split any further.
172 return std::numeric_limits<float>::max();
173 }
174
175 size_t use_weight = 0;
176 if (interval->GetDefinedBy() != nullptr && interval->DefinitionRequiresRegister()) {
177 // Cost for spilling at a register definition point.
178 use_weight += CostForMoveAt(interval->GetStart() + 1, liveness);
179 }
180
181 UsePosition* use = interval->GetFirstUse();
182 while (use != nullptr && use->GetPosition() <= interval->GetStart()) {
183 // Skip uses before the start of this live interval.
184 use = use->GetNext();
185 }
186
187 while (use != nullptr && use->GetPosition() <= interval->GetEnd()) {
188 if (use->GetUser() != nullptr && use->RequiresRegister()) {
189 // Cost for spilling at a register use point.
190 use_weight += CostForMoveAt(use->GetUser()->GetLifetimePosition() - 1, liveness);
191 }
192 use = use->GetNext();
193 }
194
195 // We divide by the length of the interval because we want to prioritize
196 // short intervals; we do not benefit much if we split them further.
197 return static_cast<float>(use_weight) / static_cast<float>(length);
198}
199
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700200// Interference nodes make up the interference graph, which is the primary data structure in
201// graph coloring register allocation. Each node represents a single live interval, and contains
202// a set of adjacent nodes corresponding to intervals overlapping with its own. To save memory,
203// pre-colored nodes never contain outgoing edges (only incoming ones).
204//
205// As nodes are pruned from the interference graph, incoming edges of the pruned node are removed,
206// but outgoing edges remain in order to later color the node based on the colors of its neighbors.
207//
208// Note that a pair interval is represented by a single node in the interference graph, which
209// essentially requires two colors. One consequence of this is that the degree of a node is not
210// necessarily equal to the number of adjacent nodes--instead, the degree reflects the maximum
211// number of colors with which a node could interfere. We model this by giving edges different
212// weights (1 or 2) to control how much it increases the degree of adjacent nodes.
213// For example, the edge between two single nodes will have weight 1. On the other hand,
214// the edge between a single node and a pair node will have weight 2. This is because the pair
215// node could block up to two colors for the single node, and because the single node could
216// block an entire two-register aligned slot for the pair node.
217// The degree is defined this way because we use it to decide whether a node is guaranteed a color,
218// and thus whether it is safe to prune it from the interference graph early on.
219class InterferenceNode : public ArenaObject<kArenaAllocRegisterAllocator> {
220 public:
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000221 InterferenceNode(ArenaAllocator* allocator,
222 LiveInterval* interval,
223 const SsaLivenessAnalysis& liveness)
224 : stage(NodeStage::kInitial),
225 interval_(interval),
226 adjacent_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
227 coalesce_opportunities_(allocator->Adapter(kArenaAllocRegisterAllocator)),
228 out_degree_(interval->HasRegister() ? std::numeric_limits<size_t>::max() : 0),
229 alias_(this),
230 spill_weight_(ComputeSpillWeight(interval, liveness)),
Matthew Gharrityb6722ff2016-08-12 19:07:11 -0700231 requires_color_(interval->RequiresRegister()),
232 needs_spill_slot_(false) {
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000233 DCHECK(!interval->IsHighInterval()) << "Pair nodes should be represented by the low interval";
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700234 }
235
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000236 void AddInterference(InterferenceNode* other, bool guaranteed_not_interfering_yet) {
237 DCHECK(!IsPrecolored()) << "To save memory, fixed nodes should not have outgoing interferences";
238 DCHECK_NE(this, other) << "Should not create self loops in the interference graph";
239 DCHECK_EQ(this, alias_) << "Should not add interferences to a node that aliases another";
240 DCHECK_NE(stage, NodeStage::kPruned);
241 DCHECK_NE(other->stage, NodeStage::kPruned);
242 if (guaranteed_not_interfering_yet) {
243 DCHECK(std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other)
244 == adjacent_nodes_.end());
245 adjacent_nodes_.push_back(other);
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700246 out_degree_ += EdgeWeightWith(other);
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000247 } else {
248 auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other);
249 if (it == adjacent_nodes_.end()) {
250 adjacent_nodes_.push_back(other);
251 out_degree_ += EdgeWeightWith(other);
252 }
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700253 }
254 }
255
256 void RemoveInterference(InterferenceNode* other) {
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000257 DCHECK_EQ(this, alias_) << "Should not remove interferences from a coalesced node";
258 DCHECK_EQ(other->stage, NodeStage::kPruned) << "Should only remove interferences when pruning";
259 auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other);
260 if (it != adjacent_nodes_.end()) {
261 adjacent_nodes_.erase(it);
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700262 out_degree_ -= EdgeWeightWith(other);
263 }
264 }
265
266 bool ContainsInterference(InterferenceNode* other) const {
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000267 DCHECK(!IsPrecolored()) << "Should not query fixed nodes for interferences";
268 DCHECK_EQ(this, alias_) << "Should not query a coalesced node for interferences";
269 auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other);
270 return it != adjacent_nodes_.end();
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700271 }
272
273 LiveInterval* GetInterval() const {
274 return interval_;
275 }
276
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000277 const ArenaVector<InterferenceNode*>& GetAdjacentNodes() const {
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700278 return adjacent_nodes_;
279 }
280
281 size_t GetOutDegree() const {
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000282 // Pre-colored nodes have infinite degree.
283 DCHECK(!IsPrecolored() || out_degree_ == std::numeric_limits<size_t>::max());
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700284 return out_degree_;
285 }
286
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000287 void AddCoalesceOpportunity(CoalesceOpportunity* opportunity) {
288 coalesce_opportunities_.push_back(opportunity);
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700289 }
290
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000291 void ClearCoalesceOpportunities() {
292 coalesce_opportunities_.clear();
293 }
294
295 bool IsMoveRelated() const {
296 for (CoalesceOpportunity* opportunity : coalesce_opportunities_) {
297 if (opportunity->stage == CoalesceStage::kWorklist ||
298 opportunity->stage == CoalesceStage::kActive) {
299 return true;
300 }
301 }
302 return false;
303 }
304
305 // Return whether this node already has a color.
306 // Used to find fixed nodes in the interference graph before coloring.
307 bool IsPrecolored() const {
308 return interval_->HasRegister();
309 }
310
311 bool IsPair() const {
312 return interval_->HasHighInterval();
313 }
314
315 void SetAlias(InterferenceNode* rep) {
316 DCHECK_NE(rep->stage, NodeStage::kPruned);
317 DCHECK_EQ(this, alias_) << "Should only set a node's alias once";
318 alias_ = rep;
319 }
320
321 InterferenceNode* GetAlias() {
322 if (alias_ != this) {
323 // Recurse in order to flatten tree of alias pointers.
324 alias_ = alias_->GetAlias();
325 }
326 return alias_;
327 }
328
329 const ArenaVector<CoalesceOpportunity*>& GetCoalesceOpportunities() const {
330 return coalesce_opportunities_;
331 }
332
333 float GetSpillWeight() const {
334 return spill_weight_;
335 }
336
337 bool RequiresColor() const {
338 return requires_color_;
339 }
340
Matthew Gharrity465ed692016-07-22 08:52:13 -0700341 // We give extra weight to edges adjacent to pair nodes. See the general comment on the
342 // interference graph above.
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000343 size_t EdgeWeightWith(const InterferenceNode* other) const {
344 return (IsPair() || other->IsPair()) ? 2 : 1;
Matthew Gharrity465ed692016-07-22 08:52:13 -0700345 }
346
Matthew Gharrityb6722ff2016-08-12 19:07:11 -0700347 bool NeedsSpillSlot() const {
348 return needs_spill_slot_;
349 }
350
351 void SetNeedsSpillSlot() {
352 needs_spill_slot_ = true;
353 }
354
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000355 // The current stage of this node, indicating which worklist it belongs to.
356 NodeStage stage;
357
358 private:
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700359 // The live interval that this node represents.
360 LiveInterval* const interval_;
361
362 // All nodes interfering with this one.
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000363 // We use an unsorted vector as a set, since a tree or hash set is too heavy for the
364 // set sizes that we encounter. Using a vector leads to much better performance.
365 ArenaVector<InterferenceNode*> adjacent_nodes_;
366
367 // Interference nodes that this node should be coalesced with to reduce moves.
368 ArenaVector<CoalesceOpportunity*> coalesce_opportunities_;
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700369
370 // The maximum number of colors with which this node could interfere. This could be more than
371 // the number of adjacent nodes if this is a pair node, or if some adjacent nodes are pair nodes.
372 // We use "out" degree because incoming edges come from nodes already pruned from the graph,
373 // and do not affect the coloring of this node.
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000374 // Pre-colored nodes are treated as having infinite degree.
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700375 size_t out_degree_;
376
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000377 // The node representing this node in the interference graph.
378 // Initially set to `this`, and only changed if this node is coalesced into another.
379 InterferenceNode* alias_;
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700380
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000381 // The cost of splitting and spilling this interval to the stack.
382 // Nodes with a higher spill weight should be prioritized when assigning registers.
383 // This is essentially based on use density and location; short intervals with many uses inside
384 // deeply nested loops have a high spill weight.
385 const float spill_weight_;
386
387 const bool requires_color_;
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700388
Matthew Gharrityb6722ff2016-08-12 19:07:11 -0700389 bool needs_spill_slot_;
390
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700391 DISALLOW_COPY_AND_ASSIGN(InterferenceNode);
392};
393
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000394// The order in which we color nodes is important. To guarantee forward progress,
395// we prioritize intervals that require registers, and after that we prioritize
396// short intervals. That way, if we fail to color a node, it either won't require a
397// register, or it will be a long interval that can be split in order to make the
398// interference graph sparser.
399// To improve code quality, we prioritize intervals used frequently in deeply nested loops.
400// (This metric is secondary to the forward progress requirements above.)
401// TODO: May also want to consider:
402// - Constants (since they can be rematerialized)
403// - Allocated spill slots
404static bool HasGreaterNodePriority(const InterferenceNode* lhs,
405 const InterferenceNode* rhs) {
406 // (1) Prioritize the node that requires a color.
407 if (lhs->RequiresColor() != rhs->RequiresColor()) {
408 return lhs->RequiresColor();
409 }
410
411 // (2) Prioritize the interval that has a higher spill weight.
412 return lhs->GetSpillWeight() > rhs->GetSpillWeight();
413}
414
415// A ColoringIteration holds the many data structures needed for a single graph coloring attempt,
416// and provides methods for each phase of the attempt.
417class ColoringIteration {
418 public:
419 ColoringIteration(RegisterAllocatorGraphColor* register_allocator,
420 ArenaAllocator* allocator,
421 bool processing_core_regs,
422 size_t num_regs)
423 : register_allocator_(register_allocator),
424 allocator_(allocator),
425 processing_core_regs_(processing_core_regs),
426 num_regs_(num_regs),
427 interval_node_map_(allocator->Adapter(kArenaAllocRegisterAllocator)),
428 prunable_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
429 pruned_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
430 simplify_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)),
431 freeze_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)),
432 spill_worklist_(HasGreaterNodePriority, allocator->Adapter(kArenaAllocRegisterAllocator)),
433 coalesce_worklist_(CoalesceOpportunity::CmpPriority,
434 allocator->Adapter(kArenaAllocRegisterAllocator)) {}
435
436 // Use the intervals collected from instructions to construct an
437 // interference graph mapping intervals to adjacency lists.
438 // Also, collect synthesized safepoint nodes, used to keep
439 // track of live intervals across safepoints.
440 // TODO: Should build safepoints elsewhere.
441 void BuildInterferenceGraph(const ArenaVector<LiveInterval*>& intervals,
Vladimir Marko70e97462016-08-09 11:04:26 +0100442 const ArenaVector<InterferenceNode*>& physical_nodes);
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000443
444 // Add coalesce opportunities to interference nodes.
445 void FindCoalesceOpportunities();
446
447 // Prune nodes from the interference graph to be colored later. Build
448 // a stack (pruned_nodes) containing these intervals in an order determined
449 // by various heuristics.
450 void PruneInterferenceGraph();
451
452 // Process pruned_intervals_ to color the interference graph, spilling when
453 // necessary. Returns true if successful. Else, some intervals have been
454 // split, and the interference graph should be rebuilt for another attempt.
455 bool ColorInterferenceGraph();
456
457 // Return prunable nodes.
458 // The register allocator will need to access prunable nodes after coloring
459 // in order to tell the code generator which registers have been assigned.
460 const ArenaVector<InterferenceNode*>& GetPrunableNodes() const {
461 return prunable_nodes_;
462 }
463
464 private:
465 // Create a coalesce opportunity between two nodes.
466 void CreateCoalesceOpportunity(InterferenceNode* a,
467 InterferenceNode* b,
468 CoalesceKind kind,
469 size_t position);
470
471 // Add an edge in the interference graph, if valid.
472 // Note that `guaranteed_not_interfering_yet` is used to optimize adjacency set insertion
473 // when possible.
474 void AddPotentialInterference(InterferenceNode* from,
475 InterferenceNode* to,
476 bool guaranteed_not_interfering_yet,
477 bool both_directions = true);
478
479 // Invalidate all coalesce opportunities this node has, so that it (and possibly its neighbors)
480 // may be pruned from the interference graph.
481 void FreezeMoves(InterferenceNode* node);
482
483 // Prune a node from the interference graph, updating worklists if necessary.
484 void PruneNode(InterferenceNode* node);
485
486 // Add coalesce opportunities associated with this node to the coalesce worklist.
487 void EnableCoalesceOpportunities(InterferenceNode* node);
488
489 // If needed, from `node` from the freeze worklist to the simplify worklist.
490 void CheckTransitionFromFreezeWorklist(InterferenceNode* node);
491
492 // Return true if `into` is colored, and `from` can be coalesced with `into` conservatively.
493 bool PrecoloredHeuristic(InterferenceNode* from, InterferenceNode* into);
494
495 // Return true if `from` and `into` are uncolored, and can be coalesced conservatively.
496 bool UncoloredHeuristic(InterferenceNode* from, InterferenceNode* into);
497
498 void Coalesce(CoalesceOpportunity* opportunity);
499
500 // Merge `from` into `into` in the interference graph.
501 void Combine(InterferenceNode* from, InterferenceNode* into);
502
503 // A reference to the register allocator instance,
504 // needed to split intervals and assign spill slots.
505 RegisterAllocatorGraphColor* register_allocator_;
506
507 // An arena allocator used for a single graph coloring attempt.
508 ArenaAllocator* allocator_;
509
510 const bool processing_core_regs_;
511
512 const size_t num_regs_;
513
514 // A map from live intervals to interference nodes.
515 ArenaHashMap<LiveInterval*, InterferenceNode*> interval_node_map_;
516
517 // Uncolored nodes that should be pruned from the interference graph.
518 ArenaVector<InterferenceNode*> prunable_nodes_;
519
520 // A stack of nodes pruned from the interference graph, waiting to be pruned.
521 ArenaStdStack<InterferenceNode*> pruned_nodes_;
522
523 // A queue containing low degree, non-move-related nodes that can pruned immediately.
524 ArenaDeque<InterferenceNode*> simplify_worklist_;
525
526 // A queue containing low degree, move-related nodes.
527 ArenaDeque<InterferenceNode*> freeze_worklist_;
528
529 // A queue containing high degree nodes.
530 // If we have to prune from the spill worklist, we cannot guarantee
531 // the pruned node a color, so we order the worklist by priority.
532 ArenaPriorityQueue<InterferenceNode*, decltype(&HasGreaterNodePriority)> spill_worklist_;
533
534 // A queue containing coalesce opportunities.
535 // We order the coalesce worklist by priority, since some coalesce opportunities (e.g., those
536 // inside of loops) are more important than others.
537 ArenaPriorityQueue<CoalesceOpportunity*,
538 decltype(&CoalesceOpportunity::CmpPriority)> coalesce_worklist_;
539
540 DISALLOW_COPY_AND_ASSIGN(ColoringIteration);
541};
542
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700543static bool IsCoreInterval(LiveInterval* interval) {
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000544 return !Primitive::IsFloatingPointType(interval->GetType());
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700545}
546
547static size_t ComputeReservedArtMethodSlots(const CodeGenerator& codegen) {
548 return static_cast<size_t>(InstructionSetPointerSize(codegen.GetInstructionSet())) / kVRegSize;
549}
550
551RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ArenaAllocator* allocator,
552 CodeGenerator* codegen,
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000553 const SsaLivenessAnalysis& liveness,
554 bool iterative_move_coalescing)
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700555 : RegisterAllocator(allocator, codegen, liveness),
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000556 iterative_move_coalescing_(iterative_move_coalescing),
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700557 core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
558 fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
559 temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
560 safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)),
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000561 physical_core_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
562 physical_fp_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
Matthew Gharrityb6722ff2016-08-12 19:07:11 -0700563 num_int_spill_slots_(0),
564 num_double_spill_slots_(0),
565 num_float_spill_slots_(0),
566 num_long_spill_slots_(0),
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700567 catch_phi_spill_slot_counter_(0),
568 reserved_art_method_slots_(ComputeReservedArtMethodSlots(*codegen)),
Vladimir Marko70e97462016-08-09 11:04:26 +0100569 reserved_out_slots_(codegen->GetGraph()->GetMaximumNumberOfOutVRegs()) {
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700570 // Before we ask for blocked registers, set them up in the code generator.
571 codegen->SetupBlockedRegisters();
572
573 // Initialize physical core register live intervals and blocked registers.
574 // This includes globally blocked registers, such as the stack pointer.
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000575 physical_core_nodes_.resize(codegen_->GetNumberOfCoreRegisters(), nullptr);
576 for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700577 LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, Primitive::kPrimInt);
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000578 physical_core_nodes_[i] =
579 new (allocator_) InterferenceNode(allocator_, interval, liveness);
580 physical_core_nodes_[i]->stage = NodeStage::kPrecolored;
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700581 core_intervals_.push_back(interval);
582 if (codegen_->IsBlockedCoreRegister(i)) {
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700583 interval->AddRange(0, liveness.GetMaxLifetimePosition());
584 }
585 }
586 // Initialize physical floating point register live intervals and blocked registers.
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000587 physical_fp_nodes_.resize(codegen_->GetNumberOfFloatingPointRegisters(), nullptr);
588 for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700589 LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, Primitive::kPrimFloat);
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000590 physical_fp_nodes_[i] =
591 new (allocator_) InterferenceNode(allocator_, interval, liveness);
592 physical_fp_nodes_[i]->stage = NodeStage::kPrecolored;
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700593 fp_intervals_.push_back(interval);
594 if (codegen_->IsBlockedFloatingPointRegister(i)) {
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700595 interval->AddRange(0, liveness.GetMaxLifetimePosition());
596 }
597 }
598}
599
600void RegisterAllocatorGraphColor::AllocateRegisters() {
601 // (1) Collect and prepare live intervals.
602 ProcessInstructions();
603
604 for (bool processing_core_regs : {true, false}) {
605 ArenaVector<LiveInterval*>& intervals = processing_core_regs
606 ? core_intervals_
607 : fp_intervals_;
608 size_t num_registers = processing_core_regs
609 ? codegen_->GetNumberOfCoreRegisters()
610 : codegen_->GetNumberOfFloatingPointRegisters();
611
612 size_t attempt = 0;
613 while (true) {
614 ++attempt;
615 DCHECK(attempt <= kMaxGraphColoringAttemptsDebug)
616 << "Exceeded debug max graph coloring register allocation attempts. "
617 << "This could indicate that the register allocator is not making forward progress, "
618 << "which could be caused by prioritizing the wrong live intervals. (Short intervals "
619 << "should be prioritized over long ones, because they cannot be split further.)";
620
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000621 // Many data structures are cleared between graph coloring attempts, so we reduce
622 // total memory usage by using a new arena allocator for each attempt.
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700623 ArenaAllocator coloring_attempt_allocator(allocator_->GetArenaPool());
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000624 ColoringIteration iteration(this,
625 &coloring_attempt_allocator,
626 processing_core_regs,
627 num_registers);
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700628
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000629 // (2) Build the interference graph. Also gather safepoints.
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700630 ArenaVector<InterferenceNode*> safepoints(
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000631 coloring_attempt_allocator.Adapter(kArenaAllocRegisterAllocator));
632 ArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs
633 ? physical_core_nodes_
634 : physical_fp_nodes_;
Vladimir Marko70e97462016-08-09 11:04:26 +0100635 iteration.BuildInterferenceGraph(intervals, physical_nodes);
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700636
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000637 // (3) Add coalesce opportunities.
638 // If we have tried coloring the graph a suspiciously high number of times, give
639 // up on move coalescing, just in case the coalescing heuristics are not conservative.
640 // (This situation will be caught if DCHECKs are turned on.)
641 if (iterative_move_coalescing_ && attempt <= kMaxGraphColoringAttemptsDebug) {
642 iteration.FindCoalesceOpportunities();
643 }
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700644
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000645 // (4) Prune all uncolored nodes from interference graph.
646 iteration.PruneInterferenceGraph();
647
648 // (5) Color pruned nodes based on interferences.
649 bool successful = iteration.ColorInterferenceGraph();
650
651 // We manually clear coalesce opportunities for physical nodes,
652 // since they persist across coloring attempts.
653 for (InterferenceNode* node : physical_core_nodes_) {
654 node->ClearCoalesceOpportunities();
655 }
656 for (InterferenceNode* node : physical_fp_nodes_) {
657 node->ClearCoalesceOpportunities();
658 }
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700659
660 if (successful) {
Matthew Gharrityb6722ff2016-08-12 19:07:11 -0700661 // Assign spill slots.
662 AllocateSpillSlots(iteration.GetPrunableNodes());
663
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700664 // Tell the code generator which registers were allocated.
665 // We only look at prunable_nodes because we already told the code generator about
666 // fixed intervals while processing instructions. We also ignore the fixed intervals
667 // placed at the top of catch blocks.
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000668 for (InterferenceNode* node : iteration.GetPrunableNodes()) {
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700669 LiveInterval* interval = node->GetInterval();
670 if (interval->HasRegister()) {
671 Location low_reg = processing_core_regs
672 ? Location::RegisterLocation(interval->GetRegister())
673 : Location::FpuRegisterLocation(interval->GetRegister());
674 codegen_->AddAllocatedRegister(low_reg);
675 if (interval->HasHighInterval()) {
676 LiveInterval* high = interval->GetHighInterval();
677 DCHECK(high->HasRegister());
678 Location high_reg = processing_core_regs
679 ? Location::RegisterLocation(high->GetRegister())
680 : Location::FpuRegisterLocation(high->GetRegister());
681 codegen_->AddAllocatedRegister(high_reg);
682 }
683 } else {
684 DCHECK(!interval->HasHighInterval() || !interval->GetHighInterval()->HasRegister());
685 }
686 }
687
688 break;
689 }
690 } // while unsuccessful
691 } // for processing_core_instructions
692
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000693 // (6) Resolve locations and deconstruct SSA form.
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700694 RegisterAllocationResolver(allocator_, codegen_, liveness_)
Vladimir Marko70e97462016-08-09 11:04:26 +0100695 .Resolve(ArrayRef<HInstruction* const>(safepoints_),
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700696 reserved_art_method_slots_ + reserved_out_slots_,
Matthew Gharrityb6722ff2016-08-12 19:07:11 -0700697 num_int_spill_slots_,
698 num_long_spill_slots_,
699 num_float_spill_slots_,
700 num_double_spill_slots_,
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700701 catch_phi_spill_slot_counter_,
702 temp_intervals_);
703
704 if (kIsDebugBuild) {
705 Validate(/*log_fatal_on_failure*/ true);
706 }
707}
708
709bool RegisterAllocatorGraphColor::Validate(bool log_fatal_on_failure) {
710 for (bool processing_core_regs : {true, false}) {
711 ArenaVector<LiveInterval*> intervals(
712 allocator_->Adapter(kArenaAllocRegisterAllocatorValidate));
713 for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) {
714 HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
715 LiveInterval* interval = instruction->GetLiveInterval();
716 if (interval != nullptr && IsCoreInterval(interval) == processing_core_regs) {
717 intervals.push_back(instruction->GetLiveInterval());
718 }
719 }
720
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000721 ArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs
722 ? physical_core_nodes_
723 : physical_fp_nodes_;
724 for (InterferenceNode* fixed : physical_nodes) {
725 LiveInterval* interval = fixed->GetInterval();
726 if (interval->GetFirstRange() != nullptr) {
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700727 // Ideally we would check fixed ranges as well, but currently there are times when
728 // two fixed intervals for the same register will overlap. For example, a fixed input
729 // and a fixed output may sometimes share the same register, in which there will be two
730 // fixed intervals for the same place.
731 }
732 }
733
734 for (LiveInterval* temp : temp_intervals_) {
735 if (IsCoreInterval(temp) == processing_core_regs) {
736 intervals.push_back(temp);
737 }
738 }
739
Matthew Gharrityb6722ff2016-08-12 19:07:11 -0700740 size_t spill_slots = num_int_spill_slots_
741 + num_long_spill_slots_
742 + num_float_spill_slots_
743 + num_double_spill_slots_
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700744 + catch_phi_spill_slot_counter_;
745 bool ok = ValidateIntervals(intervals,
746 spill_slots,
747 reserved_art_method_slots_ + reserved_out_slots_,
748 *codegen_,
749 allocator_,
750 processing_core_regs,
751 log_fatal_on_failure);
752 if (!ok) {
753 return false;
754 }
755 } // for processing_core_regs
756
757 return true;
758}
759
760void RegisterAllocatorGraphColor::ProcessInstructions() {
Vladimir Marko2c45bc92016-10-25 16:54:12 +0100761 for (HBasicBlock* block : codegen_->GetGraph()->GetLinearPostOrder()) {
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700762 // Note that we currently depend on this ordering, since some helper
763 // code is designed for linear scan register allocation.
764 for (HBackwardInstructionIterator instr_it(block->GetInstructions());
765 !instr_it.Done();
766 instr_it.Advance()) {
767 ProcessInstruction(instr_it.Current());
768 }
769
770 for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
771 ProcessInstruction(phi_it.Current());
772 }
773
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000774 if (block->IsCatchBlock()
775 || (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700776 // By blocking all registers at the top of each catch block or irreducible loop, we force
777 // intervals belonging to the live-in set of the catch/header block to be spilled.
778 // TODO(ngeoffray): Phis in this block could be allocated in register.
779 size_t position = block->GetLifetimeStart();
780 BlockRegisters(position, position + 1);
781 }
782 }
783}
784
785void RegisterAllocatorGraphColor::ProcessInstruction(HInstruction* instruction) {
786 LocationSummary* locations = instruction->GetLocations();
787 if (locations == nullptr) {
788 return;
789 }
790 if (locations->NeedsSafepoint() && codegen_->IsLeafMethod()) {
791 // We do this here because we do not want the suspend check to artificially
792 // create live registers.
793 DCHECK(instruction->IsSuspendCheckEntry());
794 DCHECK_EQ(locations->GetTempCount(), 0u);
795 instruction->GetBlock()->RemoveInstruction(instruction);
796 return;
797 }
798
799 CheckForTempLiveIntervals(instruction);
800 CheckForSafepoint(instruction);
801 if (instruction->GetLocations()->WillCall()) {
802 // If a call will happen, create fixed intervals for caller-save registers.
803 // TODO: Note that it may be beneficial to later split intervals at this point,
804 // so that we allow last-minute moves from a caller-save register
805 // to a callee-save register.
806 BlockRegisters(instruction->GetLifetimePosition(),
807 instruction->GetLifetimePosition() + 1,
808 /*caller_save_only*/ true);
809 }
810 CheckForFixedInputs(instruction);
811
812 LiveInterval* interval = instruction->GetLiveInterval();
813 if (interval == nullptr) {
814 // Instructions lacking a valid output location do not have a live interval.
815 DCHECK(!locations->Out().IsValid());
816 return;
817 }
818
819 // Low intervals act as representatives for their corresponding high interval.
820 DCHECK(!interval->IsHighInterval());
821 if (codegen_->NeedsTwoRegisters(interval->GetType())) {
822 interval->AddHighInterval();
823 }
824 AddSafepointsFor(instruction);
825 CheckForFixedOutput(instruction);
826 AllocateSpillSlotForCatchPhi(instruction);
827
828 ArenaVector<LiveInterval*>& intervals = IsCoreInterval(interval)
829 ? core_intervals_
830 : fp_intervals_;
831 if (interval->HasSpillSlot() || instruction->IsConstant()) {
832 // Note that if an interval already has a spill slot, then its value currently resides
833 // in the stack (e.g., parameters). Thus we do not have to allocate a register until its first
834 // register use. This is also true for constants, which can be materialized at any point.
835 size_t first_register_use = interval->FirstRegisterUse();
836 if (first_register_use != kNoLifetime) {
837 LiveInterval* split = SplitBetween(interval, interval->GetStart(), first_register_use - 1);
838 intervals.push_back(split);
839 } else {
840 // We won't allocate a register for this value.
841 }
842 } else {
843 intervals.push_back(interval);
844 }
845}
846
847void RegisterAllocatorGraphColor::CheckForFixedInputs(HInstruction* instruction) {
848 // We simply block physical registers where necessary.
849 // TODO: Ideally we would coalesce the physical register with the register
850 // allocated to the input value, but this can be tricky if, e.g., there
851 // could be multiple physical register uses of the same value at the
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +0000852 // same instruction. Furthermore, there's currently no distinction between
853 // fixed inputs to a call (which will be clobbered) and other fixed inputs (which
854 // may not be clobbered).
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700855 LocationSummary* locations = instruction->GetLocations();
856 size_t position = instruction->GetLifetimePosition();
857 for (size_t i = 0; i < locations->GetInputCount(); ++i) {
858 Location input = locations->InAt(i);
859 if (input.IsRegister() || input.IsFpuRegister()) {
860 BlockRegister(input, position, position + 1);
861 codegen_->AddAllocatedRegister(input);
862 } else if (input.IsPair()) {
863 BlockRegister(input.ToLow(), position, position + 1);
864 BlockRegister(input.ToHigh(), position, position + 1);
865 codegen_->AddAllocatedRegister(input.ToLow());
866 codegen_->AddAllocatedRegister(input.ToHigh());
867 }
868 }
869}
870
871void RegisterAllocatorGraphColor::CheckForFixedOutput(HInstruction* instruction) {
872 // If an instruction has a fixed output location, we give the live interval a register and then
873 // proactively split it just after the definition point to avoid creating too many interferences
874 // with a fixed node.
875 LiveInterval* interval = instruction->GetLiveInterval();
876 Location out = interval->GetDefinedBy()->GetLocations()->Out();
877 size_t position = instruction->GetLifetimePosition();
878 DCHECK_GE(interval->GetEnd() - position, 2u);
879
880 if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) {
881 out = instruction->GetLocations()->InAt(0);
882 }
883
884 if (out.IsRegister() || out.IsFpuRegister()) {
885 interval->SetRegister(out.reg());
886 codegen_->AddAllocatedRegister(out);
887 Split(interval, position + 1);
888 } else if (out.IsPair()) {
889 interval->SetRegister(out.low());
890 interval->GetHighInterval()->SetRegister(out.high());
891 codegen_->AddAllocatedRegister(out.ToLow());
892 codegen_->AddAllocatedRegister(out.ToHigh());
893 Split(interval, position + 1);
894 } else if (out.IsStackSlot() || out.IsDoubleStackSlot()) {
895 interval->SetSpillSlot(out.GetStackIndex());
896 } else {
897 DCHECK(out.IsUnallocated() || out.IsConstant());
898 }
899}
900
901void RegisterAllocatorGraphColor::AddSafepointsFor(HInstruction* instruction) {
902 LiveInterval* interval = instruction->GetLiveInterval();
903 for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) {
904 HInstruction* safepoint = safepoints_[safepoint_index - 1u];
905 size_t safepoint_position = safepoint->GetLifetimePosition();
906
907 // Test that safepoints_ are ordered in the optimal way.
908 DCHECK(safepoint_index == safepoints_.size() ||
909 safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position);
910
911 if (safepoint_position == interval->GetStart()) {
912 // The safepoint is for this instruction, so the location of the instruction
913 // does not need to be saved.
914 DCHECK_EQ(safepoint_index, safepoints_.size());
915 DCHECK_EQ(safepoint, instruction);
916 continue;
917 } else if (interval->IsDeadAt(safepoint_position)) {
918 break;
919 } else if (!interval->Covers(safepoint_position)) {
920 // Hole in the interval.
921 continue;
922 }
923 interval->AddSafepoint(safepoint);
924 }
925}
926
927void RegisterAllocatorGraphColor::CheckForTempLiveIntervals(HInstruction* instruction) {
928 LocationSummary* locations = instruction->GetLocations();
929 size_t position = instruction->GetLifetimePosition();
930 for (size_t i = 0; i < locations->GetTempCount(); ++i) {
931 Location temp = locations->GetTemp(i);
932 if (temp.IsRegister() || temp.IsFpuRegister()) {
933 BlockRegister(temp, position, position + 1);
934 codegen_->AddAllocatedRegister(temp);
935 } else {
936 DCHECK(temp.IsUnallocated());
937 switch (temp.GetPolicy()) {
938 case Location::kRequiresRegister: {
939 LiveInterval* interval =
940 LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt);
941 interval->AddTempUse(instruction, i);
942 core_intervals_.push_back(interval);
943 temp_intervals_.push_back(interval);
944 break;
945 }
946
947 case Location::kRequiresFpuRegister: {
948 LiveInterval* interval =
949 LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble);
950 interval->AddTempUse(instruction, i);
951 fp_intervals_.push_back(interval);
952 temp_intervals_.push_back(interval);
953 if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
954 interval->AddHighInterval(/*is_temp*/ true);
955 temp_intervals_.push_back(interval->GetHighInterval());
956 }
957 break;
958 }
959
960 default:
961 LOG(FATAL) << "Unexpected policy for temporary location "
962 << temp.GetPolicy();
963 }
964 }
965 }
966}
967
968void RegisterAllocatorGraphColor::CheckForSafepoint(HInstruction* instruction) {
969 LocationSummary* locations = instruction->GetLocations();
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700970
971 if (locations->NeedsSafepoint()) {
972 safepoints_.push_back(instruction);
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -0700973 }
974}
975
976LiveInterval* RegisterAllocatorGraphColor::TrySplit(LiveInterval* interval, size_t position) {
977 if (interval->GetStart() < position && position < interval->GetEnd()) {
978 return Split(interval, position);
979 } else {
980 return interval;
981 }
982}
983
984void RegisterAllocatorGraphColor::SplitAtRegisterUses(LiveInterval* interval) {
985 DCHECK(!interval->IsHighInterval());
986
987 // Split just after a register definition.
988 if (interval->IsParent() && interval->DefinitionRequiresRegister()) {
989 interval = TrySplit(interval, interval->GetStart() + 1);
990 }
991
992 UsePosition* use = interval->GetFirstUse();
993 while (use != nullptr && use->GetPosition() < interval->GetStart()) {
994 use = use->GetNext();
995 }
996
997 // Split around register uses.
998 size_t end = interval->GetEnd();
999 while (use != nullptr && use->GetPosition() <= end) {
1000 if (use->RequiresRegister()) {
1001 size_t position = use->GetPosition();
1002 interval = TrySplit(interval, position - 1);
1003 if (liveness_.GetInstructionFromPosition(position / 2)->IsControlFlow()) {
1004 // If we are at the very end of a basic block, we cannot split right
1005 // at the use. Split just after instead.
1006 interval = TrySplit(interval, position + 1);
1007 } else {
1008 interval = TrySplit(interval, position);
1009 }
1010 }
1011 use = use->GetNext();
1012 }
1013}
1014
1015void RegisterAllocatorGraphColor::AllocateSpillSlotForCatchPhi(HInstruction* instruction) {
1016 if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
1017 HPhi* phi = instruction->AsPhi();
1018 LiveInterval* interval = phi->GetLiveInterval();
1019
1020 HInstruction* previous_phi = phi->GetPrevious();
1021 DCHECK(previous_phi == nullptr ||
1022 previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber())
1023 << "Phis expected to be sorted by vreg number, "
1024 << "so that equivalent phis are adjacent.";
1025
1026 if (phi->IsVRegEquivalentOf(previous_phi)) {
1027 // Assign the same spill slot.
1028 DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot());
1029 interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot());
1030 } else {
1031 interval->SetSpillSlot(catch_phi_spill_slot_counter_);
Aart Bikcc895252017-03-21 10:55:15 -07001032 catch_phi_spill_slot_counter_ += interval->NumberOfSpillSlotsNeeded();
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001033 }
1034 }
1035}
1036
1037void RegisterAllocatorGraphColor::BlockRegister(Location location,
1038 size_t start,
1039 size_t end) {
1040 DCHECK(location.IsRegister() || location.IsFpuRegister());
1041 int reg = location.reg();
1042 LiveInterval* interval = location.IsRegister()
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001043 ? physical_core_nodes_[reg]->GetInterval()
1044 : physical_fp_nodes_[reg]->GetInterval();
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001045 DCHECK(interval->GetRegister() == reg);
1046 bool blocked_by_codegen = location.IsRegister()
1047 ? codegen_->IsBlockedCoreRegister(reg)
1048 : codegen_->IsBlockedFloatingPointRegister(reg);
1049 if (blocked_by_codegen) {
1050 // We've already blocked this register for the entire method. (And adding a
1051 // range inside another range violates the preconditions of AddRange).
1052 } else {
1053 interval->AddRange(start, end);
1054 }
1055}
1056
1057void RegisterAllocatorGraphColor::BlockRegisters(size_t start, size_t end, bool caller_save_only) {
1058 for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
1059 if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) {
1060 BlockRegister(Location::RegisterLocation(i), start, end);
1061 }
1062 }
1063 for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
1064 if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) {
1065 BlockRegister(Location::FpuRegisterLocation(i), start, end);
1066 }
1067 }
1068}
1069
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001070void ColoringIteration::AddPotentialInterference(InterferenceNode* from,
1071 InterferenceNode* to,
1072 bool guaranteed_not_interfering_yet,
1073 bool both_directions) {
1074 if (from->IsPrecolored()) {
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001075 // We save space by ignoring outgoing edges from fixed nodes.
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001076 } else if (to->IsPrecolored()) {
1077 // It is important that only a single node represents a given fixed register in the
1078 // interference graph. We retrieve that node here.
1079 const ArenaVector<InterferenceNode*>& physical_nodes = to->GetInterval()->IsFloatingPoint()
1080 ? register_allocator_->physical_fp_nodes_
1081 : register_allocator_->physical_core_nodes_;
1082 InterferenceNode* physical_node = physical_nodes[to->GetInterval()->GetRegister()];
1083 from->AddInterference(physical_node, /*guaranteed_not_interfering_yet*/ false);
1084 DCHECK_EQ(to->GetInterval()->GetRegister(), physical_node->GetInterval()->GetRegister());
1085 DCHECK_EQ(to->GetAlias(), physical_node) << "Fixed nodes should alias the canonical fixed node";
1086
1087 // If a node interferes with a fixed pair node, the weight of the edge may
1088 // be inaccurate after using the alias of the pair node, because the alias of the pair node
1089 // is a singular node.
1090 // We could make special pair fixed nodes, but that ends up being too conservative because
1091 // a node could then interfere with both {r1} and {r1,r2}, leading to a degree of
1092 // three rather than two.
1093 // Instead, we explicitly add an interference with the high node of the fixed pair node.
1094 // TODO: This is too conservative at time for pair nodes, but the fact that fixed pair intervals
1095 // can be unaligned on x86 complicates things.
1096 if (to->IsPair()) {
1097 InterferenceNode* high_node =
1098 physical_nodes[to->GetInterval()->GetHighInterval()->GetRegister()];
1099 DCHECK_EQ(to->GetInterval()->GetHighInterval()->GetRegister(),
1100 high_node->GetInterval()->GetRegister());
1101 from->AddInterference(high_node, /*guaranteed_not_interfering_yet*/ false);
1102 }
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001103 } else {
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001104 // Standard interference between two uncolored nodes.
1105 from->AddInterference(to, guaranteed_not_interfering_yet);
1106 }
1107
1108 if (both_directions) {
1109 AddPotentialInterference(to, from, guaranteed_not_interfering_yet, /*both_directions*/ false);
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001110 }
1111}
1112
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001113// Returns true if `in_node` represents an input interval of `out_node`, and the output interval
1114// is allowed to have the same register as the input interval.
1115// TODO: Ideally we should just produce correct intervals in liveness analysis.
1116// We would need to refactor the current live interval layout to do so, which is
1117// no small task.
1118static bool CheckInputOutputCanOverlap(InterferenceNode* in_node, InterferenceNode* out_node) {
1119 LiveInterval* output_interval = out_node->GetInterval();
1120 HInstruction* defined_by = output_interval->GetDefinedBy();
1121 if (defined_by == nullptr) {
1122 // This must not be a definition point.
1123 return false;
1124 }
Andreas Gampe6f61ee52016-08-12 06:33:15 +00001125
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001126 LocationSummary* locations = defined_by->GetLocations();
1127 if (locations->OutputCanOverlapWithInputs()) {
1128 // This instruction does not allow the output to reuse a register from an input.
1129 return false;
1130 }
1131
1132 LiveInterval* input_interval = in_node->GetInterval();
1133 LiveInterval* next_sibling = input_interval->GetNextSibling();
1134 size_t def_position = defined_by->GetLifetimePosition();
1135 size_t use_position = def_position + 1;
1136 if (next_sibling != nullptr && next_sibling->GetStart() == use_position) {
1137 // The next sibling starts at the use position, so reusing the input register in the output
1138 // would clobber the input before it's moved into the sibling interval location.
1139 return false;
1140 }
1141
1142 if (!input_interval->IsDeadAt(use_position) && input_interval->CoversSlow(use_position)) {
1143 // The input interval is live after the use position.
1144 return false;
1145 }
1146
1147 HInputsRef inputs = defined_by->GetInputs();
1148 for (size_t i = 0; i < inputs.size(); ++i) {
1149 if (inputs[i]->GetLiveInterval()->GetSiblingAt(def_position) == input_interval) {
1150 DCHECK(input_interval->SameRegisterKind(*output_interval));
1151 return true;
1152 }
1153 }
1154
1155 // The input interval was not an input for this instruction.
1156 return false;
1157}
1158
1159void ColoringIteration::BuildInterferenceGraph(
1160 const ArenaVector<LiveInterval*>& intervals,
Vladimir Marko70e97462016-08-09 11:04:26 +01001161 const ArenaVector<InterferenceNode*>& physical_nodes) {
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001162 DCHECK(interval_node_map_.Empty() && prunable_nodes_.empty());
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001163 // Build the interference graph efficiently by ordering range endpoints
1164 // by position and doing a linear sweep to find interferences. (That is, we
1165 // jump from endpoint to endpoint, maintaining a set of intervals live at each
1166 // point. If two nodes are ever in the live set at the same time, then they
1167 // interfere with each other.)
1168 //
1169 // We order by both position and (secondarily) by whether the endpoint
1170 // begins or ends a range; we want to process range endings before range
1171 // beginnings at the same position because they should not conflict.
1172 //
1173 // For simplicity, we create a tuple for each endpoint, and then sort the tuples.
1174 // Tuple contents: (position, is_range_beginning, node).
1175 ArenaVector<std::tuple<size_t, bool, InterferenceNode*>> range_endpoints(
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001176 allocator_->Adapter(kArenaAllocRegisterAllocator));
1177
1178 // We reserve plenty of space to avoid excessive copying.
1179 range_endpoints.reserve(4 * prunable_nodes_.size());
1180
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001181 for (LiveInterval* parent : intervals) {
1182 for (LiveInterval* sibling = parent; sibling != nullptr; sibling = sibling->GetNextSibling()) {
1183 LiveRange* range = sibling->GetFirstRange();
1184 if (range != nullptr) {
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001185 InterferenceNode* node = new (allocator_) InterferenceNode(
1186 allocator_, sibling, register_allocator_->liveness_);
1187 interval_node_map_.Insert(std::make_pair(sibling, node));
1188
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001189 if (sibling->HasRegister()) {
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001190 // Fixed nodes should alias the canonical node for the corresponding register.
1191 node->stage = NodeStage::kPrecolored;
1192 InterferenceNode* physical_node = physical_nodes[sibling->GetRegister()];
1193 node->SetAlias(physical_node);
1194 DCHECK_EQ(node->GetInterval()->GetRegister(),
1195 physical_node->GetInterval()->GetRegister());
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001196 } else {
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001197 node->stage = NodeStage::kPrunable;
1198 prunable_nodes_.push_back(node);
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001199 }
1200
1201 while (range != nullptr) {
1202 range_endpoints.push_back(std::make_tuple(range->GetStart(), true, node));
1203 range_endpoints.push_back(std::make_tuple(range->GetEnd(), false, node));
1204 range = range->GetNext();
1205 }
1206 }
1207 }
1208 }
1209
1210 // Sort the endpoints.
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001211 // We explicitly ignore the third entry of each tuple (the node pointer) in order
1212 // to maintain determinism.
1213 std::sort(range_endpoints.begin(), range_endpoints.end(),
1214 [] (const std::tuple<size_t, bool, InterferenceNode*>& lhs,
1215 const std::tuple<size_t, bool, InterferenceNode*>& rhs) {
1216 return std::tie(std::get<0>(lhs), std::get<1>(lhs))
1217 < std::tie(std::get<0>(rhs), std::get<1>(rhs));
1218 });
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001219
1220 // Nodes live at the current position in the linear sweep.
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001221 ArenaVector<InterferenceNode*> live(
1222 allocator_->Adapter(kArenaAllocRegisterAllocator));
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001223
1224 // Linear sweep. When we encounter the beginning of a range, we add the corresponding node to the
1225 // live set. When we encounter the end of a range, we remove the corresponding node
1226 // from the live set. Nodes interfere if they are in the live set at the same time.
1227 for (auto it = range_endpoints.begin(); it != range_endpoints.end(); ++it) {
1228 bool is_range_beginning;
1229 InterferenceNode* node;
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001230 size_t position;
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001231 // Extract information from the tuple, including the node this tuple represents.
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001232 std::tie(position, is_range_beginning, node) = *it;
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001233
1234 if (is_range_beginning) {
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001235 bool guaranteed_not_interfering_yet = position == node->GetInterval()->GetStart();
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001236 for (InterferenceNode* conflicting : live) {
1237 DCHECK_NE(node, conflicting);
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001238 if (CheckInputOutputCanOverlap(conflicting, node)) {
1239 // We do not add an interference, because the instruction represented by `node` allows
1240 // its output to share a register with an input, represented here by `conflicting`.
1241 } else {
1242 AddPotentialInterference(node, conflicting, guaranteed_not_interfering_yet);
1243 }
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001244 }
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001245 DCHECK(std::find(live.begin(), live.end(), node) == live.end());
1246 live.push_back(node);
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001247 } else {
1248 // End of range.
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001249 auto live_it = std::find(live.begin(), live.end(), node);
1250 DCHECK(live_it != live.end());
1251 live.erase(live_it);
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001252 }
1253 }
1254 DCHECK(live.empty());
1255}
1256
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001257void ColoringIteration::CreateCoalesceOpportunity(InterferenceNode* a,
1258 InterferenceNode* b,
1259 CoalesceKind kind,
1260 size_t position) {
1261 DCHECK_EQ(a->IsPair(), b->IsPair())
1262 << "Nodes of different memory widths should never be coalesced";
1263 CoalesceOpportunity* opportunity =
1264 new (allocator_) CoalesceOpportunity(a, b, kind, position, register_allocator_->liveness_);
1265 a->AddCoalesceOpportunity(opportunity);
1266 b->AddCoalesceOpportunity(opportunity);
1267 coalesce_worklist_.push(opportunity);
Matthew Gharrity465ed692016-07-22 08:52:13 -07001268}
1269
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001270// When looking for coalesce opportunities, we use the interval_node_map_ to find the node
1271// corresponding to an interval. Note that not all intervals are in this map, notably the parents
1272// of constants and stack arguments. (However, these interval should not be involved in coalesce
1273// opportunities anyway, because they're not going to be in registers.)
1274void ColoringIteration::FindCoalesceOpportunities() {
1275 DCHECK(coalesce_worklist_.empty());
1276
1277 for (InterferenceNode* node : prunable_nodes_) {
1278 LiveInterval* interval = node->GetInterval();
1279
1280 // Coalesce siblings.
1281 LiveInterval* next_sibling = interval->GetNextSibling();
1282 if (next_sibling != nullptr && interval->GetEnd() == next_sibling->GetStart()) {
1283 auto it = interval_node_map_.Find(next_sibling);
1284 if (it != interval_node_map_.end()) {
1285 InterferenceNode* sibling_node = it->second;
1286 CreateCoalesceOpportunity(node,
1287 sibling_node,
1288 CoalesceKind::kAdjacentSibling,
1289 interval->GetEnd());
1290 }
1291 }
1292
1293 // Coalesce fixed outputs with this interval if this interval is an adjacent sibling.
1294 LiveInterval* parent = interval->GetParent();
1295 if (parent->HasRegister()
1296 && parent->GetNextSibling() == interval
1297 && parent->GetEnd() == interval->GetStart()) {
1298 auto it = interval_node_map_.Find(parent);
1299 if (it != interval_node_map_.end()) {
1300 InterferenceNode* parent_node = it->second;
1301 CreateCoalesceOpportunity(node,
1302 parent_node,
1303 CoalesceKind::kFixedOutputSibling,
1304 parent->GetEnd());
1305 }
1306 }
1307
1308 // Try to prevent moves across blocks.
1309 // Note that this does not lead to many succeeding coalesce attempts, so could be removed
1310 // if found to add to compile time.
1311 const SsaLivenessAnalysis& liveness = register_allocator_->liveness_;
1312 if (interval->IsSplit() && liveness.IsAtBlockBoundary(interval->GetStart() / 2)) {
1313 // If the start of this interval is at a block boundary, we look at the
1314 // location of the interval in blocks preceding the block this interval
1315 // starts at. This can avoid a move between the two blocks.
1316 HBasicBlock* block = liveness.GetBlockFromPosition(interval->GetStart() / 2);
1317 for (HBasicBlock* predecessor : block->GetPredecessors()) {
1318 size_t position = predecessor->GetLifetimeEnd() - 1;
1319 LiveInterval* existing = interval->GetParent()->GetSiblingAt(position);
1320 if (existing != nullptr) {
1321 auto it = interval_node_map_.Find(existing);
1322 if (it != interval_node_map_.end()) {
1323 InterferenceNode* existing_node = it->second;
1324 CreateCoalesceOpportunity(node,
1325 existing_node,
1326 CoalesceKind::kNonlinearControlFlow,
1327 position);
1328 }
1329 }
1330 }
1331 }
1332
1333 // Coalesce phi inputs with the corresponding output.
1334 HInstruction* defined_by = interval->GetDefinedBy();
1335 if (defined_by != nullptr && defined_by->IsPhi()) {
1336 const ArenaVector<HBasicBlock*>& predecessors = defined_by->GetBlock()->GetPredecessors();
1337 HInputsRef inputs = defined_by->GetInputs();
1338
1339 for (size_t i = 0, e = inputs.size(); i < e; ++i) {
1340 // We want the sibling at the end of the appropriate predecessor block.
1341 size_t position = predecessors[i]->GetLifetimeEnd() - 1;
1342 LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(position);
1343
1344 auto it = interval_node_map_.Find(input_interval);
1345 if (it != interval_node_map_.end()) {
1346 InterferenceNode* input_node = it->second;
1347 CreateCoalesceOpportunity(node, input_node, CoalesceKind::kPhi, position);
1348 }
1349 }
1350 }
1351
1352 // Coalesce output with first input when policy is kSameAsFirstInput.
1353 if (defined_by != nullptr) {
1354 Location out = defined_by->GetLocations()->Out();
1355 if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) {
1356 LiveInterval* input_interval
1357 = defined_by->InputAt(0)->GetLiveInterval()->GetSiblingAt(interval->GetStart() - 1);
1358 // TODO: Could we consider lifetime holes here?
1359 if (input_interval->GetEnd() == interval->GetStart()) {
1360 auto it = interval_node_map_.Find(input_interval);
1361 if (it != interval_node_map_.end()) {
1362 InterferenceNode* input_node = it->second;
1363 CreateCoalesceOpportunity(node,
1364 input_node,
1365 CoalesceKind::kFirstInput,
1366 interval->GetStart());
1367 }
1368 }
1369 }
1370 }
1371
1372 // An interval that starts an instruction (that is, it is not split), may
1373 // re-use the registers used by the inputs of that instruction, based on the
1374 // location summary.
1375 if (defined_by != nullptr) {
1376 DCHECK(!interval->IsSplit());
1377 LocationSummary* locations = defined_by->GetLocations();
1378 if (!locations->OutputCanOverlapWithInputs()) {
1379 HInputsRef inputs = defined_by->GetInputs();
1380 for (size_t i = 0; i < inputs.size(); ++i) {
1381 size_t def_point = defined_by->GetLifetimePosition();
1382 // TODO: Getting the sibling at the def_point might not be quite what we want
1383 // for fixed inputs, since the use will be *at* the def_point rather than after.
1384 LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(def_point);
1385 if (input_interval != nullptr &&
1386 input_interval->HasHighInterval() == interval->HasHighInterval()) {
1387 auto it = interval_node_map_.Find(input_interval);
1388 if (it != interval_node_map_.end()) {
1389 InterferenceNode* input_node = it->second;
1390 CreateCoalesceOpportunity(node,
1391 input_node,
1392 CoalesceKind::kAnyInput,
1393 interval->GetStart());
1394 }
1395 }
1396 }
1397 }
1398 }
1399
1400 // Try to prevent moves into fixed input locations.
1401 UsePosition* use = interval->GetFirstUse();
1402 for (; use != nullptr && use->GetPosition() <= interval->GetStart(); use = use->GetNext()) {
1403 // Skip past uses before the start of this interval.
1404 }
1405 for (; use != nullptr && use->GetPosition() <= interval->GetEnd(); use = use->GetNext()) {
1406 HInstruction* user = use->GetUser();
1407 if (user == nullptr) {
1408 // User may be null for certain intervals, such as temp intervals.
1409 continue;
1410 }
1411 LocationSummary* locations = user->GetLocations();
1412 Location input = locations->InAt(use->GetInputIndex());
1413 if (input.IsRegister() || input.IsFpuRegister()) {
1414 // TODO: Could try to handle pair interval too, but coalescing with fixed pair nodes
1415 // is currently not supported.
1416 InterferenceNode* fixed_node = input.IsRegister()
1417 ? register_allocator_->physical_core_nodes_[input.reg()]
1418 : register_allocator_->physical_fp_nodes_[input.reg()];
1419 CreateCoalesceOpportunity(node,
1420 fixed_node,
1421 CoalesceKind::kFixedInput,
1422 user->GetLifetimePosition());
1423 }
1424 }
1425 } // for node in prunable_nodes
1426}
1427
1428static bool IsLowDegreeNode(InterferenceNode* node, size_t num_regs) {
1429 return node->GetOutDegree() < num_regs;
1430}
1431
1432static bool IsHighDegreeNode(InterferenceNode* node, size_t num_regs) {
1433 return !IsLowDegreeNode(node, num_regs);
1434}
1435
1436void ColoringIteration::PruneInterferenceGraph() {
1437 DCHECK(pruned_nodes_.empty()
1438 && simplify_worklist_.empty()
1439 && freeze_worklist_.empty()
1440 && spill_worklist_.empty());
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001441 // When pruning the graph, we refer to nodes with degree less than num_regs as low degree nodes,
1442 // and all others as high degree nodes. The distinction is important: low degree nodes are
1443 // guaranteed a color, while high degree nodes are not.
1444
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001445 // Build worklists. Note that the coalesce worklist has already been
1446 // filled by FindCoalesceOpportunities().
1447 for (InterferenceNode* node : prunable_nodes_) {
1448 DCHECK(!node->IsPrecolored()) << "Fixed nodes should never be pruned";
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001449 if (IsLowDegreeNode(node, num_regs_)) {
1450 if (node->GetCoalesceOpportunities().empty()) {
1451 // Simplify Worklist.
1452 node->stage = NodeStage::kSimplifyWorklist;
1453 simplify_worklist_.push_back(node);
1454 } else {
1455 // Freeze Worklist.
1456 node->stage = NodeStage::kFreezeWorklist;
1457 freeze_worklist_.push_back(node);
1458 }
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001459 } else {
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001460 // Spill worklist.
1461 node->stage = NodeStage::kSpillWorklist;
1462 spill_worklist_.push(node);
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001463 }
1464 }
1465
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001466 // Prune graph.
1467 // Note that we do not remove a node from its current worklist if it moves to another, so it may
1468 // be in multiple worklists at once; the node's `phase` says which worklist it is really in.
1469 while (true) {
1470 if (!simplify_worklist_.empty()) {
1471 // Prune low-degree nodes.
1472 // TODO: pop_back() should work as well, but it didn't; we get a
1473 // failed check while pruning. We should look into this.
1474 InterferenceNode* node = simplify_worklist_.front();
1475 simplify_worklist_.pop_front();
1476 DCHECK_EQ(node->stage, NodeStage::kSimplifyWorklist) << "Cannot move from simplify list";
1477 DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in simplify list should be low degree";
1478 DCHECK(!node->IsMoveRelated()) << "Nodes in simplify list should not be move related";
1479 PruneNode(node);
1480 } else if (!coalesce_worklist_.empty()) {
1481 // Coalesce.
1482 CoalesceOpportunity* opportunity = coalesce_worklist_.top();
1483 coalesce_worklist_.pop();
1484 if (opportunity->stage == CoalesceStage::kWorklist) {
1485 Coalesce(opportunity);
1486 }
1487 } else if (!freeze_worklist_.empty()) {
1488 // Freeze moves and prune a low-degree move-related node.
1489 InterferenceNode* node = freeze_worklist_.front();
1490 freeze_worklist_.pop_front();
1491 if (node->stage == NodeStage::kFreezeWorklist) {
1492 DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in freeze list should be low degree";
1493 DCHECK(node->IsMoveRelated()) << "Nodes in freeze list should be move related";
1494 FreezeMoves(node);
1495 PruneNode(node);
1496 }
1497 } else if (!spill_worklist_.empty()) {
1498 // We spill the lowest-priority node, because pruning a node earlier
1499 // gives it a higher chance of being spilled.
1500 InterferenceNode* node = spill_worklist_.top();
1501 spill_worklist_.pop();
1502 if (node->stage == NodeStage::kSpillWorklist) {
1503 DCHECK_GE(node->GetOutDegree(), num_regs_) << "Nodes in spill list should be high degree";
1504 FreezeMoves(node);
1505 PruneNode(node);
1506 }
1507 } else {
1508 // Pruning complete.
1509 break;
1510 }
1511 }
1512 DCHECK_EQ(prunable_nodes_.size(), pruned_nodes_.size());
1513}
1514
1515void ColoringIteration::EnableCoalesceOpportunities(InterferenceNode* node) {
1516 for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
1517 if (opportunity->stage == CoalesceStage::kActive) {
1518 opportunity->stage = CoalesceStage::kWorklist;
1519 coalesce_worklist_.push(opportunity);
1520 }
1521 }
1522}
1523
1524void ColoringIteration::PruneNode(InterferenceNode* node) {
1525 DCHECK_NE(node->stage, NodeStage::kPruned);
1526 DCHECK(!node->IsPrecolored());
1527 node->stage = NodeStage::kPruned;
1528 pruned_nodes_.push(node);
1529
1530 for (InterferenceNode* adj : node->GetAdjacentNodes()) {
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001531 DCHECK_NE(adj->stage, NodeStage::kPruned) << "Should be no interferences with pruned nodes";
1532
1533 if (adj->IsPrecolored()) {
1534 // No effect on pre-colored nodes; they're never pruned.
1535 } else {
1536 // Remove the interference.
1537 bool was_high_degree = IsHighDegreeNode(adj, num_regs_);
1538 DCHECK(adj->ContainsInterference(node))
1539 << "Missing reflexive interference from non-fixed node";
1540 adj->RemoveInterference(node);
1541
1542 // Handle transitions from high degree to low degree.
1543 if (was_high_degree && IsLowDegreeNode(adj, num_regs_)) {
1544 EnableCoalesceOpportunities(adj);
1545 for (InterferenceNode* adj_adj : adj->GetAdjacentNodes()) {
1546 EnableCoalesceOpportunities(adj_adj);
1547 }
1548
1549 DCHECK_EQ(adj->stage, NodeStage::kSpillWorklist);
1550 if (adj->IsMoveRelated()) {
1551 adj->stage = NodeStage::kFreezeWorklist;
1552 freeze_worklist_.push_back(adj);
1553 } else {
1554 adj->stage = NodeStage::kSimplifyWorklist;
1555 simplify_worklist_.push_back(adj);
Andreas Gampe6f61ee52016-08-12 06:33:15 +00001556 }
1557 }
1558 }
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001559 }
1560}
Andreas Gampe6f61ee52016-08-12 06:33:15 +00001561
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001562void ColoringIteration::CheckTransitionFromFreezeWorklist(InterferenceNode* node) {
1563 if (IsLowDegreeNode(node, num_regs_) && !node->IsMoveRelated()) {
1564 DCHECK_EQ(node->stage, NodeStage::kFreezeWorklist);
1565 node->stage = NodeStage::kSimplifyWorklist;
1566 simplify_worklist_.push_back(node);
1567 }
1568}
1569
1570void ColoringIteration::FreezeMoves(InterferenceNode* node) {
1571 for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
1572 if (opportunity->stage == CoalesceStage::kDefunct) {
1573 // Constrained moves should remain constrained, since they will not be considered
1574 // during last-chance coalescing.
1575 } else {
1576 opportunity->stage = CoalesceStage::kInactive;
Andreas Gampe6f61ee52016-08-12 06:33:15 +00001577 }
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001578 InterferenceNode* other = opportunity->node_a->GetAlias() == node
1579 ? opportunity->node_b->GetAlias()
1580 : opportunity->node_a->GetAlias();
1581 if (other != node && other->stage == NodeStage::kFreezeWorklist) {
1582 DCHECK(IsLowDegreeNode(node, num_regs_));
1583 CheckTransitionFromFreezeWorklist(other);
Matthew Gharrity465ed692016-07-22 08:52:13 -07001584 }
1585 }
Matthew Gharrity465ed692016-07-22 08:52:13 -07001586}
1587
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001588bool ColoringIteration::PrecoloredHeuristic(InterferenceNode* from,
1589 InterferenceNode* into) {
1590 if (!into->IsPrecolored()) {
1591 // The uncolored heuristic will cover this case.
1592 return false;
1593 }
1594 if (from->IsPair() || into->IsPair()) {
1595 // TODO: Merging from a pair node is currently not supported, since fixed pair nodes
1596 // are currently represented as two single fixed nodes in the graph, and `into` is
1597 // only one of them. (We may lose the implicit connections to the second one in a merge.)
1598 return false;
1599 }
1600
1601 // If all adjacent nodes of `from` are "ok", then we can conservatively merge with `into`.
1602 // Reasons an adjacent node `adj` can be "ok":
1603 // (1) If `adj` is low degree, interference with `into` will not affect its existing
1604 // colorable guarantee. (Notice that coalescing cannot increase its degree.)
1605 // (2) If `adj` is pre-colored, it already interferes with `into`. See (3).
1606 // (3) If there's already an interference with `into`, coalescing will not add interferences.
1607 for (InterferenceNode* adj : from->GetAdjacentNodes()) {
1608 if (IsLowDegreeNode(adj, num_regs_) || adj->IsPrecolored() || adj->ContainsInterference(into)) {
1609 // Ok.
1610 } else {
1611 return false;
1612 }
1613 }
1614 return true;
1615}
1616
1617bool ColoringIteration::UncoloredHeuristic(InterferenceNode* from,
1618 InterferenceNode* into) {
1619 if (into->IsPrecolored()) {
1620 // The pre-colored heuristic will handle this case.
1621 return false;
1622 }
1623
1624 // Arbitrary cap to improve compile time. Tests show that this has negligible affect
1625 // on generated code.
1626 if (from->GetOutDegree() + into->GetOutDegree() > 2 * num_regs_) {
1627 return false;
1628 }
1629
1630 // It's safe to coalesce two nodes if the resulting node has fewer than `num_regs` neighbors
1631 // of high degree. (Low degree neighbors can be ignored, because they will eventually be
1632 // pruned from the interference graph in the simplify stage.)
1633 size_t high_degree_interferences = 0;
1634 for (InterferenceNode* adj : from->GetAdjacentNodes()) {
1635 if (IsHighDegreeNode(adj, num_regs_)) {
1636 high_degree_interferences += from->EdgeWeightWith(adj);
1637 }
1638 }
1639 for (InterferenceNode* adj : into->GetAdjacentNodes()) {
1640 if (IsHighDegreeNode(adj, num_regs_)) {
1641 if (from->ContainsInterference(adj)) {
1642 // We've already counted this adjacent node.
1643 // Furthermore, its degree will decrease if coalescing succeeds. Thus, it's possible that
1644 // we should not have counted it at all. (This extends the textbook Briggs coalescing test,
1645 // but remains conservative.)
1646 if (adj->GetOutDegree() - into->EdgeWeightWith(adj) < num_regs_) {
1647 high_degree_interferences -= from->EdgeWeightWith(adj);
1648 }
1649 } else {
1650 high_degree_interferences += into->EdgeWeightWith(adj);
1651 }
1652 }
1653 }
1654
1655 return high_degree_interferences < num_regs_;
1656}
1657
1658void ColoringIteration::Combine(InterferenceNode* from,
1659 InterferenceNode* into) {
1660 from->SetAlias(into);
1661
1662 // Add interferences.
1663 for (InterferenceNode* adj : from->GetAdjacentNodes()) {
1664 bool was_low_degree = IsLowDegreeNode(adj, num_regs_);
1665 AddPotentialInterference(adj, into, /*guaranteed_not_interfering_yet*/ false);
1666 if (was_low_degree && IsHighDegreeNode(adj, num_regs_)) {
1667 // This is a (temporary) transition to a high degree node. Its degree will decrease again
1668 // when we prune `from`, but it's best to be consistent about the current worklist.
1669 adj->stage = NodeStage::kSpillWorklist;
1670 spill_worklist_.push(adj);
1671 }
1672 }
1673
1674 // Add coalesce opportunities.
1675 for (CoalesceOpportunity* opportunity : from->GetCoalesceOpportunities()) {
1676 if (opportunity->stage != CoalesceStage::kDefunct) {
1677 into->AddCoalesceOpportunity(opportunity);
1678 }
1679 }
1680 EnableCoalesceOpportunities(from);
1681
1682 // Prune and update worklists.
1683 PruneNode(from);
1684 if (IsLowDegreeNode(into, num_regs_)) {
1685 // Coalesce(...) takes care of checking for a transition to the simplify worklist.
1686 DCHECK_EQ(into->stage, NodeStage::kFreezeWorklist);
1687 } else if (into->stage == NodeStage::kFreezeWorklist) {
1688 // This is a transition to a high degree node.
1689 into->stage = NodeStage::kSpillWorklist;
1690 spill_worklist_.push(into);
1691 } else {
1692 DCHECK(into->stage == NodeStage::kSpillWorklist || into->stage == NodeStage::kPrecolored);
1693 }
1694}
1695
1696void ColoringIteration::Coalesce(CoalesceOpportunity* opportunity) {
1697 InterferenceNode* from = opportunity->node_a->GetAlias();
1698 InterferenceNode* into = opportunity->node_b->GetAlias();
1699 DCHECK_NE(from->stage, NodeStage::kPruned);
1700 DCHECK_NE(into->stage, NodeStage::kPruned);
1701
1702 if (from->IsPrecolored()) {
1703 // If we have one pre-colored node, make sure it's the `into` node.
1704 std::swap(from, into);
1705 }
1706
1707 if (from == into) {
1708 // These nodes have already been coalesced.
1709 opportunity->stage = CoalesceStage::kDefunct;
1710 CheckTransitionFromFreezeWorklist(from);
1711 } else if (from->IsPrecolored() || from->ContainsInterference(into)) {
1712 // These nodes interfere.
1713 opportunity->stage = CoalesceStage::kDefunct;
1714 CheckTransitionFromFreezeWorklist(from);
1715 CheckTransitionFromFreezeWorklist(into);
1716 } else if (PrecoloredHeuristic(from, into)
1717 || UncoloredHeuristic(from, into)) {
1718 // We can coalesce these nodes.
1719 opportunity->stage = CoalesceStage::kDefunct;
1720 Combine(from, into);
1721 CheckTransitionFromFreezeWorklist(into);
1722 } else {
1723 // We cannot coalesce, but we may be able to later.
1724 opportunity->stage = CoalesceStage::kActive;
1725 }
1726}
1727
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001728// Build a mask with a bit set for each register assigned to some
1729// interval in `intervals`.
1730template <typename Container>
1731static std::bitset<kMaxNumRegs> BuildConflictMask(Container& intervals) {
1732 std::bitset<kMaxNumRegs> conflict_mask;
1733 for (InterferenceNode* adjacent : intervals) {
1734 LiveInterval* conflicting = adjacent->GetInterval();
1735 if (conflicting->HasRegister()) {
1736 conflict_mask.set(conflicting->GetRegister());
1737 if (conflicting->HasHighInterval()) {
1738 DCHECK(conflicting->GetHighInterval()->HasRegister());
1739 conflict_mask.set(conflicting->GetHighInterval()->GetRegister());
1740 }
1741 } else {
1742 DCHECK(!conflicting->HasHighInterval()
1743 || !conflicting->GetHighInterval()->HasRegister());
1744 }
1745 }
1746 return conflict_mask;
1747}
1748
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001749bool RegisterAllocatorGraphColor::IsCallerSave(size_t reg, bool processing_core_regs) {
1750 return processing_core_regs
1751 ? !codegen_->IsCoreCalleeSaveRegister(reg)
Matthew Gharrity8a4252e2016-08-15 07:56:41 -07001752 : !codegen_->IsFloatingPointCalleeSaveRegister(reg);
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001753}
1754
1755static bool RegisterIsAligned(size_t reg) {
1756 return reg % 2 == 0;
1757}
1758
1759static size_t FindFirstZeroInConflictMask(std::bitset<kMaxNumRegs> conflict_mask) {
1760 // We use CTZ (count trailing zeros) to quickly find the lowest 0 bit.
1761 // Note that CTZ is undefined if all bits are 0, so we special-case it.
1762 return conflict_mask.all() ? conflict_mask.size() : CTZ(~conflict_mask.to_ulong());
1763}
1764
1765bool ColoringIteration::ColorInterferenceGraph() {
1766 DCHECK_LE(num_regs_, kMaxNumRegs) << "kMaxNumRegs is too small";
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001767 ArenaVector<LiveInterval*> colored_intervals(
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001768 allocator_->Adapter(kArenaAllocRegisterAllocator));
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001769 bool successful = true;
1770
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001771 while (!pruned_nodes_.empty()) {
1772 InterferenceNode* node = pruned_nodes_.top();
1773 pruned_nodes_.pop();
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001774 LiveInterval* interval = node->GetInterval();
Andreas Gampe6f61ee52016-08-12 06:33:15 +00001775 size_t reg = 0;
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001776
1777 InterferenceNode* alias = node->GetAlias();
1778 if (alias != node) {
1779 // This node was coalesced with another.
1780 LiveInterval* alias_interval = alias->GetInterval();
1781 if (alias_interval->HasRegister()) {
1782 reg = alias_interval->GetRegister();
1783 DCHECK(!BuildConflictMask(node->GetAdjacentNodes())[reg])
1784 << "This node conflicts with the register it was coalesced with";
1785 } else {
1786 DCHECK(false) << node->GetOutDegree() << " " << alias->GetOutDegree() << " "
1787 << "Move coalescing was not conservative, causing a node to be coalesced "
1788 << "with another node that could not be colored";
1789 if (interval->RequiresRegister()) {
1790 successful = false;
1791 }
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001792 }
1793 } else {
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001794 // Search for free register(s).
1795 std::bitset<kMaxNumRegs> conflict_mask = BuildConflictMask(node->GetAdjacentNodes());
1796 if (interval->HasHighInterval()) {
1797 // Note that the graph coloring allocator assumes that pair intervals are aligned here,
1798 // excluding pre-colored pair intervals (which can currently be unaligned on x86). If we
1799 // change the alignment requirements here, we will have to update the algorithm (e.g.,
1800 // be more conservative about the weight of edges adjacent to pair nodes.)
1801 while (reg < num_regs_ - 1 && (conflict_mask[reg] || conflict_mask[reg + 1])) {
1802 reg += 2;
1803 }
1804
1805 // Try to use a caller-save register first.
1806 for (size_t i = 0; i < num_regs_ - 1; i += 2) {
1807 bool low_caller_save = register_allocator_->IsCallerSave(i, processing_core_regs_);
1808 bool high_caller_save = register_allocator_->IsCallerSave(i + 1, processing_core_regs_);
1809 if (!conflict_mask[i] && !conflict_mask[i + 1]) {
1810 if (low_caller_save && high_caller_save) {
1811 reg = i;
1812 break;
1813 } else if (low_caller_save || high_caller_save) {
1814 reg = i;
1815 // Keep looking to try to get both parts in caller-save registers.
1816 }
1817 }
1818 }
1819 } else {
1820 // Not a pair interval.
1821 reg = FindFirstZeroInConflictMask(conflict_mask);
1822
1823 // Try to use caller-save registers first.
1824 for (size_t i = 0; i < num_regs_; ++i) {
1825 if (!conflict_mask[i] && register_allocator_->IsCallerSave(i, processing_core_regs_)) {
1826 reg = i;
1827 break;
1828 }
1829 }
1830 }
1831
1832 // Last-chance coalescing.
1833 for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
1834 if (opportunity->stage == CoalesceStage::kDefunct) {
1835 continue;
1836 }
1837 LiveInterval* other_interval = opportunity->node_a->GetAlias() == node
1838 ? opportunity->node_b->GetAlias()->GetInterval()
1839 : opportunity->node_a->GetAlias()->GetInterval();
1840 if (other_interval->HasRegister()) {
1841 size_t coalesce_register = other_interval->GetRegister();
1842 if (interval->HasHighInterval()) {
1843 if (!conflict_mask[coalesce_register] &&
1844 !conflict_mask[coalesce_register + 1] &&
1845 RegisterIsAligned(coalesce_register)) {
1846 reg = coalesce_register;
1847 break;
1848 }
1849 } else if (!conflict_mask[coalesce_register]) {
1850 reg = coalesce_register;
1851 break;
1852 }
1853 }
1854 }
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001855 }
1856
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001857 if (reg < (interval->HasHighInterval() ? num_regs_ - 1 : num_regs_)) {
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001858 // Assign register.
1859 DCHECK(!interval->HasRegister());
1860 interval->SetRegister(reg);
1861 colored_intervals.push_back(interval);
1862 if (interval->HasHighInterval()) {
1863 DCHECK(!interval->GetHighInterval()->HasRegister());
1864 interval->GetHighInterval()->SetRegister(reg + 1);
1865 colored_intervals.push_back(interval->GetHighInterval());
1866 }
1867 } else if (interval->RequiresRegister()) {
1868 // The interference graph is too dense to color. Make it sparser by
1869 // splitting this live interval.
1870 successful = false;
Matthew Gharrity2ccae4a2016-08-12 16:10:45 +00001871 register_allocator_->SplitAtRegisterUses(interval);
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001872 // We continue coloring, because there may be additional intervals that cannot
1873 // be colored, and that we should split.
1874 } else {
1875 // Spill.
Matthew Gharrityb6722ff2016-08-12 19:07:11 -07001876 node->SetNeedsSpillSlot();
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001877 }
1878 }
1879
1880 // If unsuccessful, reset all register assignments.
1881 if (!successful) {
1882 for (LiveInterval* interval : colored_intervals) {
1883 interval->ClearRegister();
1884 }
1885 }
1886
1887 return successful;
1888}
1889
Matthew Gharrityb6722ff2016-08-12 19:07:11 -07001890void RegisterAllocatorGraphColor::AllocateSpillSlots(const ArenaVector<InterferenceNode*>& nodes) {
1891 // The register allocation resolver will organize the stack based on value type,
1892 // so we assign stack slots for each value type separately.
1893 ArenaVector<LiveInterval*> double_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
1894 ArenaVector<LiveInterval*> long_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
1895 ArenaVector<LiveInterval*> float_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
1896 ArenaVector<LiveInterval*> int_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
1897
1898 // The set of parent intervals already handled.
1899 ArenaSet<LiveInterval*> seen(allocator_->Adapter(kArenaAllocRegisterAllocator));
1900
1901 // Find nodes that need spill slots.
1902 for (InterferenceNode* node : nodes) {
1903 if (!node->NeedsSpillSlot()) {
1904 continue;
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001905 }
1906
Matthew Gharrityb6722ff2016-08-12 19:07:11 -07001907 LiveInterval* parent = node->GetInterval()->GetParent();
1908 if (seen.find(parent) != seen.end()) {
1909 // We've already handled this interval.
1910 // This can happen if multiple siblings of the same interval request a stack slot.
1911 continue;
1912 }
1913 seen.insert(parent);
1914
1915 HInstruction* defined_by = parent->GetDefinedBy();
1916 if (parent->HasSpillSlot()) {
1917 // We already have a spill slot for this value that we can reuse.
1918 } else if (defined_by->IsParameterValue()) {
1919 // Parameters already have a stack slot.
1920 parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue()));
1921 } else if (defined_by->IsCurrentMethod()) {
1922 // The current method is always at stack slot 0.
1923 parent->SetSpillSlot(0);
1924 } else if (defined_by->IsConstant()) {
1925 // Constants don't need a spill slot.
1926 } else {
1927 // We need to find a spill slot for this interval. Place it in the correct
1928 // worklist to be processed later.
1929 switch (node->GetInterval()->GetType()) {
1930 case Primitive::kPrimDouble:
1931 double_intervals.push_back(parent);
1932 break;
1933 case Primitive::kPrimLong:
1934 long_intervals.push_back(parent);
1935 break;
1936 case Primitive::kPrimFloat:
1937 float_intervals.push_back(parent);
1938 break;
1939 case Primitive::kPrimNot:
1940 case Primitive::kPrimInt:
1941 case Primitive::kPrimChar:
1942 case Primitive::kPrimByte:
1943 case Primitive::kPrimBoolean:
1944 case Primitive::kPrimShort:
1945 int_intervals.push_back(parent);
1946 break;
1947 case Primitive::kPrimVoid:
1948 LOG(FATAL) << "Unexpected type for interval " << node->GetInterval()->GetType();
1949 UNREACHABLE();
1950 }
1951 }
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07001952 }
Matthew Gharrityb6722ff2016-08-12 19:07:11 -07001953
1954 // Color spill slots for each value type.
1955 ColorSpillSlots(&double_intervals, &num_double_spill_slots_);
1956 ColorSpillSlots(&long_intervals, &num_long_spill_slots_);
1957 ColorSpillSlots(&float_intervals, &num_float_spill_slots_);
1958 ColorSpillSlots(&int_intervals, &num_int_spill_slots_);
1959}
1960
1961void RegisterAllocatorGraphColor::ColorSpillSlots(ArenaVector<LiveInterval*>* intervals,
1962 size_t* num_stack_slots_used) {
1963 // We cannot use the original interference graph here because spill slots are assigned to
1964 // all of the siblings of an interval, whereas an interference node represents only a single
1965 // sibling. So, we assign spill slots linear-scan-style by sorting all the interval endpoints
1966 // by position, and assigning the lowest spill slot available when we encounter an interval
1967 // beginning. We ignore lifetime holes for simplicity.
1968 ArenaVector<std::tuple<size_t, bool, LiveInterval*>> interval_endpoints(
1969 allocator_->Adapter(kArenaAllocRegisterAllocator));
1970
Vladimir Marko7d157fc2017-05-10 16:29:23 +01001971 for (LiveInterval* parent_interval : *intervals) {
Matthew Gharrityb6722ff2016-08-12 19:07:11 -07001972 DCHECK(parent_interval->IsParent());
1973 DCHECK(!parent_interval->HasSpillSlot());
1974 size_t start = parent_interval->GetStart();
1975 size_t end = parent_interval->GetLastSibling()->GetEnd();
1976 DCHECK_LT(start, end);
1977 interval_endpoints.push_back(std::make_tuple(start, true, parent_interval));
1978 interval_endpoints.push_back(std::make_tuple(end, false, parent_interval));
1979 }
1980
1981 // Sort by position.
1982 // We explicitly ignore the third entry of each tuple (the interval pointer) in order
1983 // to maintain determinism.
1984 std::sort(interval_endpoints.begin(), interval_endpoints.end(),
1985 [] (const std::tuple<size_t, bool, LiveInterval*>& lhs,
1986 const std::tuple<size_t, bool, LiveInterval*>& rhs) {
1987 return std::tie(std::get<0>(lhs), std::get<1>(lhs))
1988 < std::tie(std::get<0>(rhs), std::get<1>(rhs));
1989 });
1990
1991 ArenaBitVector taken(allocator_, 0, true);
1992 for (auto it = interval_endpoints.begin(), end = interval_endpoints.end(); it != end; ++it) {
1993 // Extract information from the current tuple.
1994 LiveInterval* parent_interval;
1995 bool is_interval_beginning;
1996 size_t position;
1997 std::tie(position, is_interval_beginning, parent_interval) = *it;
Aart Bikcc895252017-03-21 10:55:15 -07001998 size_t number_of_spill_slots_needed = parent_interval->NumberOfSpillSlotsNeeded();
Matthew Gharrityb6722ff2016-08-12 19:07:11 -07001999
2000 if (is_interval_beginning) {
2001 DCHECK(!parent_interval->HasSpillSlot());
2002 DCHECK_EQ(position, parent_interval->GetStart());
2003
Aart Bikcc895252017-03-21 10:55:15 -07002004 // Find first available free stack slot(s).
Matthew Gharrityb6722ff2016-08-12 19:07:11 -07002005 size_t slot = 0;
Aart Bikcc895252017-03-21 10:55:15 -07002006 for (; ; ++slot) {
2007 bool found = true;
2008 for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
2009 if (taken.IsBitSet(s)) {
2010 found = false;
2011 break; // failure
2012 }
2013 }
2014 if (found) {
2015 break; // success
2016 }
Matthew Gharrityb6722ff2016-08-12 19:07:11 -07002017 }
Aart Bikcc895252017-03-21 10:55:15 -07002018
Matthew Gharrityb6722ff2016-08-12 19:07:11 -07002019 parent_interval->SetSpillSlot(slot);
2020
Aart Bikcc895252017-03-21 10:55:15 -07002021 *num_stack_slots_used = std::max(*num_stack_slots_used, slot + number_of_spill_slots_needed);
2022 if (number_of_spill_slots_needed > 1 && *num_stack_slots_used % 2 != 0) {
Matthew Gharrityb6722ff2016-08-12 19:07:11 -07002023 // The parallel move resolver requires that there be an even number of spill slots
2024 // allocated for pair value types.
2025 ++(*num_stack_slots_used);
2026 }
2027
Aart Bikcc895252017-03-21 10:55:15 -07002028 for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
2029 taken.SetBit(s);
Matthew Gharrityb6722ff2016-08-12 19:07:11 -07002030 }
2031 } else {
2032 DCHECK_EQ(position, parent_interval->GetLastSibling()->GetEnd());
2033 DCHECK(parent_interval->HasSpillSlot());
2034
Aart Bikcc895252017-03-21 10:55:15 -07002035 // Free up the stack slot(s) used by this interval.
Matthew Gharrityb6722ff2016-08-12 19:07:11 -07002036 size_t slot = parent_interval->GetSpillSlot();
Aart Bikcc895252017-03-21 10:55:15 -07002037 for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
2038 DCHECK(taken.IsBitSet(s));
2039 taken.ClearBit(s);
Matthew Gharrityb6722ff2016-08-12 19:07:11 -07002040 }
2041 }
2042 }
2043 DCHECK_EQ(taken.NumSetBits(), 0u);
Matthew Gharrityd9ffd0d2016-06-22 10:27:55 -07002044}
2045
2046} // namespace art