blob: 28195052df7d560bd96c4e08e36464536790c14d [file] [log] [blame]
Rubin Xu7bc1b612021-02-16 09:38:50 +00001// Copyright 2015 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "src/compiler/backend/instruction-scheduler.h"
6
7#include "src/base/iterator.h"
8#include "src/base/optional.h"
9#include "src/base/utils/random-number-generator.h"
10
11namespace v8 {
12namespace internal {
13namespace compiler {
14
15void InstructionScheduler::SchedulingQueueBase::AddNode(
16 ScheduleGraphNode* node) {
17 // We keep the ready list sorted by total latency so that we can quickly find
18 // the next best candidate to schedule.
19 auto it = nodes_.begin();
20 while ((it != nodes_.end()) &&
21 ((*it)->total_latency() >= node->total_latency())) {
22 ++it;
23 }
24 nodes_.insert(it, node);
25}
26
27InstructionScheduler::ScheduleGraphNode*
28InstructionScheduler::CriticalPathFirstQueue::PopBestCandidate(int cycle) {
29 DCHECK(!IsEmpty());
30 auto candidate = nodes_.end();
31 for (auto iterator = nodes_.begin(); iterator != nodes_.end(); ++iterator) {
32 // We only consider instructions that have all their operands ready.
33 if (cycle >= (*iterator)->start_cycle()) {
34 candidate = iterator;
35 break;
36 }
37 }
38
39 if (candidate != nodes_.end()) {
40 ScheduleGraphNode* result = *candidate;
41 nodes_.erase(candidate);
42 return result;
43 }
44
45 return nullptr;
46}
47
48InstructionScheduler::ScheduleGraphNode*
49InstructionScheduler::StressSchedulerQueue::PopBestCandidate(int cycle) {
50 DCHECK(!IsEmpty());
51 // Choose a random element from the ready list.
52 auto candidate = nodes_.begin();
53 std::advance(candidate, random_number_generator()->NextInt(
54 static_cast<int>(nodes_.size())));
55 ScheduleGraphNode* result = *candidate;
56 nodes_.erase(candidate);
57 return result;
58}
59
60InstructionScheduler::ScheduleGraphNode::ScheduleGraphNode(Zone* zone,
61 Instruction* instr)
62 : instr_(instr),
63 successors_(zone),
64 unscheduled_predecessors_count_(0),
65 latency_(GetInstructionLatency(instr)),
66 total_latency_(-1),
67 start_cycle_(-1) {}
68
69void InstructionScheduler::ScheduleGraphNode::AddSuccessor(
70 ScheduleGraphNode* node) {
71 successors_.push_back(node);
72 node->unscheduled_predecessors_count_++;
73}
74
75InstructionScheduler::InstructionScheduler(Zone* zone,
76 InstructionSequence* sequence)
77 : zone_(zone),
78 sequence_(sequence),
79 graph_(zone),
80 last_side_effect_instr_(nullptr),
81 pending_loads_(zone),
82 last_live_in_reg_marker_(nullptr),
83 last_deopt_or_trap_(nullptr),
84 operands_map_(zone) {
85 if (FLAG_turbo_stress_instruction_scheduling) {
86 random_number_generator_ =
87 base::Optional<base::RandomNumberGenerator>(FLAG_random_seed);
88 }
89}
90
91void InstructionScheduler::StartBlock(RpoNumber rpo) {
92 DCHECK(graph_.empty());
93 DCHECK_NULL(last_side_effect_instr_);
94 DCHECK(pending_loads_.empty());
95 DCHECK_NULL(last_live_in_reg_marker_);
96 DCHECK_NULL(last_deopt_or_trap_);
97 DCHECK(operands_map_.empty());
98 sequence()->StartBlock(rpo);
99}
100
101void InstructionScheduler::EndBlock(RpoNumber rpo) {
102 if (FLAG_turbo_stress_instruction_scheduling) {
103 Schedule<StressSchedulerQueue>();
104 } else {
105 Schedule<CriticalPathFirstQueue>();
106 }
107 sequence()->EndBlock(rpo);
108}
109
110void InstructionScheduler::AddTerminator(Instruction* instr) {
111 ScheduleGraphNode* new_node = zone()->New<ScheduleGraphNode>(zone(), instr);
112 // Make sure that basic block terminators are not moved by adding them
113 // as successor of every instruction.
114 for (ScheduleGraphNode* node : graph_) {
115 node->AddSuccessor(new_node);
116 }
117 graph_.push_back(new_node);
118}
119
120void InstructionScheduler::AddInstruction(Instruction* instr) {
121 if (IsBarrier(instr)) {
122 if (FLAG_turbo_stress_instruction_scheduling) {
123 Schedule<StressSchedulerQueue>();
124 } else {
125 Schedule<CriticalPathFirstQueue>();
126 }
127 sequence()->AddInstruction(instr);
128 return;
129 }
130
131 ScheduleGraphNode* new_node = zone()->New<ScheduleGraphNode>(zone(), instr);
132
133 // We should not have branches in the middle of a block.
134 DCHECK_NE(instr->flags_mode(), kFlags_branch);
135 DCHECK_NE(instr->flags_mode(), kFlags_branch_and_poison);
136
137 if (IsFixedRegisterParameter(instr)) {
138 if (last_live_in_reg_marker_ != nullptr) {
139 last_live_in_reg_marker_->AddSuccessor(new_node);
140 }
141 last_live_in_reg_marker_ = new_node;
142 } else {
143 if (last_live_in_reg_marker_ != nullptr) {
144 last_live_in_reg_marker_->AddSuccessor(new_node);
145 }
146
147 // Make sure that instructions are not scheduled before the last
148 // deoptimization or trap point when they depend on it.
149 if ((last_deopt_or_trap_ != nullptr) && DependsOnDeoptOrTrap(instr)) {
150 last_deopt_or_trap_->AddSuccessor(new_node);
151 }
152
153 // Instructions with side effects and memory operations can't be
154 // reordered with respect to each other.
155 if (HasSideEffect(instr)) {
156 if (last_side_effect_instr_ != nullptr) {
157 last_side_effect_instr_->AddSuccessor(new_node);
158 }
159 for (ScheduleGraphNode* load : pending_loads_) {
160 load->AddSuccessor(new_node);
161 }
162 pending_loads_.clear();
163 last_side_effect_instr_ = new_node;
164 } else if (IsLoadOperation(instr)) {
165 // Load operations can't be reordered with side effects instructions but
166 // independent loads can be reordered with respect to each other.
167 if (last_side_effect_instr_ != nullptr) {
168 last_side_effect_instr_->AddSuccessor(new_node);
169 }
170 pending_loads_.push_back(new_node);
171 } else if (instr->IsDeoptimizeCall() || instr->IsTrap()) {
172 // Ensure that deopts or traps are not reordered with respect to
173 // side-effect instructions.
174 if (last_side_effect_instr_ != nullptr) {
175 last_side_effect_instr_->AddSuccessor(new_node);
176 }
177 last_deopt_or_trap_ = new_node;
178 }
179
180 // Look for operand dependencies.
181 for (size_t i = 0; i < instr->InputCount(); ++i) {
182 const InstructionOperand* input = instr->InputAt(i);
183 if (input->IsUnallocated()) {
184 int32_t vreg = UnallocatedOperand::cast(input)->virtual_register();
185 auto it = operands_map_.find(vreg);
186 if (it != operands_map_.end()) {
187 it->second->AddSuccessor(new_node);
188 }
189 }
190 }
191
192 // Record the virtual registers defined by this instruction.
193 for (size_t i = 0; i < instr->OutputCount(); ++i) {
194 const InstructionOperand* output = instr->OutputAt(i);
195 if (output->IsUnallocated()) {
196 operands_map_[UnallocatedOperand::cast(output)->virtual_register()] =
197 new_node;
198 } else if (output->IsConstant()) {
199 operands_map_[ConstantOperand::cast(output)->virtual_register()] =
200 new_node;
201 }
202 }
203 }
204
205 graph_.push_back(new_node);
206}
207
208template <typename QueueType>
209void InstructionScheduler::Schedule() {
210 QueueType ready_list(this);
211
212 // Compute total latencies so that we can schedule the critical path first.
213 ComputeTotalLatencies();
214
215 // Add nodes which don't have dependencies to the ready list.
216 for (ScheduleGraphNode* node : graph_) {
217 if (!node->HasUnscheduledPredecessor()) {
218 ready_list.AddNode(node);
219 }
220 }
221
222 // Go through the ready list and schedule the instructions.
223 int cycle = 0;
224 while (!ready_list.IsEmpty()) {
225 ScheduleGraphNode* candidate = ready_list.PopBestCandidate(cycle);
226
227 if (candidate != nullptr) {
228 sequence()->AddInstruction(candidate->instruction());
229
230 for (ScheduleGraphNode* successor : candidate->successors()) {
231 successor->DropUnscheduledPredecessor();
232 successor->set_start_cycle(
233 std::max(successor->start_cycle(), cycle + candidate->latency()));
234
235 if (!successor->HasUnscheduledPredecessor()) {
236 ready_list.AddNode(successor);
237 }
238 }
239 }
240
241 cycle++;
242 }
243
244 // Reset own state.
245 graph_.clear();
246 operands_map_.clear();
247 pending_loads_.clear();
248 last_deopt_or_trap_ = nullptr;
249 last_live_in_reg_marker_ = nullptr;
250 last_side_effect_instr_ = nullptr;
251}
252
253int InstructionScheduler::GetInstructionFlags(const Instruction* instr) const {
254 switch (instr->arch_opcode()) {
255 case kArchNop:
256 case kArchStackCheckOffset:
257 case kArchFramePointer:
258 case kArchParentFramePointer:
259 case kArchStackSlot: // Despite its name this opcode will produce a
260 // reference to a frame slot, so it is not affected
261 // by the arm64 dual stack issues mentioned below.
262 case kArchComment:
263 case kArchDeoptimize:
264 case kArchJmp:
265 case kArchBinarySearchSwitch:
266 case kArchRet:
267 case kArchTableSwitch:
268 case kArchThrowTerminator:
269 return kNoOpcodeFlags;
270
271 case kArchTruncateDoubleToI:
272 case kIeee754Float64Acos:
273 case kIeee754Float64Acosh:
274 case kIeee754Float64Asin:
275 case kIeee754Float64Asinh:
276 case kIeee754Float64Atan:
277 case kIeee754Float64Atanh:
278 case kIeee754Float64Atan2:
279 case kIeee754Float64Cbrt:
280 case kIeee754Float64Cos:
281 case kIeee754Float64Cosh:
282 case kIeee754Float64Exp:
283 case kIeee754Float64Expm1:
284 case kIeee754Float64Log:
285 case kIeee754Float64Log1p:
286 case kIeee754Float64Log10:
287 case kIeee754Float64Log2:
288 case kIeee754Float64Pow:
289 case kIeee754Float64Sin:
290 case kIeee754Float64Sinh:
291 case kIeee754Float64Tan:
292 case kIeee754Float64Tanh:
293 return kNoOpcodeFlags;
294
295 case kArchStackPointerGreaterThan:
296 // The ArchStackPointerGreaterThan instruction loads the current stack
297 // pointer value and must not be reordered with instructions with side
298 // effects.
299 return kIsLoadOperation;
300
301 case kArchWordPoisonOnSpeculation:
302 // While poisoning operations have no side effect, they must not be
303 // reordered relative to branches.
304 return kHasSideEffect;
305
306 case kArchPrepareCallCFunction:
307 case kArchPrepareTailCall:
308 case kArchTailCallCodeObjectFromJSFunction:
309 case kArchTailCallCodeObject:
310 case kArchTailCallAddress:
311 case kArchTailCallWasm:
312 case kArchAbortCSAAssert:
313 return kHasSideEffect;
314
315 case kArchDebugBreak:
316 return kIsBarrier;
317
318 case kArchSaveCallerRegisters:
319 case kArchRestoreCallerRegisters:
320 return kIsBarrier;
321
322 case kArchCallCFunction:
323 case kArchCallCodeObject:
324 case kArchCallJSFunction:
325 case kArchCallWasmFunction:
326 case kArchCallBuiltinPointer:
327 // Calls can cause GC and GC may relocate objects. If a pure instruction
328 // operates on a tagged pointer that was cast to a word then it may be
329 // incorrect to move the instruction across the call. Hence we mark all
330 // (non-tail-)calls as barriers.
331 return kIsBarrier;
332
333 case kArchStoreWithWriteBarrier:
334 return kHasSideEffect;
335
336 case kWord32AtomicLoadInt8:
337 case kWord32AtomicLoadUint8:
338 case kWord32AtomicLoadInt16:
339 case kWord32AtomicLoadUint16:
340 case kWord32AtomicLoadWord32:
341 return kIsLoadOperation;
342
343 case kWord32AtomicStoreWord8:
344 case kWord32AtomicStoreWord16:
345 case kWord32AtomicStoreWord32:
346 return kHasSideEffect;
347
348 case kWord32AtomicExchangeInt8:
349 case kWord32AtomicExchangeUint8:
350 case kWord32AtomicExchangeInt16:
351 case kWord32AtomicExchangeUint16:
352 case kWord32AtomicExchangeWord32:
353 case kWord32AtomicCompareExchangeInt8:
354 case kWord32AtomicCompareExchangeUint8:
355 case kWord32AtomicCompareExchangeInt16:
356 case kWord32AtomicCompareExchangeUint16:
357 case kWord32AtomicCompareExchangeWord32:
358 case kWord32AtomicAddInt8:
359 case kWord32AtomicAddUint8:
360 case kWord32AtomicAddInt16:
361 case kWord32AtomicAddUint16:
362 case kWord32AtomicAddWord32:
363 case kWord32AtomicSubInt8:
364 case kWord32AtomicSubUint8:
365 case kWord32AtomicSubInt16:
366 case kWord32AtomicSubUint16:
367 case kWord32AtomicSubWord32:
368 case kWord32AtomicAndInt8:
369 case kWord32AtomicAndUint8:
370 case kWord32AtomicAndInt16:
371 case kWord32AtomicAndUint16:
372 case kWord32AtomicAndWord32:
373 case kWord32AtomicOrInt8:
374 case kWord32AtomicOrUint8:
375 case kWord32AtomicOrInt16:
376 case kWord32AtomicOrUint16:
377 case kWord32AtomicOrWord32:
378 case kWord32AtomicXorInt8:
379 case kWord32AtomicXorUint8:
380 case kWord32AtomicXorInt16:
381 case kWord32AtomicXorUint16:
382 case kWord32AtomicXorWord32:
383 return kHasSideEffect;
384
385#define CASE(Name) case k##Name:
386 TARGET_ARCH_OPCODE_LIST(CASE)
387#undef CASE
388 return GetTargetInstructionFlags(instr);
389 }
390
391 UNREACHABLE();
392}
393
394void InstructionScheduler::ComputeTotalLatencies() {
395 for (ScheduleGraphNode* node : base::Reversed(graph_)) {
396 int max_latency = 0;
397
398 for (ScheduleGraphNode* successor : node->successors()) {
399 DCHECK_NE(-1, successor->total_latency());
400 if (successor->total_latency() > max_latency) {
401 max_latency = successor->total_latency();
402 }
403 }
404
405 node->set_total_latency(max_latency + node->latency());
406 }
407}
408
409} // namespace compiler
410} // namespace internal
411} // namespace v8