Blame - src/compiler/backend/instruction-scheduler.cc - platform/external/v8

blob: 28195052df7d560bd96c4e08e36464536790c14d [file] [log] [blame]

Rubin Xu	7bc1b61	2021-02-16 09:38:50 +0000	[diff] [blame^]	1	// Copyright 2015 the V8 project authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style license that can be
				3	// found in the LICENSE file.
				4
				5	#include "src/compiler/backend/instruction-scheduler.h"
				6
				7	#include "src/base/iterator.h"
				8	#include "src/base/optional.h"
				9	#include "src/base/utils/random-number-generator.h"
				10
				11	namespace v8 {
				12	namespace internal {
				13	namespace compiler {
				14
				15	void InstructionScheduler::SchedulingQueueBase::AddNode(
				16	ScheduleGraphNode* node) {
				17	// We keep the ready list sorted by total latency so that we can quickly find
				18	// the next best candidate to schedule.
				19	auto it = nodes_.begin();
				20	while ((it != nodes_.end()) &&
				21	((*it)->total_latency() >= node->total_latency())) {
				22	++it;
				23	}
				24	nodes_.insert(it, node);
				25	}
				26
				27	InstructionScheduler::ScheduleGraphNode*
				28	InstructionScheduler::CriticalPathFirstQueue::PopBestCandidate(int cycle) {
				29	DCHECK(!IsEmpty());
				30	auto candidate = nodes_.end();
				31	for (auto iterator = nodes_.begin(); iterator != nodes_.end(); ++iterator) {
				32	// We only consider instructions that have all their operands ready.
				33	if (cycle >= (*iterator)->start_cycle()) {
				34	candidate = iterator;
				35	break;
				36	}
				37	}
				38
				39	if (candidate != nodes_.end()) {
				40	ScheduleGraphNode* result = *candidate;
				41	nodes_.erase(candidate);
				42	return result;
				43	}
				44
				45	return nullptr;
				46	}
				47
				48	InstructionScheduler::ScheduleGraphNode*
				49	InstructionScheduler::StressSchedulerQueue::PopBestCandidate(int cycle) {
				50	DCHECK(!IsEmpty());
				51	// Choose a random element from the ready list.
				52	auto candidate = nodes_.begin();
				53	std::advance(candidate, random_number_generator()->NextInt(
				54	static_cast<int>(nodes_.size())));
				55	ScheduleGraphNode* result = *candidate;
				56	nodes_.erase(candidate);
				57	return result;
				58	}
				59
				60	InstructionScheduler::ScheduleGraphNode::ScheduleGraphNode(Zone* zone,
				61	Instruction* instr)
				62	: instr_(instr),
				63	successors_(zone),
				64	unscheduled_predecessors_count_(0),
				65	latency_(GetInstructionLatency(instr)),
				66	total_latency_(-1),
				67	start_cycle_(-1) {}
				68
				69	void InstructionScheduler::ScheduleGraphNode::AddSuccessor(
				70	ScheduleGraphNode* node) {
				71	successors_.push_back(node);
				72	node->unscheduled_predecessors_count_++;
				73	}
				74
				75	InstructionScheduler::InstructionScheduler(Zone* zone,
				76	InstructionSequence* sequence)
				77	: zone_(zone),
				78	sequence_(sequence),
				79	graph_(zone),
				80	last_side_effect_instr_(nullptr),
				81	pending_loads_(zone),
				82	last_live_in_reg_marker_(nullptr),
				83	last_deopt_or_trap_(nullptr),
				84	operands_map_(zone) {
				85	if (FLAG_turbo_stress_instruction_scheduling) {
				86	random_number_generator_ =
				87	base::Optional<base::RandomNumberGenerator>(FLAG_random_seed);
				88	}
				89	}
				90
				91	void InstructionScheduler::StartBlock(RpoNumber rpo) {
				92	DCHECK(graph_.empty());
				93	DCHECK_NULL(last_side_effect_instr_);
				94	DCHECK(pending_loads_.empty());
				95	DCHECK_NULL(last_live_in_reg_marker_);
				96	DCHECK_NULL(last_deopt_or_trap_);
				97	DCHECK(operands_map_.empty());
				98	sequence()->StartBlock(rpo);
				99	}
				100
				101	void InstructionScheduler::EndBlock(RpoNumber rpo) {
				102	if (FLAG_turbo_stress_instruction_scheduling) {
				103	Schedule<StressSchedulerQueue>();
				104	} else {
				105	Schedule<CriticalPathFirstQueue>();
				106	}
				107	sequence()->EndBlock(rpo);
				108	}
				109
				110	void InstructionScheduler::AddTerminator(Instruction* instr) {
				111	ScheduleGraphNode* new_node = zone()->New<ScheduleGraphNode>(zone(), instr);
				112	// Make sure that basic block terminators are not moved by adding them
				113	// as successor of every instruction.
				114	for (ScheduleGraphNode* node : graph_) {
				115	node->AddSuccessor(new_node);
				116	}
				117	graph_.push_back(new_node);
				118	}
				119
				120	void InstructionScheduler::AddInstruction(Instruction* instr) {
				121	if (IsBarrier(instr)) {
				122	if (FLAG_turbo_stress_instruction_scheduling) {
				123	Schedule<StressSchedulerQueue>();
				124	} else {
				125	Schedule<CriticalPathFirstQueue>();
				126	}
				127	sequence()->AddInstruction(instr);
				128	return;
				129	}
				130
				131	ScheduleGraphNode* new_node = zone()->New<ScheduleGraphNode>(zone(), instr);
				132
				133	// We should not have branches in the middle of a block.
				134	DCHECK_NE(instr->flags_mode(), kFlags_branch);
				135	DCHECK_NE(instr->flags_mode(), kFlags_branch_and_poison);
				136
				137	if (IsFixedRegisterParameter(instr)) {
				138	if (last_live_in_reg_marker_ != nullptr) {
				139	last_live_in_reg_marker_->AddSuccessor(new_node);
				140	}
				141	last_live_in_reg_marker_ = new_node;
				142	} else {
				143	if (last_live_in_reg_marker_ != nullptr) {
				144	last_live_in_reg_marker_->AddSuccessor(new_node);
				145	}
				146
				147	// Make sure that instructions are not scheduled before the last
				148	// deoptimization or trap point when they depend on it.
				149	if ((last_deopt_or_trap_ != nullptr) && DependsOnDeoptOrTrap(instr)) {
				150	last_deopt_or_trap_->AddSuccessor(new_node);
				151	}
				152
				153	// Instructions with side effects and memory operations can't be
				154	// reordered with respect to each other.
				155	if (HasSideEffect(instr)) {
				156	if (last_side_effect_instr_ != nullptr) {
				157	last_side_effect_instr_->AddSuccessor(new_node);
				158	}
				159	for (ScheduleGraphNode* load : pending_loads_) {
				160	load->AddSuccessor(new_node);
				161	}
				162	pending_loads_.clear();
				163	last_side_effect_instr_ = new_node;
				164	} else if (IsLoadOperation(instr)) {
				165	// Load operations can't be reordered with side effects instructions but
				166	// independent loads can be reordered with respect to each other.
				167	if (last_side_effect_instr_ != nullptr) {
				168	last_side_effect_instr_->AddSuccessor(new_node);
				169	}
				170	pending_loads_.push_back(new_node);
				171	} else if (instr->IsDeoptimizeCall() \|\| instr->IsTrap()) {
				172	// Ensure that deopts or traps are not reordered with respect to
				173	// side-effect instructions.
				174	if (last_side_effect_instr_ != nullptr) {
				175	last_side_effect_instr_->AddSuccessor(new_node);
				176	}
				177	last_deopt_or_trap_ = new_node;
				178	}
				179
				180	// Look for operand dependencies.
				181	for (size_t i = 0; i < instr->InputCount(); ++i) {
				182	const InstructionOperand* input = instr->InputAt(i);
				183	if (input->IsUnallocated()) {
				184	int32_t vreg = UnallocatedOperand::cast(input)->virtual_register();
				185	auto it = operands_map_.find(vreg);
				186	if (it != operands_map_.end()) {
				187	it->second->AddSuccessor(new_node);
				188	}
				189	}
				190	}
				191
				192	// Record the virtual registers defined by this instruction.
				193	for (size_t i = 0; i < instr->OutputCount(); ++i) {
				194	const InstructionOperand* output = instr->OutputAt(i);
				195	if (output->IsUnallocated()) {
				196	operands_map_[UnallocatedOperand::cast(output)->virtual_register()] =
				197	new_node;
				198	} else if (output->IsConstant()) {
				199	operands_map_[ConstantOperand::cast(output)->virtual_register()] =
				200	new_node;
				201	}
				202	}
				203	}
				204
				205	graph_.push_back(new_node);
				206	}
				207
				208	template <typename QueueType>
				209	void InstructionScheduler::Schedule() {
				210	QueueType ready_list(this);
				211
				212	// Compute total latencies so that we can schedule the critical path first.
				213	ComputeTotalLatencies();
				214
				215	// Add nodes which don't have dependencies to the ready list.
				216	for (ScheduleGraphNode* node : graph_) {
				217	if (!node->HasUnscheduledPredecessor()) {
				218	ready_list.AddNode(node);
				219	}
				220	}
				221
				222	// Go through the ready list and schedule the instructions.
				223	int cycle = 0;
				224	while (!ready_list.IsEmpty()) {
				225	ScheduleGraphNode* candidate = ready_list.PopBestCandidate(cycle);
				226
				227	if (candidate != nullptr) {
				228	sequence()->AddInstruction(candidate->instruction());
				229
				230	for (ScheduleGraphNode* successor : candidate->successors()) {
				231	successor->DropUnscheduledPredecessor();
				232	successor->set_start_cycle(
				233	std::max(successor->start_cycle(), cycle + candidate->latency()));
				234
				235	if (!successor->HasUnscheduledPredecessor()) {
				236	ready_list.AddNode(successor);
				237	}
				238	}
				239	}
				240
				241	cycle++;
				242	}
				243
				244	// Reset own state.
				245	graph_.clear();
				246	operands_map_.clear();
				247	pending_loads_.clear();
				248	last_deopt_or_trap_ = nullptr;
				249	last_live_in_reg_marker_ = nullptr;
				250	last_side_effect_instr_ = nullptr;
				251	}
				252
				253	int InstructionScheduler::GetInstructionFlags(const Instruction* instr) const {
				254	switch (instr->arch_opcode()) {
				255	case kArchNop:
				256	case kArchStackCheckOffset:
				257	case kArchFramePointer:
				258	case kArchParentFramePointer:
				259	case kArchStackSlot: // Despite its name this opcode will produce a
				260	// reference to a frame slot, so it is not affected
				261	// by the arm64 dual stack issues mentioned below.
				262	case kArchComment:
				263	case kArchDeoptimize:
				264	case kArchJmp:
				265	case kArchBinarySearchSwitch:
				266	case kArchRet:
				267	case kArchTableSwitch:
				268	case kArchThrowTerminator:
				269	return kNoOpcodeFlags;
				270
				271	case kArchTruncateDoubleToI:
				272	case kIeee754Float64Acos:
				273	case kIeee754Float64Acosh:
				274	case kIeee754Float64Asin:
				275	case kIeee754Float64Asinh:
				276	case kIeee754Float64Atan:
				277	case kIeee754Float64Atanh:
				278	case kIeee754Float64Atan2:
				279	case kIeee754Float64Cbrt:
				280	case kIeee754Float64Cos:
				281	case kIeee754Float64Cosh:
				282	case kIeee754Float64Exp:
				283	case kIeee754Float64Expm1:
				284	case kIeee754Float64Log:
				285	case kIeee754Float64Log1p:
				286	case kIeee754Float64Log10:
				287	case kIeee754Float64Log2:
				288	case kIeee754Float64Pow:
				289	case kIeee754Float64Sin:
				290	case kIeee754Float64Sinh:
				291	case kIeee754Float64Tan:
				292	case kIeee754Float64Tanh:
				293	return kNoOpcodeFlags;
				294
				295	case kArchStackPointerGreaterThan:
				296	// The ArchStackPointerGreaterThan instruction loads the current stack
				297	// pointer value and must not be reordered with instructions with side
				298	// effects.
				299	return kIsLoadOperation;
				300
				301	case kArchWordPoisonOnSpeculation:
				302	// While poisoning operations have no side effect, they must not be
				303	// reordered relative to branches.
				304	return kHasSideEffect;
				305
				306	case kArchPrepareCallCFunction:
				307	case kArchPrepareTailCall:
				308	case kArchTailCallCodeObjectFromJSFunction:
				309	case kArchTailCallCodeObject:
				310	case kArchTailCallAddress:
				311	case kArchTailCallWasm:
				312	case kArchAbortCSAAssert:
				313	return kHasSideEffect;
				314
				315	case kArchDebugBreak:
				316	return kIsBarrier;
				317
				318	case kArchSaveCallerRegisters:
				319	case kArchRestoreCallerRegisters:
				320	return kIsBarrier;
				321
				322	case kArchCallCFunction:
				323	case kArchCallCodeObject:
				324	case kArchCallJSFunction:
				325	case kArchCallWasmFunction:
				326	case kArchCallBuiltinPointer:
				327	// Calls can cause GC and GC may relocate objects. If a pure instruction
				328	// operates on a tagged pointer that was cast to a word then it may be
				329	// incorrect to move the instruction across the call. Hence we mark all
				330	// (non-tail-)calls as barriers.
				331	return kIsBarrier;
				332
				333	case kArchStoreWithWriteBarrier:
				334	return kHasSideEffect;
				335
				336	case kWord32AtomicLoadInt8:
				337	case kWord32AtomicLoadUint8:
				338	case kWord32AtomicLoadInt16:
				339	case kWord32AtomicLoadUint16:
				340	case kWord32AtomicLoadWord32:
				341	return kIsLoadOperation;
				342
				343	case kWord32AtomicStoreWord8:
				344	case kWord32AtomicStoreWord16:
				345	case kWord32AtomicStoreWord32:
				346	return kHasSideEffect;
				347
				348	case kWord32AtomicExchangeInt8:
				349	case kWord32AtomicExchangeUint8:
				350	case kWord32AtomicExchangeInt16:
				351	case kWord32AtomicExchangeUint16:
				352	case kWord32AtomicExchangeWord32:
				353	case kWord32AtomicCompareExchangeInt8:
				354	case kWord32AtomicCompareExchangeUint8:
				355	case kWord32AtomicCompareExchangeInt16:
				356	case kWord32AtomicCompareExchangeUint16:
				357	case kWord32AtomicCompareExchangeWord32:
				358	case kWord32AtomicAddInt8:
				359	case kWord32AtomicAddUint8:
				360	case kWord32AtomicAddInt16:
				361	case kWord32AtomicAddUint16:
				362	case kWord32AtomicAddWord32:
				363	case kWord32AtomicSubInt8:
				364	case kWord32AtomicSubUint8:
				365	case kWord32AtomicSubInt16:
				366	case kWord32AtomicSubUint16:
				367	case kWord32AtomicSubWord32:
				368	case kWord32AtomicAndInt8:
				369	case kWord32AtomicAndUint8:
				370	case kWord32AtomicAndInt16:
				371	case kWord32AtomicAndUint16:
				372	case kWord32AtomicAndWord32:
				373	case kWord32AtomicOrInt8:
				374	case kWord32AtomicOrUint8:
				375	case kWord32AtomicOrInt16:
				376	case kWord32AtomicOrUint16:
				377	case kWord32AtomicOrWord32:
				378	case kWord32AtomicXorInt8:
				379	case kWord32AtomicXorUint8:
				380	case kWord32AtomicXorInt16:
				381	case kWord32AtomicXorUint16:
				382	case kWord32AtomicXorWord32:
				383	return kHasSideEffect;
				384
				385	#define CASE(Name) case k##Name:
				386	TARGET_ARCH_OPCODE_LIST(CASE)
				387	#undef CASE
				388	return GetTargetInstructionFlags(instr);
				389	}
				390
				391	UNREACHABLE();
				392	}
				393
				394	void InstructionScheduler::ComputeTotalLatencies() {
				395	for (ScheduleGraphNode* node : base::Reversed(graph_)) {
				396	int max_latency = 0;
				397
				398	for (ScheduleGraphNode* successor : node->successors()) {
				399	DCHECK_NE(-1, successor->total_latency());
				400	if (successor->total_latency() > max_latency) {
				401	max_latency = successor->total_latency();
				402	}
				403	}
				404
				405	node->set_total_latency(max_latency + node->latency());
				406	}
				407	}
				408
				409	} // namespace compiler
				410	} // namespace internal
				411	} // namespace v8