Add runtime reasons for deopt.
Currently to help investigate. Also:
1) Log when deoptimization happens (which method and what reason)
2) Trace when deoptimization happens (to make it visible in systrace)
bug:37655083
Test: test-art-host test-art-target
Change-Id: I0c2d87b40db09e8e475cf97a7c784a034c585e97
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index d38d5f8..ed630cd 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -1550,7 +1550,7 @@
HBasicBlock* block = GetPreHeader(loop, check);
HInstruction* cond =
new (GetGraph()->GetArena()) HEqual(array, GetGraph()->GetNullConstant());
- InsertDeoptInLoop(loop, block, cond);
+ InsertDeoptInLoop(loop, block, cond, /* is_null_check */ true);
ReplaceInstruction(check, array);
return true;
}
@@ -1616,11 +1616,16 @@
}
/** Inserts a deoptimization test in a loop preheader. */
- void InsertDeoptInLoop(HLoopInformation* loop, HBasicBlock* block, HInstruction* condition) {
+ void InsertDeoptInLoop(HLoopInformation* loop,
+ HBasicBlock* block,
+ HInstruction* condition,
+ bool is_null_check = false) {
HInstruction* suspend = loop->GetSuspendCheck();
block->InsertInstructionBefore(condition, block->GetLastInstruction());
+ DeoptimizationKind kind =
+ is_null_check ? DeoptimizationKind::kLoopNullBCE : DeoptimizationKind::kLoopBoundsBCE;
HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize(
- GetGraph()->GetArena(), condition, HDeoptimize::Kind::kBCE, suspend->GetDexPc());
+ GetGraph()->GetArena(), condition, kind, suspend->GetDexPc());
block->InsertInstructionBefore(deoptimize, block->GetLastInstruction());
if (suspend->HasEnvironment()) {
deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
@@ -1633,7 +1638,7 @@
HBasicBlock* block = bounds_check->GetBlock();
block->InsertInstructionBefore(condition, bounds_check);
HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize(
- GetGraph()->GetArena(), condition, HDeoptimize::Kind::kBCE, bounds_check->GetDexPc());
+ GetGraph()->GetArena(), condition, DeoptimizationKind::kBlockBCE, bounds_check->GetDexPc());
block->InsertInstructionBefore(deoptimize, bounds_check);
deoptimize->CopyEnvironmentFrom(bounds_check->GetEnvironment());
}
diff --git a/compiler/optimizing/cha_guard_optimization.cc b/compiler/optimizing/cha_guard_optimization.cc
index 048073e..c806dbf 100644
--- a/compiler/optimizing/cha_guard_optimization.cc
+++ b/compiler/optimizing/cha_guard_optimization.cc
@@ -203,7 +203,7 @@
// Need a new deoptimize instruction that copies the environment
// of the suspend instruction for the loop.
HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize(
- GetGraph()->GetArena(), compare, HDeoptimize::Kind::kInline, suspend->GetDexPc());
+ GetGraph()->GetArena(), compare, DeoptimizationKind::kCHA, suspend->GetDexPc());
pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction());
deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
suspend->GetEnvironment(), loop_info->GetHeader());
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index ebd578c..d037b94 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -585,8 +585,13 @@
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
__ Bind(GetEntryLabel());
+ LocationSummary* locations = instruction_->GetLocations();
+ SaveLiveRegisters(codegen, locations);
+ InvokeRuntimeCallingConvention calling_convention;
+ __ LoadImmediate(calling_convention.GetRegisterAt(0),
+ static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
arm_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
- CheckEntrypointTypes<kQuickDeoptimize, void, void>();
+ CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
}
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM"; }
@@ -2672,7 +2677,10 @@
void LocationsBuilderARM::VisitDeoptimize(HDeoptimize* deoptimize) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
- locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ InvokeRuntimeCallingConvention calling_convention;
+ RegisterSet caller_saves = RegisterSet::Empty();
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
locations->SetInAt(0, Location::RequiresRegister());
}
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 78b627a..0c198b1 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -587,8 +587,13 @@
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
__ Bind(GetEntryLabel());
+ LocationSummary* locations = instruction_->GetLocations();
+ SaveLiveRegisters(codegen, locations);
+ InvokeRuntimeCallingConvention calling_convention;
+ __ Mov(calling_convention.GetRegisterAt(0),
+ static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
arm64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
- CheckEntrypointTypes<kQuickDeoptimize, void, void>();
+ CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
}
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; }
@@ -3693,7 +3698,10 @@
void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
- locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ InvokeRuntimeCallingConvention calling_convention;
+ RegisterSet caller_saves = RegisterSet::Empty();
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
locations->SetInAt(0, Location::RequiresRegister());
}
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index d65b327..b29f5a9 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -608,8 +608,14 @@
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
__ Bind(GetEntryLabel());
+ LocationSummary* locations = instruction_->GetLocations();
+ SaveLiveRegisters(codegen, locations);
+ InvokeRuntimeCallingConventionARMVIXL calling_convention;
+ __ Mov(calling_convention.GetRegisterAt(0),
+ static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
+
arm_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
- CheckEntrypointTypes<kQuickDeoptimize, void, void>();
+ CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
}
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARMVIXL"; }
@@ -2704,7 +2710,10 @@
void LocationsBuilderARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
- locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ InvokeRuntimeCallingConventionARMVIXL calling_convention;
+ RegisterSet caller_saves = RegisterSet::Empty();
+ caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
locations->SetInAt(0, Location::RequiresRegister());
}
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 357df97..3a7108b 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -493,8 +493,13 @@
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
__ Bind(GetEntryLabel());
+ LocationSummary* locations = instruction_->GetLocations();
+ SaveLiveRegisters(codegen, locations);
+ InvokeRuntimeCallingConvention calling_convention;
+ __ LoadConst32(calling_convention.GetRegisterAt(0),
+ static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
mips_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
- CheckEntrypointTypes<kQuickDeoptimize, void, void>();
+ CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
}
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS"; }
@@ -5199,7 +5204,10 @@
void LocationsBuilderMIPS::VisitDeoptimize(HDeoptimize* deoptimize) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
- locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ InvokeRuntimeCallingConvention calling_convention;
+ RegisterSet caller_saves = RegisterSet::Empty();
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
locations->SetInAt(0, Location::RequiresRegister());
}
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index a9c4964..f04e384 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -393,8 +393,13 @@
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
__ Bind(GetEntryLabel());
+ LocationSummary* locations = instruction_->GetLocations();
+ SaveLiveRegisters(codegen, locations);
+ InvokeRuntimeCallingConvention calling_convention;
+ __ LoadConst32(calling_convention.GetRegisterAt(0),
+ static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
mips64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
- CheckEntrypointTypes<kQuickDeoptimize, void, void>();
+ CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
}
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS64"; }
@@ -3905,7 +3910,10 @@
void LocationsBuilderMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
- locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ InvokeRuntimeCallingConvention calling_convention;
+ RegisterSet caller_saves = RegisterSet::Empty();
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
locations->SetInAt(0, Location::RequiresRegister());
}
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 1e867dd..cf2d5cb 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -384,8 +384,14 @@
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
__ Bind(GetEntryLabel());
+ LocationSummary* locations = instruction_->GetLocations();
+ SaveLiveRegisters(codegen, locations);
+ InvokeRuntimeCallingConvention calling_convention;
+ x86_codegen->Load32BitValue(
+ calling_convention.GetRegisterAt(0),
+ static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
x86_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
- CheckEntrypointTypes<kQuickDeoptimize, void, void>();
+ CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
}
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; }
@@ -1688,7 +1694,10 @@
void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
- locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ InvokeRuntimeCallingConvention calling_convention;
+ RegisterSet caller_saves = RegisterSet::Empty();
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
locations->SetInAt(0, Location::Any());
}
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index f413739..f2ed52b 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -397,8 +397,14 @@
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
+ LocationSummary* locations = instruction_->GetLocations();
+ SaveLiveRegisters(codegen, locations);
+ InvokeRuntimeCallingConvention calling_convention;
+ x86_64_codegen->Load32BitValue(
+ CpuRegister(calling_convention.GetRegisterAt(0)),
+ static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
- CheckEntrypointTypes<kQuickDeoptimize, void, void>();
+ CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
}
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; }
@@ -1710,7 +1716,10 @@
void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
- locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ InvokeRuntimeCallingConvention calling_convention;
+ RegisterSet caller_saves = RegisterSet::Empty();
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
locations->SetInAt(0, Location::Any());
}
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 3b681c1..8674e72 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -783,7 +783,7 @@
HInstruction* compare = new (graph_->GetArena()) HNotEqual(
deopt_flag, graph_->GetIntConstant(0, dex_pc));
HInstruction* deopt = new (graph_->GetArena()) HDeoptimize(
- graph_->GetArena(), compare, HDeoptimize::Kind::kInline, dex_pc);
+ graph_->GetArena(), compare, DeoptimizationKind::kCHA, dex_pc);
if (cursor != nullptr) {
bb_cursor->InsertInstructionAfter(deopt_flag, cursor);
@@ -860,7 +860,9 @@
graph_->GetArena(),
compare,
receiver,
- HDeoptimize::Kind::kInline,
+ Runtime::Current()->IsAotCompiler()
+ ? DeoptimizationKind::kAotInlineCache
+ : DeoptimizationKind::kJitInlineCache,
invoke_instruction->GetDexPc());
bb_cursor->InsertInstructionAfter(deoptimize, compare);
deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
@@ -1147,7 +1149,7 @@
graph_->GetArena(),
compare,
receiver,
- HDeoptimize::Kind::kInline,
+ DeoptimizationKind::kJitSameTarget,
invoke_instruction->GetDexPc());
bb_cursor->InsertInstructionAfter(deoptimize, compare);
deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index f250c1a..a8bfe61 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -1422,18 +1422,6 @@
}
}
-std::ostream& operator<<(std::ostream& os, const HDeoptimize::Kind& rhs) {
- switch (rhs) {
- case HDeoptimize::Kind::kBCE:
- return os << "bce";
- case HDeoptimize::Kind::kInline:
- return os << "inline";
- default:
- LOG(FATAL) << "Unknown Deoptimization kind: " << static_cast<int>(rhs);
- UNREACHABLE();
- }
-}
-
bool HCondition::IsBeforeWhenDisregardMoves(HInstruction* instruction) const {
return this == instruction->GetPreviousDisregardingMoves();
}
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index e40361e..b4da20b 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -30,6 +30,7 @@
#include "base/transform_array_ref.h"
#include "dex_file.h"
#include "dex_file_types.h"
+#include "deoptimization_kind.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
#include "handle.h"
#include "handle_scope.h"
@@ -2991,15 +2992,9 @@
// Deoptimize to interpreter, upon checking a condition.
class HDeoptimize FINAL : public HVariableInputSizeInstruction {
public:
- enum class Kind {
- kBCE,
- kInline,
- kLast = kInline
- };
-
// Use this constructor when the `HDeoptimize` acts as a barrier, where no code can move
// across.
- HDeoptimize(ArenaAllocator* arena, HInstruction* cond, Kind kind, uint32_t dex_pc)
+ HDeoptimize(ArenaAllocator* arena, HInstruction* cond, DeoptimizationKind kind, uint32_t dex_pc)
: HVariableInputSizeInstruction(
SideEffects::All(),
dex_pc,
@@ -3019,7 +3014,7 @@
HDeoptimize(ArenaAllocator* arena,
HInstruction* cond,
HInstruction* guard,
- Kind kind,
+ DeoptimizationKind kind,
uint32_t dex_pc)
: HVariableInputSizeInstruction(
SideEffects::CanTriggerGC(),
@@ -3043,7 +3038,7 @@
bool CanThrow() const OVERRIDE { return true; }
- Kind GetKind() const { return GetPackedField<DeoptimizeKindField>(); }
+ DeoptimizationKind GetDeoptimizationKind() const { return GetPackedField<DeoptimizeKindField>(); }
Primitive::Type GetType() const OVERRIDE {
return GuardsAnInput() ? GuardedInput()->GetType() : Primitive::kPrimVoid;
@@ -3068,18 +3063,17 @@
static constexpr size_t kFieldCanBeMoved = kNumberOfGenericPackedBits;
static constexpr size_t kFieldDeoptimizeKind = kNumberOfGenericPackedBits + 1;
static constexpr size_t kFieldDeoptimizeKindSize =
- MinimumBitsToStore(static_cast<size_t>(Kind::kLast));
+ MinimumBitsToStore(static_cast<size_t>(DeoptimizationKind::kLast));
static constexpr size_t kNumberOfDeoptimizePackedBits =
kFieldDeoptimizeKind + kFieldDeoptimizeKindSize;
static_assert(kNumberOfDeoptimizePackedBits <= kMaxNumberOfPackedBits,
"Too many packed fields.");
- using DeoptimizeKindField = BitField<Kind, kFieldDeoptimizeKind, kFieldDeoptimizeKindSize>;
+ using DeoptimizeKindField =
+ BitField<DeoptimizationKind, kFieldDeoptimizeKind, kFieldDeoptimizeKindSize>;
DISALLOW_COPY_AND_ASSIGN(HDeoptimize);
};
-std::ostream& operator<<(std::ostream& os, const HDeoptimize::Kind& rhs);
-
// Represents a should_deoptimize flag. Currently used for CHA-based devirtualization.
// The compiled code checks this flag value in a guard before devirtualized call and
// if it's true, starts to do deoptimization.
diff --git a/compiler/optimizing/ssa_liveness_analysis_test.cc b/compiler/optimizing/ssa_liveness_analysis_test.cc
index a1016d1..029eb4b 100644
--- a/compiler/optimizing/ssa_liveness_analysis_test.cc
+++ b/compiler/optimizing/ssa_liveness_analysis_test.cc
@@ -190,7 +190,7 @@
HInstruction* ae = new (&allocator_) HAboveOrEqual(index, length);
block->AddInstruction(ae);
HInstruction* deoptimize =
- new(&allocator_) HDeoptimize(&allocator_, ae, HDeoptimize::Kind::kBCE, /* dex_pc */ 0u);
+ new(&allocator_) HDeoptimize(&allocator_, ae, DeoptimizationKind::kBlockBCE, /* dex_pc */ 0u);
block->AddInstruction(deoptimize);
HEnvironment* deoptimize_env = new (&allocator_) HEnvironment(&allocator_,
/* number_of_vregs */ 5,
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index a277edf..8b978cc 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1681,8 +1681,8 @@
.extern artDeoptimize
ENTRY art_quick_deoptimize
SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0
- mov r0, r9 @ Set up args.
- blx artDeoptimize @ artDeoptimize(Thread*)
+ mov r0, r9 @ pass Thread::Current
+ blx artDeoptimize @ (Thread*)
END art_quick_deoptimize
/*
@@ -1691,9 +1691,9 @@
*/
.extern artDeoptimizeFromCompiledCode
ENTRY art_quick_deoptimize_from_compiled_code
- SETUP_SAVE_EVERYTHING_FRAME r0
- mov r0, r9 @ Set up args.
- blx artDeoptimizeFromCompiledCode @ artDeoptimizeFromCompiledCode(Thread*)
+ SETUP_SAVE_EVERYTHING_FRAME r1
+ mov r1, r9 @ pass Thread::Current
+ blx artDeoptimizeFromCompiledCode @ (DeoptimizationKind, Thread*)
END art_quick_deoptimize_from_compiled_code
/*
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index c555126..18015b5 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -2219,7 +2219,7 @@
ENTRY art_quick_deoptimize
SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
mov x0, xSELF // Pass thread.
- bl artDeoptimize // artDeoptimize(Thread*)
+ bl artDeoptimize // (Thread*)
brk 0
END art_quick_deoptimize
@@ -2230,8 +2230,8 @@
.extern artDeoptimizeFromCompiledCode
ENTRY art_quick_deoptimize_from_compiled_code
SETUP_SAVE_EVERYTHING_FRAME
- mov x0, xSELF // Pass thread.
- bl artDeoptimizeFromCompiledCode // artDeoptimizeFromCompiledCode(Thread*)
+ mov x1, xSELF // Pass thread.
+ bl artDeoptimizeFromCompiledCode // (DeoptimizationKind, Thread*)
brk 0
END art_quick_deoptimize_from_compiled_code
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 7bbcbf0..e628a9f 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -1978,8 +1978,7 @@
ENTRY art_quick_deoptimize
SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
la $t9, artDeoptimize
- jalr $t9 # artDeoptimize(Thread*)
- # Returns caller method's frame size.
+ jalr $t9 # (Thread*)
move $a0, rSELF # pass Thread::current
END art_quick_deoptimize
@@ -1991,9 +1990,8 @@
ENTRY art_quick_deoptimize_from_compiled_code
SETUP_SAVE_EVERYTHING_FRAME
la $t9, artDeoptimizeFromCompiledCode
- jalr $t9 # artDeoptimizeFromCompiledCode(Thread*)
- # Returns caller method's frame size.
- move $a0, rSELF # pass Thread::current
+ jalr $t9 # (DeoptimizationKind, Thread*)
+ move $a1, rSELF # pass Thread::current
END art_quick_deoptimize_from_compiled_code
/*
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 8f713a1..40bad16 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -1913,8 +1913,7 @@
.extern artEnterInterpreterFromDeoptimize
ENTRY art_quick_deoptimize
SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
- jal artDeoptimize # artDeoptimize(Thread*, SP)
- # Returns caller method's frame size.
+ jal artDeoptimize # artDeoptimize(Thread*)
move $a0, rSELF # pass Thread::current
END art_quick_deoptimize
@@ -1925,9 +1924,8 @@
.extern artDeoptimizeFromCompiledCode
ENTRY_NO_GP art_quick_deoptimize_from_compiled_code
SETUP_SAVE_EVERYTHING_FRAME
- jal artDeoptimizeFromCompiledCode # artDeoptimizeFromCompiledCode(Thread*, SP)
- # Returns caller method's frame size.
- move $a0, rSELF # pass Thread::current
+ jal artDeoptimizeFromCompiledCode # (DeoptimizationKind, Thread*)
+ move $a1, rSELF # pass Thread::current
END art_quick_deoptimize_from_compiled_code
.set push
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 3694c3e..2222f5c 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -2028,7 +2028,7 @@
CFI_ADJUST_CFA_OFFSET(12)
pushl %fs:THREAD_SELF_OFFSET // Pass Thread::Current().
CFI_ADJUST_CFA_OFFSET(4)
- call SYMBOL(artDeoptimize) // artDeoptimize(Thread*)
+ call SYMBOL(artDeoptimize) // (Thread*)
UNREACHABLE
END_FUNCTION art_quick_deoptimize
@@ -2038,11 +2038,12 @@
*/
DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
SETUP_SAVE_EVERYTHING_FRAME ebx, ebx
- subl LITERAL(12), %esp // Align stack.
- CFI_ADJUST_CFA_OFFSET(12)
+ subl LITERAL(8), %esp // Align stack.
+ CFI_ADJUST_CFA_OFFSET(8)
pushl %fs:THREAD_SELF_OFFSET // Pass Thread::Current().
CFI_ADJUST_CFA_OFFSET(4)
- call SYMBOL(artDeoptimizeFromCompiledCode) // artDeoptimizeFromCompiledCode(Thread*)
+ PUSH eax
+ call SYMBOL(artDeoptimizeFromCompiledCode) // (DeoptimizationKind, Thread*)
UNREACHABLE
END_FUNCTION art_quick_deoptimize_from_compiled_code
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index ad7c2b3..41651d8 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1983,7 +1983,7 @@
SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
// Stack should be aligned now.
movq %gs:THREAD_SELF_OFFSET, %rdi // Pass Thread.
- call SYMBOL(artDeoptimize) // artDeoptimize(Thread*)
+ call SYMBOL(artDeoptimize) // (Thread*)
UNREACHABLE
END_FUNCTION art_quick_deoptimize
@@ -1994,8 +1994,8 @@
DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
SETUP_SAVE_EVERYTHING_FRAME
// Stack should be aligned now.
- movq %gs:THREAD_SELF_OFFSET, %rdi // Pass Thread.
- call SYMBOL(artDeoptimizeFromCompiledCode) // artDeoptimizeFromCompiledCode(Thread*)
+ movq %gs:THREAD_SELF_OFFSET, %rsi // Pass Thread.
+ call SYMBOL(artDeoptimizeFromCompiledCode) // (DeoptimizationKind, Thread*)
UNREACHABLE
END_FUNCTION art_quick_deoptimize_from_compiled_code
diff --git a/runtime/deoptimization_kind.h b/runtime/deoptimization_kind.h
new file mode 100644
index 0000000..14e189c
--- /dev/null
+++ b/runtime/deoptimization_kind.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_DEOPTIMIZATION_KIND_H_
+#define ART_RUNTIME_DEOPTIMIZATION_KIND_H_
+
+namespace art {
+
+enum class DeoptimizationKind {
+ kAotInlineCache = 0,
+ kJitInlineCache,
+ kJitSameTarget,
+ kLoopBoundsBCE,
+ kLoopNullBCE,
+ kBlockBCE,
+ kCHA,
+ kFullFrame,
+ kLast = kFullFrame
+};
+
+inline const char* GetDeoptimizationKindName(DeoptimizationKind kind) {
+ switch (kind) {
+ case DeoptimizationKind::kAotInlineCache: return "AOT inline cache";
+ case DeoptimizationKind::kJitInlineCache: return "JIT inline cache";
+ case DeoptimizationKind::kJitSameTarget: return "JIT same target";
+ case DeoptimizationKind::kLoopBoundsBCE: return "loop bounds check elimination";
+ case DeoptimizationKind::kLoopNullBCE: return "loop bounds check elimination on null";
+ case DeoptimizationKind::kBlockBCE: return "block bounds check elimination";
+ case DeoptimizationKind::kCHA: return "class hierarchy analysis";
+ case DeoptimizationKind::kFullFrame: return "full frame";
+ }
+ LOG(FATAL) << "Unexpected kind " << static_cast<size_t>(kind);
+ UNREACHABLE();
+}
+
+std::ostream& operator<<(std::ostream& os, const DeoptimizationKind& kind);
+
+} // namespace art
+
+#endif // ART_RUNTIME_DEOPTIMIZATION_KIND_H_
diff --git a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
index 3820d85..5762e4f 100644
--- a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
@@ -16,6 +16,7 @@
#include "base/logging.h"
#include "base/mutex.h"
+#include "base/systrace.h"
#include "callee_save_frame.h"
#include "interpreter/interpreter.h"
#include "obj_ptr-inl.h" // TODO: Find the other include that isn't complete, and clean this up.
@@ -24,8 +25,9 @@
namespace art {
-NO_RETURN static void artDeoptimizeImpl(Thread* self, bool single_frame)
+NO_RETURN static void artDeoptimizeImpl(Thread* self, DeoptimizationKind kind, bool single_frame)
REQUIRES_SHARED(Locks::mutator_lock_) {
+ Runtime::Current()->IncrementDeoptimizationCount(kind);
if (VLOG_IS_ON(deopt)) {
if (single_frame) {
// Deopt logging will be in DeoptimizeSingleFrame. It is there to take advantage of the
@@ -38,10 +40,13 @@
self->AssertHasDeoptimizationContext();
QuickExceptionHandler exception_handler(self, true);
- if (single_frame) {
- exception_handler.DeoptimizeSingleFrame();
- } else {
- exception_handler.DeoptimizeStack();
+ {
+ ScopedTrace trace(std::string("Deoptimization ") + GetDeoptimizationKindName(kind));
+ if (single_frame) {
+ exception_handler.DeoptimizeSingleFrame(kind);
+ } else {
+ exception_handler.DeoptimizeStack();
+ }
}
uintptr_t return_pc = exception_handler.UpdateInstrumentationStack();
if (exception_handler.IsFullFragmentDone()) {
@@ -57,18 +62,18 @@
extern "C" NO_RETURN void artDeoptimize(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_) {
ScopedQuickEntrypointChecks sqec(self);
- artDeoptimizeImpl(self, false);
+ artDeoptimizeImpl(self, DeoptimizationKind::kFullFrame, false);
}
-// This is called directly from compiled code by an HDepptimize.
-extern "C" NO_RETURN void artDeoptimizeFromCompiledCode(Thread* self)
+// This is called directly from compiled code by an HDeoptimize.
+extern "C" NO_RETURN void artDeoptimizeFromCompiledCode(DeoptimizationKind kind, Thread* self)
REQUIRES_SHARED(Locks::mutator_lock_) {
ScopedQuickEntrypointChecks sqec(self);
// Before deoptimizing to interpreter, we must push the deoptimization context.
JValue return_value;
return_value.SetJ(0); // we never deoptimize from compiled code with an invoke result.
self->PushDeoptimizationContext(return_value, false, /* from_code */ true, self->GetException());
- artDeoptimizeImpl(self, true);
+ artDeoptimizeImpl(self, kind, true);
}
} // namespace art
diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h
index 915f18e..6cd9dc1 100644
--- a/runtime/entrypoints/quick/quick_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_entrypoints.h
@@ -21,6 +21,7 @@
#include "base/macros.h"
#include "base/mutex.h"
+#include "deoptimization_kind.h"
#include "offsets.h"
#define QUICK_ENTRYPOINT_OFFSET(ptr_size, x) \
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index e0a2e3c..e2d45ac 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -140,7 +140,7 @@
V(ThrowNullPointer, void, void) \
V(ThrowStackOverflow, void, void*) \
V(ThrowStringBounds, void, int32_t, int32_t) \
- V(Deoptimize, void, void) \
+ V(Deoptimize, void, DeoptimizationKind) \
\
V(A64Load, int64_t, volatile const int64_t *) \
V(A64Store, void, volatile int64_t *, int64_t) \
diff --git a/runtime/entrypoints/runtime_asm_entrypoints.h b/runtime/entrypoints/runtime_asm_entrypoints.h
index 4ca52de..fa287cb 100644
--- a/runtime/entrypoints/runtime_asm_entrypoints.h
+++ b/runtime/entrypoints/runtime_asm_entrypoints.h
@@ -17,6 +17,8 @@
#ifndef ART_RUNTIME_ENTRYPOINTS_RUNTIME_ASM_ENTRYPOINTS_H_
#define ART_RUNTIME_ENTRYPOINTS_RUNTIME_ASM_ENTRYPOINTS_H_
+#include "deoptimization_kind.h"
+
namespace art {
#ifndef BUILDING_LIBART
@@ -77,7 +79,7 @@
}
// Stub to deoptimize from compiled code.
-extern "C" void art_quick_deoptimize_from_compiled_code();
+extern "C" void art_quick_deoptimize_from_compiled_code(DeoptimizationKind);
// The return_pc of instrumentation exit stub.
extern "C" void art_quick_instrumentation_exit();
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index b32b272..1dfb0f6 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -353,6 +353,7 @@
DCHECK(!profile_saver_options_.IsEnabled() || !ProfileSaver::IsStarted());
if (dump_info_on_shutdown_) {
DumpInfo(LOG_STREAM(INFO));
+ Runtime::Current()->DumpDeoptimizations(LOG_STREAM(INFO));
}
DeleteThreadPool();
if (jit_compiler_handle_ != nullptr) {
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index e9a5ae5..81b87f1 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -149,7 +149,6 @@
used_memory_for_code_(0),
number_of_compilations_(0),
number_of_osr_compilations_(0),
- number_of_deoptimizations_(0),
number_of_collections_(0),
histogram_stack_map_memory_use_("Memory used for stack maps", 16),
histogram_code_memory_use_("Memory used for compiled code", 16),
@@ -1416,8 +1415,6 @@
osr_code_map_.erase(it);
}
}
- MutexLock mu(Thread::Current(), lock_);
- number_of_deoptimizations_++;
}
uint8_t* JitCodeCache::AllocateCode(size_t code_size) {
@@ -1456,7 +1453,6 @@
<< "Total number of JIT compilations: " << number_of_compilations_ << "\n"
<< "Total number of JIT compilations for on stack replacement: "
<< number_of_osr_compilations_ << "\n"
- << "Total number of deoptimizations: " << number_of_deoptimizations_ << "\n"
<< "Total number of JIT code cache collections: " << number_of_collections_ << std::endl;
histogram_stack_map_memory_use_.PrintMemoryUse(os);
histogram_code_memory_use_.PrintMemoryUse(os);
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index db214e7..612d06b 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -384,9 +384,6 @@
// Number of compilations for on-stack-replacement done throughout the lifetime of the JIT.
size_t number_of_osr_compilations_ GUARDED_BY(lock_);
- // Number of deoptimizations done throughout the lifetime of the JIT.
- size_t number_of_deoptimizations_ GUARDED_BY(lock_);
-
// Number of code cache collections done throughout the lifetime of the JIT.
size_t number_of_collections_ GUARDED_BY(lock_);
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index b866941..db10103 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -530,7 +530,7 @@
PrepareForLongJumpToInvokeStubOrInterpreterBridge();
}
-void QuickExceptionHandler::DeoptimizeSingleFrame() {
+void QuickExceptionHandler::DeoptimizeSingleFrame(DeoptimizationKind kind) {
DCHECK(is_deoptimization_);
if (VLOG_IS_ON(deopt) || kDebugExceptionDelivery) {
@@ -544,6 +544,10 @@
// Compiled code made an explicit deoptimization.
ArtMethod* deopt_method = visitor.GetSingleFrameDeoptMethod();
DCHECK(deopt_method != nullptr);
+ LOG(INFO) << "Deoptimizing "
+ << deopt_method->PrettyMethod()
+ << " due to "
+ << GetDeoptimizationKindName(kind);
if (Runtime::Current()->UseJitCompilation()) {
Runtime::Current()->GetJit()->GetCodeCache()->InvalidateCompiledCodeFor(
deopt_method, visitor.GetSingleFrameDeoptQuickMethodHeader());
diff --git a/runtime/quick_exception_handler.h b/runtime/quick_exception_handler.h
index 3ead7db..8090f9b 100644
--- a/runtime/quick_exception_handler.h
+++ b/runtime/quick_exception_handler.h
@@ -20,6 +20,7 @@
#include "base/logging.h"
#include "base/macros.h"
#include "base/mutex.h"
+#include "deoptimization_kind.h"
#include "stack_reference.h"
namespace art {
@@ -62,7 +63,7 @@
// the result of IsDeoptimizeable().
// - It can be either full-fragment, or partial-fragment deoptimization, depending
// on whether that single frame covers full or partial fragment.
- void DeoptimizeSingleFrame() REQUIRES_SHARED(Locks::mutator_lock_);
+ void DeoptimizeSingleFrame(DeoptimizationKind kind) REQUIRES_SHARED(Locks::mutator_lock_);
void DeoptimizePartialFragmentFixup(uintptr_t return_pc)
REQUIRES_SHARED(Locks::mutator_lock_);
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index b1acec6..60fa082 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -262,6 +262,9 @@
std::fill(callee_save_methods_, callee_save_methods_ + arraysize(callee_save_methods_), 0u);
interpreter::CheckInterpreterAsmConstants();
callbacks_.reset(new RuntimeCallbacks());
+ for (size_t i = 0; i <= static_cast<size_t>(DeoptimizationKind::kLast); ++i) {
+ deoptimization_counts_[i] = 0u;
+ }
}
Runtime::~Runtime() {
@@ -1575,6 +1578,23 @@
register_sun_misc_Unsafe(env);
}
+std::ostream& operator<<(std::ostream& os, const DeoptimizationKind& kind) {
+ os << GetDeoptimizationKindName(kind);
+ return os;
+}
+
+void Runtime::DumpDeoptimizations(std::ostream& os) {
+ for (size_t i = 0; i <= static_cast<size_t>(DeoptimizationKind::kLast); ++i) {
+ if (deoptimization_counts_[i] != 0) {
+ os << "Number of "
+ << GetDeoptimizationKindName(static_cast<DeoptimizationKind>(i))
+ << " deoptimizations: "
+ << deoptimization_counts_[i]
+ << "\n";
+ }
+ }
+}
+
void Runtime::DumpForSigQuit(std::ostream& os) {
GetClassLinker()->DumpForSigQuit(os);
GetInternTable()->DumpForSigQuit(os);
@@ -1586,6 +1606,7 @@
} else {
os << "Running non JIT\n";
}
+ DumpDeoptimizations(os);
TrackedAllocators::Dump(os);
os << "\n";
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 3ba0f2c..a2505e2 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -29,6 +29,7 @@
#include "arch/instruction_set.h"
#include "base/macros.h"
#include "base/mutex.h"
+#include "deoptimization_kind.h"
#include "dex_file_types.h"
#include "experimental_flags.h"
#include "gc_root.h"
@@ -235,6 +236,7 @@
// Detaches the current native thread from the runtime.
void DetachCurrentThread() REQUIRES(!Locks::mutator_lock_);
+ void DumpDeoptimizations(std::ostream& os);
void DumpForSigQuit(std::ostream& os);
void DumpLockHolders(std::ostream& os);
@@ -682,6 +684,11 @@
dump_gc_performance_on_shutdown_ = value;
}
+ void IncrementDeoptimizationCount(DeoptimizationKind kind) {
+ DCHECK_LE(kind, DeoptimizationKind::kLast);
+ deoptimization_counts_[static_cast<size_t>(kind)]++;
+ }
+
private:
static void InitPlatformSignalHandlers();
@@ -941,6 +948,8 @@
std::unique_ptr<RuntimeCallbacks> callbacks_;
+ std::atomic<uint32_t> deoptimization_counts_[static_cast<uint32_t>(DeoptimizationKind::kLast)];
+
DISALLOW_COPY_AND_ASSIGN(Runtime);
};
std::ostream& operator<<(std::ostream& os, const Runtime::CalleeSaveType& rhs);