Merge "Use mmapped boot image class table for PIC app HLoadClass."
diff --git a/compiler/Android.bp b/compiler/Android.bp
index d0b5192..c798d97 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -54,6 +54,7 @@
"optimizing/code_generator_utils.cc",
"optimizing/code_sinking.cc",
"optimizing/constant_folding.cc",
+ "optimizing/constructor_fence_redundancy_elimination.cc",
"optimizing/dead_code_elimination.cc",
"optimizing/escape.cc",
"optimizing/graph_checker.cc",
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index 9095ecd..18a55c8 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -27,12 +27,13 @@
using helpers::ARM64EncodableConstantOrRegister;
using helpers::Arm64CanEncodeConstantAsImmediate;
using helpers::DRegisterFrom;
-using helpers::VRegisterFrom;
using helpers::HeapOperand;
using helpers::InputRegisterAt;
using helpers::Int64ConstantFrom;
-using helpers::XRegisterFrom;
+using helpers::OutputRegister;
+using helpers::VRegisterFrom;
using helpers::WRegisterFrom;
+using helpers::XRegisterFrom;
#define __ GetVIXLAssembler()->
@@ -127,20 +128,51 @@
}
}
-void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+void LocationsBuilderARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
-void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void LocationsBuilderARM64::VisitVecSumReduce(HVecSumReduce* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void InstructionCodeGeneratorARM64::VisitVecSumReduce(HVecSumReduce* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+void InstructionCodeGeneratorARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister src = VRegisterFrom(locations->InAt(0));
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Umov(OutputRegister(instruction), src.V4S(), 0);
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Umov(OutputRegister(instruction), src.V2D(), 0);
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 4u);
+ DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
// Helper to set up locations for vector unary operations.
@@ -169,6 +201,46 @@
}
}
+void LocationsBuilderARM64::VisitVecReduce(HVecReduce* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecReduce(HVecReduce* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister src = VRegisterFrom(locations->InAt(0));
+ VRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ switch (instruction->GetKind()) {
+ case HVecReduce::kSum:
+ __ Addv(dst.S(), src.V4S());
+ break;
+ case HVecReduce::kMin:
+ __ Sminv(dst.S(), src.V4S());
+ break;
+ case HVecReduce::kMax:
+ __ Smaxv(dst.S(), src.V4S());
+ break;
+ }
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ switch (instruction->GetKind()) {
+ case HVecReduce::kSum:
+ __ Addp(dst.D(), src.V2D());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD min/max";
+ UNREACHABLE();
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderARM64::VisitVecCnv(HVecCnv* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -263,6 +335,7 @@
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
}
}
@@ -805,6 +878,77 @@
}
}
+void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+
+ DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
+
+ HInstruction* input = instruction->InputAt(0);
+ bool is_zero = IsZeroBitPattern(input);
+
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ : Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ : Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister dst = VRegisterFrom(locations->Out());
+
+ DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
+
+ // Zero out all other elements first.
+ __ Movi(dst.V16B(), 0);
+
+ // Shorthand for any type of zero.
+ if (IsZeroBitPattern(instruction->InputAt(0))) {
+ return;
+ }
+
+ // Set required elements.
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Mov(dst.V16B(), 0, InputRegisterAt(instruction, 0));
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Mov(dst.V8H(), 0, InputRegisterAt(instruction, 0));
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Mov(dst.V4S(), 0, InputRegisterAt(instruction, 0));
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0));
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
switch (instr->GetPackedType()) {
diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc
index 527691d..7a11dff 100644
--- a/compiler/optimizing/code_generator_vector_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc
@@ -73,19 +73,11 @@
}
}
-void LocationsBuilderARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) {
+void LocationsBuilderARMVIXL::VisitVecExtractScalar(HVecExtractScalar* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
-void InstructionCodeGeneratorARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void LocationsBuilderARMVIXL::VisitVecSumReduce(HVecSumReduce* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void InstructionCodeGeneratorARMVIXL::VisitVecSumReduce(HVecSumReduce* instruction) {
+void InstructionCodeGeneratorARMVIXL::VisitVecExtractScalar(HVecExtractScalar* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
@@ -112,6 +104,14 @@
}
}
+void LocationsBuilderARMVIXL::VisitVecReduce(HVecReduce* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecReduce(HVecReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderARMVIXL::VisitVecCnv(HVecCnv* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -621,6 +621,14 @@
}
}
+void LocationsBuilderARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderARMVIXL::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
LOG(FATAL) << "No SIMD for " << instr->GetId();
}
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc
index 6bf28ab..c2fbf7f 100644
--- a/compiler/optimizing/code_generator_vector_mips.cc
+++ b/compiler/optimizing/code_generator_vector_mips.cc
@@ -88,19 +88,11 @@
}
}
-void LocationsBuilderMIPS::VisitVecSetScalars(HVecSetScalars* instruction) {
+void LocationsBuilderMIPS::VisitVecExtractScalar(HVecExtractScalar* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
-void InstructionCodeGeneratorMIPS::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void LocationsBuilderMIPS::VisitVecSumReduce(HVecSumReduce* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void InstructionCodeGeneratorMIPS::VisitVecSumReduce(HVecSumReduce* instruction) {
+void InstructionCodeGeneratorMIPS::VisitVecExtractScalar(HVecExtractScalar* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
@@ -133,6 +125,14 @@
}
}
+void LocationsBuilderMIPS::VisitVecReduce(HVecReduce* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecReduce(HVecReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderMIPS::VisitVecCnv(HVecCnv* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -818,6 +818,14 @@
}
}
+void LocationsBuilderMIPS::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
switch (instr->GetPackedType()) {
diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc
index 75bf7a7..9d3a777 100644
--- a/compiler/optimizing/code_generator_vector_mips64.cc
+++ b/compiler/optimizing/code_generator_vector_mips64.cc
@@ -91,19 +91,11 @@
}
}
-void LocationsBuilderMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) {
+void LocationsBuilderMIPS64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
-void InstructionCodeGeneratorMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void LocationsBuilderMIPS64::VisitVecSumReduce(HVecSumReduce* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void InstructionCodeGeneratorMIPS64::VisitVecSumReduce(HVecSumReduce* instruction) {
+void InstructionCodeGeneratorMIPS64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
@@ -136,6 +128,14 @@
}
}
+void LocationsBuilderMIPS64::VisitVecReduce(HVecReduce* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecReduce(HVecReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderMIPS64::VisitVecCnv(HVecCnv* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -822,6 +822,14 @@
}
}
+void LocationsBuilderMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
switch (instr->GetPackedType()) {
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index e7aec76..37190f8 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -27,9 +27,99 @@
void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+ HInstruction* input = instruction->InputAt(0);
+ bool is_zero = IsZeroBitPattern(input);
switch (instruction->GetPackedType()) {
case Primitive::kPrimLong:
- // Long needs extra temporary to load the register pair.
+ // Long needs extra temporary to load from the register pair.
+ if (!is_zero) {
+ locations->AddTemp(Location::RequiresFpuRegister());
+ }
+ FALLTHROUGH_INTENDED;
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ : Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ : Location::RequiresFpuRegister());
+ locations->SetOut(is_zero ? Location::RequiresFpuRegister()
+ : Location::SameAsFirstInput());
+
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+
+ // Shorthand for any type of zero.
+ if (IsZeroBitPattern(instruction->InputAt(0))) {
+ __ xorps(dst, dst);
+ return;
+ }
+
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ movd(dst, locations->InAt(0).AsRegister<Register>());
+ __ punpcklbw(dst, dst);
+ __ punpcklwd(dst, dst);
+ __ pshufd(dst, dst, Immediate(0));
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ movd(dst, locations->InAt(0).AsRegister<Register>());
+ __ punpcklwd(dst, dst);
+ __ pshufd(dst, dst, Immediate(0));
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ movd(dst, locations->InAt(0).AsRegister<Register>());
+ __ pshufd(dst, dst, Immediate(0));
+ break;
+ case Primitive::kPrimLong: {
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>());
+ __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
+ __ punpckldq(dst, tmp);
+ __ punpcklqdq(dst, dst);
+ break;
+ }
+ case Primitive::kPrimFloat:
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ shufps(dst, dst, Immediate(0));
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ shufpd(dst, dst, Immediate(0));
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimLong:
+ // Long needs extra temporary to store into the register pair.
locations->AddTemp(Location::RequiresFpuRegister());
FALLTHROUGH_INTENDED;
case Primitive::kPrimBoolean:
@@ -37,8 +127,8 @@
case Primitive::kPrimChar:
case Primitive::kPrimShort:
case Primitive::kPrimInt:
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::RequiresFpuRegister());
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
break;
case Primitive::kPrimFloat:
case Primitive::kPrimDouble:
@@ -51,48 +141,34 @@
}
}
-void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+void InstructionCodeGeneratorX86::VisitVecExtractScalar(HVecExtractScalar* instruction) {
LocationSummary* locations = instruction->GetLocations();
- XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
+ XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
switch (instruction->GetPackedType()) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- DCHECK_EQ(16u, instruction->GetVectorLength());
- __ movd(reg, locations->InAt(0).AsRegister<Register>());
- __ punpcklbw(reg, reg);
- __ punpcklwd(reg, reg);
- __ pshufd(reg, reg, Immediate(0));
- break;
case Primitive::kPrimChar:
- case Primitive::kPrimShort:
- DCHECK_EQ(8u, instruction->GetVectorLength());
- __ movd(reg, locations->InAt(0).AsRegister<Register>());
- __ punpcklwd(reg, reg);
- __ pshufd(reg, reg, Immediate(0));
- break;
+ case Primitive::kPrimShort: // TODO: up to here, and?
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
case Primitive::kPrimInt:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ movd(reg, locations->InAt(0).AsRegister<Register>());
- __ pshufd(reg, reg, Immediate(0));
+ DCHECK_LE(4u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ __ movd(locations->Out().AsRegister<Register>(), src);
break;
case Primitive::kPrimLong: {
XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ movd(reg, locations->InAt(0).AsRegisterPairLow<Register>());
- __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
- __ punpckldq(reg, tmp);
- __ punpcklqdq(reg, reg);
+ __ movd(locations->Out().AsRegisterPairLow<Register>(), src);
+ __ pshufd(tmp, src, Immediate(1));
+ __ movd(locations->Out().AsRegisterPairHigh<Register>(), tmp);
break;
}
case Primitive::kPrimFloat:
- DCHECK(locations->InAt(0).Equals(locations->Out()));
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ shufps(reg, reg, Immediate(0));
- break;
case Primitive::kPrimDouble:
- DCHECK(locations->InAt(0).Equals(locations->Out()));
- DCHECK_EQ(2u, instruction->GetVectorLength());
- __ shufpd(reg, reg, Immediate(0));
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 4u);
+ DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -100,22 +176,6 @@
}
}
-void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void LocationsBuilderX86::VisitVecSumReduce(HVecSumReduce* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void InstructionCodeGeneratorX86::VisitVecSumReduce(HVecSumReduce* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
// Helper to set up locations for vector unary operations.
static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) {
LocationSummary* locations = new (arena) LocationSummary(instruction);
@@ -137,6 +197,73 @@
}
}
+void LocationsBuilderX86::VisitVecReduce(HVecReduce* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+ // Long reduction or min/max require a temporary.
+ if (instruction->GetPackedType() == Primitive::kPrimLong ||
+ instruction->GetKind() == HVecReduce::kMin ||
+ instruction->GetKind() == HVecReduce::kMax) {
+ instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
+ }
+}
+
+void InstructionCodeGeneratorX86::VisitVecReduce(HVecReduce* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ switch (instruction->GetKind()) {
+ case HVecReduce::kSum:
+ __ movaps(dst, src);
+ __ phaddd(dst, dst);
+ __ phaddd(dst, dst);
+ break;
+ case HVecReduce::kMin: {
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ __ movaps(tmp, src);
+ __ movaps(dst, src);
+ __ psrldq(tmp, Immediate(8));
+ __ pminsd(dst, tmp);
+ __ psrldq(tmp, Immediate(4));
+ __ pminsd(dst, tmp);
+ break;
+ }
+ case HVecReduce::kMax: {
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ __ movaps(tmp, src);
+ __ movaps(dst, src);
+ __ psrldq(tmp, Immediate(8));
+ __ pmaxsd(dst, tmp);
+ __ psrldq(tmp, Immediate(4));
+ __ pmaxsd(dst, tmp);
+ break;
+ }
+ }
+ break;
+ case Primitive::kPrimLong: {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ switch (instruction->GetKind()) {
+ case HVecReduce::kSum:
+ __ movaps(tmp, src);
+ __ movaps(dst, src);
+ __ punpckhqdq(tmp, tmp);
+ __ paddq(dst, tmp);
+ break;
+ case HVecReduce::kMin:
+ case HVecReduce::kMax:
+ LOG(FATAL) << "Unsupported SIMD type";
+ }
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderX86::VisitVecCnv(HVecCnv* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -821,6 +948,91 @@
}
}
+void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+
+ DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
+
+ HInstruction* input = instruction->InputAt(0);
+ bool is_zero = IsZeroBitPattern(input);
+
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimLong:
+ // Long needs extra temporary to load from register pairs.
+ if (!is_zero) {
+ locations->AddTemp(Location::RequiresFpuRegister());
+ }
+ FALLTHROUGH_INTENDED;
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ : Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ : Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+
+ DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
+
+ // Zero out all other elements first.
+ __ xorps(dst, dst);
+
+ // Shorthand for any type of zero.
+ if (IsZeroBitPattern(instruction->InputAt(0))) {
+ return;
+ }
+
+ // Set required elements.
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort: // TODO: up to here, and?
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ movd(dst, locations->InAt(0).AsRegister<Register>());
+ break;
+ case Primitive::kPrimLong: {
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ xorps(tmp, tmp);
+ __ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>());
+ __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
+ __ punpckldq(dst, tmp);
+ break;
+ }
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ movss(dst, locations->InAt(1).AsFpuRegister<XmmRegister>());
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ movsd(dst, locations->InAt(1).AsFpuRegister<XmmRegister>());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
LOG(FATAL) << "No SIMD for " << instr->GetId();
}
@@ -868,6 +1080,7 @@
case 8: scale = TIMES_8; break;
default: break;
}
+ // Incorporate the string or array offset in the address computation.
uint32_t offset = is_string_char_at
? mirror::String::ValueOffset().Uint32Value()
: mirror::Array::DataOffset(size).Uint32Value();
@@ -902,7 +1115,7 @@
__ testb(Address(locations->InAt(0).AsRegister<Register>(), count_offset), Immediate(1));
__ j(kNotZero, ¬_compressed);
// Zero extend 8 compressed bytes into 8 chars.
- __ movsd(reg, VecAddress(locations, 1, /*is_string_char_at*/ true));
+ __ movsd(reg, VecAddress(locations, 1, instruction->IsStringCharAt()));
__ pxor(tmp, tmp);
__ punpcklbw(reg, tmp);
__ jmp(&done);
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index c7ee81c..edd0209 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -27,6 +27,8 @@
void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+ HInstruction* input = instruction->InputAt(0);
+ bool is_zero = IsZeroBitPattern(input);
switch (instruction->GetPackedType()) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
@@ -34,11 +36,89 @@
case Primitive::kPrimShort:
case Primitive::kPrimInt:
case Primitive::kPrimLong:
- locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ : Location::RequiresRegister());
locations->SetOut(Location::RequiresFpuRegister());
break;
case Primitive::kPrimFloat:
case Primitive::kPrimDouble:
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ : Location::RequiresFpuRegister());
+ locations->SetOut(is_zero ? Location::RequiresFpuRegister()
+ : Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+
+ // Shorthand for any type of zero.
+ if (IsZeroBitPattern(instruction->InputAt(0))) {
+ __ xorps(dst, dst);
+ return;
+ }
+
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false);
+ __ punpcklbw(dst, dst);
+ __ punpcklwd(dst, dst);
+ __ pshufd(dst, dst, Immediate(0));
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false);
+ __ punpcklwd(dst, dst);
+ __ pshufd(dst, dst, Immediate(0));
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false);
+ __ pshufd(dst, dst, Immediate(0));
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ true);
+ __ punpcklqdq(dst, dst);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ __ shufps(dst, dst, Immediate(0));
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ __ shufpd(dst, dst, Immediate(0));
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetOut(Location::SameAsFirstInput());
break;
@@ -48,44 +128,29 @@
}
}
-void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+void InstructionCodeGeneratorX86_64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
LocationSummary* locations = instruction->GetLocations();
- XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
+ XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
switch (instruction->GetPackedType()) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- DCHECK_EQ(16u, instruction->GetVectorLength());
- __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>());
- __ punpcklbw(reg, reg);
- __ punpcklwd(reg, reg);
- __ pshufd(reg, reg, Immediate(0));
- break;
case Primitive::kPrimChar:
- case Primitive::kPrimShort:
- DCHECK_EQ(8u, instruction->GetVectorLength());
- __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>());
- __ punpcklwd(reg, reg);
- __ pshufd(reg, reg, Immediate(0));
- break;
+ case Primitive::kPrimShort: // TODO: up to here, and?
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
case Primitive::kPrimInt:
DCHECK_EQ(4u, instruction->GetVectorLength());
- __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>());
- __ pshufd(reg, reg, Immediate(0));
+ __ movd(locations->Out().AsRegister<CpuRegister>(), src, /*64-bit*/ false);
break;
case Primitive::kPrimLong:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>()); // is 64-bit
- __ punpcklqdq(reg, reg);
+ __ movd(locations->Out().AsRegister<CpuRegister>(), src, /*64-bit*/ true);
break;
case Primitive::kPrimFloat:
- DCHECK(locations->InAt(0).Equals(locations->Out()));
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ shufps(reg, reg, Immediate(0));
- break;
case Primitive::kPrimDouble:
- DCHECK(locations->InAt(0).Equals(locations->Out()));
- DCHECK_EQ(2u, instruction->GetVectorLength());
- __ shufpd(reg, reg, Immediate(0));
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 4u);
+ DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -93,22 +158,6 @@
}
}
-void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void LocationsBuilderX86_64::VisitVecSumReduce(HVecSumReduce* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void InstructionCodeGeneratorX86_64::VisitVecSumReduce(HVecSumReduce* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
// Helper to set up locations for vector unary operations.
static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) {
LocationSummary* locations = new (arena) LocationSummary(instruction);
@@ -130,6 +179,73 @@
}
}
+void LocationsBuilderX86_64::VisitVecReduce(HVecReduce* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+ // Long reduction or min/max require a temporary.
+ if (instruction->GetPackedType() == Primitive::kPrimLong ||
+ instruction->GetKind() == HVecReduce::kMin ||
+ instruction->GetKind() == HVecReduce::kMax) {
+ instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
+ }
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecReduce(HVecReduce* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ switch (instruction->GetKind()) {
+ case HVecReduce::kSum:
+ __ movaps(dst, src);
+ __ phaddd(dst, dst);
+ __ phaddd(dst, dst);
+ break;
+ case HVecReduce::kMin: {
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ __ movaps(tmp, src);
+ __ movaps(dst, src);
+ __ psrldq(tmp, Immediate(8));
+ __ pminsd(dst, tmp);
+ __ psrldq(tmp, Immediate(4));
+ __ pminsd(dst, tmp);
+ break;
+ }
+ case HVecReduce::kMax: {
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ __ movaps(tmp, src);
+ __ movaps(dst, src);
+ __ psrldq(tmp, Immediate(8));
+ __ pmaxsd(dst, tmp);
+ __ psrldq(tmp, Immediate(4));
+ __ pmaxsd(dst, tmp);
+ break;
+ }
+ }
+ break;
+ case Primitive::kPrimLong: {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ switch (instruction->GetKind()) {
+ case HVecReduce::kSum:
+ __ movaps(tmp, src);
+ __ movaps(dst, src);
+ __ punpckhqdq(tmp, tmp);
+ __ paddq(dst, tmp);
+ break;
+ case HVecReduce::kMin:
+ case HVecReduce::kMax:
+ LOG(FATAL) << "Unsupported SIMD type";
+ }
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderX86_64::VisitVecCnv(HVecCnv* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -814,12 +930,87 @@
}
}
-void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LOG(FATAL) << "No SIMD for " << instr->GetId();
+void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+
+ DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
+
+ HInstruction* input = instruction->InputAt(0);
+ bool is_zero = IsZeroBitPattern(input);
+
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ : Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ : Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
-void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LOG(FATAL) << "No SIMD for " << instr->GetId();
+void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+
+ DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
+
+ // Zero out all other elements first.
+ __ xorps(dst, dst);
+
+ // Shorthand for any type of zero.
+ if (IsZeroBitPattern(instruction->InputAt(0))) {
+ return;
+ }
+
+ // Set required elements.
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort: // TODO: up to here, and?
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>());
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>()); // is 64-bit
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ movss(dst, locations->InAt(0).AsFpuRegister<XmmRegister>());
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ movsd(dst, locations->InAt(0).AsFpuRegister<XmmRegister>());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
// Helper to set up locations for vector memory operations.
@@ -861,6 +1052,7 @@
case 8: scale = TIMES_8; break;
default: break;
}
+ // Incorporate the string or array offset in the address computation.
uint32_t offset = is_string_char_at
? mirror::String::ValueOffset().Uint32Value()
: mirror::Array::DataOffset(size).Uint32Value();
@@ -895,7 +1087,7 @@
__ testb(Address(locations->InAt(0).AsRegister<CpuRegister>(), count_offset), Immediate(1));
__ j(kNotZero, ¬_compressed);
// Zero extend 8 compressed bytes into 8 chars.
- __ movsd(reg, VecAddress(locations, 1, /*is_string_char_at*/ true));
+ __ movsd(reg, VecAddress(locations, 1, instruction->IsStringCharAt()));
__ pxor(tmp, tmp);
__ punpcklbw(reg, tmp);
__ jmp(&done);
diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc
index 6c3a9fd..b558eb1 100644
--- a/compiler/optimizing/code_sinking.cc
+++ b/compiler/optimizing/code_sinking.cc
@@ -64,6 +64,11 @@
// A fence with "0" inputs is dead and should've been removed in a prior pass.
DCHECK_NE(0u, ctor_fence->InputCount());
+ // TODO: this should be simplified to 'return true' since it's
+ // potentially pessimizing any code sinking for inlined constructors with final fields.
+ // TODO: double check that if the final field assignments are not moved,
+ // then the fence is not moved either.
+
return ctor_fence->GetAssociatedAllocation() != nullptr;
}
diff --git a/compiler/optimizing/constructor_fence_redundancy_elimination.cc b/compiler/optimizing/constructor_fence_redundancy_elimination.cc
new file mode 100644
index 0000000..ff7ce60
--- /dev/null
+++ b/compiler/optimizing/constructor_fence_redundancy_elimination.cc
@@ -0,0 +1,261 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "constructor_fence_redundancy_elimination.h"
+
+#include "base/arena_allocator.h"
+
+namespace art {
+
+static constexpr bool kCfreLogFenceInputCount = false;
+
+// TODO: refactor this code by reusing escape analysis.
+class CFREVisitor : public HGraphVisitor {
+ public:
+ CFREVisitor(HGraph* graph, OptimizingCompilerStats* stats)
+ : HGraphVisitor(graph),
+ scoped_allocator_(graph->GetArena()->GetArenaPool()),
+ candidate_fences_(scoped_allocator_.Adapter(kArenaAllocCFRE)),
+ candidate_fence_targets_(scoped_allocator_.Adapter(kArenaAllocCFRE)),
+ stats_(stats) {}
+
+ void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
+ // Visit all instructions in block.
+ HGraphVisitor::VisitBasicBlock(block);
+
+ // If there were any unmerged fences left, merge them together,
+ // the objects are considered 'published' at the end of the block.
+ MergeCandidateFences();
+ }
+
+ void VisitConstructorFence(HConstructorFence* constructor_fence) OVERRIDE {
+ candidate_fences_.push_back(constructor_fence);
+
+ for (size_t input_idx = 0; input_idx < constructor_fence->InputCount(); ++input_idx) {
+ candidate_fence_targets_.Insert(constructor_fence->InputAt(input_idx));
+ }
+ }
+
+ void VisitBoundType(HBoundType* bound_type) OVERRIDE {
+ VisitAlias(bound_type);
+ }
+
+ void VisitNullCheck(HNullCheck* null_check) OVERRIDE {
+ VisitAlias(null_check);
+ }
+
+ void VisitSelect(HSelect* select) OVERRIDE {
+ VisitAlias(select);
+ }
+
+ void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE {
+ HInstruction* value = instruction->InputAt(1);
+ VisitSetLocation(instruction, value);
+ }
+
+ void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE {
+ HInstruction* value = instruction->InputAt(1);
+ VisitSetLocation(instruction, value);
+ }
+
+ void VisitArraySet(HArraySet* instruction) OVERRIDE {
+ HInstruction* value = instruction->InputAt(2);
+ VisitSetLocation(instruction, value);
+ }
+
+ void VisitDeoptimize(HDeoptimize* instruction ATTRIBUTE_UNUSED) {
+ // Pessimize: Merge all fences.
+ MergeCandidateFences();
+ }
+
+ void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
+ HandleInvoke(invoke);
+ }
+
+ void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE {
+ HandleInvoke(invoke);
+ }
+
+ void VisitInvokeInterface(HInvokeInterface* invoke) OVERRIDE {
+ HandleInvoke(invoke);
+ }
+
+ void VisitInvokeUnresolved(HInvokeUnresolved* invoke) OVERRIDE {
+ HandleInvoke(invoke);
+ }
+
+ void VisitInvokePolymorphic(HInvokePolymorphic* invoke) OVERRIDE {
+ HandleInvoke(invoke);
+ }
+
+ void VisitClinitCheck(HClinitCheck* clinit) OVERRIDE {
+ HandleInvoke(clinit);
+ }
+
+ void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instruction) OVERRIDE {
+ // Conservatively treat it as an invocation.
+ HandleInvoke(instruction);
+ }
+
+ void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* instruction) OVERRIDE {
+ // Conservatively treat it as an invocation.
+ HandleInvoke(instruction);
+ }
+
+ void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instruction) OVERRIDE {
+ // Conservatively treat it as an invocation.
+ HandleInvoke(instruction);
+ }
+
+ void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* instruction) OVERRIDE {
+ // Conservatively treat it as an invocation.
+ HandleInvoke(instruction);
+ }
+
+ private:
+ void HandleInvoke(HInstruction* invoke) {
+ // An object is considered "published" if it escapes into an invoke as any of the parameters.
+ if (HasInterestingPublishTargetAsInput(invoke)) {
+ MergeCandidateFences();
+ }
+ }
+
+ // Called by any instruction visitor that may create an alias.
+ // These instructions may create an alias:
+ // - BoundType
+ // - NullCheck
+ // - Select
+ //
+ // These also create an alias, but are not handled by this function:
+ // - Phi: propagates values across blocks, but we always merge at the end of a block.
+ // - Invoke: this is handled by HandleInvoke.
+ void VisitAlias(HInstruction* aliasing_inst) {
+ // An object is considered "published" if it becomes aliased by other instructions.
+ if (HasInterestingPublishTargetAsInput(aliasing_inst)) {
+ // Note that constructing a "NullCheck" for new-instance, new-array,
+ // or a 'this' (receiver) reference is impossible.
+ //
+ // If by some reason we actually encounter such a NullCheck(FenceTarget),
+ // we LOG(WARNING).
+ if (UNLIKELY(aliasing_inst->IsNullCheck())) {
+ LOG(kIsDebugBuild ? FATAL : WARNING)
+ << "Unexpected instruction: NullCheck; should not be legal in graph";
+ // We then do a best-effort to handle this case.
+ }
+ MergeCandidateFences();
+ }
+ }
+
+ void VisitSetLocation(HInstruction* inst ATTRIBUTE_UNUSED, HInstruction* store_input) {
+ // An object is considered "published" if it's stored onto the heap.
+ // Sidenote: A later "LSE" pass can still remove the fence if it proves the
+ // object doesn't actually escape.
+ if (IsInterestingPublishTarget(store_input)) {
+ // Merge all constructor fences that we've seen since
+ // the last interesting store (or since the beginning).
+ MergeCandidateFences();
+ }
+ }
+
+ bool HasInterestingPublishTargetAsInput(HInstruction* inst) {
+ for (size_t input_count = 0; input_count < inst->InputCount(); ++input_count) {
+ if (IsInterestingPublishTarget(inst->InputAt(input_count))) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ // Merges all the existing fences we've seen so far into the last-most fence.
+ //
+ // This resets the list of candidate fences and their targets back to {}.
+ void MergeCandidateFences() {
+ if (candidate_fences_.empty()) {
+ // Nothing to do, need 1+ fences to merge.
+ return;
+ }
+
+ // The merge target is always the "last" candidate fence.
+ HConstructorFence* merge_target = candidate_fences_[candidate_fences_.size() - 1];
+
+ for (HConstructorFence* fence : candidate_fences_) {
+ MaybeMerge(merge_target, fence);
+ }
+
+ if (kCfreLogFenceInputCount) {
+ LOG(INFO) << "CFRE-MergeCandidateFences: Post-merge fence input count "
+ << merge_target->InputCount();
+ }
+
+ // Each merge acts as a cut-off point. The optimization is reset completely.
+ // In theory, we could push the fence as far as its publish, but in practice
+ // there is no benefit to this extra complexity unless we also reordered
+ // the stores to come later.
+ candidate_fences_.clear();
+ candidate_fence_targets_.Clear();
+ }
+
+ // A publishing 'store' is only interesting if the value being stored
+ // is one of the fence `targets` in `candidate_fences`.
+ bool IsInterestingPublishTarget(HInstruction* store_input) const {
+ return candidate_fence_targets_.Find(store_input) != candidate_fence_targets_.end();
+ }
+
+ void MaybeMerge(HConstructorFence* target, HConstructorFence* src) {
+ if (target == src) {
+ return; // Don't merge a fence into itself.
+ // This is mostly for stats-purposes, we don't want to count merge(x,x)
+ // as removing a fence because it's a no-op.
+ }
+
+ target->Merge(src);
+
+ MaybeRecordStat(stats_, MethodCompilationStat::kConstructorFenceRemovedCFRE);
+ }
+
+ // Phase-local heap memory allocator for CFRE optimizer. Storage obtained
+ // through this allocator is immediately released when the CFRE optimizer is done.
+ ArenaAllocator scoped_allocator_;
+
+ // Set of constructor fences that we've seen in the current block.
+ // Each constructor fences acts as a guard for one or more `targets`.
+ // There exist no stores to any `targets` between any of these fences.
+ //
+ // Fences are in succession order (e.g. fence[i] succeeds fence[i-1]
+ // within the same basic block).
+ ArenaVector<HConstructorFence*> candidate_fences_;
+
+ // Stores a set of the fence targets, to allow faster lookup of whether
+ // a detected publish is a target of one of the candidate fences.
+ ArenaHashSet<HInstruction*> candidate_fence_targets_;
+
+ // Used to record stats about the optimization.
+ OptimizingCompilerStats* const stats_;
+
+ DISALLOW_COPY_AND_ASSIGN(CFREVisitor);
+};
+
+void ConstructorFenceRedundancyElimination::Run() {
+ CFREVisitor cfre_visitor(graph_, stats_);
+
+ // Arbitrarily visit in reverse-post order.
+ // The exact block visit order does not matter, as the algorithm
+ // only operates on a single block at a time.
+ cfre_visitor.VisitReversePostOrder();
+}
+
+} // namespace art
diff --git a/compiler/optimizing/constructor_fence_redundancy_elimination.h b/compiler/optimizing/constructor_fence_redundancy_elimination.h
new file mode 100644
index 0000000..d89210c
--- /dev/null
+++ b/compiler/optimizing/constructor_fence_redundancy_elimination.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_CONSTRUCTOR_FENCE_REDUNDANCY_ELIMINATION_H_
+#define ART_COMPILER_OPTIMIZING_CONSTRUCTOR_FENCE_REDUNDANCY_ELIMINATION_H_
+
+#include "optimization.h"
+
+namespace art {
+
+/*
+ * Constructor Fence Redundancy Elimination (CFRE).
+ *
+ * A local optimization pass that merges redundant constructor fences
+ * together within the same basic block.
+ *
+ * Abbreviations:
+ * - CF: Constructor Fence
+ * - CFS: Constructor Fence Set
+ * - CFTargets: The unique set of the inputs of all the instructions in CFS.
+ *
+ * Given any CFS = { CF(x), CF(y), CF(z), ... }, define CFTargets = { x, y, z, ... }.
+ * - Publish(R) must not exist for any R in CFTargets if this Publish(R) is between any CF in CFS.
+ * - This type of Publish(R) is called an "interesting publish".
+ *
+ * A Publish(R) is considered any instruction at which the reference to "R"
+ * may escape (e.g. invoke, store, return, etc) to another thread.
+ *
+ * Starting at the beginning of the block:
+ * - Find the largest contiguous CFS.
+ * - If we see an interesting publish, merge all instructions in CFS into a single CF(CFTargets).
+ * - Repeat until the block is fully visited.
+ * - At the end of the block, merge all instructions in CFS into a single CF(CFTargets).
+ */
+class ConstructorFenceRedundancyElimination : public HOptimization {
+ public:
+ ConstructorFenceRedundancyElimination(HGraph* graph,
+ OptimizingCompilerStats* stats)
+ : HOptimization(graph, kPassName, stats) {}
+
+ void Run() OVERRIDE;
+
+ static constexpr const char* kPassName = "constructor_fence_redundancy_elimination";
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ConstructorFenceRedundancyElimination);
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_CONSTRUCTOR_FENCE_REDUNDANCY_ELIMINATION_H_
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index 089340e..191d3d1 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -670,6 +670,15 @@
return AddValue(GetFetch(instruction->InputAt(0), trip, in_body, is_min),
Value(static_cast<int32_t>(value)));
}
+ } else if (instruction->IsSub()) {
+ // Incorporate suitable constants in the chased value.
+ if (IsInt64AndGet(instruction->InputAt(0), &value) && CanLongValueFitIntoInt(value)) {
+ return SubValue(Value(static_cast<int32_t>(value)),
+ GetFetch(instruction->InputAt(1), trip, in_body, !is_min));
+ } else if (IsInt64AndGet(instruction->InputAt(1), &value) && CanLongValueFitIntoInt(value)) {
+ return SubValue(GetFetch(instruction->InputAt(0), trip, in_body, is_min),
+ Value(static_cast<int32_t>(value)));
+ }
} else if (instruction->IsArrayLength()) {
// Exploit length properties when chasing constants or chase into a new array declaration.
if (chase_hint_ == nullptr) {
diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc
index 2b82b33..9437014 100644
--- a/compiler/optimizing/induction_var_range_test.cc
+++ b/compiler/optimizing/induction_var_range_test.cc
@@ -723,6 +723,29 @@
ExpectEqual(Value(x_, 1, 0), GetMax(CreateFetch(array_length), nullptr));
}
+TEST_F(InductionVarRangeTest, AddOrSubAndConstant) {
+ HInstruction* add = new (&allocator_)
+ HAdd(Primitive::kPrimInt, x_, graph_->GetIntConstant(-1));
+ HInstruction* alt = new (&allocator_)
+ HAdd(Primitive::kPrimInt, graph_->GetIntConstant(-1), x_);
+ HInstruction* sub = new (&allocator_)
+ HSub(Primitive::kPrimInt, x_, graph_->GetIntConstant(1));
+ HInstruction* rev = new (&allocator_)
+ HSub(Primitive::kPrimInt, graph_->GetIntConstant(1), x_);
+ entry_block_->AddInstruction(add);
+ entry_block_->AddInstruction(alt);
+ entry_block_->AddInstruction(sub);
+ entry_block_->AddInstruction(rev);
+ ExpectEqual(Value(x_, 1, -1), GetMin(CreateFetch(add), nullptr));
+ ExpectEqual(Value(x_, 1, -1), GetMax(CreateFetch(add), nullptr));
+ ExpectEqual(Value(x_, 1, -1), GetMin(CreateFetch(alt), nullptr));
+ ExpectEqual(Value(x_, 1, -1), GetMax(CreateFetch(alt), nullptr));
+ ExpectEqual(Value(x_, 1, -1), GetMin(CreateFetch(sub), nullptr));
+ ExpectEqual(Value(x_, 1, -1), GetMax(CreateFetch(sub), nullptr));
+ ExpectEqual(Value(x_, -1, 1), GetMin(CreateFetch(rev), nullptr));
+ ExpectEqual(Value(x_, -1, 1), GetMax(CreateFetch(rev), nullptr));
+}
+
//
// Tests on public methods.
//
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 027ba77..e150b65 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -285,6 +285,19 @@
return false;
}
+// Translates operation to reduction kind.
+static HVecReduce::ReductionKind GetReductionKind(HInstruction* reduction) {
+ if (reduction->IsVecAdd() || reduction->IsVecSub()) {
+ return HVecReduce::kSum;
+ } else if (reduction->IsVecMin()) {
+ return HVecReduce::kMin;
+ } else if (reduction->IsVecMax()) {
+ return HVecReduce::kMax;
+ }
+ LOG(FATAL) << "Unsupported SIMD reduction";
+ UNREACHABLE();
+}
+
// Test vector restrictions.
static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) {
return (restrictions & tested) != 0;
@@ -318,8 +331,9 @@
HLoopOptimization::HLoopOptimization(HGraph* graph,
CompilerDriver* compiler_driver,
- HInductionVarAnalysis* induction_analysis)
- : HOptimization(graph, kLoopOptimizationPassName),
+ HInductionVarAnalysis* induction_analysis,
+ OptimizingCompilerStats* stats)
+ : HOptimization(graph, kLoopOptimizationPassName, stats),
compiler_driver_(compiler_driver),
induction_range_(induction_analysis),
loop_allocator_(nullptr),
@@ -334,7 +348,8 @@
vector_peeling_candidate_(nullptr),
vector_runtime_test_a_(nullptr),
vector_runtime_test_b_(nullptr),
- vector_map_(nullptr) {
+ vector_map_(nullptr),
+ vector_permanent_map_(nullptr) {
}
void HLoopOptimization::Run() {
@@ -388,11 +403,14 @@
ArenaSet<ArrayReference> refs(loop_allocator_->Adapter(kArenaAllocLoopOptimization));
ArenaSafeMap<HInstruction*, HInstruction*> map(
std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
+ ArenaSafeMap<HInstruction*, HInstruction*> perm(
+ std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
// Attach.
iset_ = &iset;
reductions_ = &reds;
vector_refs_ = &refs;
vector_map_ = ↦
+ vector_permanent_map_ = &perm;
// Traverse.
TraverseLoopsInnerToOuter(top_loop_);
// Detach.
@@ -400,6 +418,7 @@
reductions_ = nullptr;
vector_refs_ = nullptr;
vector_map_ = nullptr;
+ vector_permanent_map_ = nullptr;
}
}
@@ -603,11 +622,11 @@
// Vectorize loop, if possible and valid.
if (kEnableVectorization &&
TrySetSimpleLoopHeader(header, &main_phi) &&
- reductions_->empty() && // TODO: possible with some effort
ShouldVectorize(node, body, trip_count) &&
TryAssignLastValue(node->loop_info, main_phi, preheader, /*collect_loop_uses*/ true)) {
Vectorize(node, body, exit, trip_count);
graph_->SetHasSIMD(true); // flag SIMD usage
+ MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorized);
return true;
}
return false;
@@ -802,6 +821,13 @@
/*unroll*/ 1);
}
+ // Link reductions to their final uses.
+ for (auto i = reductions_->begin(); i != reductions_->end(); ++i) {
+ if (i->first->IsPhi()) {
+ i->first->ReplaceWith(ReduceAndExtractIfNeeded(i->second));
+ }
+ }
+
// Remove the original loop by disconnecting the body block
// and removing all instructions from the header.
block->DisconnectAndDelete();
@@ -841,21 +867,10 @@
vector_header_->AddInstruction(cond);
vector_header_->AddInstruction(new (global_allocator_) HIf(cond));
vector_index_ = phi;
+ vector_permanent_map_->clear(); // preserved over unrolling
for (uint32_t u = 0; u < unroll; u++) {
- // Clear map, leaving loop invariants setup during unrolling.
- if (u == 0) {
- vector_map_->clear();
- } else {
- for (auto i = vector_map_->begin(); i != vector_map_->end(); ) {
- if (i->second->IsVecReplicateScalar()) {
- DCHECK(node->loop_info->IsDefinedOutOfTheLoop(i->first));
- ++i;
- } else {
- i = vector_map_->erase(i);
- }
- }
- }
// Generate instruction map.
+ vector_map_->clear();
for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
bool vectorized_def = VectorizeDef(node, it.Current(), /*generate_code*/ true);
DCHECK(vectorized_def);
@@ -872,9 +887,17 @@
}
}
}
+ // Generate the induction.
vector_index_ = new (global_allocator_) HAdd(induc_type, vector_index_, step);
Insert(vector_body_, vector_index_);
}
+ // Finalize phi inputs for the reductions (if any).
+ for (auto i = reductions_->begin(); i != reductions_->end(); ++i) {
+ if (!i->first->IsPhi()) {
+ DCHECK(i->second->IsPhi());
+ GenerateVecReductionPhiInputs(i->second->AsPhi(), i->first);
+ }
+ }
// Finalize phi inputs for the loop index.
phi->AddInput(lo);
phi->AddInput(vector_index_);
@@ -910,6 +933,23 @@
}
return false;
}
+ // Accept a left-hand-side reduction for
+ // (1) supported vector type,
+ // (2) vectorizable right-hand-side value.
+ auto redit = reductions_->find(instruction);
+ if (redit != reductions_->end()) {
+ Primitive::Type type = instruction->GetType();
+ if (TrySetVectorType(type, &restrictions) &&
+ VectorizeUse(node, instruction, generate_code, type, restrictions)) {
+ if (generate_code) {
+ HInstruction* new_red = vector_map_->Get(instruction);
+ vector_permanent_map_->Put(new_red, vector_map_->Get(redit->second));
+ vector_permanent_map_->Overwrite(redit->second, new_red);
+ }
+ return true;
+ }
+ return false;
+ }
// Branch back okay.
if (instruction->IsGoto()) {
return true;
@@ -965,6 +1005,21 @@
}
return true;
}
+ } else if (instruction->IsPhi()) {
+ // Accept particular phi operations.
+ if (reductions_->find(instruction) != reductions_->end()) {
+ // Deal with vector restrictions.
+ if (HasVectorRestrictions(restrictions, kNoReduction)) {
+ return false;
+ }
+ // Accept a reduction.
+ if (generate_code) {
+ GenerateVecReductionPhi(instruction->AsPhi());
+ }
+ return true;
+ }
+ // TODO: accept right-hand-side induction?
+ return false;
} else if (instruction->IsTypeConversion()) {
// Accept particular type conversions.
HTypeConversion* conversion = instruction->AsTypeConversion();
@@ -1155,14 +1210,14 @@
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoDiv;
+ *restrictions |= kNoDiv | kNoReduction;
return TrySetVectorLength(8);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoStringCharAt;
+ *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction;
return TrySetVectorLength(4);
case Primitive::kPrimInt:
- *restrictions |= kNoDiv;
+ *restrictions |= kNoDiv | kNoReduction;
return TrySetVectorLength(2);
default:
break;
@@ -1174,11 +1229,11 @@
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoDiv;
+ *restrictions |= kNoDiv | kNoReduction;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv;
+ *restrictions |= kNoDiv | kNoReduction;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
*restrictions |= kNoDiv;
@@ -1187,8 +1242,10 @@
*restrictions |= kNoDiv | kNoMul | kNoMinMax;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
+ *restrictions |= kNoReduction;
return TrySetVectorLength(4);
case Primitive::kPrimDouble:
+ *restrictions |= kNoReduction;
return TrySetVectorLength(2);
default:
return false;
@@ -1200,11 +1257,12 @@
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd;
+ *restrictions |=
+ kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoReduction;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd;
+ *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoReduction;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
*restrictions |= kNoDiv;
@@ -1213,10 +1271,10 @@
*restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoMinMax;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
- *restrictions |= kNoMinMax; // -0.0 vs +0.0
+ *restrictions |= kNoMinMax | kNoReduction; // minmax: -0.0 vs +0.0
return TrySetVectorLength(4);
case Primitive::kPrimDouble:
- *restrictions |= kNoMinMax; // -0.0 vs +0.0
+ *restrictions |= kNoMinMax | kNoReduction; // minmax: -0.0 vs +0.0
return TrySetVectorLength(2);
default:
break;
@@ -1228,23 +1286,23 @@
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoDiv;
+ *restrictions |= kNoDiv | kNoReduction;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoStringCharAt;
+ *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
- *restrictions |= kNoDiv;
+ *restrictions |= kNoDiv | kNoReduction;
return TrySetVectorLength(4);
case Primitive::kPrimLong:
- *restrictions |= kNoDiv;
+ *restrictions |= kNoDiv | kNoReduction;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
- *restrictions |= kNoMinMax; // min/max(x, NaN)
+ *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
return TrySetVectorLength(4);
case Primitive::kPrimDouble:
- *restrictions |= kNoMinMax; // min/max(x, NaN)
+ *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
return TrySetVectorLength(2);
default:
break;
@@ -1256,23 +1314,23 @@
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoDiv;
+ *restrictions |= kNoDiv | kNoReduction;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoStringCharAt;
+ *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
- *restrictions |= kNoDiv;
+ *restrictions |= kNoDiv | kNoReduction;
return TrySetVectorLength(4);
case Primitive::kPrimLong:
- *restrictions |= kNoDiv;
+ *restrictions |= kNoDiv | kNoReduction;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
- *restrictions |= kNoMinMax; // min/max(x, NaN)
+ *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
return TrySetVectorLength(4);
case Primitive::kPrimDouble:
- *restrictions |= kNoMinMax; // min/max(x, NaN)
+ *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
return TrySetVectorLength(2);
default:
break;
@@ -1305,9 +1363,16 @@
return;
}
// In vector code, explicit scalar expansion is needed.
- HInstruction* vector = new (global_allocator_) HVecReplicateScalar(
- global_allocator_, org, type, vector_length_);
- vector_map_->Put(org, Insert(vector_preheader_, vector));
+ HInstruction* vector = nullptr;
+ auto it = vector_permanent_map_->find(org);
+ if (it != vector_permanent_map_->end()) {
+ vector = it->second; // reuse during unrolling
+ } else {
+ vector = new (global_allocator_) HVecReplicateScalar(
+ global_allocator_, org, type, vector_length_);
+ vector_permanent_map_->Put(org, Insert(vector_preheader_, vector));
+ }
+ vector_map_->Put(org, vector);
}
}
@@ -1362,6 +1427,78 @@
vector_map_->Put(org, vector);
}
+void HLoopOptimization::GenerateVecReductionPhi(HPhi* phi) {
+ DCHECK(reductions_->find(phi) != reductions_->end());
+ DCHECK(reductions_->Get(phi->InputAt(1)) == phi);
+ HInstruction* vector = nullptr;
+ if (vector_mode_ == kSequential) {
+ HPhi* new_phi = new (global_allocator_) HPhi(
+ global_allocator_, kNoRegNumber, 0, phi->GetType());
+ vector_header_->AddPhi(new_phi);
+ vector = new_phi;
+ } else {
+ // Link vector reduction back to prior unrolled update, or a first phi.
+ auto it = vector_permanent_map_->find(phi);
+ if (it != vector_permanent_map_->end()) {
+ vector = it->second;
+ } else {
+ HPhi* new_phi = new (global_allocator_) HPhi(
+ global_allocator_, kNoRegNumber, 0, HVecOperation::kSIMDType);
+ vector_header_->AddPhi(new_phi);
+ vector = new_phi;
+ }
+ }
+ vector_map_->Put(phi, vector);
+}
+
+void HLoopOptimization::GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* reduction) {
+ HInstruction* new_phi = vector_map_->Get(phi);
+ HInstruction* new_init = reductions_->Get(phi);
+ HInstruction* new_red = vector_map_->Get(reduction);
+ // Link unrolled vector loop back to new phi.
+ for (; !new_phi->IsPhi(); new_phi = vector_permanent_map_->Get(new_phi)) {
+ DCHECK(new_phi->IsVecOperation());
+ }
+ // Prepare the new initialization.
+ if (vector_mode_ == kVector) {
+ // Generate a [initial, 0, .., 0] vector.
+ new_init = Insert(
+ vector_preheader_,
+ new (global_allocator_) HVecSetScalars(
+ global_allocator_, &new_init, phi->GetType(), vector_length_, 1));
+ } else {
+ new_init = ReduceAndExtractIfNeeded(new_init);
+ }
+ // Set the phi inputs.
+ DCHECK(new_phi->IsPhi());
+ new_phi->AsPhi()->AddInput(new_init);
+ new_phi->AsPhi()->AddInput(new_red);
+ // New feed value for next phi (safe mutation in iteration).
+ reductions_->find(phi)->second = new_phi;
+}
+
+HInstruction* HLoopOptimization::ReduceAndExtractIfNeeded(HInstruction* instruction) {
+ if (instruction->IsPhi()) {
+ HInstruction* input = instruction->InputAt(1);
+ if (input->IsVecOperation()) {
+ Primitive::Type type = input->AsVecOperation()->GetPackedType();
+ HBasicBlock* exit = instruction->GetBlock()->GetSuccessors()[0];
+ // Generate a vector reduction and scalar extract
+ // x = REDUCE( [x_1, .., x_n] )
+ // y = x_1
+ // along the exit of the defining loop.
+ HVecReduce::ReductionKind kind = GetReductionKind(input);
+ HInstruction* reduce = new (global_allocator_) HVecReduce(
+ global_allocator_, instruction, type, vector_length_, kind);
+ exit->InsertInstructionBefore(reduce, exit->GetFirstInstruction());
+ instruction = new (global_allocator_) HVecExtractScalar(
+ global_allocator_, reduce, type, vector_length_, 0);
+ exit->InsertInstructionAfter(instruction, reduce);
+ }
+ }
+ return instruction;
+}
+
#define GENERATE_VEC(x, y) \
if (vector_mode_ == kVector) { \
vector = (x); \
@@ -1542,10 +1679,9 @@
// Test for top level arithmetic shift right x >> 1 or logical shift right x >>> 1
// (note whether the sign bit in wider precision is shifted in has no effect
// on the narrow precision computed by the idiom).
- int64_t distance = 0;
if ((instruction->IsShr() ||
instruction->IsUShr()) &&
- IsInt64AndGet(instruction->InputAt(1), /*out*/ &distance) && distance == 1) {
+ IsInt64Value(instruction->InputAt(1), 1)) {
// Test for (a + b + c) >> 1 for optional constant c.
HInstruction* a = nullptr;
HInstruction* b = nullptr;
@@ -1590,6 +1726,7 @@
vector_length_,
is_unsigned,
is_rounded));
+ MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom);
} else {
GenerateVecOp(instruction, vector_map_->Get(r), vector_map_->Get(s), type);
}
@@ -1624,21 +1761,33 @@
vector_peeling_candidate_ = candidate;
}
+static constexpr uint32_t ARM64_SIMD_MAXIMUM_UNROLL_FACTOR = 8;
+static constexpr uint32_t ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE = 50;
+
uint32_t HLoopOptimization::GetUnrollingFactor(HBasicBlock* block, int64_t trip_count) {
- // Current heuristic: unroll by 2 on ARM64/X86 for large known trip
- // counts and small loop bodies.
- // TODO: refine with operation count, remaining iterations, etc.
- // Artem had some really cool ideas for this already.
switch (compiler_driver_->GetInstructionSet()) {
- case kArm64:
- case kX86:
- case kX86_64: {
- size_t num_instructions = block->GetInstructions().CountSize();
- if (num_instructions <= 10 && trip_count >= 4 * vector_length_) {
- return 2;
+ case kArm64: {
+ DCHECK_NE(vector_length_, 0u);
+ // TODO: Unroll loops with unknown trip count.
+ if (trip_count < 2 * vector_length_) {
+ return 1;
}
- return 1;
+
+ uint32_t instruction_count = block->GetInstructions().CountSize();
+
+ // Find a beneficial unroll factor with the following restrictions:
+ // - At least one iteration of the transformed loop should be executed.
+ // - The loop body shouldn't be "too big" (heuristic).
+ uint32_t uf1 = ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE / instruction_count;
+ uint32_t uf2 = trip_count / vector_length_;
+ uint32_t unroll_factor =
+ TruncToPowerOfTwo(std::min({uf1, uf2, ARM64_SIMD_MAXIMUM_UNROLL_FACTOR}));
+ DCHECK_GE(unroll_factor, 1u);
+
+ return unroll_factor;
}
+ case kX86:
+ case kX86_64:
default:
return 1;
}
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 49be8a3..f347518 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -34,7 +34,8 @@
public:
HLoopOptimization(HGraph* graph,
CompilerDriver* compiler_driver,
- HInductionVarAnalysis* induction_analysis);
+ HInductionVarAnalysis* induction_analysis,
+ OptimizingCompilerStats* stats);
void Run() OVERRIDE;
@@ -62,17 +63,18 @@
* Vectorization restrictions (bit mask).
*/
enum VectorRestrictions {
- kNone = 0, // no restrictions
- kNoMul = 1, // no multiplication
- kNoDiv = 2, // no division
- kNoShift = 4, // no shift
- kNoShr = 8, // no arithmetic shift right
- kNoHiBits = 16, // "wider" operations cannot bring in higher order bits
- kNoSignedHAdd = 32, // no signed halving add
- kNoUnroundedHAdd = 64, // no unrounded halving add
- kNoAbs = 128, // no absolute value
- kNoMinMax = 256, // no min/max
- kNoStringCharAt = 512, // no StringCharAt
+ kNone = 0, // no restrictions
+ kNoMul = 1 << 0, // no multiplication
+ kNoDiv = 1 << 1, // no division
+ kNoShift = 1 << 2, // no shift
+ kNoShr = 1 << 3, // no arithmetic shift right
+ kNoHiBits = 1 << 4, // "wider" operations cannot bring in higher order bits
+ kNoSignedHAdd = 1 << 5, // no signed halving add
+ kNoUnroundedHAdd = 1 << 6, // no unrounded halving add
+ kNoAbs = 1 << 7, // no absolute value
+ kNoMinMax = 1 << 8, // no min/max
+ kNoStringCharAt = 1 << 9, // no StringCharAt
+ kNoReduction = 1 << 10, // no reduction
};
/*
@@ -155,6 +157,9 @@
HInstruction* opb,
HInstruction* offset,
Primitive::Type type);
+ void GenerateVecReductionPhi(HPhi* phi);
+ void GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* reduction);
+ HInstruction* ReduceAndExtractIfNeeded(HInstruction* instruction);
void GenerateVecOp(HInstruction* org,
HInstruction* opa,
HInstruction* opb,
@@ -253,6 +258,10 @@
// Contents reside in phase-local heap memory.
ArenaSafeMap<HInstruction*, HInstruction*>* vector_map_;
+ // Permanent mapping used during vectorization synthesis.
+ // Contents reside in phase-local heap memory.
+ ArenaSafeMap<HInstruction*, HInstruction*>* vector_permanent_map_;
+
// Temporary vectorization bookkeeping.
VectorMode vector_mode_; // synthesis mode
HBasicBlock* vector_preheader_; // preheader of the new loop
diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc
index b5b03d8..1c5603d 100644
--- a/compiler/optimizing/loop_optimization_test.cc
+++ b/compiler/optimizing/loop_optimization_test.cc
@@ -31,7 +31,7 @@
allocator_(&pool_),
graph_(CreateGraph(&allocator_)),
iva_(new (&allocator_) HInductionVarAnalysis(graph_)),
- loop_opt_(new (&allocator_) HLoopOptimization(graph_, nullptr, iva_)) {
+ loop_opt_(new (&allocator_) HLoopOptimization(graph_, nullptr, iva_, nullptr)) {
BuildGraph();
}
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 2a7017c..9cff6b0 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -1289,18 +1289,59 @@
return remove_count;
}
-HInstruction* HConstructorFence::GetAssociatedAllocation() {
+void HConstructorFence::Merge(HConstructorFence* other) {
+ // Do not delete yourself from the graph.
+ DCHECK(this != other);
+ // Don't try to merge with an instruction not associated with a block.
+ DCHECK(other->GetBlock() != nullptr);
+ // A constructor fence's return type is "kPrimVoid"
+ // and therefore it cannot have any environment uses.
+ DCHECK(!other->HasEnvironmentUses());
+
+ auto has_input = [](HInstruction* haystack, HInstruction* needle) {
+ // Check if `haystack` has `needle` as any of its inputs.
+ for (size_t input_count = 0; input_count < haystack->InputCount(); ++input_count) {
+ if (haystack->InputAt(input_count) == needle) {
+ return true;
+ }
+ }
+ return false;
+ };
+
+ // Add any inputs from `other` into `this` if it wasn't already an input.
+ for (size_t input_count = 0; input_count < other->InputCount(); ++input_count) {
+ HInstruction* other_input = other->InputAt(input_count);
+ if (!has_input(this, other_input)) {
+ AddInput(other_input);
+ }
+ }
+
+ other->GetBlock()->RemoveInstruction(other);
+}
+
+HInstruction* HConstructorFence::GetAssociatedAllocation(bool ignore_inputs) {
HInstruction* new_instance_inst = GetPrevious();
// Check if the immediately preceding instruction is a new-instance/new-array.
// Otherwise this fence is for protecting final fields.
if (new_instance_inst != nullptr &&
(new_instance_inst->IsNewInstance() || new_instance_inst->IsNewArray())) {
- // TODO: Need to update this code to handle multiple inputs.
- DCHECK_EQ(InputCount(), 1u);
- return new_instance_inst;
- } else {
- return nullptr;
+ if (ignore_inputs) {
+ // If inputs are ignored, simply check if the predecessor is
+ // *any* HNewInstance/HNewArray.
+ //
+ // Inputs are normally only ignored for prepare_for_register_allocation,
+ // at which point *any* prior HNewInstance/Array can be considered
+ // associated.
+ return new_instance_inst;
+ } else {
+ // Normal case: There must be exactly 1 input and the previous instruction
+ // must be that input.
+ if (InputCount() == 1u && InputAt(0) == new_instance_inst) {
+ return new_instance_inst;
+ }
+ }
}
+ return nullptr;
}
#define DEFINE_ACCEPT(name, super) \
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index b551f37..a6d0da1 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1374,7 +1374,8 @@
M(UShr, BinaryOperation) \
M(Xor, BinaryOperation) \
M(VecReplicateScalar, VecUnaryOperation) \
- M(VecSumReduce, VecUnaryOperation) \
+ M(VecExtractScalar, VecUnaryOperation) \
+ M(VecReduce, VecUnaryOperation) \
M(VecCnv, VecUnaryOperation) \
M(VecNeg, VecUnaryOperation) \
M(VecAbs, VecUnaryOperation) \
@@ -6648,13 +6649,24 @@
// Returns how many HConstructorFence instructions were removed from graph.
static size_t RemoveConstructorFences(HInstruction* instruction);
+ // Combine all inputs of `this` and `other` instruction and remove
+ // `other` from the graph.
+ //
+ // Inputs are unique after the merge.
+ //
+ // Requirement: `this` must not be the same as `other.
+ void Merge(HConstructorFence* other);
+
// Check if this constructor fence is protecting
// an HNewInstance or HNewArray that is also the immediate
// predecessor of `this`.
//
+ // If `ignore_inputs` is true, then the immediate predecessor doesn't need
+ // to be one of the inputs of `this`.
+ //
// Returns the associated HNewArray or HNewInstance,
// or null otherwise.
- HInstruction* GetAssociatedAllocation();
+ HInstruction* GetAssociatedAllocation(bool ignore_inputs = false);
DECLARE_INSTRUCTION(ConstructorFence);
@@ -7042,6 +7054,17 @@
return false;
}
+// Returns true iff instruction is the given integral constant.
+inline bool IsInt64Value(HInstruction* instruction, int64_t value) {
+ int64_t val = 0;
+ return IsInt64AndGet(instruction, &val) && val == value;
+}
+
+// Returns true iff instruction is a zero bit pattern.
+inline bool IsZeroBitPattern(HInstruction* instruction) {
+ return instruction->IsConstant() && instruction->AsConstant()->IsZeroBitPattern();
+}
+
#define INSTRUCTION_TYPE_CHECK(type, super) \
inline bool HInstruction::Is##type() const { return GetKind() == k##type; } \
inline const H##type* HInstruction::As##type() const { \
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index 6261171..886d75e 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -63,6 +63,10 @@
// GetVectorLength() x GetPackedType() operations simultaneously.
class HVecOperation : public HVariableInputSizeInstruction {
public:
+ // A SIMD operation looks like a FPU location.
+ // TODO: we could introduce SIMD types in HIR.
+ static constexpr Primitive::Type kSIMDType = Primitive::kPrimDouble;
+
HVecOperation(ArenaAllocator* arena,
Primitive::Type packed_type,
SideEffects side_effects,
@@ -89,10 +93,9 @@
return vector_length_ * Primitive::ComponentSize(GetPackedType());
}
- // Returns the type of the vector operation: a SIMD operation looks like a FPU location.
- // TODO: we could introduce SIMD types in HIR.
+ // Returns the type of the vector operation.
Primitive::Type GetType() const OVERRIDE {
- return Primitive::kPrimDouble;
+ return kSIMDType;
}
// Returns the true component type packed in a vector.
@@ -220,8 +223,11 @@
DISALLOW_COPY_AND_ASSIGN(HVecMemoryOperation);
};
-// Packed type consistency checker (same vector length integral types may mix freely).
+// Packed type consistency checker ("same vector length" integral types may mix freely).
inline static bool HasConsistentPackedTypes(HInstruction* input, Primitive::Type type) {
+ if (input->IsPhi()) {
+ return input->GetType() == HVecOperation::kSIMDType; // carries SIMD
+ }
DCHECK(input->IsVecOperation());
Primitive::Type input_type = input->AsVecOperation()->GetPackedType();
switch (input_type) {
@@ -265,27 +271,77 @@
DISALLOW_COPY_AND_ASSIGN(HVecReplicateScalar);
};
-// Sum-reduces the given vector into a shorter vector (m < n) or scalar (m = 1),
-// viz. sum-reduce[ x1, .. , xn ] = [ y1, .., ym ], where yi = sum_j x_j.
-class HVecSumReduce FINAL : public HVecUnaryOperation {
- HVecSumReduce(ArenaAllocator* arena,
- HInstruction* input,
- Primitive::Type packed_type,
- size_t vector_length,
- uint32_t dex_pc = kNoDexPc)
+// Extracts a particular scalar from the given vector,
+// viz. extract[ x1, .. , xn ] = x_i.
+//
+// TODO: for now only i == 1 case supported.
+class HVecExtractScalar FINAL : public HVecUnaryOperation {
+ public:
+ HVecExtractScalar(ArenaAllocator* arena,
+ HInstruction* input,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ size_t index,
+ uint32_t dex_pc = kNoDexPc)
: HVecUnaryOperation(arena, input, packed_type, vector_length, dex_pc) {
DCHECK(HasConsistentPackedTypes(input, packed_type));
+ DCHECK_LT(index, vector_length);
+ DCHECK_EQ(index, 0u);
+ }
+
+ // Yields a single component in the vector.
+ Primitive::Type GetType() const OVERRIDE {
+ return GetPackedType();
+ }
+
+ // An extract needs to stay in place, since SIMD registers are not
+ // kept alive across vector loop boundaries (yet).
+ bool CanBeMoved() const OVERRIDE { return false; }
+
+ DECLARE_INSTRUCTION(VecExtractScalar);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecExtractScalar);
+};
+
+// Reduces the given vector into the first element as sum/min/max,
+// viz. sum-reduce[ x1, .. , xn ] = [ y, ---- ], where y = sum xi
+// and the "-" denotes "don't care" (implementation dependent).
+class HVecReduce FINAL : public HVecUnaryOperation {
+ public:
+ enum ReductionKind {
+ kSum = 1,
+ kMin = 2,
+ kMax = 3
+ };
+
+ HVecReduce(ArenaAllocator* arena,
+ HInstruction* input,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ ReductionKind kind,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecUnaryOperation(arena, input, packed_type, vector_length, dex_pc),
+ kind_(kind) {
+ DCHECK(HasConsistentPackedTypes(input, packed_type));
}
- // TODO: probably integral promotion
- Primitive::Type GetType() const OVERRIDE { return GetPackedType(); }
+ ReductionKind GetKind() const { return kind_; }
bool CanBeMoved() const OVERRIDE { return true; }
- DECLARE_INSTRUCTION(VecSumReduce);
+ bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+ DCHECK(other->IsVecReduce());
+ const HVecReduce* o = other->AsVecReduce();
+ return HVecOperation::InstructionDataEquals(o) && GetKind() == o->GetKind();
+ }
+
+ DECLARE_INSTRUCTION(VecReduce);
private:
- DISALLOW_COPY_AND_ASSIGN(HVecSumReduce);
+ const ReductionKind kind_;
+
+ DISALLOW_COPY_AND_ASSIGN(HVecReduce);
};
// Converts every component in the vector,
@@ -754,20 +810,23 @@
//
// Assigns the given scalar elements to a vector,
-// viz. set( array(x1, .., xn) ) = [ x1, .. , xn ].
+// viz. set( array(x1, .., xn) ) = [ x1, .. , xn ] if n == m,
+// set( array(x1, .., xm) ) = [ x1, .. , xm, 0, .., 0 ] if m < n.
class HVecSetScalars FINAL : public HVecOperation {
+ public:
HVecSetScalars(ArenaAllocator* arena,
HInstruction** scalars, // array
Primitive::Type packed_type,
size_t vector_length,
+ size_t number_of_scalars,
uint32_t dex_pc = kNoDexPc)
: HVecOperation(arena,
packed_type,
SideEffects::None(),
- /* number_of_inputs */ vector_length,
+ number_of_scalars,
vector_length,
dex_pc) {
- for (size_t i = 0; i < vector_length; i++) {
+ for (size_t i = 0; i < number_of_scalars; i++) {
DCHECK(!scalars[i]->IsVecOperation());
SetRawInputAt(0, scalars[i]);
}
diff --git a/compiler/optimizing/nodes_vector_test.cc b/compiler/optimizing/nodes_vector_test.cc
index 0238ea4..5a56a2c 100644
--- a/compiler/optimizing/nodes_vector_test.cc
+++ b/compiler/optimizing/nodes_vector_test.cc
@@ -332,4 +332,32 @@
EXPECT_FALSE(v1->Equals(v3)); // different vector lengths
}
+TEST_F(NodesVectorTest, VectorKindMattersOnReduce) {
+ HVecOperation* v0 = new (&allocator_)
+ HVecReplicateScalar(&allocator_, parameter_, Primitive::kPrimInt, 4);
+
+ HVecReduce* v1 = new (&allocator_) HVecReduce(
+ &allocator_, v0, Primitive::kPrimInt, 4, HVecReduce::kSum);
+ HVecReduce* v2 = new (&allocator_) HVecReduce(
+ &allocator_, v0, Primitive::kPrimInt, 4, HVecReduce::kMin);
+ HVecReduce* v3 = new (&allocator_) HVecReduce(
+ &allocator_, v0, Primitive::kPrimInt, 4, HVecReduce::kMax);
+
+ EXPECT_FALSE(v0->CanBeMoved());
+ EXPECT_TRUE(v1->CanBeMoved());
+ EXPECT_TRUE(v2->CanBeMoved());
+ EXPECT_TRUE(v3->CanBeMoved());
+
+ EXPECT_EQ(HVecReduce::kSum, v1->GetKind());
+ EXPECT_EQ(HVecReduce::kMin, v2->GetKind());
+ EXPECT_EQ(HVecReduce::kMax, v3->GetKind());
+
+ EXPECT_TRUE(v1->Equals(v1));
+ EXPECT_TRUE(v2->Equals(v2));
+ EXPECT_TRUE(v3->Equals(v3));
+
+ EXPECT_FALSE(v1->Equals(v2)); // different kinds
+ EXPECT_FALSE(v1->Equals(v3));
+}
+
} // namespace art
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index e98c97c..399cd98 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -53,6 +53,7 @@
#include "compiled_method.h"
#include "compiler.h"
#include "constant_folding.h"
+#include "constructor_fence_redundancy_elimination.h"
#include "dead_code_elimination.h"
#include "debug/elf_debug_writer.h"
#include "debug/method_debug_info.h"
@@ -509,11 +510,13 @@
} else if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) {
return new (arena) SideEffectsAnalysis(graph);
} else if (opt_name == HLoopOptimization::kLoopOptimizationPassName) {
- return new (arena) HLoopOptimization(graph, driver, most_recent_induction);
+ return new (arena) HLoopOptimization(graph, driver, most_recent_induction, stats);
} else if (opt_name == CHAGuardOptimization::kCHAGuardOptimizationPassName) {
return new (arena) CHAGuardOptimization(graph);
} else if (opt_name == CodeSinking::kCodeSinkingPassName) {
return new (arena) CodeSinking(graph, stats);
+ } else if (opt_name == ConstructorFenceRedundancyElimination::kPassName) {
+ return new (arena) ConstructorFenceRedundancyElimination(graph, stats);
#ifdef ART_ENABLE_CODEGEN_arm
} else if (opt_name == arm::InstructionSimplifierArm::kInstructionSimplifierArmPassName) {
return new (arena) arm::InstructionSimplifierArm(graph, stats);
@@ -770,7 +773,7 @@
LICM* licm = new (arena) LICM(graph, *side_effects1, stats);
HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph);
BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects1, induction);
- HLoopOptimization* loop = new (arena) HLoopOptimization(graph, driver, induction);
+ HLoopOptimization* loop = new (arena) HLoopOptimization(graph, driver, induction, stats);
LoadStoreAnalysis* lsa = new (arena) LoadStoreAnalysis(graph);
LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects2, *lsa, stats);
HSharpening* sharpening = new (arena) HSharpening(
@@ -784,6 +787,8 @@
IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, stats);
CHAGuardOptimization* cha_guard = new (arena) CHAGuardOptimization(graph);
CodeSinking* code_sinking = new (arena) CodeSinking(graph, stats);
+ ConstructorFenceRedundancyElimination* cfre =
+ new (arena) ConstructorFenceRedundancyElimination(graph, stats);
HOptimization* optimizations1[] = {
intrinsics,
@@ -821,6 +826,8 @@
// can satisfy. For example, the code generator does not expect to see a
// HTypeConversion from a type to the same type.
simplify4,
+ cfre, // Eliminate constructor fences after code sinking to avoid
+ // complicated sinking logic to split a fence with many inputs.
};
RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer);
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index d6da73c..07f9635 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -63,6 +63,8 @@
kBooleanSimplified,
kIntrinsicRecognized,
kLoopInvariantMoved,
+ kLoopVectorized,
+ kLoopVectorizedIdiom,
kSelectGenerated,
kRemovedInstanceOf,
kInlinedInvokeVirtualOrInterface,
@@ -91,6 +93,7 @@
kConstructorFenceGeneratedFinal,
kConstructorFenceRemovedLSE,
kConstructorFenceRemovedPFRA,
+ kConstructorFenceRemovedCFRE,
kLastStat
};
@@ -183,6 +186,8 @@
case kBooleanSimplified : name = "BooleanSimplified"; break;
case kIntrinsicRecognized : name = "IntrinsicRecognized"; break;
case kLoopInvariantMoved : name = "LoopInvariantMoved"; break;
+ case kLoopVectorized : name = "LoopVectorized"; break;
+ case kLoopVectorizedIdiom : name = "LoopVectorizedIdiom"; break;
case kSelectGenerated : name = "SelectGenerated"; break;
case kRemovedInstanceOf: name = "RemovedInstanceOf"; break;
case kInlinedInvokeVirtualOrInterface: name = "InlinedInvokeVirtualOrInterface"; break;
@@ -211,6 +216,7 @@
case kConstructorFenceGeneratedFinal: name = "ConstructorFenceGeneratedFinal"; break;
case kConstructorFenceRemovedLSE: name = "ConstructorFenceRemovedLSE"; break;
case kConstructorFenceRemovedPFRA: name = "ConstructorFenceRemovedPFRA"; break;
+ case kConstructorFenceRemovedCFRE: name = "ConstructorFenceRemovedCFRE"; break;
case kLastStat:
LOG(FATAL) << "invalid stat "
diff --git a/compiler/optimizing/scheduler_arm64.cc b/compiler/optimizing/scheduler_arm64.cc
index 510619f..1d9d28a 100644
--- a/compiler/optimizing/scheduler_arm64.cc
+++ b/compiler/optimizing/scheduler_arm64.cc
@@ -215,12 +215,12 @@
last_visited_latency_ = kArm64SIMDReplicateOpLatency;
}
-void SchedulingLatencyVisitorARM64::VisitVecSetScalars(HVecSetScalars* instr) {
- LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId();
+void SchedulingLatencyVisitorARM64::VisitVecExtractScalar(HVecExtractScalar* instr) {
+ HandleSimpleArithmeticSIMD(instr);
}
-void SchedulingLatencyVisitorARM64::VisitVecSumReduce(HVecSumReduce* instr) {
- LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId();
+void SchedulingLatencyVisitorARM64::VisitVecReduce(HVecReduce* instr) {
+ HandleSimpleArithmeticSIMD(instr);
}
void SchedulingLatencyVisitorARM64::VisitVecCnv(HVecCnv* instr ATTRIBUTE_UNUSED) {
@@ -283,8 +283,8 @@
last_visited_latency_ = kArm64SIMDIntegerOpLatency;
}
-void SchedulingLatencyVisitorARM64::VisitVecAndNot(HVecAndNot* instr) {
- LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId();
+void SchedulingLatencyVisitorARM64::VisitVecAndNot(HVecAndNot* instr ATTRIBUTE_UNUSED) {
+ last_visited_latency_ = kArm64SIMDIntegerOpLatency;
}
void SchedulingLatencyVisitorARM64::VisitVecOr(HVecOr* instr ATTRIBUTE_UNUSED) {
@@ -307,6 +307,10 @@
HandleSimpleArithmeticSIMD(instr);
}
+void SchedulingLatencyVisitorARM64::VisitVecSetScalars(HVecSetScalars* instr) {
+ HandleSimpleArithmeticSIMD(instr);
+}
+
void SchedulingLatencyVisitorARM64::VisitVecMultiplyAccumulate(
HVecMultiplyAccumulate* instr ATTRIBUTE_UNUSED) {
last_visited_latency_ = kArm64SIMDMulIntegerLatency;
diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h
index 63d5b7d..e1a80ec 100644
--- a/compiler/optimizing/scheduler_arm64.h
+++ b/compiler/optimizing/scheduler_arm64.h
@@ -83,8 +83,8 @@
M(SuspendCheck , unused) \
M(TypeConversion , unused) \
M(VecReplicateScalar , unused) \
- M(VecSetScalars , unused) \
- M(VecSumReduce , unused) \
+ M(VecExtractScalar , unused) \
+ M(VecReduce , unused) \
M(VecCnv , unused) \
M(VecNeg , unused) \
M(VecAbs , unused) \
@@ -103,6 +103,7 @@
M(VecShl , unused) \
M(VecShr , unused) \
M(VecUShr , unused) \
+ M(VecSetScalars , unused) \
M(VecMultiplyAccumulate, unused) \
M(VecLoad , unused) \
M(VecStore , unused)
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 4c8fb68..dd6dcd1 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -86,7 +86,7 @@
int32_t y = High32Bits(value);
if (x == y) {
- return (IsUint<16>(x) || IsInt<16>(x) || ((x & 0xFFFF) == 0 && IsInt<16>(value >> 16))) ? 2 : 3;
+ return (IsUint<16>(x) || IsInt<16>(x) || ((x & 0xFFFF) == 0)) ? 2 : 3;
}
return INT_MAX;
diff --git a/openjdkjvmti/OpenjdkJvmTi.cc b/openjdkjvmti/OpenjdkJvmTi.cc
index 6c0d492..277f611 100644
--- a/openjdkjvmti/OpenjdkJvmTi.cc
+++ b/openjdkjvmti/OpenjdkJvmTi.cc
@@ -810,11 +810,11 @@
}
static jvmtiError GetObjectMonitorUsage(jvmtiEnv* env,
- jobject object ATTRIBUTE_UNUSED,
- jvmtiMonitorUsage* info_ptr ATTRIBUTE_UNUSED) {
+ jobject object,
+ jvmtiMonitorUsage* info_ptr) {
ENSURE_VALID_ENV(env);
ENSURE_HAS_CAP(env, can_get_monitor_info);
- return ERR(NOT_IMPLEMENTED);
+ return ObjectUtil::GetObjectMonitorUsage(env, object, info_ptr);
}
static jvmtiError GetFieldName(jvmtiEnv* env,
diff --git a/openjdkjvmti/art_jvmti.h b/openjdkjvmti/art_jvmti.h
index 93eee28..d3f52f6 100644
--- a/openjdkjvmti/art_jvmti.h
+++ b/openjdkjvmti/art_jvmti.h
@@ -226,7 +226,7 @@
.can_get_synthetic_attribute = 1,
.can_get_owned_monitor_info = 1,
.can_get_current_contended_monitor = 0,
- .can_get_monitor_info = 0,
+ .can_get_monitor_info = 1,
.can_pop_frame = 0,
.can_redefine_classes = 1,
.can_signal_thread = 0,
diff --git a/openjdkjvmti/ti_object.cc b/openjdkjvmti/ti_object.cc
index 2506aca..89ce352 100644
--- a/openjdkjvmti/ti_object.cc
+++ b/openjdkjvmti/ti_object.cc
@@ -35,6 +35,8 @@
#include "mirror/object-inl.h"
#include "scoped_thread_state_change-inl.h"
#include "thread-current-inl.h"
+#include "thread_list.h"
+#include "ti_thread.h"
namespace openjdkjvmti {
@@ -73,4 +75,59 @@
return ERR(NONE);
}
+jvmtiError ObjectUtil::GetObjectMonitorUsage(
+ jvmtiEnv* baseenv, jobject obj, jvmtiMonitorUsage* usage) {
+ ArtJvmTiEnv* env = ArtJvmTiEnv::AsArtJvmTiEnv(baseenv);
+ if (obj == nullptr) {
+ return ERR(INVALID_OBJECT);
+ }
+ if (usage == nullptr) {
+ return ERR(NULL_POINTER);
+ }
+ art::Thread* self = art::Thread::Current();
+ ThreadUtil::SuspendCheck(self);
+ art::JNIEnvExt* jni = self->GetJniEnv();
+ std::vector<jthread> wait;
+ std::vector<jthread> notify_wait;
+ {
+ art::ScopedObjectAccess soa(self); // Now we know we have the shared lock.
+ art::ScopedThreadSuspension sts(self, art::kNative);
+ art::ScopedSuspendAll ssa("GetObjectMonitorUsage", /*long_suspend*/false);
+ art::ObjPtr<art::mirror::Object> target(self->DecodeJObject(obj));
+ // This gets the list of threads trying to lock or wait on the monitor.
+ art::MonitorInfo info(target.Ptr());
+ usage->owner = info.owner_ != nullptr ?
+ jni->AddLocalReference<jthread>(info.owner_->GetPeerFromOtherThread()) : nullptr;
+ usage->entry_count = info.entry_count_;
+ for (art::Thread* thd : info.waiters_) {
+ // RI seems to consider waiting for notify to be included in those waiting to acquire the
+ // monitor. We will match this behavior.
+ notify_wait.push_back(jni->AddLocalReference<jthread>(thd->GetPeerFromOtherThread()));
+ wait.push_back(jni->AddLocalReference<jthread>(thd->GetPeerFromOtherThread()));
+ }
+ {
+ // Scan all threads to see which are waiting on this particular monitor.
+ art::MutexLock tll(self, *art::Locks::thread_list_lock_);
+ for (art::Thread* thd : art::Runtime::Current()->GetThreadList()->GetList()) {
+ if (thd != info.owner_ && target.Ptr() == thd->GetMonitorEnterObject()) {
+ wait.push_back(jni->AddLocalReference<jthread>(thd->GetPeerFromOtherThread()));
+ }
+ }
+ }
+ }
+ usage->waiter_count = wait.size();
+ usage->notify_waiter_count = notify_wait.size();
+ jvmtiError ret = CopyDataIntoJvmtiBuffer(env,
+ reinterpret_cast<const unsigned char*>(wait.data()),
+ wait.size() * sizeof(jthread),
+ reinterpret_cast<unsigned char**>(&usage->waiters));
+ if (ret != OK) {
+ return ret;
+ }
+ return CopyDataIntoJvmtiBuffer(env,
+ reinterpret_cast<const unsigned char*>(notify_wait.data()),
+ notify_wait.size() * sizeof(jthread),
+ reinterpret_cast<unsigned char**>(&usage->notify_waiters));
+}
+
} // namespace openjdkjvmti
diff --git a/openjdkjvmti/ti_object.h b/openjdkjvmti/ti_object.h
index fa3bd0f..977ec39 100644
--- a/openjdkjvmti/ti_object.h
+++ b/openjdkjvmti/ti_object.h
@@ -42,6 +42,8 @@
static jvmtiError GetObjectSize(jvmtiEnv* env, jobject object, jlong* size_ptr);
static jvmtiError GetObjectHashCode(jvmtiEnv* env, jobject object, jint* hash_code_ptr);
+
+ static jvmtiError GetObjectMonitorUsage(jvmtiEnv* env, jobject object, jvmtiMonitorUsage* usage);
};
} // namespace openjdkjvmti
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 69c615d..ab9ca84 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1843,55 +1843,53 @@
mov r12, r0 @ r12 holds reference to code
ldr r0, [sp, #4] @ restore r0
RESTORE_SAVE_REFS_AND_ARGS_FRAME
+ adr lr, art_quick_instrumentation_exit + /* thumb mode */ 1
+ @ load art_quick_instrumentation_exit into lr in thumb mode
REFRESH_MARKING_REGISTER
- blx r12 @ call method with lr set to art_quick_instrumentation_exit
-@ Deliberate fall-through into art_quick_instrumentation_exit.
- .type art_quick_instrumentation_exit, #function
- .global art_quick_instrumentation_exit
-art_quick_instrumentation_exit:
- mov lr, #0 @ link register is to here, so clobber with 0 for later checks
- SETUP_SAVE_REFS_ONLY_FRAME r2 @ set up frame knowing r2 and r3 must be dead on exit
- mov r12, sp @ remember bottom of caller's frame
- push {r0-r1} @ save return value
- .cfi_adjust_cfa_offset 8
- .cfi_rel_offset r0, 0
- .cfi_rel_offset r1, 4
- mov r2, sp @ store gpr_res pointer.
- vpush {d0} @ save fp return value
- .cfi_adjust_cfa_offset 8
- mov r3, sp @ store fpr_res pointer
- mov r1, r12 @ pass SP
- mov r0, r9 @ pass Thread::Current
- blx artInstrumentationMethodExitFromCode @ (Thread*, SP, gpr_res*, fpr_res*)
-
- mov r2, r0 @ link register saved by instrumentation
- mov lr, r1 @ r1 is holding link register if we're to bounce to deoptimize
- vpop {d0} @ restore fp return value
- .cfi_adjust_cfa_offset -8
- pop {r0, r1} @ restore return value
- .cfi_adjust_cfa_offset -8
- .cfi_restore r0
- .cfi_restore r1
- RESTORE_SAVE_REFS_ONLY_FRAME
- REFRESH_MARKING_REGISTER
- cbz r2, .Ldo_deliver_instrumentation_exception
- @ Deliver exception if we got nullptr as function.
- bx r2 @ Otherwise, return
+ bx r12 @ call method with lr set to art_quick_instrumentation_exit
.Ldeliver_instrumentation_entry_exception:
@ Deliver exception for art_quick_instrumentation_entry placed after
@ art_quick_instrumentation_exit so that the fallthrough works.
RESTORE_SAVE_REFS_AND_ARGS_FRAME
-.Ldo_deliver_instrumentation_exception:
DELIVER_PENDING_EXCEPTION
END art_quick_instrumentation_entry
+ENTRY art_quick_instrumentation_exit
+ mov lr, #0 @ link register is to here, so clobber with 0 for later checks
+ SETUP_SAVE_EVERYTHING_FRAME r2
+
+ add r3, sp, #8 @ store fpr_res pointer, in kSaveEverything frame
+ add r2, sp, #136 @ store gpr_res pointer, in kSaveEverything frame
+ mov r1, sp @ pass SP
+ mov r0, r9 @ pass Thread::Current
+ blx artInstrumentationMethodExitFromCode @ (Thread*, SP, gpr_res*, fpr_res*)
+
+ cbz r0, .Ldo_deliver_instrumentation_exception
+ @ Deliver exception if we got nullptr as function.
+ cbnz r1, .Ldeoptimize
+ // Normal return.
+ str r0, [sp, #FRAME_SIZE_SAVE_EVERYTHING - 4]
+ @ Set return pc.
+ RESTORE_SAVE_EVERYTHING_FRAME
+ REFRESH_MARKING_REGISTER
+ bx lr
+.Ldo_deliver_instrumentation_exception:
+ DELIVER_PENDING_EXCEPTION_FRAME_READY
+.Ldeoptimize:
+ str r1, [sp, #FRAME_SIZE_SAVE_EVERYTHING - 4]
+ @ Set return pc.
+ RESTORE_SAVE_EVERYTHING_FRAME
+ // Jump to art_quick_deoptimize.
+ b art_quick_deoptimize
+END art_quick_instrumentation_exit
+
/*
* Instrumentation has requested that we deoptimize into the interpreter. The deoptimization
* will long jump to the upcall with a special exception of -1.
*/
.extern artDeoptimize
ENTRY art_quick_deoptimize
- SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0
+ SETUP_SAVE_EVERYTHING_FRAME r0
mov r0, r9 @ pass Thread::Current
blx artDeoptimize @ (Thread*)
END art_quick_deoptimize
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 802cf5e..adfc88f 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -2365,32 +2365,31 @@
.extern artInstrumentationMethodExitFromCode
ENTRY art_quick_instrumentation_exit
mov xLR, #0 // Clobber LR for later checks.
+ SETUP_SAVE_EVERYTHING_FRAME
- SETUP_SAVE_REFS_ONLY_FRAME
-
- str x0, [sp, #-16]! // Save integer result.
- .cfi_adjust_cfa_offset 16
- str d0, [sp, #8] // Save floating-point result.
-
- add x3, sp, #8 // Pass floating-point result pointer.
- mov x2, sp // Pass integer result pointer.
- add x1, sp, #16 // Pass SP.
+ add x3, sp, #8 // Pass floating-point result pointer, in kSaveEverything frame.
+ add x2, sp, #264 // Pass integer result pointer, in kSaveEverything frame.
+ mov x1, sp // Pass SP.
mov x0, xSELF // Pass Thread.
bl artInstrumentationMethodExitFromCode // (Thread*, SP, gpr_res*, fpr_res*)
- mov xIP0, x0 // Return address from instrumentation call.
- mov xLR, x1 // r1 is holding link register if we're to bounce to deoptimize
-
- ldr d0, [sp, #8] // Restore floating-point result.
- ldr x0, [sp], #16 // Restore integer result, and drop stack area.
- .cfi_adjust_cfa_offset -16
-
- RESTORE_SAVE_REFS_ONLY_FRAME
+ cbz x0, .Ldo_deliver_instrumentation_exception
+ // Handle error
+ cbnz x1, .Ldeoptimize
+ // Normal return.
+ str x0, [sp, #FRAME_SIZE_SAVE_EVERYTHING - 8]
+ // Set return pc.
+ RESTORE_SAVE_EVERYTHING_FRAME
REFRESH_MARKING_REGISTER
- cbz xIP0, 1f // Handle error
- br xIP0 // Tail-call out.
-1:
- DELIVER_PENDING_EXCEPTION
+ br lr
+.Ldo_deliver_instrumentation_exception:
+ DELIVER_PENDING_EXCEPTION_FRAME_READY
+.Ldeoptimize:
+ str x1, [sp, #FRAME_SIZE_SAVE_EVERYTHING - 8]
+ // Set return pc.
+ RESTORE_SAVE_EVERYTHING_FRAME
+ // Jump to art_quick_deoptimize.
+ b art_quick_deoptimize
END art_quick_instrumentation_exit
/*
@@ -2399,7 +2398,7 @@
*/
.extern artDeoptimize
ENTRY art_quick_deoptimize
- SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
+ SETUP_SAVE_EVERYTHING_FRAME
mov x0, xSELF // Pass thread.
bl artDeoptimize // (Thread*)
brk 0
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index f08c7fe..eecca58 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -2063,42 +2063,43 @@
DEFINE_FUNCTION_CUSTOM_CFA art_quick_instrumentation_exit, 0
pushl LITERAL(0) // Push a fake return PC as there will be none on the stack.
CFI_ADJUST_CFA_OFFSET(4)
- SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx
- mov %esp, %ecx // Remember SP
- subl LITERAL(8), %esp // Save float return value.
+ SETUP_SAVE_EVERYTHING_FRAME ebx, ebx
+
+ movl %esp, %ecx // Remember SP
+ subl LITERAL(8), %esp // Align stack.
CFI_ADJUST_CFA_OFFSET(8)
- movq %xmm0, (%esp)
- PUSH edx // Save gpr return value.
+ PUSH edx // Save gpr return value. edx and eax need to be together,
+ // which isn't the case in kSaveEverything frame.
PUSH eax
- leal 8(%esp), %eax // Get pointer to fpr_result
+ leal 32(%esp), %eax // Get pointer to fpr_result, in kSaveEverything frame
movl %esp, %edx // Get pointer to gpr_result
PUSH eax // Pass fpr_result
PUSH edx // Pass gpr_result
- PUSH ecx // Pass SP.
+ PUSH ecx // Pass SP
pushl %fs:THREAD_SELF_OFFSET // Pass Thread::Current.
CFI_ADJUST_CFA_OFFSET(4)
+
call SYMBOL(artInstrumentationMethodExitFromCode) // (Thread*, SP, gpr_result*, fpr_result*)
- testl %eax, %eax // Check if we returned error.
- jz 1f
- mov %eax, %ecx // Move returned link register.
- addl LITERAL(16), %esp // Pop arguments.
- CFI_ADJUST_CFA_OFFSET(-16)
- movl %edx, %ebx // Move returned link register for deopt
- // (ebx is pretending to be our LR).
- POP eax // Restore gpr return value.
- POP edx
- movq (%esp), %xmm0 // Restore fpr return value.
- addl LITERAL(8), %esp
- CFI_ADJUST_CFA_OFFSET(-8)
- RESTORE_SAVE_REFS_ONLY_FRAME
- addl LITERAL(4), %esp // Remove fake return pc.
- CFI_ADJUST_CFA_OFFSET(-4)
- jmp *%ecx // Return.
-1:
- addl LITERAL(32), %esp
+ // Return result could have been changed if it's a reference.
+ movl 16(%esp), %ecx
+ movl %ecx, (80+32)(%esp)
+ addl LITERAL(32), %esp // Pop arguments and grp_result.
CFI_ADJUST_CFA_OFFSET(-32)
- RESTORE_SAVE_REFS_ONLY_FRAME
- DELIVER_PENDING_EXCEPTION
+
+ testl %eax, %eax // Check if we returned error.
+ jz .Ldo_deliver_instrumentation_exception
+ testl %edx, %edx
+ jnz .Ldeoptimize
+ // Normal return.
+ movl %eax, FRAME_SIZE_SAVE_EVERYTHING-4(%esp) // Set return pc.
+ RESTORE_SAVE_EVERYTHING_FRAME
+ ret
+.Ldeoptimize:
+ mov %edx, (FRAME_SIZE_SAVE_EVERYTHING-4)(%esp) // Set return pc.
+ RESTORE_SAVE_EVERYTHING_FRAME
+ jmp SYMBOL(art_quick_deoptimize)
+.Ldo_deliver_instrumentation_exception:
+ DELIVER_PENDING_EXCEPTION_FRAME_READY
END_FUNCTION art_quick_instrumentation_exit
/*
@@ -2106,8 +2107,7 @@
* will long jump to the upcall with a special exception of -1.
*/
DEFINE_FUNCTION art_quick_deoptimize
- PUSH ebx // Entry point for a jump. Fake that we were called.
- SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx
+ SETUP_SAVE_EVERYTHING_FRAME ebx, ebx
subl LITERAL(12), %esp // Align stack.
CFI_ADJUST_CFA_OFFSET(12)
pushl %fs:THREAD_SELF_OFFSET // Pass Thread::Current().
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index b70abaa..2c3da90 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -2026,45 +2026,31 @@
pushq LITERAL(0) // Push a fake return PC as there will be none on the stack.
CFI_ADJUST_CFA_OFFSET(8)
- SETUP_SAVE_REFS_ONLY_FRAME
+ SETUP_SAVE_EVERYTHING_FRAME
- // We need to save rax and xmm0. We could use a callee-save from SETUP_REF_ONLY, but then
- // we would need to fully restore it. As there are a good number of callee-save registers, it
- // seems easier to have an extra small stack area. But this should be revisited.
-
- movq %rsp, %rsi // Pass SP.
-
- PUSH rax // Save integer result.
- movq %rsp, %rdx // Pass integer result pointer.
-
- subq LITERAL(8), %rsp // Save floating-point result.
- CFI_ADJUST_CFA_OFFSET(8)
- movq %xmm0, (%rsp)
- movq %rsp, %rcx // Pass floating-point result pointer.
-
- movq %gs:THREAD_SELF_OFFSET, %rdi // Pass Thread.
+ leaq 16(%rsp), %rcx // Pass floating-point result pointer, in kSaveEverything frame.
+ leaq 144(%rsp), %rdx // Pass integer result pointer, in kSaveEverything frame.
+ movq %rsp, %rsi // Pass SP.
+ movq %gs:THREAD_SELF_OFFSET, %rdi // Pass Thread.
call SYMBOL(artInstrumentationMethodExitFromCode) // (Thread*, SP, gpr_res*, fpr_res*)
- movq %rax, %rdi // Store return PC
- movq %rdx, %rsi // Store second return PC in hidden arg.
-
- movq (%rsp), %xmm0 // Restore floating-point result.
- addq LITERAL(8), %rsp
- CFI_ADJUST_CFA_OFFSET(-8)
- POP rax // Restore integer result.
-
- RESTORE_SAVE_REFS_ONLY_FRAME
-
- testq %rdi, %rdi // Check if we have a return-pc to go to. If we don't then there was
+ testq %rax, %rax // Check if we have a return-pc to go to. If we don't then there was
// an exception
- jz 1f
-
- addq LITERAL(8), %rsp // Drop fake return pc.
-
- jmp *%rdi // Return.
-1:
- DELIVER_PENDING_EXCEPTION
+ jz .Ldo_deliver_instrumentation_exception
+ testq %rdx, %rdx
+ jnz .Ldeoptimize
+ // Normal return.
+ movq %rax, FRAME_SIZE_SAVE_EVERYTHING-8(%rsp) // Set return pc.
+ RESTORE_SAVE_EVERYTHING_FRAME
+ ret
+.Ldeoptimize:
+ movq %rdx, FRAME_SIZE_SAVE_EVERYTHING-8(%rsp) // Set return pc.
+ RESTORE_SAVE_EVERYTHING_FRAME
+ // Jump to art_quick_deoptimize.
+ jmp SYMBOL(art_quick_deoptimize)
+.Ldo_deliver_instrumentation_exception:
+ DELIVER_PENDING_EXCEPTION_FRAME_READY
END_FUNCTION art_quick_instrumentation_exit
/*
@@ -2072,10 +2058,7 @@
* will long jump to the upcall with a special exception of -1.
*/
DEFINE_FUNCTION art_quick_deoptimize
- pushq %rsi // Entry point for a jump. Fake that we were called.
- // Use hidden arg.
- SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
- // Stack should be aligned now.
+ SETUP_SAVE_EVERYTHING_FRAME // Stack should be aligned now.
movq %gs:THREAD_SELF_OFFSET, %rdi // Pass Thread.
call SYMBOL(artDeoptimize) // (Thread*)
UNREACHABLE
diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc
index 148ef86..8738adf 100644
--- a/runtime/base/arena_allocator.cc
+++ b/runtime/base/arena_allocator.cc
@@ -73,6 +73,7 @@
"BCE ",
"DCE ",
"LSE ",
+ "CFRE ",
"LICM ",
"LoopOpt ",
"SsaLiveness ",
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index 0b1a3ba..212edfb 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -80,6 +80,7 @@
kArenaAllocBoundsCheckElimination,
kArenaAllocDCE,
kArenaAllocLSE,
+ kArenaAllocCFRE,
kArenaAllocLICM,
kArenaAllocLoopOptimization,
kArenaAllocSsaLiveness,
diff --git a/runtime/base/bit_utils.h b/runtime/base/bit_utils.h
index 0844678..87dac02 100644
--- a/runtime/base/bit_utils.h
+++ b/runtime/base/bit_utils.h
@@ -127,6 +127,14 @@
return (x < 2u) ? x : static_cast<T>(1u) << (std::numeric_limits<T>::digits - CLZ(x - 1u));
}
+// Return highest possible N - a power of two - such that val >= N.
+template <typename T>
+constexpr T TruncToPowerOfTwo(T val) {
+ static_assert(std::is_integral<T>::value, "T must be integral");
+ static_assert(std::is_unsigned<T>::value, "T must be unsigned");
+ return (val != 0) ? static_cast<T>(1u) << (BitSizeOf<T>() - CLZ(val) - 1u) : 0;
+}
+
template<typename T>
constexpr bool IsPowerOfTwo(T x) {
static_assert(std::is_integral<T>::value, "T must be integral");
diff --git a/runtime/base/bit_utils_test.cc b/runtime/base/bit_utils_test.cc
index 9f22fb4..c96c6dc 100644
--- a/runtime/base/bit_utils_test.cc
+++ b/runtime/base/bit_utils_test.cc
@@ -122,6 +122,32 @@
static_assert(33u == MinimumBitsToStore<uint64_t>(UINT64_C(0x1FFFFFFFF)), "TestMinBits2Store64#10");
static_assert(64u == MinimumBitsToStore<uint64_t>(~UINT64_C(0)), "TestMinBits2Store64#11");
+static_assert(0 == TruncToPowerOfTwo<uint32_t>(0u), "TestTruncToPowerOfTwo32#1");
+static_assert(1 == TruncToPowerOfTwo<uint32_t>(1u), "TestTruncToPowerOfTwo32#2");
+static_assert(2 == TruncToPowerOfTwo<uint32_t>(2u), "TestTruncToPowerOfTwo32#3");
+static_assert(2 == TruncToPowerOfTwo<uint32_t>(3u), "TestTruncToPowerOfTwo32#4");
+static_assert(4 == TruncToPowerOfTwo<uint32_t>(7u), "TestTruncToPowerOfTwo32#5");
+static_assert(0x20000u == TruncToPowerOfTwo<uint32_t>(0x3aaaau),
+ "TestTruncToPowerOfTwo32#6");
+static_assert(0x40000000u == TruncToPowerOfTwo<uint32_t>(0x40000001u),
+ "TestTruncToPowerOfTwo32#7");
+static_assert(0x80000000u == TruncToPowerOfTwo<uint32_t>(0x80000000u),
+ "TestTruncToPowerOfTwo32#8");
+
+static_assert(0 == TruncToPowerOfTwo<uint64_t>(UINT64_C(0)), "TestTruncToPowerOfTwo64#1");
+static_assert(1 == TruncToPowerOfTwo<uint64_t>(UINT64_C(1)), "TestTruncToPowerOfTwo64#2");
+static_assert(2 == TruncToPowerOfTwo<uint64_t>(UINT64_C(2)), "TestTruncToPowerOfTwo64#3");
+static_assert(2 == TruncToPowerOfTwo<uint64_t>(UINT64_C(3)), "TestTruncToPowerOfTwo64#4");
+static_assert(4 == TruncToPowerOfTwo<uint64_t>(UINT64_C(7)), "TestTruncToPowerOfTwo64#5");
+static_assert(UINT64_C(0x20000) == TruncToPowerOfTwo<uint64_t>(UINT64_C(0x3aaaa)),
+ "TestTruncToPowerOfTwo64#6");
+static_assert(
+ UINT64_C(0x4000000000000000) == TruncToPowerOfTwo<uint64_t>(UINT64_C(0x4000000000000001)),
+ "TestTruncToPowerOfTwo64#7");
+static_assert(
+ UINT64_C(0x8000000000000000) == TruncToPowerOfTwo<uint64_t>(UINT64_C(0x8000000000000000)),
+ "TestTruncToPowerOfTwo64#8");
+
static_assert(0 == RoundUpToPowerOfTwo<uint32_t>(0u), "TestRoundUpPowerOfTwo32#1");
static_assert(1 == RoundUpToPowerOfTwo<uint32_t>(1u), "TestRoundUpPowerOfTwo32#2");
static_assert(2 == RoundUpToPowerOfTwo<uint32_t>(2u), "TestRoundUpPowerOfTwo32#3");
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 77ce39c..4161754 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -7763,7 +7763,8 @@
// We have a valid method from the DexCache but we need to perform ICCE and IAE checks.
DCHECK(resolved->GetDeclaringClassUnchecked() != nullptr) << resolved->GetDexMethodIndex();
klass = LookupResolvedType(dex_file, method_id.class_idx_, dex_cache.Get(), class_loader.Get());
- DCHECK(klass != nullptr);
+ CHECK(klass != nullptr) << resolved->PrettyMethod() << " " << resolved << " "
+ << resolved->GetAccessFlags();
} else {
// The method was not in the DexCache, resolve the declaring class.
klass = ResolveType(dex_file, method_id.class_idx_, dex_cache, class_loader);
diff --git a/runtime/class_loader_context.cc b/runtime/class_loader_context.cc
index 07afedf..56573f5 100644
--- a/runtime/class_loader_context.cc
+++ b/runtime/class_loader_context.cc
@@ -632,6 +632,10 @@
}
}
+static bool IsAbsoluteLocation(const std::string& location) {
+ return !location.empty() && location[0] == '/';
+}
+
bool ClassLoaderContext::VerifyClassLoaderContextMatch(const std::string& context_spec) const {
ClassLoaderContext expected_context;
if (!expected_context.Parse(context_spec, /*parse_checksums*/ true)) {
@@ -673,18 +677,52 @@
DCHECK_EQ(expected_info.classpath.size(), expected_info.checksums.size());
for (size_t k = 0; k < info.classpath.size(); k++) {
- if (info.classpath[k] != expected_info.classpath[k]) {
+ // Compute the dex location that must be compared.
+ // We shouldn't do a naive comparison `info.classpath[k] == expected_info.classpath[k]`
+ // because even if they refer to the same file, one could be encoded as a relative location
+ // and the other as an absolute one.
+ bool is_dex_name_absolute = IsAbsoluteLocation(info.classpath[k]);
+ bool is_expected_dex_name_absolute = IsAbsoluteLocation(expected_info.classpath[k]);
+ std::string dex_name;
+ std::string expected_dex_name;
+
+ if (is_dex_name_absolute == is_expected_dex_name_absolute) {
+ // If both locations are absolute or relative then compare them as they are.
+ // This is usually the case for: shared libraries and secondary dex files.
+ dex_name = info.classpath[k];
+ expected_dex_name = expected_info.classpath[k];
+ } else if (is_dex_name_absolute) {
+ // The runtime name is absolute but the compiled name (the expected one) is relative.
+ // This is the case for split apks which depend on base or on other splits.
+ dex_name = info.classpath[k];
+ expected_dex_name = OatFile::ResolveRelativeEncodedDexLocation(
+ info.classpath[k].c_str(), expected_info.classpath[k]);
+ } else {
+ // The runtime name is relative but the compiled name is absolute.
+ // There is no expected use case that would end up here as dex files are always loaded
+ // with their absolute location. However, be tolerant and do the best effort (in case
+ // there are unexpected new use case...).
+ DCHECK(is_expected_dex_name_absolute);
+ dex_name = OatFile::ResolveRelativeEncodedDexLocation(
+ expected_info.classpath[k].c_str(), info.classpath[k]);
+ expected_dex_name = expected_info.classpath[k];
+ }
+
+ // Compare the locations.
+ if (dex_name != expected_dex_name) {
LOG(WARNING) << "ClassLoaderContext classpath element mismatch for position " << i
<< ". expected=" << expected_info.classpath[k]
<< ", found=" << info.classpath[k]
<< " (" << context_spec << " | " << EncodeContextForOatFile("") << ")";
return false;
}
+
+ // Compare the checksums.
if (info.checksums[k] != expected_info.checksums[k]) {
LOG(WARNING) << "ClassLoaderContext classpath element checksum mismatch for position " << i
- << ". expected=" << expected_info.checksums[k]
- << ", found=" << info.checksums[k]
- << " (" << context_spec << " | " << EncodeContextForOatFile("") << ")";
+ << ". expected=" << expected_info.checksums[k]
+ << ", found=" << info.checksums[k]
+ << " (" << context_spec << " | " << EncodeContextForOatFile("") << ")";
return false;
}
}
diff --git a/runtime/class_loader_context_test.cc b/runtime/class_loader_context_test.cc
index ddbb73b..1847274 100644
--- a/runtime/class_loader_context_test.cc
+++ b/runtime/class_loader_context_test.cc
@@ -697,7 +697,17 @@
std::unique_ptr<ClassLoaderContext> context = CreateContextForClassLoader(class_loader_d);
- ASSERT_TRUE(context->VerifyClassLoaderContextMatch(context->EncodeContextForOatFile("")));
+ std::string context_with_no_base_dir = context->EncodeContextForOatFile("");
+ ASSERT_TRUE(context->VerifyClassLoaderContextMatch(context_with_no_base_dir));
+
+ std::string dex_location = GetTestDexFileName("ForClassLoaderA");
+ size_t pos = dex_location.rfind('/');
+ ASSERT_NE(std::string::npos, pos);
+ std::string parent = dex_location.substr(0, pos);
+
+ std::string context_with_base_dir = context->EncodeContextForOatFile(parent);
+ ASSERT_NE(context_with_base_dir, context_with_no_base_dir);
+ ASSERT_TRUE(context->VerifyClassLoaderContextMatch(context_with_base_dir));
}
TEST_F(ClassLoaderContextTest, VerifyClassLoaderContextMatchAfterEncodingMultidex) {
diff --git a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
index 53f0727..5f40711 100644
--- a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
@@ -73,7 +73,11 @@
// Before deoptimizing to interpreter, we must push the deoptimization context.
JValue return_value;
return_value.SetJ(0); // we never deoptimize from compiled code with an invoke result.
- self->PushDeoptimizationContext(return_value, false, /* from_code */ true, self->GetException());
+ self->PushDeoptimizationContext(return_value,
+ false /* is_reference */,
+ self->GetException(),
+ true /* from_code */,
+ DeoptimizationMethodType::kDefault);
artDeoptimizeImpl(self, kind, true);
}
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index c6abd28..7b83f20 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -744,7 +744,11 @@
ObjPtr<mirror::Throwable> pending_exception;
bool from_code = false;
- self->PopDeoptimizationContext(&result, &pending_exception, /* out */ &from_code);
+ DeoptimizationMethodType method_type;
+ self->PopDeoptimizationContext(/* out */ &result,
+ /* out */ &pending_exception,
+ /* out */ &from_code,
+ /* out */ &method_type);
// Push a transition back into managed code onto the linked list in thread.
self->PushManagedStackFragment(&fragment);
@@ -771,7 +775,11 @@
if (pending_exception != nullptr) {
self->SetException(pending_exception);
}
- interpreter::EnterInterpreterFromDeoptimize(self, deopt_frame, from_code, &result);
+ interpreter::EnterInterpreterFromDeoptimize(self,
+ deopt_frame,
+ &result,
+ from_code,
+ DeoptimizationMethodType::kDefault);
} else {
const char* old_cause = self->StartAssertNoThreadSuspension(
"Building interpreter shadow frame");
@@ -823,7 +831,11 @@
// Push the context of the deoptimization stack so we can restore the return value and the
// exception before executing the deoptimized frames.
self->PushDeoptimizationContext(
- result, shorty[0] == 'L', /* from_code */ false, self->GetException());
+ result,
+ shorty[0] == 'L' || shorty[0] == '[', /* class or array */
+ self->GetException(),
+ false /* from_code */,
+ DeoptimizationMethodType::kDefault);
// Set special exception to cause deoptimization.
self->SetException(Thread::GetDeoptimizationException());
@@ -1041,7 +1053,8 @@
CHECK(!self->IsExceptionPending()) << "Enter instrumentation exit stub with pending exception "
<< self->GetException()->Dump();
// Compute address of return PC and sanity check that it currently holds 0.
- size_t return_pc_offset = GetCalleeSaveReturnPcOffset(kRuntimeISA, CalleeSaveType::kSaveRefsOnly);
+ size_t return_pc_offset = GetCalleeSaveReturnPcOffset(kRuntimeISA,
+ CalleeSaveType::kSaveEverything);
uintptr_t* return_pc = reinterpret_cast<uintptr_t*>(reinterpret_cast<uint8_t*>(sp) +
return_pc_offset);
CHECK_EQ(*return_pc, 0U);
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 6e457a4..4d8c687 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -26,11 +26,13 @@
#include "class_linker.h"
#include "debugger.h"
#include "dex_file-inl.h"
+#include "dex_instruction-inl.h"
#include "entrypoints/quick/quick_alloc_entrypoints.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "entrypoints/runtime_asm_entrypoints.h"
#include "gc_root-inl.h"
#include "interpreter/interpreter.h"
+#include "interpreter/interpreter_common.h"
#include "jit/jit.h"
#include "jit/jit_code_cache.h"
#include "jvalue-inl.h"
@@ -229,39 +231,32 @@
return true; // Continue.
}
uintptr_t return_pc = GetReturnPc();
- if (m->IsRuntimeMethod()) {
- if (return_pc == instrumentation_exit_pc_) {
- if (kVerboseInstrumentation) {
- LOG(INFO) << " Handling quick to interpreter transition. Frame " << GetFrameId();
- }
- CHECK_LT(instrumentation_stack_depth_, instrumentation_stack_->size());
- const InstrumentationStackFrame& frame =
- instrumentation_stack_->at(instrumentation_stack_depth_);
- CHECK(frame.interpreter_entry_);
- // This is an interpreter frame so method enter event must have been reported. However we
- // need to push a DEX pc into the dex_pcs_ list to match size of instrumentation stack.
- // Since we won't report method entry here, we can safely push any DEX pc.
- dex_pcs_.push_back(0);
- last_return_pc_ = frame.return_pc_;
- ++instrumentation_stack_depth_;
- return true;
- } else {
- if (kVerboseInstrumentation) {
- LOG(INFO) << " Skipping runtime method. Frame " << GetFrameId();
- }
- last_return_pc_ = GetReturnPc();
- return true; // Ignore unresolved methods since they will be instrumented after resolution.
- }
- }
if (kVerboseInstrumentation) {
LOG(INFO) << " Installing exit stub in " << DescribeLocation();
}
if (return_pc == instrumentation_exit_pc_) {
+ CHECK_LT(instrumentation_stack_depth_, instrumentation_stack_->size());
+
+ if (m->IsRuntimeMethod()) {
+ const InstrumentationStackFrame& frame =
+ instrumentation_stack_->at(instrumentation_stack_depth_);
+ if (frame.interpreter_entry_) {
+ // This instrumentation frame is for an interpreter bridge and is
+ // pushed when executing the instrumented interpreter bridge. So method
+ // enter event must have been reported. However we need to push a DEX pc
+ // into the dex_pcs_ list to match size of instrumentation stack.
+ uint32_t dex_pc = DexFile::kDexNoIndex;
+ dex_pcs_.push_back(dex_pc);
+ last_return_pc_ = frame.return_pc_;
+ ++instrumentation_stack_depth_;
+ return true;
+ }
+ }
+
// We've reached a frame which has already been installed with instrumentation exit stub.
// We should have already installed instrumentation on previous frames.
reached_existing_instrumentation_frames_ = true;
- CHECK_LT(instrumentation_stack_depth_, instrumentation_stack_->size());
const InstrumentationStackFrame& frame =
instrumentation_stack_->at(instrumentation_stack_depth_);
CHECK_EQ(m, frame.method_) << "Expected " << ArtMethod::PrettyMethod(m)
@@ -273,8 +268,12 @@
} else {
CHECK_NE(return_pc, 0U);
CHECK(!reached_existing_instrumentation_frames_);
- InstrumentationStackFrame instrumentation_frame(GetThisObject(), m, return_pc, GetFrameId(),
- false);
+ InstrumentationStackFrame instrumentation_frame(
+ m->IsRuntimeMethod() ? nullptr : GetThisObject(),
+ m,
+ return_pc,
+ GetFrameId(), // A runtime method still gets a frame id.
+ false);
if (kVerboseInstrumentation) {
LOG(INFO) << "Pushing frame " << instrumentation_frame.Dump();
}
@@ -291,9 +290,12 @@
instrumentation_stack_->insert(it, instrumentation_frame);
SetReturnPc(instrumentation_exit_pc_);
}
- dex_pcs_.push_back((GetCurrentOatQuickMethodHeader() == nullptr)
- ? DexFile::kDexNoIndex
- : GetCurrentOatQuickMethodHeader()->ToDexPc(m, last_return_pc_));
+ uint32_t dex_pc = DexFile::kDexNoIndex;
+ if (last_return_pc_ != 0 &&
+ GetCurrentOatQuickMethodHeader() != nullptr) {
+ dex_pc = GetCurrentOatQuickMethodHeader()->ToDexPc(m, last_return_pc_);
+ }
+ dex_pcs_.push_back(dex_pc);
last_return_pc_ = return_pc;
++instrumentation_stack_depth_;
return true; // Continue.
@@ -391,7 +393,8 @@
CHECK(m == instrumentation_frame.method_) << ArtMethod::PrettyMethod(m);
}
SetReturnPc(instrumentation_frame.return_pc_);
- if (instrumentation_->ShouldNotifyMethodEnterExitEvents()) {
+ if (instrumentation_->ShouldNotifyMethodEnterExitEvents() &&
+ !m->IsRuntimeMethod()) {
// Create the method exit events. As the methods didn't really exit the result is 0.
// We only do this if no debugger is attached to prevent from posting events twice.
instrumentation_->MethodExitEvent(thread_, instrumentation_frame.this_object_, m,
@@ -969,6 +972,7 @@
ObjPtr<mirror::Object> this_object,
ArtMethod* method,
uint32_t dex_pc) const {
+ DCHECK(!method->IsRuntimeMethod());
if (HasMethodEntryListeners()) {
Thread* self = Thread::Current();
StackHandleScope<1> hs(self);
@@ -1199,6 +1203,66 @@
stack->push_front(instrumentation_frame);
}
+DeoptimizationMethodType Instrumentation::GetDeoptimizationMethodType(ArtMethod* method) {
+ if (method->IsRuntimeMethod()) {
+ // Certain methods have strict requirement on whether the dex instruction
+ // should be re-executed upon deoptimization.
+ if (method == Runtime::Current()->GetCalleeSaveMethod(
+ CalleeSaveType::kSaveEverythingForClinit)) {
+ return DeoptimizationMethodType::kKeepDexPc;
+ }
+ if (method == Runtime::Current()->GetCalleeSaveMethod(
+ CalleeSaveType::kSaveEverythingForSuspendCheck)) {
+ return DeoptimizationMethodType::kKeepDexPc;
+ }
+ }
+ return DeoptimizationMethodType::kDefault;
+}
+
+// Try to get the shorty of a runtime method if it's an invocation stub.
+struct RuntimeMethodShortyVisitor : public StackVisitor {
+ explicit RuntimeMethodShortyVisitor(Thread* thread)
+ : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+ shorty('V') {}
+
+ bool VisitFrame() REQUIRES_SHARED(Locks::mutator_lock_) {
+ ArtMethod* m = GetMethod();
+ if (m != nullptr && !m->IsRuntimeMethod()) {
+ // The first Java method.
+ if (m->IsNative()) {
+ // Use JNI method's shorty for the jni stub.
+ shorty = m->GetShorty()[0];
+ return false;
+ }
+ if (m->IsProxyMethod()) {
+ // Proxy method just invokes its proxied method via
+ // art_quick_proxy_invoke_handler.
+ shorty = m->GetInterfaceMethodIfProxy(kRuntimePointerSize)->GetShorty()[0];
+ return false;
+ }
+ const DexFile::CodeItem* code_item = m->GetCodeItem();
+ const Instruction* instr = Instruction::At(&code_item->insns_[GetDexPc()]);
+ if (instr->IsInvoke()) {
+ const DexFile* dex_file = m->GetDexFile();
+ if (interpreter::IsStringInit(dex_file, instr->VRegB())) {
+ // Invoking string init constructor is turned into invoking
+ // StringFactory.newStringFromChars() which returns a string.
+ shorty = 'L';
+ return false;
+ }
+ // A regular invoke, use callee's shorty.
+ uint32_t method_idx = instr->VRegB();
+ shorty = dex_file->GetMethodShorty(method_idx)[0];
+ }
+ // Stop stack walking since we've seen a Java frame.
+ return false;
+ }
+ return true;
+ }
+
+ char shorty;
+};
+
TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self,
uintptr_t* return_pc,
uint64_t* gpr_result,
@@ -1219,7 +1283,36 @@
ArtMethod* method = instrumentation_frame.method_;
uint32_t length;
const PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
- char return_shorty = method->GetInterfaceMethodIfProxy(pointer_size)->GetShorty(&length)[0];
+ char return_shorty;
+
+ // Runtime method does not call into MethodExitEvent() so there should not be
+ // suspension point below.
+ ScopedAssertNoThreadSuspension ants(__FUNCTION__, method->IsRuntimeMethod());
+ if (method->IsRuntimeMethod()) {
+ if (method != Runtime::Current()->GetCalleeSaveMethod(
+ CalleeSaveType::kSaveEverythingForClinit)) {
+ // If the caller is at an invocation point and the runtime method is not
+ // for clinit, we need to pass return results to the caller.
+ // We need the correct shorty to decide whether we need to pass the return
+ // result for deoptimization below.
+ RuntimeMethodShortyVisitor visitor(self);
+ visitor.WalkStack();
+ return_shorty = visitor.shorty;
+ } else {
+ // Some runtime methods such as allocations, unresolved field getters, etc.
+ // have return value. We don't need to set return_value since MethodExitEvent()
+ // below isn't called for runtime methods. Deoptimization doesn't need the
+ // value either since the dex instruction will be re-executed by the
+ // interpreter, except these two cases:
+ // (1) For an invoke, which is handled above to get the correct shorty.
+ // (2) For MONITOR_ENTER/EXIT, which cannot be re-executed since it's not
+ // idempotent. However there is no return value for it anyway.
+ return_shorty = 'V';
+ }
+ } else {
+ return_shorty = method->GetInterfaceMethodIfProxy(pointer_size)->GetShorty(&length)[0];
+ }
+
bool is_ref = return_shorty == '[' || return_shorty == 'L';
StackHandleScope<1> hs(self);
MutableHandle<mirror::Object> res(hs.NewHandle<mirror::Object>(nullptr));
@@ -1239,7 +1332,7 @@
// return_pc.
uint32_t dex_pc = DexFile::kDexNoIndex;
mirror::Object* this_object = instrumentation_frame.this_object_;
- if (!instrumentation_frame.interpreter_entry_) {
+ if (!method->IsRuntimeMethod() && !instrumentation_frame.interpreter_entry_) {
MethodExitEvent(self, this_object, instrumentation_frame.method_, dex_pc, return_value);
}
@@ -1265,10 +1358,12 @@
<< " in "
<< *self;
}
+ DeoptimizationMethodType deopt_method_type = GetDeoptimizationMethodType(method);
self->PushDeoptimizationContext(return_value,
- return_shorty == 'L',
+ return_shorty == 'L' || return_shorty == '[',
+ nullptr /* no pending exception */,
false /* from_code */,
- nullptr /* no pending exception */);
+ deopt_method_type);
return GetTwoWordSuccessValue(*return_pc,
reinterpret_cast<uintptr_t>(GetQuickDeoptimizationEntryPoint()));
} else {
@@ -1305,7 +1400,9 @@
// TODO: improve the dex pc information here, requires knowledge of current PC as opposed to
// return_pc.
uint32_t dex_pc = DexFile::kDexNoIndex;
- MethodUnwindEvent(self, instrumentation_frame.this_object_, method, dex_pc);
+ if (!method->IsRuntimeMethod()) {
+ MethodUnwindEvent(self, instrumentation_frame.this_object_, method, dex_pc);
+ }
}
// TODO: bring back CheckStackDepth(self, instrumentation_frame, 2);
CHECK_EQ(stack->size(), idx);
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index fec027e..5763a41 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -40,6 +40,7 @@
union JValue;
class ShadowFrame;
class Thread;
+enum class DeoptimizationMethodType;
namespace instrumentation {
@@ -474,6 +475,9 @@
bool interpreter_entry)
REQUIRES_SHARED(Locks::mutator_lock_);
+ DeoptimizationMethodType GetDeoptimizationMethodType(ArtMethod* method)
+ REQUIRES_SHARED(Locks::mutator_lock_);
+
// Called when an instrumented method is exited. Removes the pushed instrumentation frame
// returning the intended link register. Generates method exit events. The gpr_result and
// fpr_result pointers are pointers to the locations where the integer/pointer and floating point
@@ -711,9 +715,15 @@
// An element in the instrumentation side stack maintained in art::Thread.
struct InstrumentationStackFrame {
- InstrumentationStackFrame(mirror::Object* this_object, ArtMethod* method,
- uintptr_t return_pc, size_t frame_id, bool interpreter_entry)
- : this_object_(this_object), method_(method), return_pc_(return_pc), frame_id_(frame_id),
+ InstrumentationStackFrame(mirror::Object* this_object,
+ ArtMethod* method,
+ uintptr_t return_pc,
+ size_t frame_id,
+ bool interpreter_entry)
+ : this_object_(this_object),
+ method_(method),
+ return_pc_(return_pc),
+ frame_id_(frame_id),
interpreter_entry_(interpreter_entry) {
}
diff --git a/runtime/instrumentation_test.cc b/runtime/instrumentation_test.cc
index 9b77d12..89baa35 100644
--- a/runtime/instrumentation_test.cc
+++ b/runtime/instrumentation_test.cc
@@ -514,7 +514,23 @@
// Test instrumentation listeners for each event.
TEST_F(InstrumentationTest, MethodEntryEvent) {
- TestEvent(instrumentation::Instrumentation::kMethodEntered);
+ ScopedObjectAccess soa(Thread::Current());
+ jobject class_loader = LoadDex("Instrumentation");
+ Runtime* const runtime = Runtime::Current();
+ ClassLinker* class_linker = runtime->GetClassLinker();
+ StackHandleScope<1> hs(soa.Self());
+ Handle<mirror::ClassLoader> loader(hs.NewHandle(soa.Decode<mirror::ClassLoader>(class_loader)));
+ mirror::Class* klass = class_linker->FindClass(soa.Self(), "LInstrumentation;", loader);
+ ASSERT_TRUE(klass != nullptr);
+ ArtMethod* method =
+ klass->FindClassMethod("returnReference", "()Ljava/lang/Object;", kRuntimePointerSize);
+ ASSERT_TRUE(method != nullptr);
+ ASSERT_TRUE(method->IsDirect());
+ ASSERT_TRUE(method->GetDeclaringClass() == klass);
+ TestEvent(instrumentation::Instrumentation::kMethodEntered,
+ /*event_method*/ method,
+ /*event_field*/ nullptr,
+ /*with_object*/ true);
}
TEST_F(InstrumentationTest, MethodExitObjectEvent) {
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 3349833..a1f2123 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -467,29 +467,6 @@
self->PopShadowFrame();
}
-static bool IsStringInit(const Instruction* instr, ArtMethod* caller)
- REQUIRES_SHARED(Locks::mutator_lock_) {
- if (instr->Opcode() == Instruction::INVOKE_DIRECT ||
- instr->Opcode() == Instruction::INVOKE_DIRECT_RANGE) {
- // Instead of calling ResolveMethod() which has suspend point and can trigger
- // GC, look up the callee method symbolically.
- uint16_t callee_method_idx = (instr->Opcode() == Instruction::INVOKE_DIRECT_RANGE) ?
- instr->VRegB_3rc() : instr->VRegB_35c();
- const DexFile* dex_file = caller->GetDexFile();
- const DexFile::MethodId& method_id = dex_file->GetMethodId(callee_method_idx);
- const char* class_name = dex_file->StringByTypeIdx(method_id.class_idx_);
- const char* method_name = dex_file->GetMethodName(method_id);
- // Compare method's class name and method name against string init.
- // It's ok since it's not allowed to create your own java/lang/String.
- // TODO: verify that assumption.
- if ((strcmp(class_name, "Ljava/lang/String;") == 0) &&
- (strcmp(method_name, "<init>") == 0)) {
- return true;
- }
- }
- return false;
-}
-
static int16_t GetReceiverRegisterForStringInit(const Instruction* instr) {
DCHECK(instr->Opcode() == Instruction::INVOKE_DIRECT_RANGE ||
instr->Opcode() == Instruction::INVOKE_DIRECT);
@@ -499,8 +476,9 @@
void EnterInterpreterFromDeoptimize(Thread* self,
ShadowFrame* shadow_frame,
+ JValue* ret_val,
bool from_code,
- JValue* ret_val)
+ DeoptimizationMethodType deopt_method_type)
REQUIRES_SHARED(Locks::mutator_lock_) {
JValue value;
// Set value to last known result in case the shadow frame chain is empty.
@@ -525,11 +503,27 @@
new_dex_pc = MoveToExceptionHandler(
self, *shadow_frame, instrumentation) ? shadow_frame->GetDexPC() : DexFile::kDexNoIndex;
} else if (!from_code) {
- // For the debugger and full deoptimization stack, we must go past the invoke
- // instruction, as it already executed.
- // TODO: should be tested more once b/17586779 is fixed.
+ // Deoptimization is not called from code directly.
const Instruction* instr = Instruction::At(&code_item->insns_[dex_pc]);
- if (instr->IsInvoke()) {
+ if (deopt_method_type == DeoptimizationMethodType::kKeepDexPc) {
+ DCHECK(first);
+ // Need to re-execute the dex instruction.
+ // (1) An invocation might be split into class initialization and invoke.
+ // In this case, the invoke should not be skipped.
+ // (2) A suspend check should also execute the dex instruction at the
+ // corresponding dex pc.
+ DCHECK_EQ(new_dex_pc, dex_pc);
+ } else if (instr->Opcode() == Instruction::MONITOR_ENTER ||
+ instr->Opcode() == Instruction::MONITOR_EXIT) {
+ DCHECK(deopt_method_type == DeoptimizationMethodType::kDefault);
+ DCHECK(first);
+ // Non-idempotent dex instruction should not be re-executed.
+ // On the other hand, if a MONITOR_ENTER is at the dex_pc of a suspend
+ // check, that MONITOR_ENTER should be executed. That case is handled
+ // above.
+ new_dex_pc = dex_pc + instr->SizeInCodeUnits();
+ } else if (instr->IsInvoke()) {
+ DCHECK(deopt_method_type == DeoptimizationMethodType::kDefault);
if (IsStringInit(instr, shadow_frame->GetMethod())) {
uint16_t this_obj_vreg = GetReceiverRegisterForStringInit(instr);
// Move the StringFactory.newStringFromChars() result into the register representing
@@ -542,30 +536,27 @@
}
new_dex_pc = dex_pc + instr->SizeInCodeUnits();
} else if (instr->Opcode() == Instruction::NEW_INSTANCE) {
- // It's possible to deoptimize at a NEW_INSTANCE dex instruciton that's for a
- // java string, which is turned into a call into StringFactory.newEmptyString();
- // Move the StringFactory.newEmptyString() result into the destination register.
- DCHECK(value.GetL()->IsString());
- shadow_frame->SetVRegReference(instr->VRegA_21c(), value.GetL());
- // new-instance doesn't generate a result value.
- value.SetJ(0);
- // Skip the dex instruction since we essentially come back from an invocation.
- new_dex_pc = dex_pc + instr->SizeInCodeUnits();
- if (kIsDebugBuild) {
- ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
- // This is a suspend point. But it's ok since value has been set into shadow_frame.
- ObjPtr<mirror::Class> klass = class_linker->ResolveType(
- dex::TypeIndex(instr->VRegB_21c()), shadow_frame->GetMethod());
- DCHECK(klass->IsStringClass());
- }
+ // A NEW_INSTANCE is simply re-executed, including
+ // "new-instance String" which is compiled into a call into
+ // StringFactory.newEmptyString().
+ DCHECK_EQ(new_dex_pc, dex_pc);
} else {
- CHECK(false) << "Unexpected instruction opcode " << instr->Opcode()
- << " at dex_pc " << dex_pc
- << " of method: " << ArtMethod::PrettyMethod(shadow_frame->GetMethod(), false);
+ DCHECK(deopt_method_type == DeoptimizationMethodType::kDefault);
+ DCHECK(first);
+ // By default, we re-execute the dex instruction since if they are not
+ // an invoke, so that we don't have to decode the dex instruction to move
+ // result into the right vreg. All slow paths have been audited to be
+ // idempotent except monitor-enter/exit and invocation stubs.
+ // TODO: move result and advance dex pc. That also requires that we
+ // can tell the return type of a runtime method, possibly by decoding
+ // the dex instruction at the caller.
+ DCHECK_EQ(new_dex_pc, dex_pc);
}
} else {
// Nothing to do, the dex_pc is the one at which the code requested
// the deoptimization.
+ DCHECK(first);
+ DCHECK_EQ(new_dex_pc, dex_pc);
}
if (new_dex_pc != DexFile::kDexNoIndex) {
shadow_frame->SetDexPC(new_dex_pc);
@@ -574,8 +565,10 @@
ShadowFrame* old_frame = shadow_frame;
shadow_frame = shadow_frame->GetLink();
ShadowFrame::DeleteDeoptimizedFrame(old_frame);
- // Following deoptimizations of shadow frames must pass the invoke instruction.
+ // Following deoptimizations of shadow frames must be at invocation point
+ // and should advance dex pc past the invoke instruction.
from_code = false;
+ deopt_method_type = DeoptimizationMethodType::kDefault;
first = false;
}
ret_val->SetJ(value.GetJ());
diff --git a/runtime/interpreter/interpreter.h b/runtime/interpreter/interpreter.h
index 65cfade..df8568e 100644
--- a/runtime/interpreter/interpreter.h
+++ b/runtime/interpreter/interpreter.h
@@ -30,6 +30,7 @@
union JValue;
class ShadowFrame;
class Thread;
+enum class DeoptimizationMethodType;
namespace interpreter {
@@ -44,8 +45,11 @@
REQUIRES_SHARED(Locks::mutator_lock_);
// 'from_code' denotes whether the deoptimization was explicitly triggered by compiled code.
-extern void EnterInterpreterFromDeoptimize(Thread* self, ShadowFrame* shadow_frame, bool from_code,
- JValue* ret_val)
+extern void EnterInterpreterFromDeoptimize(Thread* self,
+ ShadowFrame* shadow_frame,
+ JValue* ret_val,
+ bool from_code,
+ DeoptimizationMethodType method_type)
REQUIRES_SHARED(Locks::mutator_lock_);
extern JValue EnterInterpreterFromEntryPoint(Thread* self, const DexFile::CodeItem* code_item,
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 82e12f5..3ccab85 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -537,6 +537,34 @@
uint16_t arg_offset,
JValue* result);
+static inline bool IsStringInit(const DexFile* dex_file, uint32_t method_idx)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ const DexFile::MethodId& method_id = dex_file->GetMethodId(method_idx);
+ const char* class_name = dex_file->StringByTypeIdx(method_id.class_idx_);
+ const char* method_name = dex_file->GetMethodName(method_id);
+ // Instead of calling ResolveMethod() which has suspend point and can trigger
+ // GC, look up the method symbolically.
+ // Compare method's class name and method name against string init.
+ // It's ok since it's not allowed to create your own java/lang/String.
+ // TODO: verify that assumption.
+ if ((strcmp(class_name, "Ljava/lang/String;") == 0) &&
+ (strcmp(method_name, "<init>") == 0)) {
+ return true;
+ }
+ return false;
+}
+
+static inline bool IsStringInit(const Instruction* instr, ArtMethod* caller)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ if (instr->Opcode() == Instruction::INVOKE_DIRECT ||
+ instr->Opcode() == Instruction::INVOKE_DIRECT_RANGE) {
+ uint16_t callee_method_idx = (instr->Opcode() == Instruction::INVOKE_DIRECT_RANGE) ?
+ instr->VRegB_3rc() : instr->VRegB_35c();
+ return IsStringInit(caller->GetDexFile(), callee_method_idx);
+ }
+ return false;
+}
+
// Set string value created from StringFactory.newStringFromXXX() into all aliases of
// StringFactory.newEmptyString().
void SetStringInitValueToAllAliases(ShadowFrame* shadow_frame,
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 5c63dca..80e6ad3 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -1498,13 +1498,21 @@
break;
case LockWord::kThinLocked:
owner_ = Runtime::Current()->GetThreadList()->FindThreadByThreadId(lock_word.ThinLockOwner());
+ DCHECK(owner_ != nullptr) << "Thin-locked without owner!";
entry_count_ = 1 + lock_word.ThinLockCount();
// Thin locks have no waiters.
break;
case LockWord::kFatLocked: {
Monitor* mon = lock_word.FatLockMonitor();
owner_ = mon->owner_;
- entry_count_ = 1 + mon->lock_count_;
+ // Here it is okay for the owner to be null since we don't reset the LockWord back to
+ // kUnlocked until we get a GC. In cases where this hasn't happened yet we will have a fat
+ // lock without an owner.
+ if (owner_ != nullptr) {
+ entry_count_ = 1 + mon->lock_count_;
+ } else {
+ DCHECK_EQ(mon->lock_count_, 0) << "Monitor is fat-locked without any owner!";
+ }
for (Thread* waiter = mon->wait_set_; waiter != nullptr; waiter = waiter->GetWaitNext()) {
waiters_.push_back(waiter);
}
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index a8ccf89..a67a6aa 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -417,6 +417,12 @@
return;
}
Thread* self = Thread::Current();
+
+ // Dump all threads first and then the aborting thread. While this is counter the logical flow,
+ // it improves the chance of relevant data surviving in the Android logs.
+
+ DumpAllThreads(os, self);
+
if (self == nullptr) {
os << "(Aborting thread was not attached to runtime!)\n";
DumpKernelStack(os, GetTid(), " kernel: ", false);
@@ -432,7 +438,6 @@
}
}
}
- DumpAllThreads(os, self);
}
// No thread-safety analysis as we do explicitly test for holding the mutator lock.
diff --git a/runtime/runtime_common.cc b/runtime/runtime_common.cc
index 940e461..f8e9442 100644
--- a/runtime/runtime_common.cc
+++ b/runtime/runtime_common.cc
@@ -416,8 +416,19 @@
<< "Cmdline: " << cmd_line << std::endl
<< "Thread: " << tid << " \"" << thread_name << "\"" << std::endl
<< "Registers:\n" << Dumpable<UContext>(thread_context) << std::endl
- << "Backtrace:\n" << Dumpable<Backtrace>(thread_backtrace) << std::endl;
- stream << std::flush;
+ << "Backtrace:\n" << Dumpable<Backtrace>(thread_backtrace);
+ if (signal_number == SIGILL) {
+ // Note the view we present is from the d-cache, which should
+ // match the i-cache if all is well.
+ static const size_t kCodeSnippetBytes = 16;
+ stream << "Code:\n\t" << info->si_addr << ":";
+ uintptr_t start = reinterpret_cast<uintptr_t>(info->si_addr);
+ uintptr_t end = std::min(start + kCodeSnippetBytes, RoundUp(start, kPageSize));
+ for (uintptr_t addr = start; addr != end; ++addr) {
+ stream << StringPrintf(" %02x", *(reinterpret_cast<const uint8_t*>(addr)));
+ }
+ }
+ stream << std::endl << std::flush;
};
if (dump_on_stderr) {
diff --git a/runtime/runtime_common.h b/runtime/runtime_common.h
index 06d6627..1248fe0 100644
--- a/runtime/runtime_common.h
+++ b/runtime/runtime_common.h
@@ -30,6 +30,7 @@
#include <iomanip>
+#include "base/bit_utils.h"
#include "base/dumpable.h"
#include "native_stack_dump.h"
#include "utils.h"
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 3f23926..57b3a75 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -166,11 +166,13 @@
bool is_reference,
bool from_code,
ObjPtr<mirror::Throwable> pending_exception,
+ DeoptimizationMethodType method_type,
DeoptimizationContextRecord* link)
: ret_val_(ret_val),
is_reference_(is_reference),
from_code_(from_code),
pending_exception_(pending_exception.Ptr()),
+ deopt_method_type_(method_type),
link_(link) {}
JValue GetReturnValue() const { return ret_val_; }
@@ -185,6 +187,9 @@
mirror::Object** GetPendingExceptionAsGCRoot() {
return reinterpret_cast<mirror::Object**>(&pending_exception_);
}
+ DeoptimizationMethodType GetDeoptimizationMethodType() const {
+ return deopt_method_type_;
+ }
private:
// The value returned by the method at the top of the stack before deoptimization.
@@ -200,6 +205,9 @@
// exception).
mirror::Throwable* pending_exception_;
+ // Whether the context was created for an (idempotent) runtime method.
+ const DeoptimizationMethodType deopt_method_type_;
+
// A link to the previous DeoptimizationContextRecord.
DeoptimizationContextRecord* const link_;
@@ -229,26 +237,30 @@
void Thread::PushDeoptimizationContext(const JValue& return_value,
bool is_reference,
+ ObjPtr<mirror::Throwable> exception,
bool from_code,
- ObjPtr<mirror::Throwable> exception) {
+ DeoptimizationMethodType method_type) {
DeoptimizationContextRecord* record = new DeoptimizationContextRecord(
return_value,
is_reference,
from_code,
exception,
+ method_type,
tlsPtr_.deoptimization_context_stack);
tlsPtr_.deoptimization_context_stack = record;
}
void Thread::PopDeoptimizationContext(JValue* result,
ObjPtr<mirror::Throwable>* exception,
- bool* from_code) {
+ bool* from_code,
+ DeoptimizationMethodType* method_type) {
AssertHasDeoptimizationContext();
DeoptimizationContextRecord* record = tlsPtr_.deoptimization_context_stack;
tlsPtr_.deoptimization_context_stack = record->GetLink();
result->SetJ(record->GetReturnValue().GetJ());
*exception = record->GetPendingException();
*from_code = record->GetFromCode();
+ *method_type = record->GetDeoptimizationMethodType();
delete record;
}
@@ -3084,10 +3096,16 @@
NthCallerVisitor visitor(this, 0, false);
visitor.WalkStack();
if (Runtime::Current()->IsAsyncDeoptimizeable(visitor.caller_pc)) {
+ // method_type shouldn't matter due to exception handling.
+ const DeoptimizationMethodType method_type = DeoptimizationMethodType::kDefault;
// Save the exception into the deoptimization context so it can be restored
// before entering the interpreter.
PushDeoptimizationContext(
- JValue(), /*is_reference */ false, /* from_code */ false, exception);
+ JValue(),
+ false /* is_reference */,
+ exception,
+ false /* from_code */,
+ method_type);
artDeoptimize(this);
UNREACHABLE();
} else {
@@ -3647,7 +3665,8 @@
PopStackedShadowFrame(StackedShadowFrameType::kDeoptimizationShadowFrame);
ObjPtr<mirror::Throwable> pending_exception;
bool from_code = false;
- PopDeoptimizationContext(result, &pending_exception, &from_code);
+ DeoptimizationMethodType method_type;
+ PopDeoptimizationContext(result, &pending_exception, &from_code, &method_type);
SetTopOfStack(nullptr);
SetTopOfShadowStack(shadow_frame);
@@ -3656,7 +3675,11 @@
if (pending_exception != nullptr) {
SetException(pending_exception);
}
- interpreter::EnterInterpreterFromDeoptimize(this, shadow_frame, from_code, result);
+ interpreter::EnterInterpreterFromDeoptimize(this,
+ shadow_frame,
+ result,
+ from_code,
+ method_type);
}
void Thread::SetException(ObjPtr<mirror::Throwable> new_exception) {
diff --git a/runtime/thread.h b/runtime/thread.h
index 7540fd2..ad4506e 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -117,6 +117,13 @@
kDeoptimizationShadowFrame,
};
+// The type of method that triggers deoptimization. It contains info on whether
+// the deoptimized method should advance dex_pc.
+enum class DeoptimizationMethodType {
+ kKeepDexPc, // dex pc is required to be kept upon deoptimization.
+ kDefault // dex pc may or may not advance depending on other conditions.
+};
+
// This should match RosAlloc::kNumThreadLocalSizeBrackets.
static constexpr size_t kNumRosAllocThreadLocalSizeBracketsInThread = 16;
@@ -960,14 +967,18 @@
// values on stacks.
// 'from_code' denotes whether the deoptimization was explicitly made from
// compiled code.
+ // 'method_type' contains info on whether deoptimization should advance
+ // dex_pc.
void PushDeoptimizationContext(const JValue& return_value,
bool is_reference,
+ ObjPtr<mirror::Throwable> exception,
bool from_code,
- ObjPtr<mirror::Throwable> exception)
+ DeoptimizationMethodType method_type)
REQUIRES_SHARED(Locks::mutator_lock_);
void PopDeoptimizationContext(JValue* result,
ObjPtr<mirror::Throwable>* exception,
- bool* from_code)
+ bool* from_code,
+ DeoptimizationMethodType* method_type)
REQUIRES_SHARED(Locks::mutator_lock_);
void AssertHasDeoptimizationContext()
REQUIRES_SHARED(Locks::mutator_lock_);
diff --git a/test/088-monitor-verification/src/Main.java b/test/088-monitor-verification/src/Main.java
index 13a96c7..f5cbc2a 100644
--- a/test/088-monitor-verification/src/Main.java
+++ b/test/088-monitor-verification/src/Main.java
@@ -39,6 +39,7 @@
ensureJitCompiled(Main.class, "constantLock");
ensureJitCompiled(Main.class, "notExcessiveNesting");
ensureJitCompiled(Main.class, "notNested");
+ ensureJitCompiled(TwoPath.class, "twoPath");
Main m = new Main();
diff --git a/test/1930-monitor-info/expected.txt b/test/1930-monitor-info/expected.txt
new file mode 100644
index 0000000..b43f1b2
--- /dev/null
+++ b/test/1930-monitor-info/expected.txt
@@ -0,0 +1,31 @@
+Running with single thread.
+Pre-lock[main]: MonitorUsage{ monitor: NamedLock[Test1930 - testSingleThread], owner: <NULL>, entryCount: 0, waiters: [], notify_waiters: [] }
+Thread[main]: MonitorUsage{ monitor: NamedLock[Test1930 - testSingleThread], owner: main, entryCount: 1, waiters: [], notify_waiters: [] }
+Running with single thread in native.
+Pre-lock[main]: MonitorUsage{ monitor: NamedLock[Test1930 - testSingleThread], owner: <NULL>, entryCount: 0, waiters: [], notify_waiters: [] }
+Thread[main]: MonitorUsage{ monitor: NamedLock[Test1930 - testSingleThread], owner: main, entryCount: 1, waiters: [], notify_waiters: [] }
+Lock twice
+Pre-lock[main]: MonitorUsage{ monitor: NamedLock[Test1930 - testLockedTwice], owner: <NULL>, entryCount: 0, waiters: [], notify_waiters: [] }
+Pre-lock[main]: MonitorUsage{ monitor: NamedLock[Test1930 - testLockedTwice], owner: main, entryCount: 1, waiters: [], notify_waiters: [] }
+Thread[main]: MonitorUsage{ monitor: NamedLock[Test1930 - testLockedTwice], owner: main, entryCount: 2, waiters: [], notify_waiters: [] }
+Lock twice native
+Pre-lock[main]: MonitorUsage{ monitor: NamedLock[Test1930 - testLockedTwiceNative], owner: <NULL>, entryCount: 0, waiters: [], notify_waiters: [] }
+Pre-lock[main]: MonitorUsage{ monitor: NamedLock[Test1930 - testLockedTwiceNative], owner: main, entryCount: 1, waiters: [], notify_waiters: [] }
+Thread[main]: MonitorUsage{ monitor: NamedLock[Test1930 - testLockedTwiceNative], owner: main, entryCount: 2, waiters: [], notify_waiters: [] }
+Lock twice Java then native
+Pre-lock[main]: MonitorUsage{ monitor: NamedLock[Test1930 - testLockedTwiceJN], owner: <NULL>, entryCount: 0, waiters: [], notify_waiters: [] }
+Pre-lock[main]: MonitorUsage{ monitor: NamedLock[Test1930 - testLockedTwiceJN], owner: main, entryCount: 1, waiters: [], notify_waiters: [] }
+Thread[main]: MonitorUsage{ monitor: NamedLock[Test1930 - testLockedTwiceJN], owner: main, entryCount: 2, waiters: [], notify_waiters: [] }
+Lock twice native then Java
+Pre-lock[main]: MonitorUsage{ monitor: NamedLock[Test1930 - testLockedTwiceNJ], owner: <NULL>, entryCount: 0, waiters: [], notify_waiters: [] }
+Pre-lock[main]: MonitorUsage{ monitor: NamedLock[Test1930 - testLockedTwiceNJ], owner: main, entryCount: 1, waiters: [], notify_waiters: [] }
+Thread[main]: MonitorUsage{ monitor: NamedLock[Test1930 - testLockedTwiceNJ], owner: main, entryCount: 2, waiters: [], notify_waiters: [] }
+lock with wait
+Thread[main]: MonitorUsage{ monitor: NamedLock[Test1930 - testLockWait], owner: main, entryCount: 1, waiters: [Test1930 Thread - testLockWait], notify_waiters: [] }
+Thread[Test1930 Thread - testLockWait]: MonitorUsage{ monitor: NamedLock[Test1930 - testLockWait], owner: Test1930 Thread - testLockWait, entryCount: 1, waiters: [], notify_waiters: [] }
+Thread[main]: MonitorUsage{ monitor: NamedLock[Test1930 - testLockWait], owner: <NULL>, entryCount: 0, waiters: [], notify_waiters: [] }
+Wait for notify.
+Thread[Test1930 Thread - testLockWait]: MonitorUsage{ monitor: NamedLock[Test1930 - testNotifyWait], owner: Test1930 Thread - testLockWait, entryCount: 1, waiters: [], notify_waiters: [] }
+Thread[main]: MonitorUsage{ monitor: NamedLock[Test1930 - testNotifyWait], owner: main, entryCount: 1, waiters: [Test1930 Thread - testLockWait], notify_waiters: [Test1930 Thread - testLockWait] }
+Thread[Test1930 Thread - testLockWait]: MonitorUsage{ monitor: NamedLock[Test1930 - testNotifyWait], owner: Test1930 Thread - testLockWait, entryCount: 1, waiters: [], notify_waiters: [] }
+Thread[main]: MonitorUsage{ monitor: NamedLock[Test1930 - testNotifyWait], owner: <NULL>, entryCount: 0, waiters: [], notify_waiters: [] }
diff --git a/test/1930-monitor-info/info.txt b/test/1930-monitor-info/info.txt
new file mode 100644
index 0000000..8e19edc
--- /dev/null
+++ b/test/1930-monitor-info/info.txt
@@ -0,0 +1,3 @@
+Tests basic functions in the jvmti plugin.
+
+Tests that the GetObjectMonitorUsage function works correctly.
diff --git a/test/1930-monitor-info/monitor.cc b/test/1930-monitor-info/monitor.cc
new file mode 100644
index 0000000..7f97c05
--- /dev/null
+++ b/test/1930-monitor-info/monitor.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <pthread.h>
+
+#include <cstdio>
+#include <iostream>
+#include <vector>
+
+#include "android-base/logging.h"
+#include "jni.h"
+#include "jvmti.h"
+
+#include "scoped_local_ref.h"
+#include "scoped_primitive_array.h"
+
+// Test infrastructure
+#include "jvmti_helper.h"
+#include "test_env.h"
+
+namespace art {
+namespace Test1930MonitorInfo {
+
+extern "C" JNIEXPORT void JNICALL Java_art_Test1930_executeLockedNative(JNIEnv* env,
+ jclass klass,
+ jobject run,
+ jobject l) {
+ ScopedLocalRef<jclass> runnable(env, env->FindClass("java/lang/Runnable"));
+ if (env->ExceptionCheck()) {
+ return;
+ }
+ jmethodID method = env->GetMethodID(runnable.get(), "run", "()V");
+
+ if (env->ExceptionCheck()) {
+ return;
+ }
+ jmethodID printMethod = env->GetStaticMethodID(klass, "printPreLock", "(Ljava/lang/Object;)V");
+ if (env->ExceptionCheck()) {
+ return;
+ }
+
+ env->CallStaticVoidMethod(klass, printMethod, l);
+ if (env->ExceptionCheck()) {
+ return;
+ }
+ if (env->MonitorEnter(l) != 0) {
+ return;
+ }
+ env->CallVoidMethod(run, method);
+ env->MonitorExit(l);
+}
+
+} // namespace Test1930MonitorInfo
+} // namespace art
diff --git a/test/1930-monitor-info/run b/test/1930-monitor-info/run
new file mode 100755
index 0000000..e92b873
--- /dev/null
+++ b/test/1930-monitor-info/run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+./default-run "$@" --jvmti
diff --git a/test/1930-monitor-info/src/Main.java b/test/1930-monitor-info/src/Main.java
new file mode 100644
index 0000000..3328461
--- /dev/null
+++ b/test/1930-monitor-info/src/Main.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+ public static void main(String[] args) throws Exception {
+ art.Test1930.run();
+ }
+}
diff --git a/test/1930-monitor-info/src/art/Monitors.java b/test/1930-monitor-info/src/art/Monitors.java
new file mode 100644
index 0000000..26f7718
--- /dev/null
+++ b/test/1930-monitor-info/src/art/Monitors.java
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package art;
+
+import java.util.Arrays;
+import java.util.Objects;
+import java.util.function.Function;
+import java.util.stream.Stream;
+
+public class Monitors {
+ public static class NamedLock {
+ public final String name;
+ public NamedLock(String name) {
+ this.name = name;
+ }
+ public String toString() {
+ return String.format("NamedLock[%s]", name);
+ }
+ }
+
+ public static final class MonitorUsage {
+ public final Object monitor;
+ public final Thread owner;
+ public final int entryCount;
+ public final Thread[] waiters;
+ public final Thread[] notifyWaiters;
+
+ public MonitorUsage(
+ Object monitor,
+ Thread owner,
+ int entryCount,
+ Thread[] waiters,
+ Thread[] notifyWaiters) {
+ this.monitor = monitor;
+ this.entryCount = entryCount;
+ this.owner = owner;
+ this.waiters = waiters;
+ this.notifyWaiters = notifyWaiters;
+ }
+
+ private static String toNameList(Thread[] ts) {
+ return Arrays.toString(Arrays.stream(ts).map((Thread t) -> t.getName()).toArray());
+ }
+
+ public String toString() {
+ return String.format(
+ "MonitorUsage{ monitor: %s, owner: %s, entryCount: %d, waiters: %s, notify_waiters: %s }",
+ monitor,
+ (owner != null) ? owner.getName() : "<NULL>",
+ entryCount,
+ toNameList(waiters),
+ toNameList(notifyWaiters));
+ }
+ }
+
+ public static native MonitorUsage getObjectMonitorUsage(Object monitor);
+}
+
diff --git a/test/1930-monitor-info/src/art/Test1930.java b/test/1930-monitor-info/src/art/Test1930.java
new file mode 100644
index 0000000..a7fa1c7
--- /dev/null
+++ b/test/1930-monitor-info/src/art/Test1930.java
@@ -0,0 +1,155 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package art;
+
+import java.util.concurrent.Semaphore;
+import java.util.Arrays;
+
+public class Test1930 {
+ public static final int NUM_RETRY = 100;
+ private static void testSingleThread() {
+ Monitors.NamedLock lk = new Monitors.NamedLock("Test1930 - testSingleThread");
+ executeLocked(() -> { printMonitorUsage(lk); }, lk);
+ }
+ private static void testSingleThreadNative() {
+ Monitors.NamedLock lk = new Monitors.NamedLock("Test1930 - testSingleThread");
+ executeLockedNative(() -> { printMonitorUsage(lk); }, lk);
+ }
+
+ private static void testLockedTwice() {
+ final Monitors.NamedLock lk = new Monitors.NamedLock("Test1930 - testLockedTwice");
+ executeLocked(() -> { executeLocked(() -> { printMonitorUsage(lk); }, lk); }, lk);
+ }
+
+ private static void testLockedTwiceNJ() {
+ final Monitors.NamedLock lk = new Monitors.NamedLock("Test1930 - testLockedTwiceNJ");
+ executeLockedNative(() -> { executeLockedNative(() -> { printMonitorUsage(lk); }, lk); }, lk);
+ }
+
+ private static void testLockedTwiceJN() {
+ final Monitors.NamedLock lk = new Monitors.NamedLock("Test1930 - testLockedTwiceJN");
+ executeLockedNative(() -> { executeLockedNative(() -> { printMonitorUsage(lk); }, lk); }, lk);
+ }
+
+ private static void testLockedTwiceNative() {
+ final Monitors.NamedLock lk = new Monitors.NamedLock("Test1930 - testLockedTwiceNative");
+ executeLockedNative(() -> { executeLockedNative(() -> { printMonitorUsage(lk); }, lk); }, lk);
+ }
+
+ public final static class ThreadSignaler {
+ public volatile boolean signal = false;
+ }
+
+ private static void testLockWait() throws Exception {
+ final Monitors.NamedLock lk = new Monitors.NamedLock("Test1930 - testLockWait");
+ final Semaphore sem = new Semaphore(0);
+ final Thread t = new Thread(() -> {
+ sem.release();
+ synchronized (lk) {
+ printMonitorUsage(lk);
+ }
+ }, "Test1930 Thread - testLockWait");
+ synchronized (lk) {
+ t.start();
+ // Wait for the other thread to actually start.
+ sem.acquire();
+ // Wait for the other thread to go to sleep trying to get the mutex. This might take a (short)
+ // time since we try spinning first for better performance.
+ boolean found_wait = false;
+ for (long i = 0; i < NUM_RETRY; i++) {
+ if (Arrays.asList(Monitors.getObjectMonitorUsage(lk).waiters).contains(t)) {
+ found_wait = true;
+ break;
+ } else {
+ Thread.sleep(500);
+ Thread.yield();
+ }
+ }
+ if (!found_wait) {
+ System.out.println("other thread doesn't seem to be waiting.");
+ }
+ printMonitorUsage(lk);
+ }
+ t.join();
+ printMonitorUsage(lk);
+ }
+
+ private static void testNotifyWait() throws Exception {
+ final Monitors.NamedLock lk = new Monitors.NamedLock("Test1930 - testNotifyWait");
+ final Semaphore sem = new Semaphore(0);
+ Thread t = new Thread(() -> {
+ synchronized (lk) {
+ printMonitorUsage(lk);
+ sem.release();
+ try {
+ lk.wait();
+ } catch (Exception e) {
+ throw new Error("Error waiting!", e);
+ }
+ printMonitorUsage(lk);
+ }
+ }, "Test1930 Thread - testLockWait");
+ t.start();
+ sem.acquire();
+ synchronized (lk) {
+ printMonitorUsage(lk);
+ lk.notifyAll();
+ }
+ t.join();
+ printMonitorUsage(lk);
+ }
+
+ public static void run() throws Exception {
+ // Single threaded tests.
+ System.out.println("Running with single thread.");
+ testSingleThread();
+ System.out.println("Running with single thread in native.");
+ testSingleThreadNative();
+ System.out.println("Lock twice");
+ testLockedTwice();
+ System.out.println("Lock twice native");
+ testLockedTwiceNative();
+ System.out.println("Lock twice Java then native");
+ testLockedTwiceJN();
+ System.out.println("Lock twice native then Java");
+ testLockedTwiceNJ();
+
+ // Mutli threaded tests.
+ System.out.println("lock with wait");
+ testLockWait();
+ System.out.println("Wait for notify.");
+ testNotifyWait();
+ }
+
+ public static void printPreLock(Object lock) {
+ System.out.println(String.format("Pre-lock[%s]: %s",
+ Thread.currentThread().getName(), Monitors.getObjectMonitorUsage(lock)));
+ }
+
+ public static void executeLocked(Runnable r, Object lock) {
+ printPreLock(lock);
+ synchronized (lock) {
+ r.run();
+ }
+ }
+
+ public native static void executeLockedNative(Runnable r, Object m);
+ public static void printMonitorUsage(Object m) {
+ System.out.println(String.format("Thread[%s]: %s",
+ Thread.currentThread().getName(), Monitors.getObjectMonitorUsage(m)));
+ }
+}
diff --git a/test/476-checker-ctor-fence-redun-elim/expected.txt b/test/476-checker-ctor-fence-redun-elim/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/476-checker-ctor-fence-redun-elim/expected.txt
diff --git a/test/476-checker-ctor-fence-redun-elim/info.txt b/test/476-checker-ctor-fence-redun-elim/info.txt
new file mode 100644
index 0000000..46d62f7
--- /dev/null
+++ b/test/476-checker-ctor-fence-redun-elim/info.txt
@@ -0,0 +1,2 @@
+Tests to ensure constructor fences (after new-instance, new-array, or final fields) are properly
+merged together by the compiler when they are redundant.
diff --git a/test/476-checker-ctor-fence-redun-elim/src/Main.java b/test/476-checker-ctor-fence-redun-elim/src/Main.java
new file mode 100644
index 0000000..05f2f7c
--- /dev/null
+++ b/test/476-checker-ctor-fence-redun-elim/src/Main.java
@@ -0,0 +1,844 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Array;
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.lang.reflect.Modifier;
+
+// Baseline class. This has no final fields, so there are no additional freezes
+// in its constructor.
+//
+// The new-instance itself always has 1 freeze for the happens-before on the object header
+// write (i.e. [obj.class = X] happens-before any access to obj).
+//
+// Total freezes for "new Base()": 1.
+class Base {
+ int w0;
+ int w1;
+ int w2;
+ int w3;
+
+ @Override
+ public String toString() {
+ return getClass().getName() + "(" + baseString() + ")";
+ }
+
+ protected String baseString() {
+ return String.format("w0: %d, w1: %d, w2: %d, w3: %d", w0, w1, w2, w3);
+ }
+}
+
+// This has a final field in its constructor, so there must be a field freeze
+// at the end of <init>.
+//
+// Total freezes for "new OneFinal()": 2.
+class OneFinal extends Base {
+ final int x;
+ OneFinal(int x) {
+ this.x = x;
+ }
+
+ @Override
+ protected String baseString() {
+ return String.format("%s, x: %d", super.baseString(), x);
+ }
+}
+
+class Assert {
+ public static void stringEquals(String expected, Object actual) {
+ stringEquals$noinline$(expected, actual);
+ }
+
+ // Forbid compiler from inlining this to avoid overly clever optimizations.
+ private static void stringEquals$noinline$(String expected, Object actual) {
+ String actualStr = Main.valueToString(actual);
+ if (!expected.equals(actualStr)) {
+ throw new AssertionError("Expected: " + expected + ", actual: " + actualStr);
+ }
+ }
+}
+
+interface Test {
+ public void exercise();
+ public void check();
+}
+
+class TestOneFinal implements Test {
+ // Initialize at least once before actual test.
+ public static Object external;
+
+ /// CHECK-START: void TestOneFinal.exercise() constructor_fence_redundancy_elimination (before)
+ /// CHECK: <<NewInstance:l\d+>> NewInstance
+ /// CHECK-DAG: ConstructorFence [<<NewInstance>>]
+ /// CHECK-DAG: ConstructorFence [<<NewInstance>>]
+ /// CHECK-NOT: ConstructorFence
+ /// CHECK-DAG: StaticFieldSet [<<External:l\d+>>,<<NewInstance>>]
+
+ /// CHECK-START: void TestOneFinal.exercise() constructor_fence_redundancy_elimination (after)
+ /// CHECK: <<NewInstance:l\d+>> NewInstance
+ /// CHECK-DAG: ConstructorFence [<<NewInstance>>]
+ /// CHECK-NOT: ConstructorFence
+ /// CHECK-DAG: StaticFieldSet [<<External:l\d+>>,<<NewInstance>>]
+ @Override
+ public void exercise() {
+ Base b = new OneFinal(1);
+ // 1 store, 2 freezes.
+
+ // Stores to 'b' do not escape b.
+ b.w0 = 1;
+ b.w1 = 2;
+ b.w2 = 3;
+
+ // Publish the result to a global so that it is not LSE-eliminated.
+ external = b;
+ }
+
+ @Override
+ public void check() {
+ Assert.stringEquals("OneFinal(w0: 1, w1: 2, w2: 3, w3: 0, x: 1)", external);
+ }
+}
+
+// This has a final field in its constructor, so there must be a field freeze
+// at the end of <init>. The previous base class's freezes accumulate on top
+// of this one.
+//
+// Total freezes for "new TwoFinal()": 3.
+class TwoFinal extends OneFinal {
+ final int y;
+ TwoFinal(int x, int y) {
+ super(x);
+ this.y = y;
+ }
+
+ @Override
+ protected String baseString() {
+ return String.format("%s, y: %d", super.baseString(), y);
+ }
+}
+
+// This has a final field in its constructor, so there must be a field freeze
+// at the end of <init>. The previous base class's freezes accumulate on top
+// of this one.
+//
+// Total freezes for "new ThreeFinal()": 4.
+class ThreeFinal extends TwoFinal {
+ final int z;
+ ThreeFinal(int x, int y, int z) {
+ super(x, y);
+ this.z = z;
+ }
+
+ @Override
+ protected String baseString() {
+ return String.format("%s, z: %d", super.baseString(), z);
+ }
+}
+
+class TestThreeFinal implements Test {
+ // Initialize at least once before actual test.
+ public static Object external;
+
+ /// CHECK-START: void TestThreeFinal.exercise() constructor_fence_redundancy_elimination (before)
+ /// CHECK: <<NewInstance:l\d+>> NewInstance
+ /// CHECK-DAG: ConstructorFence [<<NewInstance>>]
+ /// CHECK-DAG: ConstructorFence [<<NewInstance>>]
+ /// CHECK-DAG: ConstructorFence [<<NewInstance>>]
+ /// CHECK-DAG: ConstructorFence [<<NewInstance>>]
+ /// CHECK-NOT: ConstructorFence
+ /// CHECK-DAG: StaticFieldSet [<<External:l\d+>>,<<NewInstance>>]
+
+ /// CHECK-START: void TestThreeFinal.exercise() constructor_fence_redundancy_elimination (after)
+ /// CHECK: <<NewInstance:l\d+>> NewInstance
+ /// CHECK-DAG: ConstructorFence [<<NewInstance>>]
+ /// CHECK-NOT: ConstructorFence
+ /// CHECK-DAG: StaticFieldSet [<<External:l\d+>>,<<NewInstance>>]
+ @Override
+ public void exercise() {
+ Base b = new ThreeFinal(1, 1, 2);
+ // 3 store, 4 freezes.
+
+ // Stores to 'b' do not escape b.
+ b.w0 = 3;
+
+ // Publish the result to a global so that it is not LSE-eliminated.
+ external = b;
+ }
+
+ @Override
+ public void check() {
+ Assert.stringEquals("ThreeFinal(w0: 3, w1: 0, w2: 0, w3: 0, x: 1, y: 1, z: 2)", external);
+ }
+}
+
+// Ensure "freezes" between multiple new-instances are optimized out.
+class TestMultiAlloc implements Test {
+ public static Object external;
+ public static Object external2;
+
+ /// CHECK-START: void TestMultiAlloc.exercise() constructor_fence_redundancy_elimination (before)
+ /// CHECK: <<NewInstance:l\d+>> NewInstance
+ /// CHECK-DAG: ConstructorFence [<<NewInstance>>]
+ /// CHECK: <<NewInstance2:l\d+>> NewInstance
+ /// CHECK-DAG: ConstructorFence [<<NewInstance2>>]
+ /// CHECK-NOT: ConstructorFence
+ /// CHECK-DAG: StaticFieldSet [<<External:l\d+>>,<<NewInstance>>]
+ /// CHECK-DAG: StaticFieldSet [<<External2:l\d+>>,<<NewInstance2>>]
+
+ /// CHECK-START: void TestMultiAlloc.exercise() constructor_fence_redundancy_elimination (after)
+ /// CHECK: <<NewInstance:l\d+>> NewInstance
+ /// CHECK: <<NewInstance2:l\d+>> NewInstance
+ /// CHECK-DAG: ConstructorFence [<<NewInstance2>>,<<NewInstance>>]
+ /// CHECK-NOT: ConstructorFence
+ /// CHECK-DAG: StaticFieldSet [<<External:l\d+>>,<<NewInstance>>]
+ /// CHECK-DAG: StaticFieldSet [<<External2:l\d+>>,<<NewInstance2>>]
+ @Override
+ public void exercise() {
+ // 1 freeze
+ Base b = new Base();
+ // 1 freeze
+ Base b2 = new Base();
+
+ // Merge 2 freezes above into 1 constructor fence.
+ external = b;
+ external2 = b2;
+ }
+
+ @Override
+ public void check() {
+ Assert.stringEquals("Base(w0: 0, w1: 0, w2: 0, w3: 0)", external);
+ Assert.stringEquals("Base(w0: 0, w1: 0, w2: 0, w3: 0)", external2);
+ }
+}
+
+// Ensure "freezes" between multiple new-instances are optimized out.
+class TestThreeFinalTwice implements Test {
+ // Initialize at least once before actual test.
+ public static Object external;
+ public static Object external2;
+
+ /// CHECK-START: void TestThreeFinalTwice.exercise() constructor_fence_redundancy_elimination (before)
+ /// CHECK: <<NewInstance:l\d+>> NewInstance
+ /// CHECK-DAG: ConstructorFence [<<NewInstance>>]
+ /// CHECK-DAG: ConstructorFence [<<NewInstance>>]
+ /// CHECK-DAG: ConstructorFence [<<NewInstance>>]
+ /// CHECK-DAG: ConstructorFence [<<NewInstance>>]
+ /// CHECK: <<NewInstance2:l\d+>> NewInstance
+ /// CHECK-DAG: ConstructorFence [<<NewInstance2>>]
+ /// CHECK-DAG: ConstructorFence [<<NewInstance2>>]
+ /// CHECK-DAG: ConstructorFence [<<NewInstance2>>]
+ /// CHECK-DAG: ConstructorFence [<<NewInstance2>>]
+ /// CHECK-NOT: ConstructorFence
+ /// CHECK-DAG: StaticFieldSet [<<External:l\d+>>,<<NewInstance>>]
+ /// CHECK-DAG: StaticFieldSet [<<External2:l\d+>>,<<NewInstance2>>]
+
+ /// CHECK-START: void TestThreeFinalTwice.exercise() constructor_fence_redundancy_elimination (after)
+ /// CHECK: <<NewInstance:l\d+>> NewInstance
+ /// CHECK: <<NewInstance2:l\d+>> NewInstance
+ /// CHECK-DAG: ConstructorFence [<<NewInstance2>>,<<NewInstance>>]
+ /// CHECK-NOT: ConstructorFence
+ /// CHECK-DAG: StaticFieldSet [<<External:l\d+>>,<<NewInstance>>]
+ /// CHECK-DAG: StaticFieldSet [<<External2:l\d+>>,<<NewInstance2>>]
+ @Override
+ public void exercise() {
+ Base b = new ThreeFinal(1, 1, 2);
+ // 3 store, 4 freezes.
+
+ // Stores to 'b' do not escape b.
+ b.w0 = 3;
+
+ Base b2 = new ThreeFinal(4, 5, 6);
+ // 3 store, 4 freezes.
+
+ // Stores to 'b2' do not escape b2.
+ b2.w0 = 7;
+
+ // Publish the result to a global so that it is not LSE-eliminated.
+ // Publishing is done at the end to give freezes above a chance to merge.
+ external = b;
+ external2 = b2;
+ }
+
+ @Override
+ public void check() {
+ Assert.stringEquals("ThreeFinal(w0: 3, w1: 0, w2: 0, w3: 0, x: 1, y: 1, z: 2)", external);
+ Assert.stringEquals("ThreeFinal(w0: 7, w1: 0, w2: 0, w3: 0, x: 4, y: 5, z: 6)", external2);
+ }
+}
+
+class TestNonEscaping {
+ // Prevent constant folding.
+ static boolean test;
+
+ static Object external;
+ static Object external2;
+ static Object external3;
+ static Object external4;
+
+ static class Invoke implements Test {
+ /// CHECK-START: void TestNonEscaping$Invoke.exercise() constructor_fence_redundancy_elimination (before)
+ /// CHECK: <<NewInstance:l\d+>> NewInstance
+ /// CHECK: ConstructorFence [<<NewInstance>>]
+ /// CHECK: InvokeStaticOrDirect
+ /// CHECK: <<NewInstance2:l\d+>> NewInstance
+ /// CHECK-DAG: ConstructorFence [<<NewInstance2>>]
+ /// CHECK-NOT: ConstructorFence
+
+ /// CHECK-START: void TestNonEscaping$Invoke.exercise() constructor_fence_redundancy_elimination (after)
+ /// CHECK: <<NewInstance:l\d+>> NewInstance
+ /// CHECK: InvokeStaticOrDirect
+ /// CHECK: <<NewInstance2:l\d+>> NewInstance
+ /// CHECK-DAG: ConstructorFence [<<NewInstance2>>,<<NewInstance>>]
+ /// CHECK-NOT: ConstructorFence
+ @Override
+ public void exercise() {
+ Base b = new Base();
+
+ // b cannot possibly escape into this invoke because it hasn't escaped onto the heap earlier,
+ // and the invoke doesn't take it as a parameter.
+ noEscape$noinline$();
+
+ // Remove the Constructor Fence for b, merging into the fence for b2.
+ Base b2 = new Base();
+
+ // Do not LSE-eliminate b,b2
+ external = b;
+ external2 = b2;
+ }
+
+ @Override
+ public void check() {
+ Assert.stringEquals("Base(w0: 0, w1: 0, w2: 0, w3: 0)", external);
+ Assert.stringEquals("Base(w0: 0, w1: 0, w2: 0, w3: 0)", external2);
+ }
+ }
+
+ public static int[] array = new int[1];
+ static Base base = new Base();
+
+ static class Store implements Test {
+ /// CHECK-START: void TestNonEscaping$Store.exercise() constructor_fence_redundancy_elimination (before)
+ /// CHECK: <<NewInstance:l\d+>> NewInstance
+ /// CHECK: ConstructorFence [<<NewInstance>>]
+ /// CHECK-DAG: ArraySet
+ /// CHECK-DAG: StaticFieldSet
+ /// CHECK-DAG: InstanceFieldSet
+ /// CHECK: <<NewInstance2:l\d+>> NewInstance
+ /// CHECK-DAG: ConstructorFence [<<NewInstance2>>]
+ /// CHECK-NOT: ConstructorFence
+
+ /// CHECK-START: void TestNonEscaping$Store.exercise() constructor_fence_redundancy_elimination (after)
+ /// CHECK-DAG: <<NewInstance:l\d+>> NewInstance
+ /// CHECK-DAG: <<NewInstance2:l\d+>> NewInstance
+ /// CHECK-DAG: ConstructorFence [<<NewInstance2>>,<<NewInstance>>]
+ /// CHECK-NOT: ConstructorFence
+ @Override
+ public void exercise() {
+ Base b = new Base();
+
+ // Stores of inputs other than the fence target do not publish 'b'.
+ array[0] = b.w0; // aput
+ external = array; // sput
+ base.w0 = b.w0; // iput
+
+ // Remove the Constructor Fence for b, merging into the fence for b2.
+ Base b2 = new Base();
+
+ // Do not LSE-eliminate b,b2
+ external3 = b;
+ external4 = b2;
+ }
+
+ @Override
+ public void check() {
+ Assert.stringEquals("[0]", array);
+ Assert.stringEquals("[0]", external);
+ Assert.stringEquals("Base(w0: 0, w1: 0, w2: 0, w3: 0)", base);
+ Assert.stringEquals("Base(w0: 0, w1: 0, w2: 0, w3: 0)", external3);
+ Assert.stringEquals("Base(w0: 0, w1: 0, w2: 0, w3: 0)", external4);
+ }
+ }
+
+ private static void noEscape$noinline$() {
+ }
+}
+
+class TestDontOptimizeAcrossBlocks implements Test {
+ // Prevent constant folding.
+ static boolean test;
+
+ static Object external;
+ static Object external3;
+
+ /// CHECK-START: void TestDontOptimizeAcrossBlocks.exercise() constructor_fence_redundancy_elimination (before)
+ /// CHECK: <<NewInstance:l\d+>> NewInstance
+ /// CHECK: ConstructorFence [<<NewInstance>>]
+ /// CHECK: <<NewInstance2:l\d+>> NewInstance
+ /// CHECK-DAG: ConstructorFence [<<NewInstance2>>]
+ /// CHECK-NOT: ConstructorFence
+ /// CHECK-DAG: StaticFieldSet [<<External:l\d+>>,<<NewInstance>>]
+ /// CHECK-DAG: StaticFieldSet [<<External2:l\d+>>,<<NewInstance2>>]
+
+ /// CHECK-START: void TestDontOptimizeAcrossBlocks.exercise() constructor_fence_redundancy_elimination (after)
+ /// CHECK: <<NewInstance:l\d+>> NewInstance
+ /// CHECK: ConstructorFence [<<NewInstance>>]
+ /// CHECK: <<NewInstance2:l\d+>> NewInstance
+ /// CHECK-DAG: ConstructorFence [<<NewInstance2>>]
+ /// CHECK-NOT: ConstructorFence
+ /// CHECK-DAG: StaticFieldSet [<<External:l\d+>>,<<NewInstance>>]
+ /// CHECK-DAG: StaticFieldSet [<<External2:l\d+>>,<<NewInstance2>>]
+ @Override
+ public void exercise() {
+ Base b = new Base();
+
+ // Do not move constructor fence across this block, even though 'b' is not published yet.
+ if (test) {
+ external = null;
+ }
+
+ Base b2 = new Base();
+ external = b2;
+ external3 = b;
+ }
+
+ @Override
+ public void check() {
+ Assert.stringEquals("false", test);
+ Assert.stringEquals("Base(w0: 0, w1: 0, w2: 0, w3: 0)", external);
+ Assert.stringEquals("Base(w0: 0, w1: 0, w2: 0, w3: 0)", external3);
+ }
+}
+
+class TestDontOptimizeAcrossEscape {
+ // Prevent constant folding.
+ static boolean test;
+
+ static Object external;
+ static Object external2;
+ static Object external3;
+ static Object external4;
+
+ static class Invoke implements Test {
+ /// CHECK-START: void TestDontOptimizeAcrossEscape$Invoke.exercise() constructor_fence_redundancy_elimination (before)
+ /// CHECK: <<NewInstance:l\d+>> NewInstance
+ /// CHECK: ConstructorFence [<<NewInstance>>]
+ /// CHECK: InvokeStaticOrDirect
+ /// CHECK: <<NewInstance2:l\d+>> NewInstance
+ /// CHECK-DAG: ConstructorFence [<<NewInstance2>>]
+ /// CHECK-NOT: ConstructorFence
+
+ /// CHECK-START: void TestDontOptimizeAcrossEscape$Invoke.exercise() constructor_fence_redundancy_elimination (after)
+ /// CHECK: <<NewInstance:l\d+>> NewInstance
+ /// CHECK: ConstructorFence [<<NewInstance>>]
+ /// CHECK: InvokeStaticOrDirect
+ /// CHECK: <<NewInstance2:l\d+>> NewInstance
+ /// CHECK-DAG: ConstructorFence [<<NewInstance2>>]
+ /// CHECK-NOT: ConstructorFence
+ @Override
+ public void exercise() {
+ Base b = new Base();
+ // Do not optimize across invokes into which the fence target escapes.
+ invoke$noinline$(b);
+
+ Base b2 = new Base();
+
+ // Do not LSE-eliminate b,b2
+ external = b;
+ external2 = b2;
+ }
+
+ private static void invoke$noinline$(Object b) {
+ // Even though 'b' does not escape this method, we conservatively assume all parameters
+ // of an invoke escape.
+ }
+
+ @Override
+ public void check() {
+ Assert.stringEquals("Base(w0: 0, w1: 0, w2: 0, w3: 0)", external);
+ Assert.stringEquals("Base(w0: 0, w1: 0, w2: 0, w3: 0)", external2);
+ }
+ }
+
+ public static Object[] array = new Object[3];
+ static Base base = new Base();
+
+ static class InstanceEscaper {
+ public Object holder;
+
+ @Override
+ public String toString() {
+ return getClass().getName() + "(" + baseString() + ")";
+ }
+
+ protected String baseString() {
+ return String.format("holder: %s", Main.valueToString(holder));
+ }
+ }
+
+ static InstanceEscaper instanceEscaper = new InstanceEscaper();
+
+ static class StoreIput implements Test {
+ /// CHECK-START: void TestDontOptimizeAcrossEscape$StoreIput.exercise() constructor_fence_redundancy_elimination (before)
+ /// CHECK: <<NewInstance:l\d+>> NewInstance
+ /// CHECK: ConstructorFence [<<NewInstance>>]
+ /// CHECK-DAG: InstanceFieldSet
+ /// CHECK: <<NewInstance2:l\d+>> NewInstance
+ /// CHECK-DAG: ConstructorFence [<<NewInstance2>>]
+ /// CHECK-NOT: ConstructorFence
+
+ /// CHECK-START: void TestDontOptimizeAcrossEscape$StoreIput.exercise() constructor_fence_redundancy_elimination (after)
+ /// CHECK-DAG: <<NewInstance:l\d+>> NewInstance
+ /// CHECK: ConstructorFence [<<NewInstance>>]
+ /// CHECK-DAG: <<NewInstance2:l\d+>> NewInstance
+ /// CHECK-DAG: ConstructorFence [<<NewInstance2>>]
+ /// CHECK-NOT: ConstructorFence
+ @Override
+ public void exercise() {
+ Base b = new Base();
+
+ // A store of 'b' into another instance will publish 'b'.
+ instanceEscaper.holder = b;
+
+ // Do not remove any constructor fences above.
+ Base b2 = new Base();
+
+ // Do not LSE-eliminate b,b2
+ external3 = b;
+ external4 = b2;
+ }
+
+ @Override
+ public void check() {
+ Assert.stringEquals(
+ "TestDontOptimizeAcrossEscape$InstanceEscaper(holder: Base(w0: 0, w1: 0, w2: 0, w3: 0))",
+ instanceEscaper);
+ Assert.stringEquals("Base(w0: 0, w1: 0, w2: 0, w3: 0)", external3);
+ Assert.stringEquals("Base(w0: 0, w1: 0, w2: 0, w3: 0)", external4);
+ }
+ }
+
+ static class StoreAput implements Test {
+ /// CHECK-START: void TestDontOptimizeAcrossEscape$StoreAput.exercise() constructor_fence_redundancy_elimination (before)
+ /// CHECK: <<NewInstance:l\d+>> NewInstance
+ /// CHECK: ConstructorFence [<<NewInstance>>]
+ /// CHECK-DAG: ArraySet
+ /// CHECK: <<NewInstance2:l\d+>> NewInstance
+ /// CHECK-DAG: ConstructorFence [<<NewInstance2>>]
+ /// CHECK-NOT: ConstructorFence
+
+ /// CHECK-START: void TestDontOptimizeAcrossEscape$StoreAput.exercise() constructor_fence_redundancy_elimination (after)
+ /// CHECK-DAG: <<NewInstance:l\d+>> NewInstance
+ /// CHECK: ConstructorFence [<<NewInstance>>]
+ /// CHECK-DAG: <<NewInstance2:l\d+>> NewInstance
+ /// CHECK-DAG: ConstructorFence [<<NewInstance2>>]
+ /// CHECK-NOT: ConstructorFence
+ @Override
+ public void exercise() {
+ Base b = new Base();
+
+ // A store of 'b' into another array will publish 'b'.
+ array[0] = b; // aput
+
+ // Do not remove any constructor fences above.
+ Base b2 = new Base();
+
+ // Do not LSE-eliminate b,b2
+ external3 = b;
+ external4 = b2;
+ }
+
+ @Override
+ public void check() {
+ Assert.stringEquals("[Base(w0: 0, w1: 0, w2: 0, w3: 0),<null>,<null>]", array);
+ Assert.stringEquals("Base(w0: 0, w1: 0, w2: 0, w3: 0)", external3);
+ Assert.stringEquals("Base(w0: 0, w1: 0, w2: 0, w3: 0)", external4);
+ }
+ }
+
+ static class StoreSput implements Test {
+ /// CHECK-START: void TestDontOptimizeAcrossEscape$StoreSput.exercise() constructor_fence_redundancy_elimination (before)
+ /// CHECK: <<NewInstance:l\d+>> NewInstance
+ /// CHECK: ConstructorFence [<<NewInstance>>]
+ /// CHECK-DAG: StaticFieldSet
+ /// CHECK: <<NewInstance2:l\d+>> NewInstance
+ /// CHECK-DAG: ConstructorFence [<<NewInstance2>>]
+ /// CHECK-NOT: ConstructorFence
+
+ /// CHECK-START: void TestDontOptimizeAcrossEscape$StoreSput.exercise() constructor_fence_redundancy_elimination (after)
+ /// CHECK-DAG: <<NewInstance:l\d+>> NewInstance
+ /// CHECK: ConstructorFence [<<NewInstance>>]
+ /// CHECK-DAG: <<NewInstance2:l\d+>> NewInstance
+ /// CHECK-DAG: ConstructorFence [<<NewInstance2>>]
+ /// CHECK-NOT: ConstructorFence
+ @Override
+ public void exercise() {
+ Base b = new Base();
+
+ // A store of 'b' into a static will publish 'b'.
+ external = b;
+
+ // Do not remove any constructor fences above.
+ Base b2 = new Base();
+
+ // Do not LSE-eliminate b,b2
+ external3 = b;
+ external4 = b2;
+ }
+
+ @Override
+ public void check() {
+ Assert.stringEquals("Base(w0: 0, w1: 0, w2: 0, w3: 0)", external);
+ Assert.stringEquals("Base(w0: 0, w1: 0, w2: 0, w3: 0)", external3);
+ Assert.stringEquals("Base(w0: 0, w1: 0, w2: 0, w3: 0)", external4);
+ }
+ }
+
+ static class Deopt implements Test {
+ /// CHECK-START: void TestDontOptimizeAcrossEscape$Deopt.exercise() constructor_fence_redundancy_elimination (before)
+ /// CHECK: <<NewInstance:l\d+>> NewInstance
+ /// CHECK: ConstructorFence [<<NewInstance>>]
+ /// CHECK-DAG: Deoptimize
+ /// CHECK: <<NewInstance2:l\d+>> NewInstance
+ /// CHECK-DAG: ConstructorFence [<<NewInstance2>>]
+ /// CHECK-NOT: ConstructorFence
+
+ /// CHECK-START: void TestDontOptimizeAcrossEscape$Deopt.exercise() constructor_fence_redundancy_elimination (after)
+ /// CHECK-DAG: <<NewInstance:l\d+>> NewInstance
+ /// CHECK: ConstructorFence [<<NewInstance>>]
+ /// CHECK-DAG: <<NewInstance2:l\d+>> NewInstance
+ /// CHECK-DAG: ConstructorFence [<<NewInstance2>>]
+ /// CHECK-NOT: ConstructorFence
+ @Override
+ public void exercise() {
+ Base b = new Base();
+
+ // An array access generates a Deopt to avoid doing bounds check.
+ array[0] = external; // aput
+ array[1] = external; // aput
+ array[2] = external; // aput
+
+ // Do not remove any constructor fences above.
+ Base b2 = new Base();
+
+ // Do not LSE-eliminate b,b2
+ external3 = b;
+ external4 = b2;
+ }
+
+ @Override
+ public void check() {
+ Assert.stringEquals("[Base(w0: 0, w1: 0, w2: 0, w3: 0),"
+ + "Base(w0: 0, w1: 0, w2: 0, w3: 0),"
+ + "Base(w0: 0, w1: 0, w2: 0, w3: 0)]",
+ array);
+ Assert.stringEquals("Base(w0: 0, w1: 0, w2: 0, w3: 0)", external3);
+ Assert.stringEquals("Base(w0: 0, w1: 0, w2: 0, w3: 0)", external4);
+ }
+ }
+
+ static class Select implements Test {
+ /// CHECK-START: void TestDontOptimizeAcrossEscape$Select.exercise() constructor_fence_redundancy_elimination (before)
+ /// CHECK: <<NewInstance:l\d+>> NewInstance
+ /// CHECK: ConstructorFence [<<NewInstance>>]
+ /// CHECK-DAG: Select
+ /// CHECK: <<NewInstance2:l\d+>> NewInstance
+ /// CHECK-DAG: ConstructorFence [<<NewInstance2>>]
+ /// CHECK-NOT: ConstructorFence
+
+ /// CHECK-START: void TestDontOptimizeAcrossEscape$Select.exercise() constructor_fence_redundancy_elimination (after)
+ /// CHECK-DAG: <<NewInstance:l\d+>> NewInstance
+ /// CHECK: ConstructorFence [<<NewInstance>>]
+ /// CHECK-DAG: <<NewInstance2:l\d+>> NewInstance
+ /// CHECK-DAG: ConstructorFence [<<NewInstance2>>]
+ /// CHECK-NOT: ConstructorFence
+ @Override
+ public void exercise() {
+ Base b = new Base();
+
+ boolean localTest = test;
+ Object localExternal = external3;
+
+ // Selecting 'b' creates an alias, which we conservatively assume escapes immediately.
+ external = localTest ? b : localExternal;
+
+ // Do not remove any constructor fences above.
+ Base b2 = new Base();
+
+ // Do not LSE-eliminate b,b2
+ external3 = b;
+ external4 = b2;
+ }
+
+ @Override
+ public void check() {
+ Assert.stringEquals("Base(w0: 0, w1: 0, w2: 0, w3: 0)", external);
+ Assert.stringEquals("Base(w0: 0, w1: 0, w2: 0, w3: 0)", external3);
+ Assert.stringEquals("Base(w0: 0, w1: 0, w2: 0, w3: 0)", external4);
+ }
+ }
+
+ static class MakeBoundTypeTest implements Test {
+ public static Object makeBoundType;
+ public static Object makeBoundTypeSub;
+
+ @Override
+ public void exercise() {
+ // Note: MakeBoundType is special and we have to call the constructor directly
+ // to prevent inlining it.
+ try {
+ makeBoundType = exerciseNewInstance(MakeBoundType.class, 123);
+ makeBoundTypeSub = exerciseNewInstance(MakeBoundTypeSub.class, 123);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
+ public void check() {
+ Assert.stringEquals(
+ "TestDontOptimizeAcrossEscape$MakeBoundTypeTest$MakeBoundType(abcdefgh: 123, x: 2)",
+ makeBoundType);
+ Assert.stringEquals(
+ "TestDontOptimizeAcrossEscape$MakeBoundTypeTest$MakeBoundTypeSub(abcdefgh: 123, x: 1)",
+ makeBoundTypeSub);
+ }
+
+ // Make a new instance of 'klass'.
+ private static <T> T exerciseNewInstance(Class<T> klass, int params) throws Exception {
+ return klass.cast(klass.getDeclaredConstructor(int.class).newInstance(params));
+ }
+
+ /// CHECK-START: void TestDontOptimizeAcrossEscape$MakeBoundTypeTest$MakeBoundType.<init>(int) constructor_fence_redundancy_elimination (before)
+ /// CHECK-DAG: <<This:l\d+>> ParameterValue
+ /// CHECK-DAG: <<NewInstance:l\d+>> NewInstance
+ /// CHECK: ConstructorFence [<<NewInstance>>]
+ /// CHECK-DAG: BoundType
+ /// CHECK-DAG: ConstructorFence [<<This>>]
+ /// CHECK-NOT: ConstructorFence
+
+ /// CHECK-START: void TestDontOptimizeAcrossEscape$MakeBoundTypeTest$MakeBoundType.<init>(int) constructor_fence_redundancy_elimination (after)
+ /// CHECK-DAG: <<This:l\d+>> ParameterValue
+ /// CHECK-DAG: <<NewInstance:l\d+>> NewInstance
+ /// CHECK: ConstructorFence [<<NewInstance>>]
+ /// CHECK-DAG: BoundType
+ /// CHECK-DAG: ConstructorFence [<<This>>]
+ /// CHECK-NOT: ConstructorFence
+ static class MakeBoundType {
+ final int abcdefgh;
+ int x;
+
+ MakeBoundType(int param) {
+ abcdefgh = param;
+
+ Base b = new Base();
+ // constructor-fence(b)
+
+ if (this instanceof MakeBoundTypeSub) {
+ // Create a "BoundType(this)" which prevents
+ // a merged constructor-fence(this, b)
+ x = 1;
+ } else {
+ x = 2;
+ }
+
+ // publish(b).
+ external = b;
+
+ // constructor-fence(this)
+ }
+
+ @Override
+ public String toString() {
+ return getClass().getName() + "(" + baseString() + ")";
+ }
+
+ protected String baseString() {
+ return String.format("abcdefgh: %d, x: %d", abcdefgh, x);
+ }
+ }
+
+ static class MakeBoundTypeSub extends MakeBoundType {
+ MakeBoundTypeSub(int xyz) {
+ super(xyz);
+ }
+ }
+ }
+}
+
+public class Main {
+ public static void main(String[] args) throws Exception {
+ // Ensure that all of this code does not get optimized out into a no-op
+ // by actually running the code with reflection, then validating
+ // the result by asserting it against a string.
+ Class<? extends Test>[] testClasses = new Class[] {
+ TestOneFinal.class,
+ TestThreeFinal.class,
+ TestMultiAlloc.class,
+ TestThreeFinalTwice.class,
+ TestNonEscaping.Invoke.class,
+ TestNonEscaping.Store.class,
+ TestDontOptimizeAcrossBlocks.class,
+ TestDontOptimizeAcrossEscape.Invoke.class,
+ TestDontOptimizeAcrossEscape.StoreIput.class,
+ TestDontOptimizeAcrossEscape.StoreAput.class,
+ TestDontOptimizeAcrossEscape.StoreSput.class,
+ TestDontOptimizeAcrossEscape.Deopt.class,
+ TestDontOptimizeAcrossEscape.Select.class,
+ TestDontOptimizeAcrossEscape.MakeBoundTypeTest.class,
+ };
+
+ for (Class<? extends Test> klass : testClasses) {
+ exerciseTestClass(klass);
+ }
+ }
+
+ /**
+ * Invoke Test#exercise(), then Test#check().
+ * @throws AssertionError if test fails.
+ */
+ private static void exerciseTestClass(Class<? extends Test> klass) throws Exception {
+ Test instance = klass.cast(klass.getDeclaredConstructor().newInstance());
+
+ // Use reflection as a best-effort to avoid compiler optimizations (e.g. inlining).
+ instance.getClass().getDeclaredMethod("exercise").invoke(instance);
+ instance.getClass().getDeclaredMethod("check").invoke(instance);
+ }
+
+ // Print an object, with special handling for array and null.
+ public static String valueToString(Object val) {
+ if (val == null) {
+ return "<null>";
+ }
+ if (val.getClass().isArray()) {
+ String fmt = "[";
+ int length = Array.getLength(val);
+ for (int i = 0; i < length; ++i) {
+ Object arrayElement = Array.get(val, i);
+ fmt += valueToString(arrayElement);
+
+ if (i != length - 1) {
+ fmt += ",";
+ }
+ }
+ fmt += "]";
+
+ return fmt;
+ }
+
+ return val.toString();
+ }
+}
diff --git a/test/530-checker-lse/src/Main.java b/test/530-checker-lse/src/Main.java
index 6632503..7ae873a 100644
--- a/test/530-checker-lse/src/Main.java
+++ b/test/530-checker-lse/src/Main.java
@@ -881,10 +881,10 @@
/// CHECK: ArrayGet
private static int testAllocationEliminationOfArray2() {
// Cannot eliminate array allocation since array is accessed with non-constant
- // index.
- int[] array = new int[4];
- array[2] = 4;
- array[3] = 7;
+ // index (only 3 elements to prevent vectorization of the reduction).
+ int[] array = new int[3];
+ array[1] = 4;
+ array[2] = 7;
int sum = 0;
for (int e : array) {
sum += e;
diff --git a/test/597-deopt-busy-loop/expected.txt b/test/597-deopt-busy-loop/expected.txt
new file mode 100644
index 0000000..f993efc
--- /dev/null
+++ b/test/597-deopt-busy-loop/expected.txt
@@ -0,0 +1,2 @@
+JNI_OnLoad called
+Finishing
diff --git a/test/597-deopt-busy-loop/info.txt b/test/597-deopt-busy-loop/info.txt
new file mode 100644
index 0000000..2c50dbb
--- /dev/null
+++ b/test/597-deopt-busy-loop/info.txt
@@ -0,0 +1 @@
+Test deoptimizing when returning from suspend-check runtime method.
diff --git a/test/597-deopt-busy-loop/run b/test/597-deopt-busy-loop/run
new file mode 100644
index 0000000..bc04498
--- /dev/null
+++ b/test/597-deopt-busy-loop/run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# We want to run in debuggable mode and compiled.
+exec ${RUN} --jit -Xcompiler-option --debuggable "${@}"
diff --git a/test/597-deopt-busy-loop/src/Main.java b/test/597-deopt-busy-loop/src/Main.java
new file mode 100644
index 0000000..46b6bbf
--- /dev/null
+++ b/test/597-deopt-busy-loop/src/Main.java
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main implements Runnable {
+ static final int numberOfThreads = 2;
+ volatile static boolean sExitFlag = false;
+ volatile static boolean sEntered = false;
+ int threadIndex;
+
+ private static native void deoptimizeAll();
+ private static native void assertIsInterpreted();
+ private static native void assertIsManaged();
+ private static native void ensureJitCompiled(Class<?> cls, String methodName);
+
+ Main(int index) {
+ threadIndex = index;
+ }
+
+ public static void main(String[] args) throws Exception {
+ System.loadLibrary(args[0]);
+
+ final Thread[] threads = new Thread[numberOfThreads];
+ for (int t = 0; t < threads.length; t++) {
+ threads[t] = new Thread(new Main(t));
+ threads[t].start();
+ }
+ for (Thread t : threads) {
+ t.join();
+ }
+ System.out.println("Finishing");
+ }
+
+ public void $noinline$busyLoop() {
+ assertIsManaged();
+ sEntered = true;
+ for (;;) {
+ if (sExitFlag) {
+ break;
+ }
+ }
+ assertIsInterpreted();
+ }
+
+ public void run() {
+ if (threadIndex == 0) {
+ while (!sEntered) {
+ Thread.yield();
+ }
+ deoptimizeAll();
+ sExitFlag = true;
+ } else {
+ ensureJitCompiled(Main.class, "$noinline$busyLoop");
+ $noinline$busyLoop();
+ }
+ }
+}
diff --git a/test/597-deopt-invoke-stub/expected.txt b/test/597-deopt-invoke-stub/expected.txt
new file mode 100644
index 0000000..f993efc
--- /dev/null
+++ b/test/597-deopt-invoke-stub/expected.txt
@@ -0,0 +1,2 @@
+JNI_OnLoad called
+Finishing
diff --git a/test/597-deopt-invoke-stub/info.txt b/test/597-deopt-invoke-stub/info.txt
new file mode 100644
index 0000000..31960a9
--- /dev/null
+++ b/test/597-deopt-invoke-stub/info.txt
@@ -0,0 +1 @@
+Test deoptimizing when returning from a quick-to-interpreter bridge.
diff --git a/test/597-deopt-invoke-stub/run b/test/597-deopt-invoke-stub/run
new file mode 100644
index 0000000..bc04498
--- /dev/null
+++ b/test/597-deopt-invoke-stub/run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# We want to run in debuggable mode and compiled.
+exec ${RUN} --jit -Xcompiler-option --debuggable "${@}"
diff --git a/test/597-deopt-invoke-stub/src/Main.java b/test/597-deopt-invoke-stub/src/Main.java
new file mode 100644
index 0000000..0751783
--- /dev/null
+++ b/test/597-deopt-invoke-stub/src/Main.java
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main implements Runnable {
+ static final int numberOfThreads = 2;
+ volatile static boolean sExitFlag = false;
+ volatile static boolean sEntered = false;
+ int threadIndex;
+
+ private static native void deoptimizeAll();
+ private static native void assertIsInterpreted();
+ private static native void assertIsManaged();
+ private static native void ensureJitCompiled(Class<?> cls, String methodName);
+
+ Main(int index) {
+ threadIndex = index;
+ }
+
+ public static void main(String[] args) throws Exception {
+ System.loadLibrary(args[0]);
+
+ final Thread[] threads = new Thread[numberOfThreads];
+ for (int t = 0; t < threads.length; t++) {
+ threads[t] = new Thread(new Main(t));
+ threads[t].start();
+ }
+ for (Thread t : threads) {
+ t.join();
+ }
+ System.out.println("Finishing");
+ }
+
+ private static int $noinline$bar() {
+ // Should be entered via interpreter bridge.
+ assertIsInterpreted();
+ sEntered = true;
+ while (!sExitFlag) {}
+ assertIsInterpreted();
+ return 0x1234;
+ }
+
+ public void $noinline$foo() {
+ assertIsManaged();
+ if ($noinline$bar() != 0x1234) {
+ System.out.println("Bad return value");
+ }
+ assertIsInterpreted();
+ }
+
+ public void run() {
+ if (threadIndex == 0) {
+ while (!sEntered) {
+ Thread.yield();
+ }
+ deoptimizeAll();
+ sExitFlag = true;
+ } else {
+ ensureJitCompiled(Main.class, "$noinline$foo");
+ $noinline$foo();
+ }
+ }
+}
diff --git a/test/623-checker-loop-regressions/src/Main.java b/test/623-checker-loop-regressions/src/Main.java
index 056ed91..9229d81 100644
--- a/test/623-checker-loop-regressions/src/Main.java
+++ b/test/623-checker-loop-regressions/src/Main.java
@@ -473,6 +473,18 @@
return y;
}
+ // b/65478356: sum up 2-dim array.
+ static int sum(int[][] a) {
+ int sum = 0;
+ for (int y = 0; y < a.length; y++) {
+ int[] aa = a[y];
+ for (int x = 0; x < aa.length; x++) {
+ sum += aa[x];
+ }
+ }
+ return sum;
+ }
+
public static void main(String[] args) {
expectEquals(10, earlyExitFirst(-1));
for (int i = 0; i <= 10; i++) {
@@ -613,6 +625,14 @@
}
expectEquals(2, verify);
+ int[][] x = new int[128][128];
+ for (int i = 0; i < 128; i++) {
+ for (int j = 0; j < 128; j++) {
+ x[i][j] = -i - j;
+ }
+ }
+ expectEquals(-2080768, sum(x));
+
System.out.println("passed");
}
diff --git a/test/661-checker-simd-reduc/src/Main.java b/test/661-checker-simd-reduc/src/Main.java
index 741b5fa..8208a9e 100644
--- a/test/661-checker-simd-reduc/src/Main.java
+++ b/test/661-checker-simd-reduc/src/Main.java
@@ -51,6 +51,26 @@
return sum;
}
+ /// CHECK-START: int Main.reductionInt(int[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Get>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.reductionInt(int[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecAdd [<<Phi2>>,<<Load>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi2>>] loop:none
+ /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none
private static int reductionInt(int[] x) {
int sum = 0;
for (int i = 0; i < x.length; i++) {
@@ -59,6 +79,28 @@
return sum;
}
+ /// CHECK-START: long Main.reductionLong(long[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Long0:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<Long0>>,{{j\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get:j\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Get>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: long Main.reductionLong(long[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Long0:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Cons2:i\d+>> IntConstant 2 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Long0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecAdd [<<Phi2>>,<<Load>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi2>>] loop:none
+ /// CHECK-DAG: <<Extr:j\d+>> VecExtractScalar [<<Red>>] loop:none
private static long reductionLong(long[] x) {
long sum = 0;
for (int i = 0; i < x.length; i++) {
@@ -67,6 +109,90 @@
return sum;
}
+ private static byte reductionByteM1(byte[] x) {
+ byte sum = -1;
+ for (int i = 0; i < x.length; i++) {
+ sum += x[i];
+ }
+ return sum;
+ }
+
+ private static short reductionShortM1(short[] x) {
+ short sum = -1;
+ for (int i = 0; i < x.length; i++) {
+ sum += x[i];
+ }
+ return sum;
+ }
+
+ private static char reductionCharM1(char[] x) {
+ char sum = 0xffff;
+ for (int i = 0; i < x.length; i++) {
+ sum += x[i];
+ }
+ return sum;
+ }
+
+ /// CHECK-START: int Main.reductionIntM1(int[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<ConsM1:i\d+>> IntConstant -1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<ConsM1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Get>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.reductionIntM1(int[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<ConsM1:i\d+>> IntConstant -1 loop:none
+ /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsM1>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecAdd [<<Phi2>>,<<Load>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi2>>] loop:none
+ /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none
+ private static int reductionIntM1(int[] x) {
+ int sum = -1;
+ for (int i = 0; i < x.length; i++) {
+ sum += x[i];
+ }
+ return sum;
+ }
+
+ /// CHECK-START: long Main.reductionLongM1(long[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<LongM1:j\d+>> LongConstant -1 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<LongM1>>,{{j\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get:j\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Get>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: long Main.reductionLongM1(long[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<LongM1:j\d+>> LongConstant -1 loop:none
+ /// CHECK-DAG: <<Cons2:i\d+>> IntConstant 2 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<LongM1>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecAdd [<<Phi2>>,<<Load>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi2>>] loop:none
+ /// CHECK-DAG: <<Extr:j\d+>> VecExtractScalar [<<Red>>] loop:none
+ private static long reductionLongM1(long[] x) {
+ long sum = -1L;
+ for (int i = 0; i < x.length; i++) {
+ sum += x[i];
+ }
+ return sum;
+ }
+
private static byte reductionMinusByte(byte[] x) {
byte sum = 0;
for (int i = 0; i < x.length; i++) {
@@ -91,6 +217,26 @@
return sum;
}
+ /// CHECK-START: int Main.reductionMinusInt(int[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Sub [<<Phi2>>,<<Get>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.reductionMinusInt(int[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecSub [<<Phi2>>,<<Load>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi2>>] loop:none
+ /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none
private static int reductionMinusInt(int[] x) {
int sum = 0;
for (int i = 0; i < x.length; i++) {
@@ -99,6 +245,28 @@
return sum;
}
+ /// CHECK-START: long Main.reductionMinusLong(long[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Long0:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<Long0>>,{{j\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get:j\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Sub [<<Phi2>>,<<Get>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: long Main.reductionMinusLong(long[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Long0:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Cons2:i\d+>> IntConstant 2 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Long0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecSub [<<Phi2>>,<<Load>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi2>>] loop:none
+ /// CHECK-DAG: <<Extr:j\d+>> VecExtractScalar [<<Red>>] loop:none
private static long reductionMinusLong(long[] x) {
long sum = 0;
for (int i = 0; i < x.length; i++) {
@@ -131,6 +299,28 @@
return min;
}
+ /// CHECK-START: int Main.reductionMinInt(int[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<ConsM:i\d+>> IntConstant 2147483647 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<ConsM>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: InvokeStaticOrDirect [<<Phi2>>,<<Get>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.reductionMinInt(int[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<ConsM:i\d+>> IntConstant 2147483647 loop:none
+ /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsM>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecMin [<<Phi2>>,<<Load>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi2>>] loop:none
+ /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none
private static int reductionMinInt(int[] x) {
int min = Integer.MAX_VALUE;
for (int i = 0; i < x.length; i++) {
@@ -171,6 +361,28 @@
return max;
}
+ /// CHECK-START: int Main.reductionMaxInt(int[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<ConsM:i\d+>> IntConstant -2147483648 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<ConsM>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: InvokeStaticOrDirect [<<Phi2>>,<<Get>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.reductionMaxInt(int[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<ConsM:i\d+>> IntConstant -2147483648 loop:none
+ /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsM>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecMax [<<Phi2>>,<<Load>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi2>>] loop:none
+ /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none
private static int reductionMaxInt(int[] x) {
int max = Integer.MIN_VALUE;
for (int i = 0; i < x.length; i++) {
@@ -253,6 +465,11 @@
expectEquals(38070, reductionChar(xc));
expectEquals(365750, reductionInt(xi));
expectEquals(365750L, reductionLong(xl));
+ expectEquals(-75, reductionByteM1(xb));
+ expectEquals(-27467, reductionShortM1(xs));
+ expectEquals(38069, reductionCharM1(xc));
+ expectEquals(365749, reductionIntM1(xi));
+ expectEquals(365749L, reductionLongM1(xl));
expectEquals(74, reductionMinusByte(xb));
expectEquals(27466, reductionMinusShort(xs));
expectEquals(27466, reductionMinusChar(xc));
diff --git a/test/665-checker-simd-zero/expected.txt b/test/665-checker-simd-zero/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/665-checker-simd-zero/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/665-checker-simd-zero/info.txt b/test/665-checker-simd-zero/info.txt
new file mode 100644
index 0000000..55eca88
--- /dev/null
+++ b/test/665-checker-simd-zero/info.txt
@@ -0,0 +1 @@
+Functional tests on zero-out SIMD vectorization.
diff --git a/test/665-checker-simd-zero/src/Main.java b/test/665-checker-simd-zero/src/Main.java
new file mode 100644
index 0000000..66eea64
--- /dev/null
+++ b/test/665-checker-simd-zero/src/Main.java
@@ -0,0 +1,236 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for zero vectorization.
+ */
+public class Main {
+
+ /// CHECK-START: void Main.zeroz(boolean[]) loop_optimization (before)
+ /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Zero>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: void Main.zeroz(boolean[]) loop_optimization (after)
+ /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none
+ private static void zeroz(boolean[] x) {
+ for (int i = 0; i < x.length; i++) {
+ x[i] = false;
+ }
+ }
+
+ /// CHECK-START: void Main.zerob(byte[]) loop_optimization (before)
+ /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Zero>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: void Main.zerob(byte[]) loop_optimization (after)
+ /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none
+ private static void zerob(byte[] x) {
+ for (int i = 0; i < x.length; i++) {
+ x[i] = 0;
+ }
+ }
+
+ /// CHECK-START: void Main.zeroc(char[]) loop_optimization (before)
+ /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Zero>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: void Main.zeroc(char[]) loop_optimization (after)
+ /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none
+ private static void zeroc(char[] x) {
+ for (int i = 0; i < x.length; i++) {
+ x[i] = 0;
+ }
+ }
+
+ /// CHECK-START: void Main.zeros(short[]) loop_optimization (before)
+ /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Zero>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: void Main.zeros(short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none
+ private static void zeros(short[] x) {
+ for (int i = 0; i < x.length; i++) {
+ x[i] = 0;
+ }
+ }
+
+ /// CHECK-START: void Main.zeroi(int[]) loop_optimization (before)
+ /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Zero>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: void Main.zeroi(int[]) loop_optimization (after)
+ /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none
+ private static void zeroi(int[] x) {
+ for (int i = 0; i < x.length; i++) {
+ x[i] = 0;
+ }
+ }
+
+ /// CHECK-START: void Main.zerol(long[]) loop_optimization (before)
+ /// CHECK-DAG: <<Zero:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Zero>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: void Main.zerol(long[]) loop_optimization (after)
+ /// CHECK-DAG: <<Zero:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none
+ private static void zerol(long[] x) {
+ for (int i = 0; i < x.length; i++) {
+ x[i] = 0;
+ }
+ }
+
+ /// CHECK-START: void Main.zerof(float[]) loop_optimization (before)
+ /// CHECK-DAG: <<Zero:f\d+>> FloatConstant 0 loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Zero>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: void Main.zerof(float[]) loop_optimization (after)
+ /// CHECK-DAG: <<Zero:f\d+>> FloatConstant 0 loop:none
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none
+ private static void zerof(float[] x) {
+ for (int i = 0; i < x.length; i++) {
+ x[i] = 0;
+ }
+ }
+
+ /// CHECK-START: void Main.zerod(double[]) loop_optimization (before)
+ /// CHECK-DAG: <<Zero:d\d+>> DoubleConstant 0 loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Zero>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: void Main.zerod(double[]) loop_optimization (after)
+ /// CHECK-DAG: <<Zero:d\d+>> DoubleConstant 0 loop:none
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none
+ private static void zerod(double[] x) {
+ for (int i = 0; i < x.length; i++) {
+ x[i] = 0;
+ }
+ }
+
+ public static void main(String[] args) {
+ int total = 1111;
+
+ boolean[] xz = new boolean[total];
+ byte[] xb = new byte[total];
+ char[] xc = new char[total];
+ short[] xs = new short[total];
+ int[] xi = new int[total];
+ long[] xl = new long[total];
+ float[] xf = new float[total];
+ double[] xd = new double[total];
+
+ for (int i = 0; i < total; i++) {
+ xz[i] = true;
+ xb[i] = 1;
+ xc[i] = 1;
+ xs[i] = 1;
+ xi[i] = 1;
+ xl[i] = 1;
+ xf[i] = 1;
+ xd[i] = 1;
+ }
+
+ for (int i = 0; i < total; i++) {
+ expectEquals(true, xz[i]);
+ expectEquals(1, xb[i]);
+ expectEquals(1, xc[i]);
+ expectEquals(1, xs[i]);
+ expectEquals(1, xi[i]);
+ expectEquals(1, xl[i]);
+ expectEquals(1, xf[i]);
+ expectEquals(1, xd[i]);
+ }
+
+ zeroz(xz);
+ zerob(xb);
+ zeroc(xc);
+ zeros(xs);
+ zeroi(xi);
+ zerol(xl);
+ zerof(xf);
+ zerod(xd);
+
+ for (int i = 0; i < total; i++) {
+ expectEquals(false, xz[i]);
+ expectEquals(0, xb[i]);
+ expectEquals(0, xc[i]);
+ expectEquals(0, xs[i]);
+ expectEquals(0, xi[i]);
+ expectEquals(0, xl[i]);
+ expectEquals(0, xf[i]);
+ expectEquals(0, xd[i]);
+ }
+
+ System.out.println("passed");
+ }
+
+ private static void expectEquals(boolean expected, boolean result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+
+ private static void expectEquals(int expected, int result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+
+ private static void expectEquals(long expected, long result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+
+ private static void expectEquals(float expected, float result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+
+ private static void expectEquals(double expected, double result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+}
diff --git a/test/Android.bp b/test/Android.bp
index 2a88af1..2f23056 100644
--- a/test/Android.bp
+++ b/test/Android.bp
@@ -250,6 +250,7 @@
"ti-agent/common_helper.cc",
"ti-agent/frame_pop_helper.cc",
"ti-agent/locals_helper.cc",
+ "ti-agent/monitors_helper.cc",
"ti-agent/redefinition_helper.cc",
"ti-agent/suspension_helper.cc",
"ti-agent/stack_trace_helper.cc",
@@ -299,7 +300,8 @@
"1922-owned-monitors-info/owned_monitors.cc",
"1924-frame-pop-toggle/frame_pop_toggle.cc",
"1926-missed-frame-pop/frame_pop_missed.cc",
- "1927-exception-event/exception_event.cc"
+ "1927-exception-event/exception_event.cc",
+ "1930-monitor-info/monitor.cc",
],
shared_libs: [
"libbase",
@@ -349,6 +351,7 @@
],
shared_libs: [
"libbase",
+ "slicer",
],
header_libs: ["libopenjdkjvmti_headers"],
}
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index 90e2600..c16c487 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -422,10 +422,7 @@
if [[ "$JVMTI_REDEFINE_STRESS" = "y" ]]; then
# We really cannot do this on RI so don't both passing it in that case.
if [[ "$USE_JVM" = "n" ]]; then
- file_1=$(mktemp --tmpdir=${DEX_LOCATION})
- file_2=$(mktemp --tmpdir=${DEX_LOCATION})
- # TODO Remove need for DEXTER_BINARY!
- agent_args="${agent_args},redefine,${DEXTER_BINARY},${file_1},${file_2}"
+ agent_args="${agent_args},redefine"
fi
fi
if [[ "$JVMTI_FIELD_STRESS" = "y" ]]; then
diff --git a/test/knownfailures.json b/test/knownfailures.json
index 315476a..84758c9 100644
--- a/test/knownfailures.json
+++ b/test/knownfailures.json
@@ -216,6 +216,18 @@
"suppressed when tracing."]
},
{
+ "tests": "597-deopt-busy-loop",
+ "variant": "interp-ac | interpreter | trace | stream",
+ "description": ["This test expects JIT compilation, which is",
+ "suppressed when tracing."]
+ },
+ {
+ "tests": "597-deopt-invoke-stub",
+ "variant": "interp-ac | interpreter | optimizing | trace | stream",
+ "description": ["This test expects JIT compilation and no AOT for",
+ "testing deoptimizing at quick-to-interpreter bridge."]
+ },
+ {
"tests": "137-cfi",
"description": ["CFI unwinding expects managed frames, and the test",
"does not iterate enough to even compile. JIT also",
diff --git a/test/ti-agent/jvmti_helper.cc b/test/ti-agent/jvmti_helper.cc
index 7280102..c290e9b 100644
--- a/test/ti-agent/jvmti_helper.cc
+++ b/test/ti-agent/jvmti_helper.cc
@@ -50,7 +50,7 @@
.can_get_synthetic_attribute = 1,
.can_get_owned_monitor_info = 0,
.can_get_current_contended_monitor = 0,
- .can_get_monitor_info = 0,
+ .can_get_monitor_info = 1,
.can_pop_frame = 0,
.can_redefine_classes = 1,
.can_signal_thread = 0,
diff --git a/test/ti-agent/monitors_helper.cc b/test/ti-agent/monitors_helper.cc
new file mode 100644
index 0000000..7c28ede
--- /dev/null
+++ b/test/ti-agent/monitors_helper.cc
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "jni.h"
+#include "jvmti.h"
+#include <vector>
+#include "jvmti_helper.h"
+#include "jni_helper.h"
+#include "test_env.h"
+#include "scoped_local_ref.h"
+namespace art {
+namespace common_monitors {
+
+extern "C" JNIEXPORT jobject JNICALL Java_art_Monitors_getObjectMonitorUsage(
+ JNIEnv* env, jclass, jobject obj) {
+ ScopedLocalRef<jclass> klass(env, env->FindClass("art/Monitors$MonitorUsage"));
+ if (env->ExceptionCheck()) {
+ return nullptr;
+ }
+ jmethodID constructor = env->GetMethodID(
+ klass.get(),
+ "<init>",
+ "(Ljava/lang/Object;Ljava/lang/Thread;I[Ljava/lang/Thread;[Ljava/lang/Thread;)V");
+ if (env->ExceptionCheck()) {
+ return nullptr;
+ }
+ jvmtiMonitorUsage usage;
+ if (JvmtiErrorToException(env, jvmti_env, jvmti_env->GetObjectMonitorUsage(obj, &usage))) {
+ return nullptr;
+ }
+ jobjectArray wait = CreateObjectArray(env, usage.waiter_count, "java/lang/Thread",
+ [&](jint i) { return usage.waiters[i]; });
+ if (env->ExceptionCheck()) {
+ jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(usage.waiters));
+ jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(usage.notify_waiters));
+ return nullptr;
+ }
+ jobjectArray notify_wait = CreateObjectArray(env, usage.notify_waiter_count, "java/lang/Thread",
+ [&](jint i) { return usage.notify_waiters[i]; });
+ if (env->ExceptionCheck()) {
+ jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(usage.waiters));
+ jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(usage.notify_waiters));
+ return nullptr;
+ }
+ return env->NewObject(klass.get(), constructor,
+ obj, usage.owner, usage.entry_count, wait, notify_wait);
+}
+
+} // namespace common_monitors
+} // namespace art
diff --git a/test/ti-stress/stress.cc b/test/ti-stress/stress.cc
index 5d7c2f3..6e29e36 100644
--- a/test/ti-stress/stress.cc
+++ b/test/ti-stress/stress.cc
@@ -28,15 +28,31 @@
#include "jvmti.h"
#include "utils.h"
+#pragma clang diagnostic push
+// slicer defines its own CHECK. b/65422458
+#pragma push_macro("CHECK")
+#undef CHECK
+
+// Slicer's headers have code that triggers these warnings. b/65298177
+#pragma clang diagnostic ignored "-Wunused-parameter"
+#pragma clang diagnostic ignored "-Wsign-compare"
+#include "code_ir.h"
+#include "control_flow_graph.h"
+#include "dex_ir.h"
+#include "dex_ir_builder.h"
+#include "instrumentation.h"
+#include "reader.h"
+#include "writer.h"
+
+#pragma pop_macro("CHECK")
+#pragma clang diagnostic pop
+
namespace art {
// Should we do a 'full_rewrite' with this test?
static constexpr bool kDoFullRewrite = true;
struct StressData {
- std::string dexter_cmd;
- std::string out_temp_dex;
- std::string in_temp_dex;
bool vm_class_loader_initialized;
bool trace_stress;
bool redefine_stress;
@@ -44,51 +60,60 @@
bool step_stress;
};
-static void WriteToFile(const std::string& fname, jint data_len, const unsigned char* data) {
- std::ofstream file(fname, std::ios::binary | std::ios::out | std::ios::trunc);
- file.write(reinterpret_cast<const char*>(data), data_len);
- file.flush();
-}
-
-static bool ReadIntoBuffer(const std::string& fname, /*out*/std::vector<unsigned char>* data) {
- std::ifstream file(fname, std::ios::binary | std::ios::in);
- file.seekg(0, std::ios::end);
- size_t len = file.tellg();
- data->resize(len);
- file.seekg(0);
- file.read(reinterpret_cast<char*>(data->data()), len);
- return len != 0;
-}
-
-// TODO rewrite later.
-static bool DoExtractClassFromData(StressData* data,
- const std::string& class_name,
+static bool DoExtractClassFromData(jvmtiEnv* env,
+ const std::string& descriptor,
jint in_len,
const unsigned char* in_data,
- /*out*/std::vector<unsigned char>* dex) {
- // Write the dex file into a temporary file.
- WriteToFile(data->in_temp_dex, in_len, in_data);
- // Clear out file so even if something suppresses the exit value we will still detect dexter
- // failure.
- WriteToFile(data->out_temp_dex, 0, nullptr);
- // Have dexter do the extraction.
- std::vector<std::string> args;
- args.push_back(data->dexter_cmd);
- if (kDoFullRewrite) {
- args.push_back("-x");
- args.push_back("full_rewrite");
- }
- args.push_back("-e");
- args.push_back(class_name);
- args.push_back("-o");
- args.push_back(data->out_temp_dex);
- args.push_back(data->in_temp_dex);
- std::string error;
- if (ExecAndReturnCode(args, &error) != 0) {
- LOG(ERROR) << "unable to execute dexter: " << error;
+ /*out*/jint* out_len,
+ /*out*/unsigned char** out_data) {
+ dex::Reader reader(in_data, in_len);
+ dex::u4 class_idx = reader.FindClassIndex(descriptor.c_str());
+ if (class_idx != dex::kNoIndex) {
+ reader.CreateClassIr(class_idx);
+ } else {
+ LOG(ERROR) << "ERROR: Can't find class " << descriptor;
return false;
}
- return ReadIntoBuffer(data->out_temp_dex, dex);
+ auto dex_ir = reader.GetIr();
+
+ if (kDoFullRewrite) {
+ for (auto& ir_method : dex_ir->encoded_methods) {
+ if (ir_method->code != nullptr) {
+ lir::CodeIr code_ir(ir_method.get(), dex_ir);
+ lir::ControlFlowGraph cfg_compact(&code_ir, false);
+ lir::ControlFlowGraph cfg_verbose(&code_ir, true);
+ code_ir.Assemble();
+ }
+ }
+ }
+ dex::Writer writer(dex_ir);
+
+ struct Allocator : public dex::Writer::Allocator {
+ explicit Allocator(jvmtiEnv* jvmti_env) : jvmti_env_(jvmti_env) {}
+ virtual void* Allocate(size_t size) {
+ unsigned char* out = nullptr;
+ if (JVMTI_ERROR_NONE != jvmti_env_->Allocate(size, &out)) {
+ return nullptr;
+ } else {
+ return out;
+ }
+ }
+ virtual void Free(void* ptr) {
+ jvmti_env_->Deallocate(reinterpret_cast<unsigned char*>(ptr));
+ }
+ private:
+ jvmtiEnv* jvmti_env_;
+ };
+ Allocator alloc(env);
+ size_t res_len;
+ unsigned char* res = writer.CreateImage(&alloc, &res_len);
+ if (res != nullptr) {
+ *out_data = res;
+ *out_len = res_len;
+ return true;
+ } else {
+ return false;
+ }
}
class ScopedThreadInfo {
@@ -615,10 +640,10 @@
jint* new_class_data_len,
unsigned char** new_class_data) {
std::vector<unsigned char> out;
- std::string name_str(name);
- // Make the jvmti semi-descriptor into the java style descriptor (though with $ for inner
- // classes).
- std::replace(name_str.begin(), name_str.end(), '/', '.');
+ // Make the jvmti semi-descriptor into the full descriptor.
+ std::string name_str("L");
+ name_str += name;
+ name_str += ";";
StressData* data = nullptr;
CHECK_EQ(jvmti->GetEnvironmentLocalStorage(reinterpret_cast<void**>(&data)),
JVMTI_ERROR_NONE);
@@ -626,15 +651,11 @@
LOG(WARNING) << "Ignoring load of class " << name << " because VMClassLoader is not yet "
<< "initialized. Transforming this class could cause spurious test failures.";
return;
- } else if (DoExtractClassFromData(data, name_str, class_data_len, class_data, /*out*/ &out)) {
+ } else if (DoExtractClassFromData(jvmti, name_str, class_data_len, class_data,
+ /*out*/ new_class_data_len, /*out*/ new_class_data)) {
LOG(INFO) << "Extracted class: " << name;
- unsigned char* new_data;
- CHECK_EQ(JVMTI_ERROR_NONE, jvmti->Allocate(out.size(), &new_data));
- memcpy(new_data, out.data(), out.size());
- *new_class_data_len = static_cast<jint>(out.size());
- *new_class_data = new_data;
} else {
- std::cerr << "Unable to extract class " << name_str << std::endl;
+ std::cerr << "Unable to extract class " << name << std::endl;
*new_class_data_len = 0;
*new_class_data = nullptr;
}
@@ -653,7 +674,7 @@
}
// Options are
-// jvmti-stress,[redefine,${DEXTER_BINARY},${TEMP_FILE_1},${TEMP_FILE_2},][trace,][field]
+// jvmti-stress,[redefine,][trace,][field]
static void ReadOptions(StressData* data, char* options) {
std::string ops(options);
CHECK_EQ(GetOption(ops), "jvmti-stress") << "Options should start with jvmti-stress";
@@ -668,12 +689,6 @@
data->field_stress = true;
} else if (cur == "redefine") {
data->redefine_stress = true;
- ops = AdvanceOption(ops);
- data->dexter_cmd = GetOption(ops);
- ops = AdvanceOption(ops);
- data->in_temp_dex = GetOption(ops);
- ops = AdvanceOption(ops);
- data->out_temp_dex = GetOption(ops);
} else {
LOG(FATAL) << "Unknown option: " << GetOption(ops);
}
diff --git a/tools/libcore_gcstress_debug_failures.txt b/tools/libcore_gcstress_debug_failures.txt
index 5806b61..d27b8fc 100644
--- a/tools/libcore_gcstress_debug_failures.txt
+++ b/tools/libcore_gcstress_debug_failures.txt
@@ -11,9 +11,11 @@
names: ["jsr166.CompletableFutureTest#testCompleteOnTimeout_completed",
"libcore.icu.TransliteratorTest#testAll",
"libcore.icu.RelativeDateTimeFormatterTest#test_bug25821045",
+ "libcore.icu.RelativeDateTimeFormatterTest#test_bug25883157",
"libcore.java.lang.ref.ReferenceQueueTest#testRemoveWithDelayedResultAndTimeout",
"libcore.java.lang.ref.ReferenceQueueTest#testRemoveWithDelayedResultAndNoTimeout",
"libcore.java.util.TimeZoneTest#testSetDefaultDeadlock",
+ "libcore.javax.crypto.CipherBasicsTest#testBasicEncryption",
"org.apache.harmony.tests.java.util.TimerTest#testThrowingTaskKillsTimerThread"]
}
]