Merge "Quick compiler: fix compile-time perf regression"
diff --git a/build/Android.common.mk b/build/Android.common.mk
index 6135571..188ddb5 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -110,6 +110,12 @@
DALVIKVM_FLAGS := -Xcompiler-option --compiler-backend=Optimizing
endif
+#
+# Used to change the default GC. Valid values are CMS, SS, GSS. The default is CMS.
+#
+ART_DEFAULT_GC_TYPE ?= CMS
+ART_DEFAULT_GC_TYPE_CFLAGS := -DART_DEFAULT_GC_TYPE_IS_$(ART_DEFAULT_GC_TYPE)
+
LLVM_ROOT_PATH := external/llvm
# Don't fail a dalvik minimal host build.
-include $(LLVM_ROOT_PATH)/llvm.mk
@@ -237,6 +243,7 @@
ART_HOST_CFLAGS := $(art_cflags) -DANDROID_SMP=1 -DART_BASE_ADDRESS=$(LIBART_IMG_HOST_BASE_ADDRESS)
ART_HOST_CFLAGS += -DART_DEFAULT_INSTRUCTION_SET_FEATURES=default
+ART_HOST_CFLAGS += $(ART_DEFAULT_GC_TYPE_CFLAGS)
ART_TARGET_CFLAGS := $(art_cflags) -DART_TARGET -DART_BASE_ADDRESS=$(LIBART_IMG_TARGET_BASE_ADDRESS)
ifeq ($(TARGET_CPU_SMP),true)
@@ -244,6 +251,7 @@
else
ART_TARGET_CFLAGS += -DANDROID_SMP=0
endif
+ART_TARGET_CFLAGS += $(ART_DEFAULT_GC_TYPE_CFLAGS)
# DEX2OAT_TARGET_INSTRUCTION_SET_FEATURES is set in ../build/core/dex_preopt.mk based on
# the TARGET_CPU_VARIANT
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index faa9461..8fcb09b 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -139,12 +139,25 @@
}
RegLocation Mir2Lir::LoadValue(RegLocation rl_src, RegisterClass op_kind) {
- rl_src = EvalLoc(rl_src, op_kind, false);
- if (IsInexpensiveConstant(rl_src) || rl_src.location != kLocPhysReg) {
- LoadValueDirect(rl_src, rl_src.reg);
- rl_src.location = kLocPhysReg;
- MarkLive(rl_src);
+ rl_src = UpdateLoc(rl_src);
+ if (rl_src.location == kLocPhysReg) {
+ if (!RegClassMatches(op_kind, rl_src.reg)) {
+ // Wrong register class, realloc, copy and transfer ownership.
+ RegStorage new_reg = AllocTypedTemp(rl_src.fp, op_kind);
+ OpRegCopy(new_reg, rl_src.reg);
+ // Associate the old sreg with the new register and clobber the old register.
+ GetRegInfo(new_reg)->SetSReg(GetRegInfo(rl_src.reg)->SReg());
+ Clobber(rl_src.reg);
+ rl_src.reg = new_reg;
+ }
+ return rl_src;
}
+
+ DCHECK_NE(rl_src.s_reg_low, INVALID_SREG);
+ rl_src.reg = AllocTypedTemp(rl_src.fp, op_kind);
+ LoadValueDirect(rl_src, rl_src.reg);
+ rl_src.location = kLocPhysReg;
+ MarkLive(rl_src);
return rl_src;
}
@@ -203,12 +216,26 @@
RegLocation Mir2Lir::LoadValueWide(RegLocation rl_src, RegisterClass op_kind) {
DCHECK(rl_src.wide);
- rl_src = EvalLoc(rl_src, op_kind, false);
- if (IsInexpensiveConstant(rl_src) || rl_src.location != kLocPhysReg) {
- LoadValueDirectWide(rl_src, rl_src.reg);
- rl_src.location = kLocPhysReg;
- MarkLive(rl_src);
+ rl_src = UpdateLocWide(rl_src);
+ if (rl_src.location == kLocPhysReg) {
+ if (!RegClassMatches(op_kind, rl_src.reg)) {
+ // Wrong register class, realloc, copy and transfer ownership.
+ RegStorage new_regs = AllocTypedTempWide(rl_src.fp, op_kind);
+ OpRegCopyWide(new_regs, rl_src.reg);
+ // Associate the old sreg with the new register and clobber the old register.
+ GetRegInfo(new_regs)->SetSReg(GetRegInfo(rl_src.reg)->SReg());
+ Clobber(rl_src.reg);
+ rl_src.reg = new_regs;
+ }
+ return rl_src;
}
+
+ DCHECK_NE(rl_src.s_reg_low, INVALID_SREG);
+ DCHECK_NE(GetSRegHi(rl_src.s_reg_low), INVALID_SREG);
+ rl_src.reg = AllocTypedTempWide(rl_src.fp, op_kind);
+ LoadValueDirectWide(rl_src, rl_src.reg);
+ rl_src.location = kLocPhysReg;
+ MarkLive(rl_src);
return rl_src;
}
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index eebd554..3016cd1 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -681,9 +681,9 @@
RegLocation UpdateRawLoc(RegLocation loc);
/**
- * @brief Used to load register location into a typed temporary or pair of temporaries.
+ * @brief Used to prepare a register location to receive a wide value.
* @see EvalLoc
- * @param loc The register location to load from.
+ * @param loc the location where the value will be stored.
* @param reg_class Type of register needed.
* @param update Whether the liveness information should be updated.
* @return Returns the properly typed temporary in physical register pairs.
@@ -691,8 +691,8 @@
RegLocation EvalLocWide(RegLocation loc, int reg_class, bool update);
/**
- * @brief Used to load register location into a typed temporary.
- * @param loc The register location to load from.
+ * @brief Used to prepare a register location to receive a value.
+ * @param loc the location where the value will be stored.
* @param reg_class Type of register needed.
* @param update Whether the liveness information should be updated.
* @return Returns the properly typed temporary in physical register.
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index 5e6e73b..bcc077b 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -954,9 +954,8 @@
/* If already in registers, we can assume proper form. Right reg class? */
if (loc.location == kLocPhysReg) {
if (!RegClassMatches(reg_class, loc.reg)) {
- /* Wrong register class. Reallocate and copy */
+ // Wrong register class. Reallocate and transfer ownership.
RegStorage new_regs = AllocTypedTempWide(loc.fp, reg_class);
- OpRegCopyWide(new_regs, loc.reg);
// Associate the old sreg with the new register and clobber the old register.
GetRegInfo(new_regs)->SetSReg(GetRegInfo(loc.reg)->SReg());
Clobber(loc.reg);
@@ -988,9 +987,8 @@
if (loc.location == kLocPhysReg) {
if (!RegClassMatches(reg_class, loc.reg)) {
- /* Wrong register class. Realloc, copy and transfer ownership */
+ // Wrong register class. Reallocate and transfer ownership.
RegStorage new_reg = AllocTypedTemp(loc.fp, reg_class);
- OpRegCopy(new_reg, loc.reg);
// Associate the old sreg with the new register and clobber the old register.
GetRegInfo(new_reg)->SetSReg(GetRegInfo(loc.reg)->SReg());
Clobber(loc.reg);
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 698fce4..b6e0841 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -1196,7 +1196,7 @@
if (rl_src.location == kLocPhysReg) {
// Both operands are in registers.
// But we must ensure that rl_src is in pair
- rl_src = EvalLocWide(rl_src, kCoreReg, true);
+ rl_src = LoadValueWide(rl_src, kCoreReg);
if (rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()) {
// The registers are the same, so we would clobber it before the use.
RegStorage temp_reg = AllocTemp();
diff --git a/compiler/dex/reg_storage.h b/compiler/dex/reg_storage.h
index df5aa7b..979f516 100644
--- a/compiler/dex/reg_storage.h
+++ b/compiler/dex/reg_storage.h
@@ -22,14 +22,14 @@
/*
* 16-bit representation of the physical register container holding a Dalvik value.
- * The encoding allows up to 32 physical elements per storage class, and supports eight
+ * The encoding allows up to 64 physical elements per storage class, and supports eight
* register container shapes.
*
- * [V] [D] [HHHHH] [SSS] [F] [LLLLL]
+ * [V] [HHHHH] [SSS] [F] [LLLLLL]
*
- * [LLLLL]
+ * [LLLLLL]
* Physical register number for the low or solo register.
- * 0..31
+ * 0..63
*
* [F]
* Describes type of the [LLLLL] register.
@@ -51,19 +51,13 @@
* Physical register number of the high register (valid only for register pair).
* 0..31
*
- * [D]
- * Describes type of the [HHHHH] register (valid only for register pair).
- * 0: Core
- * 1: Floating point
- *
* [V]
* 0 -> Invalid
* 1 -> Valid
*
* Note that in all non-invalid cases, we can determine if the storage is floating point
- * by testing bit 6. Though a mismatch appears to be permitted by the format, the [F][D] values
- * from each half of a pair must match (this allows the high and low regs of a pair to be more
- * easily individually manipulated).
+ * by testing bit 7. Note also that a register pair is effectively limited to a pair of
+ * physical register numbers in the 0..31 range.
*
* On some target architectures, the same underlying physical register container can be given
* different views. For example, Arm's 32-bit single-precision floating point registers
@@ -82,30 +76,30 @@
kValidMask = 0x8000,
kValid = 0x8000,
kInvalid = 0x0000,
- kShapeMask = 0x01c0,
- k32BitSolo = 0x0040,
- k64BitSolo = 0x0080,
- k64BitPair = 0x00c0,
- k128BitSolo = 0x0100,
- k256BitSolo = 0x0140,
- k512BitSolo = 0x0180,
- k1024BitSolo = 0x01c0,
- k64BitMask = 0x0180,
- k64Bits = 0x0080,
- kShapeTypeMask = 0x01e0,
- kFloatingPoint = 0x0020,
+ kShapeMask = 0x0380,
+ k32BitSolo = 0x0080,
+ k64BitSolo = 0x0100,
+ k64BitPair = 0x0180,
+ k128BitSolo = 0x0200,
+ k256BitSolo = 0x0280,
+ k512BitSolo = 0x0300,
+ k1024BitSolo = 0x0380,
+ k64BitMask = 0x0300,
+ k64Bits = 0x0100,
+ kShapeTypeMask = 0x03c0,
+ kFloatingPoint = 0x0040,
kCoreRegister = 0x0000,
};
- static const uint16_t kRegValMask = 0x01ff; // Num, type and shape.
- static const uint16_t kRegTypeMask = 0x003f; // Num and type.
- static const uint16_t kRegNumMask = 0x001f; // Num only.
+ static const uint16_t kRegValMask = 0x03ff; // Num, type and shape.
+ static const uint16_t kRegTypeMask = 0x007f; // Num and type.
+ static const uint16_t kRegNumMask = 0x003f; // Num only.
+ static const uint16_t kHighRegNumMask = 0x001f; // 0..31 for high reg
static const uint16_t kMaxRegs = kRegValMask + 1;
- // TODO: deprecate use of kInvalidRegVal and speed up GetReg().
- static const uint16_t kInvalidRegVal = 0x01ff;
- static const uint16_t kHighRegShift = 9;
- static const uint16_t kShapeMaskShift = 6;
- static const uint16_t kHighRegMask = (kRegTypeMask << kHighRegShift);
+ // TODO: deprecate use of kInvalidRegVal and speed up GetReg(). Rely on valid bit instead.
+ static const uint16_t kInvalidRegVal = 0x03ff;
+ static const uint16_t kHighRegShift = 10;
+ static const uint16_t kHighRegMask = (kHighRegNumMask << kHighRegShift);
// Reg is [F][LLLLL], will override any existing shape and use rs_kind.
RegStorage(RegStorageKind rs_kind, int reg) {
@@ -116,7 +110,9 @@
RegStorage(RegStorageKind rs_kind, int low_reg, int high_reg) {
DCHECK_EQ(rs_kind, k64BitPair);
DCHECK_EQ(low_reg & kFloatingPoint, high_reg & kFloatingPoint);
- reg_ = kValid | rs_kind | ((high_reg & kRegTypeMask) << kHighRegShift) | (low_reg & kRegTypeMask);
+ DCHECK_LE(high_reg & kRegNumMask, kHighRegNumMask) << "High reg must be in 0..31";
+ reg_ = kValid | rs_kind | ((high_reg & kHighRegNumMask) << kHighRegShift) |
+ (low_reg & kRegTypeMask);
}
constexpr explicit RegStorage(uint16_t val) : reg_(val) {}
RegStorage() : reg_(kInvalid) {}
@@ -206,7 +202,7 @@
// Retrieve the most significant register of a pair.
int GetHighReg() const {
DCHECK(IsPair());
- return k32BitSolo | ((reg_ & kHighRegMask) >> kHighRegShift);
+ return k32BitSolo | ((reg_ & kHighRegMask) >> kHighRegShift) | (reg_ & kFloatingPoint);
}
// Create a stand-alone RegStorage from the high reg of a pair.
@@ -217,7 +213,7 @@
void SetHighReg(int reg) {
DCHECK(IsPair());
- reg_ = (reg_ & ~kHighRegMask) | ((reg & kRegTypeMask) << kHighRegShift);
+ reg_ = (reg_ & ~kHighRegMask) | ((reg & kHighRegNumMask) << kHighRegShift);
}
// Return the register number of low or solo.
diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc
index fb909a8..d03b99f 100644
--- a/compiler/trampolines/trampoline_compiler.cc
+++ b/compiler/trampolines/trampoline_compiler.cc
@@ -62,18 +62,15 @@
switch (abi) {
case kInterpreterAbi: // Thread* is first argument (X0) in interpreter ABI.
- // FIXME IPx used by VIXL - this is unsafe.
__ JumpTo(Arm64ManagedRegister::FromCoreRegister(X0), Offset(offset.Int32Value()),
Arm64ManagedRegister::FromCoreRegister(IP1));
break;
case kJniAbi: // Load via Thread* held in JNIEnv* in first argument (X0).
-
__ LoadRawPtr(Arm64ManagedRegister::FromCoreRegister(IP1),
Arm64ManagedRegister::FromCoreRegister(X0),
Offset(JNIEnvExt::SelfOffset().Int32Value()));
- // FIXME IPx used by VIXL - this is unsafe.
__ JumpTo(Arm64ManagedRegister::FromCoreRegister(IP1), Offset(offset.Int32Value()),
Arm64ManagedRegister::FromCoreRegister(IP0));
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index b4bb979..f486b3c 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -50,11 +50,11 @@
}
void Arm64Assembler::GetCurrentThread(ManagedRegister tr) {
- ___ Mov(reg_x(tr.AsArm64().AsCoreRegister()), reg_x(TR1));
+ ___ Mov(reg_x(tr.AsArm64().AsCoreRegister()), reg_x(ETR));
}
void Arm64Assembler::GetCurrentThread(FrameOffset offset, ManagedRegister /* scratch */) {
- StoreToOffset(TR1, SP, offset.Int32Value());
+ StoreToOffset(ETR, SP, offset.Int32Value());
}
// See Arm64 PCS Section 5.2.2.1.
@@ -79,11 +79,13 @@
// VIXL macro-assembler handles all variants.
___ Add(reg_x(rd), reg_x(rn), value);
} else {
- // ip1 = rd + value
- // rd = cond ? ip1 : rn
- CHECK_NE(rn, IP1);
- ___ Add(reg_x(IP1), reg_x(rn), value);
- ___ Csel(reg_x(rd), reg_x(IP1), reg_x(rd), COND_OP(cond));
+ // temp = rd + value
+ // rd = cond ? temp : rn
+ vixl::UseScratchRegisterScope temps(vixl_masm_);
+ temps.Exclude(reg_x(rd), reg_x(rn));
+ vixl::Register temp = temps.AcquireX();
+ ___ Add(temp, reg_x(rn), value);
+ ___ Csel(reg_x(rd), temp, reg_x(rd), COND_OP(cond));
}
}
@@ -162,7 +164,7 @@
Arm64ManagedRegister scratch = m_scratch.AsArm64();
CHECK(scratch.IsCoreRegister()) << scratch;
LoadImmediate(scratch.AsCoreRegister(), imm);
- StoreToOffset(scratch.AsCoreRegister(), TR1, offs.Int32Value());
+ StoreToOffset(scratch.AsCoreRegister(), ETR, offs.Int32Value());
}
void Arm64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> tr_offs,
@@ -171,13 +173,14 @@
Arm64ManagedRegister scratch = m_scratch.AsArm64();
CHECK(scratch.IsCoreRegister()) << scratch;
AddConstant(scratch.AsCoreRegister(), SP, fr_offs.Int32Value());
- StoreToOffset(scratch.AsCoreRegister(), TR1, tr_offs.Int32Value());
+ StoreToOffset(scratch.AsCoreRegister(), ETR, tr_offs.Int32Value());
}
void Arm64Assembler::StoreStackPointerToThread64(ThreadOffset<8> tr_offs) {
- // Arm64 does not support: "str sp, [dest]" therefore we use IP1 as a temp reg.
- ___ Mov(reg_x(IP1), reg_x(SP));
- StoreToOffset(IP1, TR1, tr_offs.Int32Value());
+ vixl::UseScratchRegisterScope temps(vixl_masm_);
+ vixl::Register temp = temps.AcquireX();
+ ___ Mov(temp, reg_x(SP));
+ ___ Str(temp, MEM_OP(reg_x(ETR), tr_offs.Int32Value()));
}
void Arm64Assembler::StoreSpanning(FrameOffset dest_off, ManagedRegister m_source,
@@ -195,12 +198,14 @@
if ((cond == AL) || (cond == NV)) {
___ Mov(reg_x(dest), value);
} else {
- // ip1 = value
- // rd = cond ? ip1 : rd
+ // temp = value
+ // rd = cond ? temp : rd
if (value != 0) {
- CHECK_NE(dest, IP1);
- ___ Mov(reg_x(IP1), value);
- ___ Csel(reg_x(dest), reg_x(IP1), reg_x(dest), COND_OP(cond));
+ vixl::UseScratchRegisterScope temps(vixl_masm_);
+ temps.Exclude(reg_x(dest));
+ vixl::Register temp = temps.AcquireX();
+ ___ Mov(temp, value);
+ ___ Csel(reg_x(dest), temp, reg_x(dest), COND_OP(cond));
} else {
___ Csel(reg_x(dest), reg_x(XZR), reg_x(dest), COND_OP(cond));
}
@@ -276,7 +281,7 @@
}
void Arm64Assembler::LoadFromThread64(ManagedRegister m_dst, ThreadOffset<8> src, size_t size) {
- return Load(m_dst.AsArm64(), TR1, src.Int32Value(), size);
+ return Load(m_dst.AsArm64(), ETR, src.Int32Value(), size);
}
void Arm64Assembler::LoadRef(ManagedRegister m_dst, FrameOffset offs) {
@@ -298,13 +303,16 @@
Arm64ManagedRegister dst = m_dst.AsArm64();
Arm64ManagedRegister base = m_base.AsArm64();
CHECK(dst.IsCoreRegister() && base.IsCoreRegister());
- LoadFromOffset(dst.AsCoreRegister(), base.AsCoreRegister(), offs.Int32Value());
+ // Remove dst and base form the temp list - higher level API uses IP1, IP0.
+ vixl::UseScratchRegisterScope temps(vixl_masm_);
+ temps.Exclude(reg_x(dst.AsCoreRegister()), reg_x(base.AsCoreRegister()));
+ ___ Ldr(reg_x(dst.AsCoreRegister()), MEM_OP(reg_x(base.AsCoreRegister()), offs.Int32Value()));
}
void Arm64Assembler::LoadRawPtrFromThread64(ManagedRegister m_dst, ThreadOffset<8> offs) {
Arm64ManagedRegister dst = m_dst.AsArm64();
CHECK(dst.IsCoreRegister()) << dst;
- LoadFromOffset(dst.AsCoreRegister(), TR1, offs.Int32Value());
+ LoadFromOffset(dst.AsCoreRegister(), ETR, offs.Int32Value());
}
// Copying routines.
@@ -342,7 +350,7 @@
ManagedRegister m_scratch) {
Arm64ManagedRegister scratch = m_scratch.AsArm64();
CHECK(scratch.IsCoreRegister()) << scratch;
- LoadFromOffset(scratch.AsCoreRegister(), TR1, tr_offs.Int32Value());
+ LoadFromOffset(scratch.AsCoreRegister(), ETR, tr_offs.Int32Value());
StoreToOffset(scratch.AsCoreRegister(), SP, fr_offs.Int32Value());
}
@@ -352,7 +360,7 @@
Arm64ManagedRegister scratch = m_scratch.AsArm64();
CHECK(scratch.IsCoreRegister()) << scratch;
LoadFromOffset(scratch.AsCoreRegister(), SP, fr_offs.Int32Value());
- StoreToOffset(scratch.AsCoreRegister(), TR1, tr_offs.Int32Value());
+ StoreToOffset(scratch.AsCoreRegister(), ETR, tr_offs.Int32Value());
}
void Arm64Assembler::CopyRef(FrameOffset dest, FrameOffset src,
@@ -511,7 +519,10 @@
Arm64ManagedRegister scratch = m_scratch.AsArm64();
CHECK(base.IsCoreRegister()) << base;
CHECK(scratch.IsCoreRegister()) << scratch;
- LoadFromOffset(scratch.AsCoreRegister(), base.AsCoreRegister(), offs.Int32Value());
+ // Remove base and scratch form the temp list - higher level API uses IP1, IP0.
+ vixl::UseScratchRegisterScope temps(vixl_masm_);
+ temps.Exclude(reg_x(base.AsCoreRegister()), reg_x(scratch.AsCoreRegister()));
+ ___ Ldr(reg_x(scratch.AsCoreRegister()), MEM_OP(reg_x(base.AsCoreRegister()), offs.Int32Value()));
___ Br(reg_x(scratch.AsCoreRegister()));
}
@@ -595,13 +606,17 @@
Arm64ManagedRegister scratch = m_scratch.AsArm64();
Arm64Exception *current_exception = new Arm64Exception(scratch, stack_adjust);
exception_blocks_.push_back(current_exception);
- LoadFromOffset(scratch.AsCoreRegister(), TR1, Thread::ExceptionOffset<8>().Int32Value());
+ LoadFromOffset(scratch.AsCoreRegister(), ETR, Thread::ExceptionOffset<8>().Int32Value());
___ Cmp(reg_x(scratch.AsCoreRegister()), 0);
___ B(current_exception->Entry(), COND_OP(NE));
}
void Arm64Assembler::EmitExceptionPoll(Arm64Exception *exception) {
- // Bind exception poll entry.
+ vixl::UseScratchRegisterScope temps(vixl_masm_);
+ temps.Exclude(reg_x(exception->scratch_.AsCoreRegister()));
+ vixl::Register temp = temps.AcquireX();
+
+ // Bind exception poll entry.
___ Bind(exception->Entry());
if (exception->stack_adjust_ != 0) { // Fix up the frame.
DecreaseFrameSize(exception->stack_adjust_);
@@ -609,12 +624,14 @@
// Pass exception object as argument.
// Don't care about preserving X0 as this won't return.
___ Mov(reg_x(X0), reg_x(exception->scratch_.AsCoreRegister()));
- LoadFromOffset(IP1, TR1, QUICK_ENTRYPOINT_OFFSET(8, pDeliverException).Int32Value());
+ ___ Ldr(temp, MEM_OP(reg_x(ETR), QUICK_ENTRYPOINT_OFFSET(8, pDeliverException).Int32Value()));
- // FIXME: Temporary fix for TR (XSELF).
- ___ Mov(reg_x(TR), reg_x(TR1));
+ // Move ETR(Callee saved) back to TR(Caller saved) reg. We use ETR on calls
+ // to external functions that might trash TR. We do not need the original
+ // X19 saved in BuildFrame().
+ ___ Mov(reg_x(TR), reg_x(ETR));
- ___ Blr(reg_x(IP1));
+ ___ Blr(temp);
// Call should never return.
___ Brk();
}
@@ -634,8 +651,10 @@
CHECK_EQ(callee_save_regs.size(), kCalleeSavedRegsSize);
___ PushCalleeSavedRegisters();
- // FIXME: Temporary fix for TR (XSELF).
- ___ Mov(reg_x(TR1), reg_x(TR));
+ // Move TR(Caller saved) to ETR(Callee saved). The original X19 has been
+ // saved by PushCalleeSavedRegisters(). This way we make sure that TR is not
+ // trashed by native code.
+ ___ Mov(reg_x(ETR), reg_x(TR));
// Increate frame to required size - must be at least space to push Method*.
CHECK_GT(frame_size, kCalleeSavedRegsSize * kFramePointerSize);
@@ -681,8 +700,10 @@
size_t adjust = frame_size - (kCalleeSavedRegsSize * kFramePointerSize);
DecreaseFrameSize(adjust);
- // FIXME: Temporary fix for TR (XSELF).
- ___ Mov(reg_x(TR), reg_x(TR1));
+ // We move ETR (Callee Saved) back to TR (Caller Saved) which might have
+ // been trashed in the native call. The original X19 (ETR) is restored as
+ // part of PopCalleeSavedRegisters().
+ ___ Mov(reg_x(TR), reg_x(ETR));
// Pop callee saved and return to LR.
___ PopCalleeSavedRegisters();
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index 97fb93a..583150c 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -85,6 +85,7 @@
vixl_masm_(new vixl::MacroAssembler(vixl_buf_, kBufferSizeArm64)) {}
virtual ~Arm64Assembler() {
+ delete vixl_masm_;
delete[] vixl_buf_;
}
@@ -237,8 +238,8 @@
// Vixl buffer.
byte* vixl_buf_;
- // Unique ptr - vixl assembler.
- UniquePtr<vixl::MacroAssembler> vixl_masm_;
+ // Vixl assembler.
+ vixl::MacroAssembler* vixl_masm_;
// List of exception blocks to generate at the end of the code cache.
std::vector<Arm64Exception*> exception_blocks_;
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 8079460..f7cb254 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -443,35 +443,32 @@
DELIVER_PENDING_EXCEPTION
.endm
-// FIXME: Temporary fix for TR(XSELF).
.macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
.extern \cxx_name
ENTRY \c_name
SETUP_SAVE_ALL_CALLEE_SAVE_FRAME // save all registers as basis for long jump context
- mov x0, x19 // pass Thread::Current
+ mov x0, xSELF // pass Thread::Current
mov x1, sp // pass SP
b \cxx_name // \cxx_name(Thread*, SP)
END \c_name
.endm
-// FIXME: Temporary fix for TR(XSELF).
.macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name
.extern \cxx_name
ENTRY \c_name
SETUP_SAVE_ALL_CALLEE_SAVE_FRAME // save all registers as basis for long jump context.
- mov x1, x19 // pass Thread::Current.
+ mov x1, xSELF // pass Thread::Current.
mov x2, sp // pass SP.
b \cxx_name // \cxx_name(arg, Thread*, SP).
brk 0
END \c_name
.endm
-// FIXME: Temporary fix for TR(XSELF).
.macro TWO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
.extern \cxx_name
ENTRY \c_name
SETUP_SAVE_ALL_CALLEE_SAVE_FRAME // save all registers as basis for long jump context
- mov x2, x19 // pass Thread::Current
+ mov x2, xSELF // pass Thread::Current
mov x3, sp // pass SP
b \cxx_name // \cxx_name(arg1, arg2, Thread*, SP)
brk 0
@@ -991,7 +988,6 @@
* failure.
*/
.extern artHandleFillArrayDataFromCode
-// TODO: xSELF -> x19.
ENTRY art_quick_handle_fill_data
SETUP_REF_ONLY_CALLEE_SAVE_FRAME // Save callee saves in case exception allocation triggers GC.
mov x2, xSELF // Pass Thread::Current.
@@ -1166,12 +1162,7 @@
brk 0 // Unreached.
END art_quick_aput_obj
-UNIMPLEMENTED art_quick_initialize_static_storage
-UNIMPLEMENTED art_quick_initialize_type
-UNIMPLEMENTED art_quick_initialize_type_and_verify_access
-
// Macro to facilitate adding new allocation entrypoints.
-// TODO: xSELF -> x19. Temporarily rely on xSELF being saved in REF_ONLY
.macro TWO_ARG_DOWNCALL name, entrypoint, return
.extern \entrypoint
ENTRY \name
@@ -1186,7 +1177,6 @@
.endm
// Macro to facilitate adding new array allocation entrypoints.
-// TODO: xSELF -> x19. Temporarily rely on xSELF being saved in REF_ONLY
.macro THREE_ARG_DOWNCALL name, entrypoint, return
.extern \entrypoint
ENTRY \name
@@ -1244,6 +1234,16 @@
END \name
.endm
+ /*
+ * Entry from managed code when uninitialized static storage, this stub will run the class
+ * initializer and deliver the exception on error. On success the static storage base is
+ * returned.
+ */
+TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+
+UNIMPLEMENTED art_quick_initialize_type
+UNIMPLEMENTED art_quick_initialize_type_and_verify_access
+
ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
@@ -1273,8 +1273,13 @@
RETURN_IF_W0_IS_ZERO_OR_DELIVER
END art_quick_set64_static
-
-UNIMPLEMENTED art_quick_resolve_string
+ /*
+ * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
+ * exception on error. On success the String is returned. x0 holds the referring method,
+ * w1 holds the string index. The fast path check for hit in strings cache has already been
+ * performed.
+ */
+TWO_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO
// Generate the allocation entrypoints for each allocator.
GENERATE_ALL_ALLOC_ENTRYPOINTS
@@ -1293,7 +1298,7 @@
mov x2, xSELF // pass Thread::Current
mov x3, sp // pass SP
bl artQuickProxyInvokeHandler // (Method* proxy method, receiver, Thread*, SP)
- ldr xSELF, [sp, #200] // Restore self pointer.
+ ldr xSELF, [sp, #200] // Restore self pointer.
ldr x2, [xSELF, THREAD_EXCEPTION_OFFSET]
cbnz x2, .Lexception_in_proxy // success if no exception is pending
RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME_NO_D0 // keep d0
@@ -1308,14 +1313,13 @@
ENTRY art_quick_resolution_trampoline
SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
- mov x19, x0 // save the called method
mov x2, xSELF
mov x3, sp
bl artQuickResolutionTrampoline // (called, receiver, Thread*, SP)
- mov x9, x0 // Remember returned code pointer in x9.
- mov x0, x19 // Restore the method, before x19 is restored to on-call value
+ cbz x0, 1f
+ mov x9, x0 // Remember returned code pointer in x9.
+ ldr x0, [sp, #0] // artQuickResolutionTrampoline puts called method in *SP.
RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
- cbz x9, 1f
br x9
1:
RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
diff --git a/runtime/arch/arm64/registers_arm64.h b/runtime/arch/arm64/registers_arm64.h
index 2503918..ea346e0 100644
--- a/runtime/arch/arm64/registers_arm64.h
+++ b/runtime/arch/arm64/registers_arm64.h
@@ -56,8 +56,8 @@
X29 = 29,
X30 = 30,
X31 = 31,
- TR = 18, // ART Thread Register - Needs to be one of the callee saved regs.
- TR1 = 19, // FIXME!
+ TR = 18, // ART Thread Register - Managed Runtime (Caller Saved Reg)
+ ETR = 19, // ART Thread Register - External Calls (Callee Saved Reg)
IP0 = 16, // Used as scratch by VIXL.
IP1 = 17, // Used as scratch by ART JNI Assembler.
FP = 29,
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 1d05540..86f52aa 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -51,6 +51,11 @@
}
}
+ // Helper function needed since TEST_F makes a new class.
+ Thread::tls_ptr_sized_values* GetTlsPtr(Thread* self) {
+ return &self->tlsPtr_;
+ }
+
size_t Invoke3(size_t arg0, size_t arg1, size_t arg2, uintptr_t code, Thread* self) {
// Push a transition back into managed code onto the linked list in thread.
ManagedStack fragment;
@@ -727,13 +732,6 @@
#endif
}
-
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
-extern "C" void art_quick_alloc_object_rosalloc(void);
-extern "C" void art_quick_alloc_object_resolved_rosalloc(void);
-extern "C" void art_quick_alloc_object_initialized_rosalloc(void);
-#endif
-
TEST_F(StubTest, AllocObject) {
TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
@@ -751,13 +749,12 @@
// Play with it...
EXPECT_FALSE(self->IsExceptionPending());
-
{
// Use an arbitrary method from c to use as referrer
size_t result = Invoke3(static_cast<size_t>(c->GetDexTypeIndex()), // type_idx
reinterpret_cast<size_t>(c->GetVirtualMethod(0)), // arbitrary
0U,
- reinterpret_cast<uintptr_t>(&art_quick_alloc_object_rosalloc),
+ reinterpret_cast<uintptr_t>(GetTlsPtr(self)->quick_entrypoints.pAllocObject),
self);
EXPECT_FALSE(self->IsExceptionPending());
@@ -771,7 +768,7 @@
// We can use nullptr in the second argument as we do not need a method here (not used in
// resolved/initialized cases)
size_t result = Invoke3(reinterpret_cast<size_t>(c.get()), reinterpret_cast<size_t>(nullptr), 0U,
- reinterpret_cast<uintptr_t>(&art_quick_alloc_object_resolved_rosalloc),
+ reinterpret_cast<uintptr_t>(GetTlsPtr(self)->quick_entrypoints.pAllocObjectResolved),
self);
EXPECT_FALSE(self->IsExceptionPending());
@@ -785,7 +782,7 @@
// We can use nullptr in the second argument as we do not need a method here (not used in
// resolved/initialized cases)
size_t result = Invoke3(reinterpret_cast<size_t>(c.get()), reinterpret_cast<size_t>(nullptr), 0U,
- reinterpret_cast<uintptr_t>(&art_quick_alloc_object_initialized_rosalloc),
+ reinterpret_cast<uintptr_t>(GetTlsPtr(self)->quick_entrypoints.pAllocObjectInitialized),
self);
EXPECT_FALSE(self->IsExceptionPending());
@@ -842,7 +839,7 @@
self->ClearException();
size_t result = Invoke3(reinterpret_cast<size_t>(c.get()), reinterpret_cast<size_t>(nullptr), 0U,
- reinterpret_cast<uintptr_t>(&art_quick_alloc_object_initialized_rosalloc),
+ reinterpret_cast<uintptr_t>(GetTlsPtr(self)->quick_entrypoints.pAllocObjectInitialized),
self);
EXPECT_TRUE(self->IsExceptionPending());
@@ -866,12 +863,6 @@
#endif
}
-
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
-extern "C" void art_quick_alloc_array_rosalloc(void);
-extern "C" void art_quick_alloc_array_resolved_rosalloc(void);
-#endif
-
TEST_F(StubTest, AllocObjectArray) {
TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
@@ -902,7 +893,7 @@
size_t result = Invoke3(static_cast<size_t>(c->GetDexTypeIndex()), // type_idx
reinterpret_cast<size_t>(c_obj->GetVirtualMethod(0)), // arbitrary
10U,
- reinterpret_cast<uintptr_t>(&art_quick_alloc_array_rosalloc),
+ reinterpret_cast<uintptr_t>(GetTlsPtr(self)->quick_entrypoints.pAllocArray),
self);
EXPECT_FALSE(self->IsExceptionPending());
@@ -917,7 +908,7 @@
// We can use nullptr in the second argument as we do not need a method here (not used in
// resolved/initialized cases)
size_t result = Invoke3(reinterpret_cast<size_t>(c.get()), reinterpret_cast<size_t>(nullptr), 10U,
- reinterpret_cast<uintptr_t>(&art_quick_alloc_array_resolved_rosalloc),
+ reinterpret_cast<uintptr_t>(GetTlsPtr(self)->quick_entrypoints.pAllocArrayResolved),
self);
EXPECT_FALSE(self->IsExceptionPending()) << PrettyTypeOf(self->GetException(nullptr));
@@ -937,7 +928,7 @@
{
size_t result = Invoke3(reinterpret_cast<size_t>(c.get()), reinterpret_cast<size_t>(nullptr),
GB, // that should fail...
- reinterpret_cast<uintptr_t>(&art_quick_alloc_array_resolved_rosalloc),
+ reinterpret_cast<uintptr_t>(GetTlsPtr(self)->quick_entrypoints.pAllocArrayResolved),
self);
EXPECT_TRUE(self->IsExceptionPending());
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 84ca23b..ff3e7b6 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -181,8 +181,16 @@
parallel_gc_threads_ = sysconf(_SC_NPROCESSORS_CONF) - 1;
// Only the main GC thread, no workers.
conc_gc_threads_ = 0;
- // Default is CMS which is Sticky + Partial + Full CMS GC.
+ // The default GC type is set in makefiles.
+#if ART_DEFAULT_GC_TYPE_IS_CMS
collector_type_ = gc::kCollectorTypeCMS;
+#elif ART_DEFAULT_GC_TYPE_IS_SS
+ collector_type_ = gc::kCollectorTypeSS;
+#elif ART_DEFAULT_GC_TYPE_IS_GSS
+ collector_type_ = gc::kCollectorTypeGSS;
+#else
+#error "ART default GC type must be set"
+#endif
// If background_collector_type_ is kCollectorTypeNone, it defaults to the collector_type_ after
// parsing options.
background_collector_type_ = gc::kCollectorTypeNone;
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index d78be92..5d4bf06 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -929,8 +929,8 @@
}
void Runtime::VisitRoots(RootCallback* callback, void* arg, VisitRootFlags flags) {
- VisitConcurrentRoots(callback, arg, flags);
VisitNonConcurrentRoots(callback, arg);
+ VisitConcurrentRoots(callback, arg, flags);
}
mirror::ObjectArray<mirror::ArtMethod>* Runtime::CreateDefaultImt(ClassLinker* cl) {
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 5e64e59..6667419 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -270,7 +270,7 @@
void StackVisitor::SanityCheckFrame() const {
if (kIsDebugBuild) {
mirror::ArtMethod* method = GetMethod();
- CHECK(method->GetClass() == mirror::ArtMethod::GetJavaLangReflectArtMethod());
+ CHECK_EQ(method->GetClass(), mirror::ArtMethod::GetJavaLangReflectArtMethod());
if (cur_quick_frame_ != nullptr) {
method->AssertPcIsWithinQuickCode(cur_quick_frame_pc_);
// Frame sanity.
diff --git a/runtime/thread.h b/runtime/thread.h
index 8c17082..32311e1 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -1082,6 +1082,7 @@
friend class Runtime; // For CreatePeer.
friend class ScopedThreadStateChange;
friend class SignalCatcher; // For SetStateUnsafe.
+ friend class StubTest; // For accessing entrypoints.
friend class ThreadList; // For ~Thread and Destroy.
DISALLOW_COPY_AND_ASSIGN(Thread);