Implement TLAB fast paths in artAllocObjectFromCode.
GSS/TLAB GC speedup on N4 (ms):
MemAllocTest 2963 -> 2792
BinaryTrees 2205 -> 2113
Also, measured wth -XX:IgnoreMaxFootprint to invoke GC less often
(only when the bump pointer space is filled rather than based on the
target utilization):
MemAllocTest 2707 -> 2590
BinaryTrees 2023 -> 1906
TODO: implement fast paths for array allocations.
Bug: 9986565
Change-Id: I73ff6327b229704f8ae5924ae9b747443c229841
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index cb0be04..7c73c79 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -37,6 +37,7 @@
// TODO: Fix no thread safety analysis when GCC can handle template specialization.
template <const bool kAccessCheck>
+ALWAYS_INLINE
static inline mirror::Class* CheckObjectAlloc(uint32_t type_idx,
mirror::ArtMethod* method,
Thread* self, bool* slow_path) {
@@ -86,6 +87,7 @@
}
// TODO: Fix no thread safety analysis when annotalysis is smarter.
+ALWAYS_INLINE
static inline mirror::Class* CheckClassInitializedForObjectAlloc(mirror::Class* klass,
Thread* self,
bool* slow_path) {
@@ -116,6 +118,7 @@
// check.
// TODO: Fix NO_THREAD_SAFETY_ANALYSIS when GCC is smarter.
template <bool kAccessCheck, bool kInstrumented>
+ALWAYS_INLINE
static inline mirror::Object* AllocObjectFromCode(uint32_t type_idx,
mirror::ArtMethod* method,
Thread* self,
@@ -135,6 +138,7 @@
// Given the context of a calling Method and a resolved class, create an instance.
// TODO: Fix NO_THREAD_SAFETY_ANALYSIS when GCC is smarter.
template <bool kInstrumented>
+ALWAYS_INLINE
static inline mirror::Object* AllocObjectFromCodeResolved(mirror::Class* klass,
mirror::ArtMethod* method,
Thread* self,
@@ -157,6 +161,7 @@
// Given the context of a calling Method and an initialized class, create an instance.
// TODO: Fix NO_THREAD_SAFETY_ANALYSIS when GCC is smarter.
template <bool kInstrumented>
+ALWAYS_INLINE
static inline mirror::Object* AllocObjectFromCodeInitialized(mirror::Class* klass,
mirror::ArtMethod* method,
Thread* self,
@@ -169,6 +174,7 @@
// TODO: Fix no thread safety analysis when GCC can handle template specialization.
template <bool kAccessCheck>
+ALWAYS_INLINE
static inline mirror::Class* CheckArrayAlloc(uint32_t type_idx,
mirror::ArtMethod* method,
int32_t component_count,
@@ -205,6 +211,7 @@
// check.
// TODO: Fix no thread safety analysis when GCC can handle template specialization.
template <bool kAccessCheck, bool kInstrumented>
+ALWAYS_INLINE
static inline mirror::Array* AllocArrayFromCode(uint32_t type_idx,
mirror::ArtMethod* method,
int32_t component_count,
@@ -227,6 +234,7 @@
}
template <bool kAccessCheck, bool kInstrumented>
+ALWAYS_INLINE
static inline mirror::Array* AllocArrayFromCodeResolved(mirror::Class* klass,
mirror::ArtMethod* method,
int32_t component_count,
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index 1f2713a..7d4da18 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -25,11 +25,34 @@
namespace art {
+static constexpr bool kUseTlabFastPath = true;
+
#define GENERATE_ENTRYPOINTS_FOR_ALLOCATOR_INST(suffix, suffix2, instrumented_bool, allocator_type) \
extern "C" mirror::Object* artAllocObjectFromCode ##suffix##suffix2( \
uint32_t type_idx, mirror::ArtMethod* method, Thread* self, \
StackReference<mirror::ArtMethod>* sp) \
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+ if (kUseTlabFastPath && !instrumented_bool && allocator_type == gc::kAllocatorTypeTLAB) { \
+ mirror::Class* klass = method->GetDexCacheResolvedTypes()->GetWithoutChecks(type_idx); \
+ if (LIKELY(klass != nullptr && klass->IsInitialized() && !klass->IsFinalizable())) { \
+ size_t byte_count = klass->GetObjectSize(); \
+ byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \
+ mirror::Object* obj; \
+ if (LIKELY(byte_count < self->TlabSize())) { \
+ obj = self->AllocTlab(byte_count); \
+ DCHECK(obj != nullptr) << "AllocTlab can't fail"; \
+ obj->SetClass(klass); \
+ if (kUseBakerOrBrooksReadBarrier) { \
+ if (kUseBrooksReadBarrier) { \
+ obj->SetReadBarrierPointer(obj); \
+ } \
+ obj->AssertReadBarrierPointer(); \
+ } \
+ QuasiAtomic::ThreadFenceForConstructor(); \
+ return obj; \
+ } \
+ } \
+ } \
FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
return AllocObjectFromCode<false, instrumented_bool>(type_idx, method, self, allocator_type); \
} \
@@ -37,6 +60,26 @@
mirror::Class* klass, mirror::ArtMethod* method, Thread* self, \
StackReference<mirror::ArtMethod>* sp) \
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+ if (kUseTlabFastPath && !instrumented_bool && allocator_type == gc::kAllocatorTypeTLAB) { \
+ if (LIKELY(klass->IsInitialized())) { \
+ size_t byte_count = klass->GetObjectSize(); \
+ byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \
+ mirror::Object* obj; \
+ if (LIKELY(byte_count < self->TlabSize())) { \
+ obj = self->AllocTlab(byte_count); \
+ DCHECK(obj != nullptr) << "AllocTlab can't fail"; \
+ obj->SetClass(klass); \
+ if (kUseBakerOrBrooksReadBarrier) { \
+ if (kUseBrooksReadBarrier) { \
+ obj->SetReadBarrierPointer(obj); \
+ } \
+ obj->AssertReadBarrierPointer(); \
+ } \
+ QuasiAtomic::ThreadFenceForConstructor(); \
+ return obj; \
+ } \
+ } \
+ } \
FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
return AllocObjectFromCodeResolved<instrumented_bool>(klass, method, self, allocator_type); \
} \
@@ -44,6 +87,24 @@
mirror::Class* klass, mirror::ArtMethod* method, Thread* self, \
StackReference<mirror::ArtMethod>* sp) \
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+ if (kUseTlabFastPath && !instrumented_bool && allocator_type == gc::kAllocatorTypeTLAB) { \
+ size_t byte_count = klass->GetObjectSize(); \
+ byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \
+ mirror::Object* obj; \
+ if (LIKELY(byte_count < self->TlabSize())) { \
+ obj = self->AllocTlab(byte_count); \
+ DCHECK(obj != nullptr) << "AllocTlab can't fail"; \
+ obj->SetClass(klass); \
+ if (kUseBakerOrBrooksReadBarrier) { \
+ if (kUseBrooksReadBarrier) { \
+ obj->SetReadBarrierPointer(obj); \
+ } \
+ obj->AssertReadBarrierPointer(); \
+ } \
+ QuasiAtomic::ThreadFenceForConstructor(); \
+ return obj; \
+ } \
+ } \
FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
return AllocObjectFromCodeInitialized<instrumented_bool>(klass, method, self, allocator_type); \
} \
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 8ffadd5..a82392a 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -662,7 +662,7 @@
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
template <bool kGrow>
- bool IsOutOfMemoryOnAllocation(AllocatorType allocator_type, size_t alloc_size);
+ ALWAYS_INLINE bool IsOutOfMemoryOnAllocation(AllocatorType allocator_type, size_t alloc_size);
// Returns true if the address passed in is within the address range of a continuous space.
bool IsValidContinuousSpaceObjectAddress(const mirror::Object* obj) const