Fix unnecessary read barrier for resolved/initialized allocations
The caller is responsible for the read barrier for these entrypoints.
No measured change in benchmarks, tested MemAllocTest.
Bug: 12687968
Test: test-art-host CC baker, N6P booting with CC
Change-Id: If6b00fa2c31ff51b943dbbe6caea2ef0a3fa1ae0
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 202846a..3f87a14 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -2107,7 +2107,7 @@
END art_quick_alloc_object_tlab
// The common code for art_quick_alloc_object_*region_tlab
-.macro GENERATE_ALLOC_OBJECT_REGION_TLAB name, entrypoint, fast_path, is_resolved
+.macro GENERATE_ALLOC_OBJECT_REGION_TLAB name, entrypoint, fast_path, is_resolved, read_barrier
ENTRY \name
// Fast path region tlab allocation.
// x0: type_idx/resolved class/return value, x1: ArtMethod*, xSELF(x19): Thread::Current
@@ -2123,16 +2123,19 @@
ldr x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64] // Load dex cache resolved types array
// Load the class (x2)
ldr w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
+ // If the class is null, go slow path. The check is required to read the lock word.
+ cbz w2, .Lslow_path\name
.endif
+.if \read_barrier
// Most common case: GC is not marking.
ldr w3, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
cbnz x3, .Lmarking\name
+.endif
.Ldo_allocation\name:
\fast_path .Lslow_path\name
.Lmarking\name:
+.if \read_barrier
// GC is marking, check the lock word of the class for the mark bit.
- // If the class is null, go slow path. The check is required to read the lock word.
- cbz w2, .Lslow_path\name
// Class is not null, check mark bit in lock word.
ldr w3, [x2, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
// If the bit is not zero, do the allocation.
@@ -2140,14 +2143,23 @@
// The read barrier slow path. Mark
// the class.
stp x0, x1, [sp, #-32]! // Save registers (x0, x1, lr).
+ .cfi_adjust_cfa_offset 32
+ .cfi_rel_offset x0, 0
+ .cfi_rel_offset x1, 8
str xLR, [sp, #16] // Align sp by 16 bytes.
+ .cfi_rel_offset xLR, 16
mov x0, x2 // Pass the class as the first param.
bl artReadBarrierMark
mov x2, x0 // Get the (marked) class back.
ldp x0, x1, [sp, #0] // Restore registers.
+ .cfi_restore x0
+ .cfi_restore x1
ldr xLR, [sp, #16]
+ .cfi_restore xLR
add sp, sp, #32
+ .cfi_adjust_cfa_offset -32
b .Ldo_allocation\name
+.endif
.Lslow_path\name:
SETUP_SAVE_REFS_ONLY_FRAME // Save callee saves in case of GC.
mov x2, xSELF // Pass Thread::Current.
@@ -2157,9 +2169,14 @@
END \name
.endm
-GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_region_tlab, artAllocObjectFromCodeRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH, 0
-GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED, 1
-GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED, 1
+// Use ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED since the null check is already done in GENERATE_ALLOC_OBJECT_TLAB.
+GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_region_tlab, artAllocObjectFromCodeRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED, 0, 1
+// No read barrier for the resolved or initialized cases since the caller is responsible for the
+// read barrier due to the to-space invariant.
+GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED, 1, 0
+GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED, 1, 0
+
+// TODO: We could use this macro for the normal tlab allocator too.
// The common code for art_quick_alloc_array_*region_tlab
.macro GENERATE_ALLOC_ARRAY_REGION_TLAB name, entrypoint, fast_path, is_resolved
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index f941c52..06ff7ab 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1268,28 +1268,9 @@
int3
int3
#endif
+ // No read barrier since the caller is responsible for that.
movq %rdi, %rdx
- cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
- jne .Lart_quick_alloc_object_resolved_region_tlab_class_load_read_barrier_marking
-.Lart_quick_alloc_object_resolved_region_tlab_class_load_read_barrier_slow_path_exit:
ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_region_tlab_slow_path
-.Lart_quick_alloc_object_resolved_region_tlab_class_load_read_barrier_marking:
- // Check the mark bit, if it is 1 avoid the read barrier.
- testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
- jnz .Lart_quick_alloc_object_resolved_region_tlab_class_load_read_barrier_slow_path_exit
-.Lart_quick_alloc_object_resolved_region_tlab_class_load_read_barrier_slow_path:
- // The read barrier slow path. Mark the class.
- PUSH rdi
- PUSH rsi
- subq LITERAL(8), %rsp // 16 byte alignment
- // Outgoing argument set up
- movq %rdx, %rdi // Pass the class as the first param.
- call SYMBOL(artReadBarrierMark) // cxx_name(mirror::Object* obj)
- movq %rax, %rdx
- addq LITERAL(8), %rsp
- POP rsi
- POP rdi
- jmp .Lart_quick_alloc_object_resolved_region_tlab_class_load_read_barrier_slow_path_exit
.Lart_quick_alloc_object_resolved_region_tlab_slow_path:
ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedRegionTLAB
END_FUNCTION art_quick_alloc_object_resolved_region_tlab
@@ -1303,29 +1284,9 @@
int3
int3
#endif
- // Might need a special macro since rsi and edx is 32b/64b mismatched.
movq %rdi, %rdx
- cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
- jne .Lart_quick_alloc_object_initialized_region_tlab_class_load_read_barrier_marking
-.Lart_quick_alloc_object_initialized_region_tlab_class_load_read_barrier_slow_path_exit:
+ // No read barrier since the caller is responsible for that.
ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_region_tlab_slow_path
-.Lart_quick_alloc_object_initialized_region_tlab_class_load_read_barrier_marking:
- // Check the mark bit, if it is 1 avoid the read barrier.
- testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
- jnz .Lart_quick_alloc_object_initialized_region_tlab_class_load_read_barrier_slow_path
-.Lart_quick_alloc_object_initialized_region_tlab_class_load_read_barrier_slow_path:
- // The read barrier slow path. Mark the class.
- PUSH rdi
- PUSH rsi
- subq LITERAL(8), %rsp // 16 byte alignment
- // Outgoing argument set up
- movq %rdx, %rdi // Pass the class as the first param.
- call SYMBOL(artReadBarrierMark) // cxx_name(mirror::Object* obj)
- movq %rax, %rdx
- addq LITERAL(8), %rsp
- POP rsi
- POP rdi
- jmp .Lart_quick_alloc_object_initialized_region_tlab_class_load_read_barrier_slow_path_exit
.Lart_quick_alloc_object_initialized_region_tlab_slow_path:
ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedRegionTLAB
END_FUNCTION art_quick_alloc_object_initialized_region_tlab