Add a missing reader barrier in entrypoint stub

Also refactored some comments.

Change-Id: I5c50f487bf9d71f1be5f6c8814bf039993fc1267
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index f6d954f..d6396c1 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -609,7 +609,7 @@
     .cfi_rel_offset lr, 20
     sub sp, #8                      @ push padding
     .cfi_adjust_cfa_offset 8
-    @ mov r0, r0                    @ pass ref in r0 (no-op for now since parameter ref is unused)
+    @ mov r0, \rRef                 @ pass ref in r0 (no-op for now since parameter ref is unused)
     .ifnc \rObj, r1
         mov r1, \rObj               @ pass rObj
     .endif
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 8ba3d43..bfef0fa 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1146,7 +1146,7 @@
     .cfi_rel_offset x4, 32
     .cfi_rel_offset x30, 40
 
-    // mov x0, x0                   // pass ref in x0 (no-op for now since parameter ref is unused)
+    // mov x0, \xRef                // pass ref in x0 (no-op for now since parameter ref is unused)
     .ifnc \xObj, x1
         mov x1, \xObj               // pass xObj
     .endif
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 4d5004f..88182dd 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -659,7 +659,7 @@
     sw     $a0, 0($sp)
     .cfi_rel_offset 4, 0
 
-    # move $a0, $a0                 # pass ref in a0 (no-op for now since parameter ref is unused)
+    # move $a0, \rRef               # pass ref in a0 (no-op for now since parameter ref is unused)
     .ifnc \rObj, $a1
         move $a1, \rObj             # pass rObj
     .endif
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index c30e6ca..4bc049c 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -922,7 +922,7 @@
     sd     $a0, 0($sp)
     .cfi_rel_offset 4, 0
 
-    # move $a0, $a0                 # pass ref in a0 (no-op for now since parameter ref is unused)
+    # move $a0, \rRef               # pass ref in a0 (no-op for now since parameter ref is unused)
     .ifnc \rObj, $a1
         move $a1, \rObj             # pass rObj
     .endif
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 1da5a2f..9b2d59d 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1200,9 +1200,9 @@
     READ_BARRIER edx, MIRROR_OBJECT_CLASS_OFFSET, eax, false
     cmpl %eax, %ebx
     POP eax                      // restore eax from the push in the beginning of READ_BARRIER macro
+    // This asymmetric push/pop saves a push of eax and maintains stack alignment.
 #elif defined(USE_HEAP_POISONING)
     PUSH eax                     // save eax
-    // Cannot call READ_BARRIER macro here, because the above push messes up stack alignment.
     movl MIRROR_OBJECT_CLASS_OFFSET(%edx), %eax
     UNPOISON_HEAP_REF eax
     cmpl %eax, %ebx
@@ -1225,15 +1225,22 @@
     PUSH eax                      // save arguments
     PUSH ecx
     PUSH edx
+#if defined(USE_READ_BARRIER)
+    subl LITERAL(4), %esp         // alignment padding
+    CFI_ADJUST_CFA_OFFSET(4)
+    READ_BARRIER edx, MIRROR_OBJECT_CLASS_OFFSET, eax, true
+    subl LITERAL(4), %esp         // alignment padding
+    CFI_ADJUST_CFA_OFFSET(4)
+    PUSH eax                      // pass arg2 - type of the value to be stored
+#elif defined(USE_HEAP_POISONING)
     subl LITERAL(8), %esp         // alignment padding
     CFI_ADJUST_CFA_OFFSET(8)
-#ifdef USE_HEAP_POISONING
-    // This load does not need read barrier, since edx is unchanged and there's no GC safe point
-    // from last read of MIRROR_OBJECT_CLASS_OFFSET(%edx).
-    movl MIRROR_OBJECT_CLASS_OFFSET(%edx), %eax  // pass arg2 - type of the value to be stored
+    movl MIRROR_OBJECT_CLASS_OFFSET(%edx), %eax
     UNPOISON_HEAP_REF eax
-    PUSH eax
+    PUSH eax                      // pass arg2 - type of the value to be stored
 #else
+    subl LITERAL(8), %esp         // alignment padding
+    CFI_ADJUST_CFA_OFFSET(8)
     pushl MIRROR_OBJECT_CLASS_OFFSET(%edx)  // pass arg2 - type of the value to be stored
     CFI_ADJUST_CFA_OFFSET(4)
 #endif
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index f4c9488..88270d9 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -918,6 +918,13 @@
     // RDI: uint32_t type_idx, RSI: ArtMethod*
     // RDX, RCX, R8, R9: free. RAX: return val.
     // TODO: Add read barrier when this function is used.
+    // Note this function can/should implement read barrier fast path only
+    // (no read barrier slow path) because this is the fast path of tlab allocation.
+    // We can fall back to the allocation slow path to do the read barrier slow path.
+#if defined(USE_READ_BARRIER)
+    int3
+    int3
+#endif
     // Might need a special macro since rsi and edx is 32b/64b mismatched.
     movl ART_METHOD_DEX_CACHE_TYPES_OFFSET(%rsi), %edx  // Load dex cache resolved types array
     UNPOISON_HEAP_REF edx
@@ -1165,7 +1172,7 @@
 END_MACRO
 
     /*
-     * Macro to insert read barrier, used in art_quick_aput_obj and art_quick_alloc_object_tlab.
+     * Macro to insert read barrier, used in art_quick_aput_obj.
      * obj_reg and dest_reg{32|64} are registers, offset is a defined literal such as
      * MIRROR_OBJECT_CLASS_OFFSET. dest_reg needs two versions to handle the mismatch between
      * 64b PUSH/POP and 32b argument.
@@ -1182,8 +1189,8 @@
     PUSH rcx
     SETUP_FP_CALLEE_SAVE_FRAME
     // Outgoing argument set up
-    // movl %edi, %edi                  // pass ref, no-op for now since parameter ref is unused
-    // // movq %rdi, %rdi
+    // movl REG_VAR(ref_reg32), %edi    // pass ref, no-op for now since parameter ref is unused
+    // // movq REG_VAR(ref_reg64), %rdi
     movl REG_VAR(obj_reg), %esi         // pass obj_reg
     // movq REG_VAR(obj_reg), %rsi
     movl MACRO_LITERAL((RAW_VAR(offset))), %edx // pass offset, double parentheses are necessary
diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc
index 0a1d806..7361d34 100644
--- a/runtime/entrypoints/quick/quick_field_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc
@@ -558,7 +558,7 @@
 }
 
 // TODO: Currently the read barrier does not have a fast path. Ideally the slow path should only
-// take one parameter "ref", which is generated by the fast path.
+// take one parameter "ref", which is given by the fast path.
 extern "C" mirror::Object* artReadBarrierSlow(mirror::Object* ref ATTRIBUTE_UNUSED,
                                               mirror::Object* obj, uint32_t offset) {
   DCHECK(kUseReadBarrier);