Merge "MIPS: Follow-up to hash-based DexCache methods array"
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 974e876..59a2c10 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -165,13 +165,29 @@
 .endm
 
     /*
+     * Individually usable part of macro SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY.
+     */
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_S4_THRU_S8
+    sw      $s8, 104($sp)
+    .cfi_rel_offset 30, 104
+    sw      $s7, 96($sp)
+    .cfi_rel_offset 23, 96
+    sw      $s6, 92($sp)
+    .cfi_rel_offset 22, 92
+    sw      $s5, 88($sp)
+    .cfi_rel_offset 21, 88
+    sw      $s4, 84($sp)
+    .cfi_rel_offset 20, 84
+.endm
+
+    /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs).
      * callee-save: $a1-$a3, $t0-$t1, $s2-$s8, $gp, $ra, $f8-$f19
      *              (26 total + 1 word padding + method*)
      */
-.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
-    addiu  $sp, $sp, -112
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY save_s4_thru_s8=1
+    addiu   $sp, $sp, -112
     .cfi_adjust_cfa_offset 112
 
     // Ugly compile-time check, but we only have the preprocessor.
@@ -179,40 +195,33 @@
 #error "FRAME_SIZE_SAVE_REFS_AND_ARGS(MIPS) size not as expected."
 #endif
 
-    sw     $ra, 108($sp)
+    sw      $ra, 108($sp)
     .cfi_rel_offset 31, 108
-    sw     $s8, 104($sp)
-    .cfi_rel_offset 30, 104
-    sw     $gp, 100($sp)
+    sw      $gp, 100($sp)
     .cfi_rel_offset 28, 100
-    sw     $s7, 96($sp)
-    .cfi_rel_offset 23, 96
-    sw     $s6, 92($sp)
-    .cfi_rel_offset 22, 92
-    sw     $s5, 88($sp)
-    .cfi_rel_offset 21, 88
-    sw     $s4, 84($sp)
-    .cfi_rel_offset 20, 84
-    sw     $s3, 80($sp)
+    .if \save_s4_thru_s8
+      SETUP_SAVE_REFS_AND_ARGS_FRAME_S4_THRU_S8
+    .endif
+    sw      $s3, 80($sp)
     .cfi_rel_offset 19, 80
-    sw     $s2, 76($sp)
+    sw      $s2, 76($sp)
     .cfi_rel_offset 18, 76
-    sw     $t1, 72($sp)
+    sw      $t1, 72($sp)
     .cfi_rel_offset 9, 72
-    sw     $t0, 68($sp)
+    sw      $t0, 68($sp)
     .cfi_rel_offset 8, 68
-    sw     $a3, 64($sp)
+    sw      $a3, 64($sp)
     .cfi_rel_offset 7, 64
-    sw     $a2, 60($sp)
+    sw      $a2, 60($sp)
     .cfi_rel_offset 6, 60
-    sw     $a1, 56($sp)
+    sw      $a1, 56($sp)
     .cfi_rel_offset 5, 56
-    SDu $f18, $f19, 48, $sp, $t8
-    SDu $f16, $f17, 40, $sp, $t8
-    SDu $f14, $f15, 32, $sp, $t8
-    SDu $f12, $f13, 24, $sp, $t8
-    SDu $f10, $f11, 16, $sp, $t8
-    SDu $f8, $f9, 8, $sp, $t8
+    SDu     $f18, $f19, 48, $sp, $t8
+    SDu     $f16, $f17, 40, $sp, $t8
+    SDu     $f14, $f15, 32, $sp, $t8
+    SDu     $f12, $f13, 24, $sp, $t8
+    SDu     $f10, $f11, 16, $sp, $t8
+    SDu     $f8, $f9, 8, $sp, $t8
     # bottom will hold Method*
 .endm
 
@@ -225,8 +234,14 @@
      * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
      * Reserves FRAME_SIZE_SAVE_REFS_AND_ARGS + ARG_SLOT_SIZE bytes on the stack
      */
-.macro SETUP_SAVE_REFS_AND_ARGS_FRAME
-    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME save_s4_thru_s8_only=0
+    .if \save_s4_thru_s8_only
+      // It is expected that `SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY /* save_s4_thru_s8 */ 0`
+      // has been done prior to `SETUP_SAVE_REFS_AND_ARGS_FRAME /* save_s4_thru_s8_only */ 1`.
+      SETUP_SAVE_REFS_AND_ARGS_FRAME_S4_THRU_S8
+    .else
+      SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
+    .endif
     lw $t0, %got(_ZN3art7Runtime9instance_E)($gp)
     lw $t0, 0($t0)
     lw $t0, RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET($t0)
@@ -254,44 +269,64 @@
     .cfi_adjust_cfa_offset ARG_SLOT_SIZE
 .endm
 
-.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME
-    addiu  $sp, $sp, ARG_SLOT_SIZE                # remove argument slots on the stack
-    .cfi_adjust_cfa_offset -ARG_SLOT_SIZE
-    lw     $ra, 108($sp)
-    .cfi_restore 31
-    lw     $s8, 104($sp)
-    .cfi_restore 30
-    lw     $gp, 100($sp)
+    /*
+     * Individually usable part of macro RESTORE_SAVE_REFS_AND_ARGS_FRAME.
+     */
+.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME_GP
+    lw      $gp, 100($sp)
     .cfi_restore 28
-    lw     $s7, 96($sp)
-    .cfi_restore 23
-    lw     $s6, 92($sp)
-    .cfi_restore 22
-    lw     $s5, 88($sp)
-    .cfi_restore 21
-    lw     $s4, 84($sp)
-    .cfi_restore 20
-    lw     $s3, 80($sp)
-    .cfi_restore 19
-    lw     $s2, 76($sp)
-    .cfi_restore 18
-    lw     $t1, 72($sp)
-    .cfi_restore 9
-    lw     $t0, 68($sp)
-    .cfi_restore 8
-    lw     $a3, 64($sp)
-    .cfi_restore 7
-    lw     $a2, 60($sp)
-    .cfi_restore 6
-    lw     $a1, 56($sp)
+.endm
+
+    /*
+     * Individually usable part of macro RESTORE_SAVE_REFS_AND_ARGS_FRAME.
+     */
+.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME_A1
+    lw      $a1, 56($sp)
     .cfi_restore 5
-    LDu $f18, $f19, 48, $sp, $t8
-    LDu $f16, $f17, 40, $sp, $t8
-    LDu $f14, $f15, 32, $sp, $t8
-    LDu $f12, $f13, 24, $sp, $t8
-    LDu $f10, $f11, 16, $sp, $t8
-    LDu $f8, $f9, 8, $sp, $t8
-    addiu  $sp, $sp, 112          # pop frame
+.endm
+
+.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME restore_s4_thru_s8=1, remove_arg_slots=1
+    .if \remove_arg_slots
+      addiu $sp, $sp, ARG_SLOT_SIZE                 # Remove argument slots from the stack.
+      .cfi_adjust_cfa_offset -ARG_SLOT_SIZE
+    .endif
+    lw      $ra, 108($sp)
+    .cfi_restore 31
+    .if \restore_s4_thru_s8
+      lw    $s8, 104($sp)
+      .cfi_restore 30
+    .endif
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME_GP
+    .if \restore_s4_thru_s8
+      lw    $s7, 96($sp)
+      .cfi_restore 23
+      lw    $s6, 92($sp)
+      .cfi_restore 22
+      lw    $s5, 88($sp)
+      .cfi_restore 21
+      lw    $s4, 84($sp)
+      .cfi_restore 20
+    .endif
+    lw      $s3, 80($sp)
+    .cfi_restore 19
+    lw      $s2, 76($sp)
+    .cfi_restore 18
+    lw      $t1, 72($sp)
+    .cfi_restore 9
+    lw      $t0, 68($sp)
+    .cfi_restore 8
+    lw      $a3, 64($sp)
+    .cfi_restore 7
+    lw      $a2, 60($sp)
+    .cfi_restore 6
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME_A1
+    LDu     $f18, $f19, 48, $sp, $t8
+    LDu     $f16, $f17, 40, $sp, $t8
+    LDu     $f14, $f15, 32, $sp, $t8
+    LDu     $f12, $f13, 24, $sp, $t8
+    LDu     $f10, $f11, 16, $sp, $t8
+    LDu     $f8, $f9, 8, $sp, $t8
+    addiu   $sp, $sp, 112                           # Pop frame.
     .cfi_adjust_cfa_offset -112
 .endm
 
@@ -826,9 +861,10 @@
      * On success this wrapper will restore arguments and *jump* to the target, leaving the lr
      * pointing back to the original caller.
      */
-.macro INVOKE_TRAMPOLINE_BODY cxx_name
+.macro INVOKE_TRAMPOLINE_BODY cxx_name, save_s4_thru_s8_only=0
     .extern \cxx_name
-    SETUP_SAVE_REFS_AND_ARGS_FRAME         # save callee saves in case allocation triggers GC
+    SETUP_SAVE_REFS_AND_ARGS_FRAME \save_s4_thru_s8_only  # save callee saves in case
+                                                          # allocation triggers GC
     move  $a2, rSELF                       # pass Thread::Current
     la    $t9, \cxx_name
     jalr  $t9                              # (method_idx, this, Thread*, $sp)
@@ -2063,46 +2099,83 @@
      * a0 is the conflict ArtMethod.
      * t7 is a hidden argument that holds the target interface method's dex method index.
      *
-     * Note that this stub writes to a0, t7 and t8.
+     * Note that this stub writes to v0-v1, a0, t2-t9, f0-f7.
      */
+    .extern artLookupResolvedMethod
+    .extern __atomic_load_8         # For int64_t std::atomic::load(std::memory_order).
 ENTRY art_quick_imt_conflict_trampoline
-// FIXME: The DexCache method array has been changed to hash-based cache with eviction.
-// We need a relaxed atomic load of a 64-bit location to try and load the method
-// and call artQuickResolutionTrampoline() if the index does not match.
-#if 0
-    lw      $t8, 0($sp)                                      # Load referrer.
-    lw      $t8, ART_METHOD_DEX_CACHE_METHODS_OFFSET_32($t8) # Load dex cache methods array.
-    sll     $t7, $t7, POINTER_SIZE_SHIFT                     # Calculate offset.
-    addu    $t7, $t8, $t7                                    # Add offset to base.
-    lw      $t7, 0($t7)                                      # Load interface method.
-    lw      $a0, ART_METHOD_JNI_OFFSET_32($a0)               # Load ImtConflictTable.
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY /* save_s4_thru_s8 */ 0
+
+    lw      $t8, FRAME_SIZE_SAVE_REFS_AND_ARGS($sp)  # $t8 = referrer.
+    la      $t9, __atomic_load_8
+    addiu   $sp, $sp, -ARG_SLOT_SIZE                # Reserve argument slots on the stack.
+    .cfi_adjust_cfa_offset ARG_SLOT_SIZE
+    lw      $t8, ART_METHOD_DEX_CACHE_METHODS_OFFSET_32($t8)  # $t8 = dex cache methods array.
+
+    move    $s2, $t7                                # $s2 = method index (callee-saved).
+    lw      $s3, ART_METHOD_JNI_OFFSET_32($a0)      # $s3 = ImtConflictTable (callee-saved).
+
+    sll     $t7, $t7, 32 - METHOD_DEX_CACHE_HASH_BITS  # $t7 = slot index in top bits, zeroes below.
+    srl     $t7, $t7, 32 - METHOD_DEX_CACHE_HASH_BITS - (POINTER_SIZE_SHIFT + 1)
+                                                    # $t7 = slot offset.
+
+    li      $a1, STD_MEMORY_ORDER_RELAXED           # $a1 = std::memory_order_relaxed.
+    jalr    $t9                                     # [$v0, $v1] = __atomic_load_8($a0, $a1).
+    addu    $a0, $t8, $t7                           # $a0 = DexCache method slot address.
+
+    bne     $v1, $s2, .Limt_conflict_trampoline_dex_cache_miss  # Branch if method index miss.
+    addiu   $sp, $sp, ARG_SLOT_SIZE                 # Remove argument slots from the stack.
+    .cfi_adjust_cfa_offset -ARG_SLOT_SIZE
 
 .Limt_table_iterate:
-    lw      $t8, 0($a0)                                      # Load next entry in ImtConflictTable.
+    lw      $t8, 0($s3)                             # Load next entry in ImtConflictTable.
     # Branch if found.
-    beq     $t8, $t7, .Limt_table_found
+    beq     $t8, $v0, .Limt_table_found
     nop
     # If the entry is null, the interface method is not in the ImtConflictTable.
     beqz    $t8, .Lconflict_trampoline
     nop
     # Iterate over the entries of the ImtConflictTable.
     b       .Limt_table_iterate
-    addiu   $a0, $a0, 2 * __SIZEOF_POINTER__                 # Iterate to the next entry.
+    addiu   $s3, $s3, 2 * __SIZEOF_POINTER__        # Iterate to the next entry.
 
 .Limt_table_found:
     # We successfully hit an entry in the table. Load the target method and jump to it.
-    lw      $a0, __SIZEOF_POINTER__($a0)
+    .cfi_remember_state
+    lw      $a0, __SIZEOF_POINTER__($s3)
     lw      $t9, ART_METHOD_QUICK_CODE_OFFSET_32($a0)
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME /* restore_s4_thru_s8 */ 0, /* remove_arg_slots */ 0
     jalr    $zero, $t9
     nop
+    .cfi_restore_state
 
 .Lconflict_trampoline:
     # Call the runtime stub to populate the ImtConflictTable and jump to the resolved method.
-    move    $a0, $t7                                         # Load interface method.
-#else
-    move   $a0, $zero
-#endif
-    INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
+    .cfi_remember_state
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME_GP             # Restore clobbered $gp.
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME_A1             # Restore this.
+    move    $a0, $v0                                # Load interface method.
+    INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline, /* save_s4_thru_s8_only */ 1
+    .cfi_restore_state
+
+.Limt_conflict_trampoline_dex_cache_miss:
+    # We're not creating a proper runtime method frame here,
+    # artLookupResolvedMethod() is not allowed to walk the stack.
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME_GP             # Restore clobbered $gp.
+    lw      $a1, FRAME_SIZE_SAVE_REFS_AND_ARGS($sp)  # $a1 = referrer.
+    la      $t9, artLookupResolvedMethod
+    addiu   $sp, $sp, -ARG_SLOT_SIZE                # Reserve argument slots on the stack.
+    .cfi_adjust_cfa_offset ARG_SLOT_SIZE
+    jalr    $t9                                     # (uint32_t method_index, ArtMethod* referrer).
+    move    $a0, $s2                                # $a0 = method index.
+
+    # If the method wasn't resolved, skip the lookup and go to artInvokeInterfaceTrampoline().
+    beqz    $v0, .Lconflict_trampoline
+    addiu   $sp, $sp, ARG_SLOT_SIZE                 # Remove argument slots from the stack.
+    .cfi_adjust_cfa_offset -ARG_SLOT_SIZE
+
+    b       .Limt_table_iterate
+    nop
 END art_quick_imt_conflict_trampoline
 
     .extern artQuickResolutionTrampoline
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index bcb315f..3b92daa 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -188,7 +188,23 @@
 // This assumes the top part of these stack frame types are identical.
 #define REFS_AND_ARGS_MINUS_REFS_SIZE (FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
 
-.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
+    /*
+     * Individually usable part of macro SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL.
+     */
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_S4_THRU_S8
+    sd      $s8, 192($sp)
+    .cfi_rel_offset 30, 192
+    sd      $s7, 176($sp)
+    .cfi_rel_offset 23, 176
+    sd      $s6, 168($sp)
+    .cfi_rel_offset 22, 168
+    sd      $s5, 160($sp)
+    .cfi_rel_offset 21, 160
+    sd      $s4, 152($sp)
+    .cfi_rel_offset 20, 152
+.endm
+
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL save_s4_thru_s8=1
     daddiu  $sp, $sp, -208
     .cfi_adjust_cfa_offset 208
 
@@ -197,48 +213,40 @@
 #error "FRAME_SIZE_SAVE_REFS_AND_ARGS(MIPS64) size not as expected."
 #endif
 
-    sd     $ra, 200($sp)           # = kQuickCalleeSaveFrame_RefAndArgs_LrOffset
+    sd      $ra, 200($sp)           # = kQuickCalleeSaveFrame_RefAndArgs_LrOffset
     .cfi_rel_offset 31, 200
-    sd     $s8, 192($sp)
-    .cfi_rel_offset 30, 192
-    sd     $t8, 184($sp)           # t8 holds caller's gp, now save it to the stack.
-    .cfi_rel_offset 28, 184        # Value from gp is pushed, so set the cfi offset accordingly.
-    sd     $s7, 176($sp)
-    .cfi_rel_offset 23, 176
-    sd     $s6, 168($sp)
-    .cfi_rel_offset 22, 168
-    sd     $s5, 160($sp)
-    .cfi_rel_offset 21, 160
-    sd     $s4, 152($sp)
-    .cfi_rel_offset 20, 152
-    sd     $s3, 144($sp)
+    sd      $t8, 184($sp)           # t8 holds caller's gp, now save it to the stack.
+    .cfi_rel_offset 28, 184         # Value from gp is pushed, so set the cfi offset accordingly.
+    .if \save_s4_thru_s8
+      SETUP_SAVE_REFS_AND_ARGS_FRAME_S4_THRU_S8
+    .endif
+    sd      $s3, 144($sp)
     .cfi_rel_offset 19, 144
-    sd     $s2, 136($sp)
+    sd      $s2, 136($sp)
     .cfi_rel_offset 18, 136
-
-    sd     $a7, 128($sp)
+    sd      $a7, 128($sp)
     .cfi_rel_offset 11, 128
-    sd     $a6, 120($sp)
+    sd      $a6, 120($sp)
     .cfi_rel_offset 10, 120
-    sd     $a5, 112($sp)
+    sd      $a5, 112($sp)
     .cfi_rel_offset 9, 112
-    sd     $a4, 104($sp)
+    sd      $a4, 104($sp)
     .cfi_rel_offset 8, 104
-    sd     $a3,  96($sp)
+    sd      $a3,  96($sp)
     .cfi_rel_offset 7, 96
-    sd     $a2,  88($sp)
+    sd      $a2,  88($sp)
     .cfi_rel_offset 6, 88
-    sd     $a1,  80($sp)           # = kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset
+    sd      $a1,  80($sp)           # = kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset
     .cfi_rel_offset 5, 80
 
-    s.d    $f19, 72($sp)
-    s.d    $f18, 64($sp)
-    s.d    $f17, 56($sp)
-    s.d    $f16, 48($sp)
-    s.d    $f15, 40($sp)
-    s.d    $f14, 32($sp)
-    s.d    $f13, 24($sp)           # = kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset
-    s.d    $f12, 16($sp)           # This isn't necessary to store.
+    s.d     $f19, 72($sp)
+    s.d     $f18, 64($sp)
+    s.d     $f17, 56($sp)
+    s.d     $f16, 48($sp)
+    s.d     $f15, 40($sp)
+    s.d     $f14, 32($sp)
+    s.d     $f13, 24($sp)           # = kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset
+    s.d     $f12, 16($sp)           # This isn't necessary to store.
     # 1x8 bytes padding + Method*
 .endm
 
@@ -248,8 +256,14 @@
      * non-moving GC.
      * callee-save: padding + $f12-$f19 + $a1-$a7 + $s2-$s7 + $gp + $ra + $s8 = 24 total + 1 words padding + Method*
      */
-.macro SETUP_SAVE_REFS_AND_ARGS_FRAME
-    SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME save_s4_thru_s8_only=0
+    .if \save_s4_thru_s8_only
+      // It is expected that `SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL /* save_s4_thru_s8 */ 0`
+      // has been done prior to `SETUP_SAVE_REFS_AND_ARGS_FRAME /* save_s4_thru_s8_only */ 1`.
+      SETUP_SAVE_REFS_AND_ARGS_FRAME_S4_THRU_S8
+    .else
+      SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
+    .endif
     # load appropriate callee-save-method
     ld      $t1, %got(_ZN3art7Runtime9instance_E)($gp)
     ld      $t1, 0($t1)
@@ -264,52 +278,62 @@
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
 .endm
 
-.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME
-    ld     $ra, 200($sp)
-    .cfi_restore 31
-    ld     $s8, 192($sp)
-    .cfi_restore 30
-    ld     $t8, 184($sp)           # Restore gp back to it's temp storage.
-    .cfi_restore 28
-    ld     $s7, 176($sp)
-    .cfi_restore 23
-    ld     $s6, 168($sp)
-    .cfi_restore 22
-    ld     $s5, 160($sp)
-    .cfi_restore 21
-    ld     $s4, 152($sp)
-    .cfi_restore 20
-    ld     $s3, 144($sp)
-    .cfi_restore 19
-    ld     $s2, 136($sp)
-    .cfi_restore 18
-
-    ld     $a7, 128($sp)
-    .cfi_restore 11
-    ld     $a6, 120($sp)
-    .cfi_restore 10
-    ld     $a5, 112($sp)
-    .cfi_restore 9
-    ld     $a4, 104($sp)
-    .cfi_restore 8
-    ld     $a3,  96($sp)
-    .cfi_restore 7
-    ld     $a2,  88($sp)
-    .cfi_restore 6
-    ld     $a1,  80($sp)
+    /*
+     * Individually usable part of macro RESTORE_SAVE_REFS_AND_ARGS_FRAME.
+     */
+.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME_A1
+    ld      $a1,  80($sp)
     .cfi_restore 5
+.endm
 
-    l.d    $f19, 72($sp)
-    l.d    $f18, 64($sp)
-    l.d    $f17, 56($sp)
-    l.d    $f16, 48($sp)
-    l.d    $f15, 40($sp)
-    l.d    $f14, 32($sp)
-    l.d    $f13, 24($sp)
-    l.d    $f12, 16($sp)
+.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME restore_s4_thru_s8=1
+    ld      $ra, 200($sp)
+    .cfi_restore 31
+    .if \restore_s4_thru_s8
+      ld    $s8, 192($sp)
+      .cfi_restore 30
+    .endif
+    ld      $t8, 184($sp)           # Restore gp back to it's temp storage.
+    .cfi_restore 28
+    .if \restore_s4_thru_s8
+      ld    $s7, 176($sp)
+      .cfi_restore 23
+      ld    $s6, 168($sp)
+      .cfi_restore 22
+      ld    $s5, 160($sp)
+      .cfi_restore 21
+      ld    $s4, 152($sp)
+      .cfi_restore 20
+    .endif
+    ld      $s3, 144($sp)
+    .cfi_restore 19
+    ld      $s2, 136($sp)
+    .cfi_restore 18
+    ld      $a7, 128($sp)
+    .cfi_restore 11
+    ld      $a6, 120($sp)
+    .cfi_restore 10
+    ld      $a5, 112($sp)
+    .cfi_restore 9
+    ld      $a4, 104($sp)
+    .cfi_restore 8
+    ld      $a3,  96($sp)
+    .cfi_restore 7
+    ld      $a2,  88($sp)
+    .cfi_restore 6
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME_A1
+
+    l.d     $f19, 72($sp)
+    l.d     $f18, 64($sp)
+    l.d     $f17, 56($sp)
+    l.d     $f16, 48($sp)
+    l.d     $f15, 40($sp)
+    l.d     $f14, 32($sp)
+    l.d     $f13, 24($sp)
+    l.d     $f12, 16($sp)
 
     .cpreturn
-    daddiu $sp, $sp, 208
+    daddiu  $sp, $sp, 208
     .cfi_adjust_cfa_offset -208
 .endm
 
@@ -913,9 +937,10 @@
      * On success this wrapper will restore arguments and *jump* to the target, leaving the ra
      * pointing back to the original caller.
      */
-.macro INVOKE_TRAMPOLINE_BODY cxx_name
+.macro INVOKE_TRAMPOLINE_BODY cxx_name, save_s4_thru_s8_only=0
     .extern \cxx_name
-    SETUP_SAVE_REFS_AND_ARGS_FRAME         # save callee saves in case allocation triggers GC
+    SETUP_SAVE_REFS_AND_ARGS_FRAME \save_s4_thru_s8_only  # save callee saves in case
+                                                          # allocation triggers GC
     move  $a2, rSELF                       # pass Thread::Current
     jal   \cxx_name                        # (method_idx, this, Thread*, $sp)
     move  $a3, $sp                         # pass $sp
@@ -1986,45 +2011,69 @@
      * a0 is the conflict ArtMethod.
      * t0 is a hidden argument that holds the target interface method's dex method index.
      *
-     * Mote that this stub writes to a0, t0 and t1.
+     * Mote that this stub writes to v0-v1, a0, t0-t3, t8-t9, f0-f11, f20-f23.
      */
+    .extern artLookupResolvedMethod
+    .extern __atomic_load_16        # For __int128_t std::atomic::load(std::memory_order).
 ENTRY art_quick_imt_conflict_trampoline
-// FIXME: The DexCache method array has been changed to hash-based cache with eviction.
-// We need a relaxed atomic load of a 128-bit location to try and load the method
-// and call artQuickResolutionTrampoline() if the index does not match.
-#if 0
-    ld      $t1, 0($sp)                                      # Load referrer.
-    ld      $t1, ART_METHOD_DEX_CACHE_METHODS_OFFSET_64($t1) # Load dex cache methods array.
-    dsll    $t0, $t0, POINTER_SIZE_SHIFT                     # Calculate offset.
-    daddu   $t0, $t1, $t0                                    # Add offset to base.
-    ld      $t0, 0($t0)                                      # Load interface method.
-    ld      $a0, ART_METHOD_JNI_OFFSET_64($a0)               # Load ImtConflictTable.
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL /* save_s4_thru_s8 */ 0
+
+    ld      $t1, FRAME_SIZE_SAVE_REFS_AND_ARGS($sp)  # $t1 = referrer.
+    dla     $t9, __atomic_load_16
+    ld      $t1, ART_METHOD_DEX_CACHE_METHODS_OFFSET_64($t1)  # $t1 = dex cache methods array.
+
+    dext    $s2, $t0, 0, 32                         # $s2 = zero-extended method index
+                                                    # (callee-saved).
+    ld      $s3, ART_METHOD_JNI_OFFSET_64($a0)      # $s3 = ImtConflictTable (callee-saved).
+
+    dext    $t0, $t0, 0, METHOD_DEX_CACHE_HASH_BITS  # $t0 = slot index.
+
+    li      $a1, STD_MEMORY_ORDER_RELAXED           # $a1 = std::memory_order_relaxed.
+    jalr    $t9                                     # [$v0, $v1] = __atomic_load_16($a0, $a1).
+    dlsa    $a0, $t0, $t1, POINTER_SIZE_SHIFT + 1   # $a0 = DexCache method slot address.
+
+    bnec    $v1, $s2, .Limt_conflict_trampoline_dex_cache_miss  # Branch if method index miss.
 
 .Limt_table_iterate:
-    ld      $t1, 0($a0)                                      # Load next entry in ImtConflictTable.
+    ld      $t1, 0($s3)                             # Load next entry in ImtConflictTable.
     # Branch if found.
-    beq     $t1, $t0, .Limt_table_found
+    beq     $t1, $v0, .Limt_table_found
     nop
     # If the entry is null, the interface method is not in the ImtConflictTable.
     beqzc   $t1, .Lconflict_trampoline
     # Iterate over the entries of the ImtConflictTable.
-    daddiu  $a0, $a0, 2 * __SIZEOF_POINTER__                 # Iterate to the next entry.
-    bc       .Limt_table_iterate
+    daddiu  $s3, $s3, 2 * __SIZEOF_POINTER__        # Iterate to the next entry.
+    bc      .Limt_table_iterate
 
 .Limt_table_found:
     # We successfully hit an entry in the table. Load the target method and jump to it.
-    ld      $a0, __SIZEOF_POINTER__($a0)
+    .cfi_remember_state
+    ld      $a0, __SIZEOF_POINTER__($s3)
     ld      $t9, ART_METHOD_QUICK_CODE_OFFSET_64($a0)
-    jr      $t9
-    .cpreturn                      # Restore gp from t8 in branch delay slot.
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME /* restore_s4_thru_s8 */ 0
+    jic     $t9, 0
+    .cfi_restore_state
 
 .Lconflict_trampoline:
     # Call the runtime stub to populate the ImtConflictTable and jump to the resolved method.
-    move   $a0, $t0                                          # Load interface method.
-#else
-    move   $a0, $zero
-#endif
-    INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
+    .cfi_remember_state
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME_A1             # Restore this.
+    move    $a0, $v0                                # Load interface method.
+    INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline, /* save_s4_thru_s8_only */ 1
+    .cfi_restore_state
+
+.Limt_conflict_trampoline_dex_cache_miss:
+    # We're not creating a proper runtime method frame here,
+    # artLookupResolvedMethod() is not allowed to walk the stack.
+    dla     $t9, artLookupResolvedMethod
+    ld      $a1, FRAME_SIZE_SAVE_REFS_AND_ARGS($sp)  # $a1 = referrer.
+    jalr    $t9                                     # (uint32_t method_index, ArtMethod* referrer).
+    sll     $a0, $s2, 0                             # $a0 = sign-extended method index.
+
+    # If the method wasn't resolved, skip the lookup and go to artInvokeInterfaceTrampoline().
+    beqzc   $v0, .Lconflict_trampoline
+    nop
+    bc      .Limt_table_iterate
 END art_quick_imt_conflict_trampoline
 
     .extern artQuickResolutionTrampoline
diff --git a/runtime/generated/asm_support_gen.h b/runtime/generated/asm_support_gen.h
index acfd889..11b3abb 100644
--- a/runtime/generated/asm_support_gen.h
+++ b/runtime/generated/asm_support_gen.h
@@ -114,6 +114,8 @@
 DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_MARK_BIT_SHIFT), (static_cast<int32_t>(art::LockWord::kMarkBitStateShift)))
 #define LOCK_WORD_MARK_BIT_MASK_SHIFTED 0x20000000
 DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_MARK_BIT_MASK_SHIFTED), (static_cast<uint32_t>(art::LockWord::kMarkBitStateMaskShifted)))
+#define STD_MEMORY_ORDER_RELAXED 0
+DEFINE_CHECK_EQ(static_cast<int32_t>(STD_MEMORY_ORDER_RELAXED), (static_cast<int32_t>(std::memory_order_relaxed)))
 #define OBJECT_ALIGNMENT_MASK 0x7
 DEFINE_CHECK_EQ(static_cast<size_t>(OBJECT_ALIGNMENT_MASK), (static_cast<size_t>(art::kObjectAlignment - 1)))
 #define OBJECT_ALIGNMENT_MASK_TOGGLED 0xfffffff8
diff --git a/runtime/mirror/dex_cache.cc b/runtime/mirror/dex_cache.cc
index 7b18a4c..1a9f999 100644
--- a/runtime/mirror/dex_cache.cc
+++ b/runtime/mirror/dex_cache.cc
@@ -219,7 +219,7 @@
   SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(DexCache, location_), location);
 }
 
-#if !defined(__aarch64__) && !defined(__x86_64__)
+#if !defined(__aarch64__) && !defined(__x86_64__) && !defined(__mips__)
 static pthread_mutex_t dex_cache_slow_atomic_mutex = PTHREAD_MUTEX_INITIALIZER;
 
 DexCache::ConversionPair64 DexCache::AtomicLoadRelaxed16B(std::atomic<ConversionPair64>* target) {
diff --git a/runtime/mirror/dex_cache.h b/runtime/mirror/dex_cache.h
index 7fd5dd1..f75786b 100644
--- a/runtime/mirror/dex_cache.h
+++ b/runtime/mirror/dex_cache.h
@@ -482,8 +482,8 @@
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_);
 
   // Due to lack of 16-byte atomics support, we use hand-crafted routines.
-#if defined(__aarch64__)
-  // 16-byte atomics are supported on aarch64.
+#if defined(__aarch64__) || defined(__mips__)
+  // 16-byte atomics are supported on aarch64, mips and mips64.
   ALWAYS_INLINE static ConversionPair64 AtomicLoadRelaxed16B(
       std::atomic<ConversionPair64>* target) {
     return target->load(std::memory_order_relaxed);
diff --git a/tools/cpp-define-generator/constant_globals.def b/tools/cpp-define-generator/constant_globals.def
index a3ccc72..dbaf33c 100644
--- a/tools/cpp-define-generator/constant_globals.def
+++ b/tools/cpp-define-generator/constant_globals.def
@@ -17,9 +17,12 @@
 // Export global values.
 
 #if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include <atomic>            // std::memory_order_relaxed
 #include "globals.h"         // art::kObjectAlignment
 #endif
 
+DEFINE_EXPR(STD_MEMORY_ORDER_RELAXED, int32_t, std::memory_order_relaxed)
+
 #define DEFINE_OBJECT_EXPR(macro_name, type, constant_field_name) \
   DEFINE_EXPR(OBJECT_ ## macro_name, type, constant_field_name)