ART: Compiler support for invoke-polymorphic.

Adds basic support to invoke method handles in compiled code.

Enables method verification for methods containing invoke-polymorphic.

Adds k45cc/k45rc output to Instruction::DumpString() which
was found to be missing when enabling verification.

Include stack traces in test 957-methodhandle-transforms for
failures so they can be easily identified.

Bug: 30550796,33191393
Test: art/test/run-test 953
Test: m test-art-run-test
Change-Id: Ic9a96ea24906087597d96ad8159a5bc349d06950
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 4d4ebdc..9e82f01 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -2010,3 +2010,83 @@
 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9
 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10
 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11
+
+.extern artInvokePolymorphic
+ENTRY art_quick_invoke_polymorphic
+    SETUP_SAVE_REFS_AND_ARGS_FRAME r2
+    mov     r2, r9                 @ pass Thread::Current
+    mov     r3, sp                 @ pass SP
+    mov     r0, #0                 @ initialize 64-bit JValue as zero.
+    str     r0, [sp, #-4]!
+    .cfi_adjust_cfa_offset 4
+    str     r0, [sp, #-4]!
+    .cfi_adjust_cfa_offset 4
+    mov     r0, sp                 @ pass JValue for return result as first argument.
+    bl      artInvokePolymorphic   @ artInvokePolymorphic(JValue, receiver, Thread*, SP)
+    sub     r0, 'A'                @ return value is descriptor of handle's return type.
+    cmp     r0, 'Z' - 'A'          @ check if value is in bounds of handler table
+    bgt     .Lcleanup_and_return   @ and clean-up if not.
+    adr     r1, .Lhandler_table
+    tbb     [r0, r1]               @ branch to handler for return value based on return type.
+
+.Lstart_of_handlers:
+.Lstore_boolean_result:
+    ldrb    r0, [sp]               @ Copy boolean value to return value of this function.
+    b       .Lcleanup_and_return
+.Lstore_char_result:
+    ldrh    r0, [sp]               @ Copy char value to return value of this function.
+    b       .Lcleanup_and_return
+.Lstore_float_result:
+    vldr    s0, [sp]               @ Copy float value from JValue result to the context restored by
+    vstr    s0, [sp, #16]          @ RESTORE_SAVE_REFS_AND_ARGS_FRAME.
+    b       .Lcleanup_and_return
+.Lstore_double_result:
+    vldr    d0, [sp]               @ Copy double value from JValue result to the context restored by
+    vstr    d0, [sp, #16]          @ RESTORE_SAVE_REFS_AND_ARGS_FRAME.
+    b       .Lcleanup_and_return
+.Lstore_long_result:
+    ldr     r1, [sp, #4]           @ Copy the upper bits from JValue result to the context restored by
+    str     r1, [sp, #80]          @ RESTORE_SAVE_REFS_AND_ARGS_FRAME.
+    // Fall-through for lower bits.
+.Lstore_int_result:
+    ldr     r0, [sp]               @ Copy int value to return value of this function.
+    // Fall-through to clean up and return.
+.Lcleanup_and_return:
+    add     sp, #8
+    .cfi_adjust_cfa_offset -8
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
+    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG r2
+
+.macro HANDLER_TABLE_OFFSET handler_label
+    .byte (\handler_label - .Lstart_of_handlers) / 2
+.endm
+
+.Lhandler_table:
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // A
+    HANDLER_TABLE_OFFSET(.Lstore_int_result)      // B (byte)
+    HANDLER_TABLE_OFFSET(.Lstore_char_result)     // C (char)
+    HANDLER_TABLE_OFFSET(.Lstore_double_result)   // D (double)
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // E
+    HANDLER_TABLE_OFFSET(.Lstore_float_result)    // F (float)
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // G
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // H
+    HANDLER_TABLE_OFFSET(.Lstore_int_result)      // I (int)
+    HANDLER_TABLE_OFFSET(.Lstore_long_result)     // J (long)
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // K
+    HANDLER_TABLE_OFFSET(.Lstore_long_result)     // L (object)
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // M
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // N
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // O
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // P
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // Q
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // R
+    HANDLER_TABLE_OFFSET(.Lstore_int_result)      // S (short)
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // T
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // U
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // V (void)
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // W
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // X
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // Y
+    HANDLER_TABLE_OFFSET(.Lstore_boolean_result)  // Z (boolean)
+.purgem HANDLER_TABLE_OFFSET
+END art_quick_invoke_polymorphic
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 8b1e038..eba0c87 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -2567,3 +2567,82 @@
 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg27, w27, x27
 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg28, w28, x28
 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, w29, x29
+
+.extern artInvokePolymorphic
+ENTRY art_quick_invoke_polymorphic
+    SETUP_SAVE_REFS_AND_ARGS_FRAME                // Save callee saves in case allocation triggers GC.
+    mov     x2, xSELF
+    mov     x3, sp
+    INCREASE_FRAME 16                             // Reserve space for JValue result.
+    str     xzr, [sp, #0]                         // Initialize result to zero.
+    mov     x0, sp                                // Set r0 to point to result.
+    bl      artInvokePolymorphic                  // ArtInvokePolymorphic(result, receiver, thread, save_area)
+    uxtb    w0, w0                                // Result is the return type descriptor as a char.
+    sub     w0, w0, 'A'                           // Convert to zero based index.
+    cmp     w0, 'Z' - 'A'
+    bhi     .Lcleanup_and_return                  // Clean-up if out-of-bounds.
+    adrp    x1, .Lhandler_table                   // Compute address of handler table.
+    add     x1, x1, :lo12:.Lhandler_table
+    ldrb    w0, [x1, w0, uxtw]                    // Lookup handler offset in handler table.
+    adr     x1, .Lstart_of_handlers
+    add     x0, x1, w0, sxtb #2                   // Convert relative offset to absolute address.
+    br      x0                                    // Branch to handler.
+
+.Lstart_of_handlers:
+.Lstore_boolean_result:
+    ldrb    w0, [sp]
+    b       .Lcleanup_and_return
+.Lstore_char_result:
+    ldrh    w0, [sp]
+    b       .Lcleanup_and_return
+.Lstore_float_result:
+    ldr     s0, [sp]
+    str     s0, [sp, #32]
+    b       .Lcleanup_and_return
+.Lstore_double_result:
+    ldr     d0, [sp]
+    str     d0, [sp, #32]
+    b       .Lcleanup_and_return
+.Lstore_long_result:
+    ldr     x0, [sp]
+    // Fall-through
+.Lcleanup_and_return:
+    DECREASE_FRAME 16
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
+    RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
+
+    .section    .rodata                           // Place handler table in read-only section away from text.
+    .align  2
+.macro HANDLER_TABLE_OFFSET handler_label
+    .byte (\handler_label - .Lstart_of_handlers) / 4
+.endm
+.Lhandler_table:
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // A
+    HANDLER_TABLE_OFFSET(.Lstore_long_result)     // B (byte)
+    HANDLER_TABLE_OFFSET(.Lstore_char_result)     // C (char)
+    HANDLER_TABLE_OFFSET(.Lstore_double_result)   // D (double)
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // E
+    HANDLER_TABLE_OFFSET(.Lstore_float_result)    // F (float)
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // G
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // H
+    HANDLER_TABLE_OFFSET(.Lstore_long_result)     // I (int)
+    HANDLER_TABLE_OFFSET(.Lstore_long_result)     // J (long)
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // K
+    HANDLER_TABLE_OFFSET(.Lstore_long_result)     // L (object)
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // M
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // N
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // O
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // P
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // Q
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // R
+    HANDLER_TABLE_OFFSET(.Lstore_long_result)     // S (short)
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // T
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // U
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // V (void)
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // W
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // X
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // Y
+    HANDLER_TABLE_OFFSET(.Lstore_boolean_result)  // Z (boolean)
+    .text
+
+END  art_quick_invoke_polymorphic
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 62c29cf..ea95bbc 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -468,7 +468,7 @@
      * The helper will attempt to locate the target and return a 64-bit result in r0/r1 consisting
      * of the target Method* in r0 and method->code_ in r1.
      *
-     * If unsuccessful, the helper will return null/null will bea pending exception in the
+     * If unsuccessful, the helper will return null/null and there will be a pending exception in the
      * thread and we branch to another stub to deliver it.
      *
      * On success this wrapper will restore arguments and *jump* to the target, leaving the lr
@@ -2223,5 +2223,97 @@
     jmp *%ebx
 END_FUNCTION art_quick_osr_stub
 
+DEFINE_FUNCTION art_quick_invoke_polymorphic
+    SETUP_SAVE_REFS_AND_ARGS_FRAME  ebx, ebx       // Save frame.
+    mov %esp, %edx                                 // Remember SP.
+    subl LITERAL(16), %esp                         // Make space for JValue result.
+    CFI_ADJUST_CFA_OFFSET(16)
+    movl LITERAL(0), (%esp)                        // Initialize result to zero.
+    movl LITERAL(0), 4(%esp)
+    mov %esp, %eax                                 // Store pointer to JValue result in eax.
+    PUSH edx                                       // pass SP
+    pushl %fs:THREAD_SELF_OFFSET                   // pass Thread::Current()
+    CFI_ADJUST_CFA_OFFSET(4)
+    PUSH ecx                                       // pass receiver (method handle)
+    PUSH eax                                       // pass JResult
+    call SYMBOL(artInvokePolymorphic)              // (result, receiver, Thread*, SP)
+    subl LITERAL('A'), %eax                        // Eliminate out of bounds options
+    cmpb LITERAL('Z' - 'A'), %al
+    ja .Lcleanup_and_return
+    movzbl %al, %eax
+    call .Lput_eip_in_ecx
+.Lbranch_start:
+    add $(.Lhandler_table - .Lbranch_start), %eax  // Add offset of handler table to index.
+    addl %ecx, %eax                                // Add EIP relative address of table.
+    movzbl (%eax), %eax                            // Lookup relative branch in table.
+    addl %ecx, %eax                                // Add EIP relative offset.
+    jmp *%eax                                      // Branch to handler.
+
+    // Handlers for different return types.
+.Lstore_boolean_result:
+    movzbl 16(%esp), %eax                          // Copy boolean result to the accumulator.
+    jmp .Lcleanup_and_return
+.Lstore_char_result:
+    movzwl 16(%esp), %eax                          // Copy char result to the accumulator.
+    jmp .Lcleanup_and_return
+.Lstore_float_result:
+    movd 16(%esp), %xmm0                           // Copy float result to the context restored by
+    movd %xmm0, 36(%esp)                           // RESTORE_SAVE_REFS_ONLY_FRAME.
+    jmp .Lcleanup_and_return
+.Lstore_double_result:
+    movsd 16(%esp), %xmm0                          // Copy double result to the context restored by
+    movsd %xmm0, 36(%esp)                          // RESTORE_SAVE_REFS_ONLY_FRAME.
+    jmp .Lcleanup_and_return
+.Lstore_long_result:
+    movl 20(%esp), %edx                            // Copy upper-word of result to the context restored by
+    movl %edx, 72(%esp)                            // RESTORE_SAVE_REFS_ONLY_FRAME.
+    // Fall-through for lower bits.
+.Lstore_int_result:
+    movl 16(%esp), %eax                            // Copy int result to the accumulator.
+    // Fall-through to clean up and return.
+.Lcleanup_and_return:
+    addl LITERAL(32), %esp                         // Pop arguments and stack allocated JValue result.
+    CFI_ADJUST_CFA_OFFSET(-32)
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
+    RETURN_OR_DELIVER_PENDING_EXCEPTION
+
+.Lput_eip_in_ecx:                                  // Internal function that puts address of
+    movl 0(%esp), %ecx                             // next instruction into ECX when CALL
+    ret
+
+    // Handler table to handlers for given type.
+.Lhandler_table:
+MACRO1(HANDLER_TABLE_ENTRY, handler_label)
+    .byte RAW_VAR(handler_label) - .Lbranch_start
+END_MACRO
+    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // A
+    HANDLER_TABLE_ENTRY(.Lstore_int_result)        // B (byte)
+    HANDLER_TABLE_ENTRY(.Lstore_char_result)       // C (char)
+    HANDLER_TABLE_ENTRY(.Lstore_double_result)     // D (double)
+    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // E
+    HANDLER_TABLE_ENTRY(.Lstore_float_result)      // F (float)
+    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // G
+    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // H
+    HANDLER_TABLE_ENTRY(.Lstore_int_result)        // I (int)
+    HANDLER_TABLE_ENTRY(.Lstore_long_result)       // J (long)
+    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // K
+    HANDLER_TABLE_ENTRY(.Lstore_int_result)        // L (object)
+    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // M
+    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // N
+    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // O
+    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // P
+    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // Q
+    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // R
+    HANDLER_TABLE_ENTRY(.Lstore_int_result)        // S (short)
+    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // T
+    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // U
+    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // V (void)
+    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // W
+    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // X
+    HANDLER_TABLE_ENTRY(.Lcleanup_and_return)      // Y
+    HANDLER_TABLE_ENTRY(.Lstore_boolean_result)    // Z (boolean)
+
+END_FUNCTION art_quick_invoke_polymorphic
+
     // TODO: implement these!
 UNIMPLEMENTED art_quick_memcmp16
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index facd563..5b3a314 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -2394,3 +2394,78 @@
     rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
     jmp *%rdx
 END_FUNCTION art_quick_osr_stub
+
+DEFINE_FUNCTION art_quick_invoke_polymorphic
+    SETUP_SAVE_REFS_AND_ARGS_FRAME                // save callee saves
+    movq %gs:THREAD_SELF_OFFSET, %rdx             // pass Thread
+    movq %rsp, %rcx                               // pass SP
+    subq LITERAL(16), %rsp                        // make space for JValue result
+    CFI_ADJUST_CFA_OFFSET(16)
+    movq LITERAL(0), (%rsp)                       // initialize result
+    movq %rsp, %rdi                               // store pointer to JValue result
+    call SYMBOL(artInvokePolymorphic)             // artInvokePolymorphic(result, receiver, Thread*, SP)
+                                                  // save the code pointer
+    subq LITERAL('A'), %rax                       // Convert type descriptor character value to a zero based index.
+    cmpb LITERAL('Z' - 'A'), %al                  // Eliminate out of bounds options
+    ja .Lcleanup_and_return
+    movzbq %al, %rax
+    leaq .Lhandler_table(%rip), %rcx              // Get the address of the handler table
+    movsbq (%rcx, %rax, 1), %rax                  // Lookup handler offset relative to table
+    addq %rcx, %rax                               // Add table address to yield handler address.
+    jmpq *%rax                                    // Jump to handler.
+
+.align 4
+.Lhandler_table:                                  // Table of type descriptor to handlers.
+MACRO1(HANDLER_TABLE_OFFSET, handle_label)
+    .byte RAW_VAR(handle_label) - .Lhandler_table
+END_MACRO
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // A
+    HANDLER_TABLE_OFFSET(.Lstore_long_result)     // B (byte)
+    HANDLER_TABLE_OFFSET(.Lstore_char_result)     // C (char)
+    HANDLER_TABLE_OFFSET(.Lstore_double_result)   // D (double)
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // E
+    HANDLER_TABLE_OFFSET(.Lstore_float_result)    // F (float)
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // G
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // H
+    HANDLER_TABLE_OFFSET(.Lstore_long_result)     // I (int)
+    HANDLER_TABLE_OFFSET(.Lstore_long_result)     // J (long)
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // K
+    HANDLER_TABLE_OFFSET(.Lstore_long_result)     // L (object)
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // M
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // N
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // O
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // P
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // Q
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // R
+    HANDLER_TABLE_OFFSET(.Lstore_long_result)     // S (short)
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // T
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // U
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // V (void)
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // W
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // X
+    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // Y
+    HANDLER_TABLE_OFFSET(.Lstore_boolean_result)  // Z (boolean)
+
+.Lstore_boolean_result:
+    movzbq (%rsp), %rax                           // Copy boolean result to the accumulator
+    jmp .Lcleanup_and_return
+.Lstore_char_result:
+    movzwq (%rsp), %rax                           // Copy char result to the accumulator
+    jmp .Lcleanup_and_return
+.Lstore_float_result:
+    movd (%rsp), %xmm0                            // Copy float result to the context restored by
+    movd %xmm0, 32(%rsp)                          // RESTORE_SAVE_REFS_AND_ARGS_FRAME.
+    jmp .Lcleanup_and_return
+.Lstore_double_result:
+    movsd (%rsp), %xmm0                           // Copy double result to the context restored by
+    movsd %xmm0, 32(%rsp)                         // RESTORE_SAVE_REFS_AND_ARGS_FRAME.
+    jmp .Lcleanup_and_return
+.Lstore_long_result:
+    movq (%rsp), %rax                             // Copy long result to the accumulator.
+    // Fall-through
+.Lcleanup_and_return:
+    addq LITERAL(16), %rsp                        // Pop space for JValue result.
+    CFI_ADJUST_CFA_OFFSET(16)
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
+    RETURN_OR_DELIVER_PENDING_EXCEPTION
+END_FUNCTION art_quick_invoke_polymorphic
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index bfdddf7..e4972da 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -98,7 +98,7 @@
 ADD_TEST_EQ(THREAD_LOCAL_END_OFFSET,
             art::Thread::ThreadLocalEndOffset<POINTER_SIZE>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_objects.
-#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_END_OFFSET + __SIZEOF_POINTER__)
+#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_END_OFFSET + 2 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET,
             art::Thread::ThreadLocalObjectsOffset<POINTER_SIZE>().Int32Value())
 // Offset of field Thread::tlsPtr_.mterp_current_ibase.
diff --git a/runtime/common_throws.cc b/runtime/common_throws.cc
index c30272e..a44f79e 100644
--- a/runtime/common_throws.cc
+++ b/runtime/common_throws.cc
@@ -428,6 +428,8 @@
     case Instruction::INVOKE_VIRTUAL_RANGE:
     case Instruction::INVOKE_INTERFACE:
     case Instruction::INVOKE_INTERFACE_RANGE:
+    case Instruction::INVOKE_POLYMORPHIC:
+    case Instruction::INVOKE_POLYMORPHIC_RANGE:
     case Instruction::INVOKE_VIRTUAL_QUICK:
     case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: {
       // Without inlining, we could just check that the offset is the class offset.
@@ -551,6 +553,12 @@
     case Instruction::INVOKE_INTERFACE_RANGE:
       ThrowNullPointerExceptionForMethodAccess(instr->VRegB_3rc(), kInterface);
       break;
+    case Instruction::INVOKE_POLYMORPHIC:
+      ThrowNullPointerExceptionForMethodAccess(instr->VRegB_45cc(), kVirtual);
+      break;
+    case Instruction::INVOKE_POLYMORPHIC_RANGE:
+      ThrowNullPointerExceptionForMethodAccess(instr->VRegB_4rcc(), kVirtual);
+      break;
     case Instruction::INVOKE_VIRTUAL_QUICK:
     case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: {
       // Since we replaced the method index, we ask the verifier to tell us which
diff --git a/runtime/dex_instruction.cc b/runtime/dex_instruction.cc
index 7b8974f..37f3ac9 100644
--- a/runtime/dex_instruction.cc
+++ b/runtime/dex_instruction.cc
@@ -358,7 +358,7 @@
       }
       break;
     case k35c: {
-      uint32_t arg[5];
+      uint32_t arg[kMaxVarArgRegs];
       GetVarArgs(arg);
       switch (Opcode()) {
         case FILLED_NEW_ARRAY:
@@ -443,8 +443,50 @@
       }
       break;
     }
+    case k45cc: {
+      uint32_t arg[kMaxVarArgRegs];
+      GetVarArgs(arg);
+      uint32_t method_idx = VRegB_45cc();
+      uint32_t proto_idx = VRegH_45cc();
+      os << opcode << " {";
+      for (int i = 0; i < VRegA_45cc(); ++i) {
+        if (i != 0) {
+          os << ", ";
+        }
+        os << "v" << arg[i];
+      }
+      os << "}";
+      if (file != nullptr) {
+        os << ", " << file->PrettyMethod(method_idx) << ", " << file->GetShorty(proto_idx)
+           << " // ";
+      } else {
+        os << ", ";
+      }
+      os << "method@" << method_idx << ", proto@" << proto_idx;
+      break;
+    }
+    case k4rcc:
+      switch (Opcode()) {
+        case INVOKE_POLYMORPHIC_RANGE: {
+          if (file != nullptr) {
+            uint32_t method_idx = VRegB_4rcc();
+            uint32_t proto_idx = VRegH_4rcc();
+            os << opcode << ", {v" << VRegC_4rcc() << " .. v" << (VRegC_4rcc() + VRegA_4rcc())
+               << "}, " << file->PrettyMethod(method_idx) << ", " << file->GetShorty(proto_idx)
+               << " // method@" << method_idx << ", proto@" << proto_idx;
+            break;
+          }
+        }
+        FALLTHROUGH_INTENDED;
+        default: {
+          uint32_t method_idx = VRegB_4rcc();
+          uint32_t proto_idx = VRegH_4rcc();
+          os << opcode << ", {v" << VRegC_4rcc() << " .. v" << (VRegC_4rcc() + VRegA_4rcc())
+             << "}, method@" << method_idx << ", proto@" << proto_idx;
+        }
+      }
+      break;
     case k51l: os << StringPrintf("%s v%d, #%+" PRId64, opcode, VRegA_51l(), VRegB_51l()); break;
-    default: os << " unknown format (" << DumpHex(5) << ")"; break;
   }
   return os.str();
 }
diff --git a/runtime/entrypoints/quick/quick_default_externs.h b/runtime/entrypoints/quick/quick_default_externs.h
index 64030f3..2d0932a 100644
--- a/runtime/entrypoints/quick/quick_default_externs.h
+++ b/runtime/entrypoints/quick/quick_default_externs.h
@@ -109,8 +109,13 @@
 extern "C" void art_quick_invoke_interface_trampoline_with_access_check(uint32_t, void*);
 extern "C" void art_quick_invoke_static_trampoline_with_access_check(uint32_t, void*);
 extern "C" void art_quick_invoke_super_trampoline_with_access_check(uint32_t, void*);
+
 extern "C" void art_quick_invoke_virtual_trampoline_with_access_check(uint32_t, void*);
 
+// Invoke polymorphic entrypoint. Return type is dynamic and may be void, a primitive value, or
+// reference return type.
+extern "C" void art_quick_invoke_polymorphic(uint32_t, void*);
+
 // Thread entrypoints.
 extern "C" void art_quick_test_suspend();
 
diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
index 78dad94..8ce61c1 100644
--- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
@@ -106,6 +106,7 @@
       art_quick_invoke_super_trampoline_with_access_check;
   qpoints->pInvokeVirtualTrampolineWithAccessCheck =
       art_quick_invoke_virtual_trampoline_with_access_check;
+  qpoints->pInvokePolymorphic = art_quick_invoke_polymorphic;
 
   // Thread
   qpoints->pTestSuspend = art_quick_test_suspend;
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index 0911aeb..4d5d6de 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -133,6 +133,7 @@
   V(InvokeStaticTrampolineWithAccessCheck, void, uint32_t, void*) \
   V(InvokeSuperTrampolineWithAccessCheck, void, uint32_t, void*) \
   V(InvokeVirtualTrampolineWithAccessCheck, void, uint32_t, void*) \
+  V(InvokePolymorphic, void, uint32_t, void*) \
 \
   V(TestSuspend, void, void) \
 \
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index bf1d4ea..bbd4fba 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -27,10 +27,12 @@
 #include "imtable-inl.h"
 #include "interpreter/interpreter.h"
 #include "linear_alloc.h"
+#include "method_handles.h"
 #include "method_reference.h"
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache-inl.h"
 #include "mirror/method.h"
+#include "mirror/method_handle_impl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 #include "oat_quick_method_header.h"
@@ -39,6 +41,7 @@
 #include "scoped_thread_state_change-inl.h"
 #include "stack.h"
 #include "debugger.h"
+#include "well_known_classes.h"
 
 namespace art {
 
@@ -2391,4 +2394,114 @@
                                 reinterpret_cast<uintptr_t>(method));
 }
 
+// Returns shorty type so the caller can determine how to put |result|
+// into expected registers. The shorty type is static so the compiler
+// could call different flavors of this code path depending on the
+// shorty type though this would require different entry points for
+// each type.
+extern "C" uintptr_t artInvokePolymorphic(
+    JValue* result,
+    mirror::Object* raw_method_handle,
+    Thread* self,
+    ArtMethod** sp)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
+  DCHECK_EQ(*sp, Runtime::Current()->GetCalleeSaveMethod(Runtime::kSaveRefsAndArgs));
+
+  // Start new JNI local reference state
+  JNIEnvExt* env = self->GetJniEnv();
+  ScopedObjectAccessUnchecked soa(env);
+  ScopedJniEnvLocalRefState env_state(env);
+  const char* old_cause = self->StartAssertNoThreadSuspension("Making stack arguments safe.");
+
+  // From the instruction, get the |callsite_shorty| and expose arguments on the stack to the GC.
+  ArtMethod* caller_method = QuickArgumentVisitor::GetCallingMethod(sp);
+  uint32_t dex_pc = QuickArgumentVisitor::GetCallingDexPc(sp);
+  const DexFile::CodeItem* code = caller_method->GetCodeItem();
+  const Instruction* inst = Instruction::At(&code->insns_[dex_pc]);
+  DCHECK(inst->Opcode() == Instruction::INVOKE_POLYMORPHIC ||
+         inst->Opcode() == Instruction::INVOKE_POLYMORPHIC_RANGE);
+  const DexFile* dex_file = caller_method->GetDexFile();
+  const uint32_t proto_idx = inst->VRegH();
+  const char* shorty = dex_file->GetShorty(proto_idx);
+  const size_t shorty_length = strlen(shorty);
+  static const bool kMethodIsStatic = false;  // invoke() and invokeExact() are not static.
+  RememberForGcArgumentVisitor gc_visitor(sp, kMethodIsStatic, shorty, shorty_length, &soa);
+  gc_visitor.Visit();
+
+  // Wrap raw_method_handle in a Handle for safety.
+  StackHandleScope<5> hs(self);
+  Handle<mirror::MethodHandleImpl> method_handle(
+      hs.NewHandle(ObjPtr<mirror::MethodHandleImpl>::DownCast(MakeObjPtr(raw_method_handle))));
+  raw_method_handle = nullptr;
+  self->EndAssertNoThreadSuspension(old_cause);
+
+  // Resolve method - it's either MethodHandle.invoke() or MethodHandle.invokeExact().
+  ClassLinker* linker = Runtime::Current()->GetClassLinker();
+  ArtMethod* resolved_method = linker->ResolveMethod<ClassLinker::kForceICCECheck>(self,
+                                                                                   inst->VRegB(),
+                                                                                   caller_method,
+                                                                                   kVirtual);
+  DCHECK((resolved_method ==
+          jni::DecodeArtMethod(WellKnownClasses::java_lang_invoke_MethodHandle_invokeExact)) ||
+         (resolved_method ==
+          jni::DecodeArtMethod(WellKnownClasses::java_lang_invoke_MethodHandle_invoke)));
+  if (UNLIKELY(method_handle.IsNull())) {
+    ThrowNullPointerExceptionForMethodAccess(resolved_method, InvokeType::kVirtual);
+    return static_cast<uintptr_t>('V');
+  }
+
+  Handle<mirror::Class> caller_class(hs.NewHandle(caller_method->GetDeclaringClass()));
+  Handle<mirror::MethodType> method_type(hs.NewHandle(linker->ResolveMethodType(
+      *dex_file, proto_idx,
+      hs.NewHandle<mirror::DexCache>(caller_class->GetDexCache()),
+      hs.NewHandle<mirror::ClassLoader>(caller_class->GetClassLoader()))));
+  // This implies we couldn't resolve one or more types in this method handle.
+  if (UNLIKELY(method_type.IsNull())) {
+    CHECK(self->IsExceptionPending());
+    return static_cast<uintptr_t>('V');
+  }
+
+  DCHECK_EQ(ArtMethod::NumArgRegisters(shorty) + 1u, (uint32_t)inst->VRegA());
+  DCHECK_EQ(resolved_method->IsStatic(), kMethodIsStatic);
+
+  // Fix references before constructing the shadow frame.
+  gc_visitor.FixupReferences();
+
+  // Construct shadow frame placing arguments consecutively from |first_arg|.
+  const bool is_range = (inst->Opcode() == Instruction::INVOKE_POLYMORPHIC_RANGE);
+  const size_t num_vregs = is_range ? inst->VRegA_4rcc() : inst->VRegA_45cc();
+  const size_t first_arg = 0;
+  ShadowFrameAllocaUniquePtr shadow_frame_unique_ptr =
+      CREATE_SHADOW_FRAME(num_vregs, /* link */ nullptr, resolved_method, dex_pc);
+  ShadowFrame* shadow_frame = shadow_frame_unique_ptr.get();
+  ScopedStackedShadowFramePusher
+      frame_pusher(self, shadow_frame, StackedShadowFrameType::kShadowFrameUnderConstruction);
+  BuildQuickShadowFrameVisitor shadow_frame_builder(sp,
+                                                    kMethodIsStatic,
+                                                    shorty,
+                                                    strlen(shorty),
+                                                    shadow_frame,
+                                                    first_arg);
+  shadow_frame_builder.VisitArguments();
+
+  // Call DoInvokePolymorphic with |is_range| = true, as shadow frame has argument registers in
+  // consecutive order.
+  uint32_t unused_args[Instruction::kMaxVarArgRegs] = {};
+  uint32_t first_callee_arg = first_arg + 1;
+  const bool do_assignability_check = false;
+  if (!DoInvokePolymorphic<true /* is_range */, do_assignability_check>(self,
+                                                                        resolved_method,
+                                                                        *shadow_frame,
+                                                                        method_handle,
+                                                                        method_type,
+                                                                        unused_args,
+                                                                        first_callee_arg,
+                                                                        result)) {
+    DCHECK(self->IsExceptionPending());
+  }
+
+  return static_cast<uintptr_t>(shorty[0]);
+}
+
 }  // namespace art
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 6866abb..96e17da 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -121,10 +121,10 @@
                         sizeof(Thread::tls_ptr_sized_values::active_suspend_barriers));
 
     // Skip across the entrypoints structures.
-
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_start, thread_local_pos, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_pos, thread_local_end, sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, thread_local_objects, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, thread_local_start, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_start, thread_local_objects, sizeof(void*));
+
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_objects, mterp_current_ibase, sizeof(size_t));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_current_ibase, mterp_default_ibase, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_default_ibase, mterp_alt_ibase, sizeof(void*));
@@ -286,6 +286,8 @@
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pInvokeSuperTrampolineWithAccessCheck,
                          pInvokeVirtualTrampolineWithAccessCheck, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pInvokeVirtualTrampolineWithAccessCheck,
+                         pInvokePolymorphic, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pInvokePolymorphic,
                          pTestSuspend, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pTestSuspend, pDeliverException, sizeof(void*));
 
diff --git a/runtime/oat.h b/runtime/oat.h
index dc103e2..685aa04 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '9', '5', '\0' };  // alloc entrypoints change
+  static constexpr uint8_t kOatVersion[] = { '0', '9', '6', '\0' };  // invoke-polymorphic entry
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 33c6a40..9ce2e5e 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -2725,6 +2725,7 @@
   QUICK_ENTRY_POINT_INFO(pInvokeStaticTrampolineWithAccessCheck)
   QUICK_ENTRY_POINT_INFO(pInvokeSuperTrampolineWithAccessCheck)
   QUICK_ENTRY_POINT_INFO(pInvokeVirtualTrampolineWithAccessCheck)
+  QUICK_ENTRY_POINT_INFO(pInvokePolymorphic)
   QUICK_ENTRY_POINT_INFO(pTestSuspend)
   QUICK_ENTRY_POINT_INFO(pDeliverException)
   QUICK_ENTRY_POINT_INFO(pThrowArrayBounds)
diff --git a/runtime/thread.h b/runtime/thread.h
index 6308851..b8c5065 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -1416,7 +1416,7 @@
       stacked_shadow_frame_record(nullptr), deoptimization_context_stack(nullptr),
       frame_id_to_shadow_frame(nullptr), name(nullptr), pthread_self(0),
       last_no_thread_suspension_cause(nullptr), checkpoint_function(nullptr),
-      thread_local_start(nullptr), thread_local_pos(nullptr), thread_local_end(nullptr),
+      thread_local_pos(nullptr), thread_local_end(nullptr), thread_local_start(nullptr),
       thread_local_objects(0), mterp_current_ibase(nullptr), mterp_default_ibase(nullptr),
       mterp_alt_ibase(nullptr), thread_local_alloc_stack_top(nullptr),
       thread_local_alloc_stack_end(nullptr), nested_signal_state(nullptr),
@@ -1540,13 +1540,14 @@
     JniEntryPoints jni_entrypoints;
     QuickEntryPoints quick_entrypoints;
 
-    // Thread-local allocation pointer. Moved here to force alignment for thread_local_pos on ARM.
-    uint8_t* thread_local_start;
     // thread_local_pos and thread_local_end must be consecutive for ldrd and are 8 byte aligned for
     // potentially better performance.
     uint8_t* thread_local_pos;
     uint8_t* thread_local_end;
 
+    // Thread-local allocation pointer.
+    uint8_t* thread_local_start;
+
     size_t thread_local_objects;
 
     // Mterp jump table bases.
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 715b237..25a179b 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -3106,19 +3106,16 @@
         break;
       }
       const uint32_t proto_idx = (is_range) ? inst->VRegH_4rcc() : inst->VRegH_45cc();
-      const char* descriptor =
+      const char* return_descriptor =
           dex_file_->GetReturnTypeDescriptor(dex_file_->GetProtoId(proto_idx));
       const RegType& return_type =
-          reg_types_.FromDescriptor(GetClassLoader(), descriptor, false);
+          reg_types_.FromDescriptor(GetClassLoader(), return_descriptor, false);
       if (!return_type.IsLowHalf()) {
         work_line_->SetResultRegisterType(this, return_type);
       } else {
         work_line_->SetResultRegisterTypeWide(return_type, return_type.HighHalf(&reg_types_));
       }
-      // TODO(oth): remove when compiler support is available.
-      Fail(VERIFY_ERROR_FORCE_INTERPRETER)
-          << "invoke-polymorphic is not supported by compiler";
-      have_pending_experimental_failure_ = true;
+      just_set_result = true;
       break;
     }
     case Instruction::NEG_INT: