Merge "ART: Update the DecodedInstruction for the Fused extended bytecodes."
diff --git a/Android.mk b/Android.mk
index a30c090..f7f65ac 100644
--- a/Android.mk
+++ b/Android.mk
@@ -97,7 +97,7 @@
 
 # ART_HOST_DEPENDENCIES depends on Android.executable.mk above for ART_HOST_EXECUTABLES
 ART_HOST_DEPENDENCIES := $(ART_HOST_EXECUTABLES) $(HOST_OUT_JAVA_LIBRARIES)/core-libart-hostdex.jar
-ART_HOST_DEPENDENCIES += $(HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION)
+ART_HOST_DEPENDENCIES += $(HOST_LIBRARY_PATH)/libjavacore$(ART_HOST_SHLIB_EXTENSION)
 ART_TARGET_DEPENDENCIES := $(ART_TARGET_EXECUTABLES) $(TARGET_OUT_JAVA_LIBRARIES)/core-libart.jar $(TARGET_OUT_SHARED_LIBRARIES)/libjavacore.so
 ifdef TARGET_2ND_ARCH
 ART_TARGET_DEPENDENCIES += $(2ND_TARGET_OUT_SHARED_LIBRARIES)/libjavacore.so
@@ -166,7 +166,7 @@
 	@echo test-art-host-interpreter PASSED
 
 .PHONY: test-art-host-dependencies
-test-art-host-dependencies: $(ART_HOST_TEST_DEPENDENCIES) $(HOST_OUT_SHARED_LIBRARIES)/libarttest$(ART_HOST_SHLIB_EXTENSION) $(HOST_CORE_DEX_LOCATIONS)
+test-art-host-dependencies: $(ART_HOST_TEST_DEPENDENCIES) $(HOST_LIBRARY_PATH)/libarttest$(ART_HOST_SHLIB_EXTENSION) $(HOST_CORE_DEX_LOCATIONS)
 
 .PHONY: test-art-host-gtest
 test-art-host-gtest: $(ART_HOST_GTEST_TARGETS)
diff --git a/build/Android.common.mk b/build/Android.common.mk
index aa167b2..09f34b3 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -34,10 +34,14 @@
 #
 ART_BUILD_TARGET_NDEBUG ?= true
 ART_BUILD_TARGET_DEBUG ?= true
-ART_BUILD_HOST_NDEBUG ?= $(WITH_HOST_DALVIK)
-ART_BUILD_HOST_DEBUG ?= $(WITH_HOST_DALVIK)
+ART_BUILD_HOST_NDEBUG ?= true
+ART_BUILD_HOST_DEBUG ?= true
 
+ifeq ($(HOST_PREFER_32_BIT),true)
+ART_HOST_ARCH := $(HOST_2ND_ARCH)
+else
 ART_HOST_ARCH := $(HOST_ARCH)
+endif
 
 ifeq ($(ART_BUILD_TARGET_NDEBUG),false)
 $(info Disabling ART_BUILD_TARGET_NDEBUG)
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 407269b..1bb1d56 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -250,11 +250,9 @@
   $(foreach file,$(RUNTIME_GTEST_TARGET_SRC_FILES), $(eval $(call build-art-test,target,$(file),,)))
   $(foreach file,$(COMPILER_GTEST_TARGET_SRC_FILES), $(eval $(call build-art-test,target,$(file),art/compiler,libartd-compiler)))
 endif
-ifeq ($(WITH_HOST_DALVIK),true)
-  ifeq ($(ART_BUILD_HOST),true)
-    $(foreach file,$(RUNTIME_GTEST_HOST_SRC_FILES), $(eval $(call build-art-test,host,$(file),,)))
-    $(foreach file,$(COMPILER_GTEST_HOST_SRC_FILES), $(eval $(call build-art-test,host,$(file),art/compiler,libartd-compiler)))
-  endif
+ifeq ($(ART_BUILD_HOST),true)
+  $(foreach file,$(RUNTIME_GTEST_HOST_SRC_FILES), $(eval $(call build-art-test,host,$(file),,)))
+  $(foreach file,$(COMPILER_GTEST_HOST_SRC_FILES), $(eval $(call build-art-test,host,$(file),art/compiler,libartd-compiler)))
 endif
 
 # Used outside the art project to get a list of the current tests
diff --git a/build/Android.libarttest.mk b/build/Android.libarttest.mk
index b4c99b5..76e5af0 100644
--- a/build/Android.libarttest.mk
+++ b/build/Android.libarttest.mk
@@ -74,8 +74,6 @@
 ifeq ($(ART_BUILD_TARGET),true)
   $(eval $(call build-libarttest,target))
 endif
-ifeq ($(WITH_HOST_DALVIK),true)
-  ifeq ($(ART_BUILD_HOST),true)
-    $(eval $(call build-libarttest,host))
-  endif
+ifeq ($(ART_BUILD_HOST),true)
+  $(eval $(call build-libarttest,host))
 endif
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index bf07ecc..fbb7eb3 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -74,7 +74,6 @@
 endif # ART_BUILD_HOST
 
 # If we aren't building the host toolchain, skip building the target core.art.
-ifeq ($(WITH_HOST_DALVIK),true)
 ifeq ($(ART_BUILD_TARGET),true)
 include $(CLEAR_VARS)
 LOCAL_MODULE := core.art
@@ -84,4 +83,3 @@
 LOCAL_ADDITIONAL_DEPENDENCIES += $(TARGET_CORE_IMG_OUT)
 include $(BUILD_PHONY_PACKAGE)
 endif # ART_BUILD_TARGET
-endif # WITH_HOST_DALVIK
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 3cf7368..9a868fc 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -273,14 +273,12 @@
 
 endef
 
-ifeq ($(WITH_HOST_DALVIK),true)
-  # We always build dex2oat and dependencies, even if the host build is otherwise disabled, since they are used to cross compile for the target.
-  ifeq ($(ART_BUILD_NDEBUG),true)
-    $(eval $(call build-libart-compiler,host,ndebug))
-  endif
-  ifeq ($(ART_BUILD_DEBUG),true)
-    $(eval $(call build-libart-compiler,host,debug))
-  endif
+# We always build dex2oat and dependencies, even if the host build is otherwise disabled, since they are used to cross compile for the target.
+ifeq ($(ART_BUILD_NDEBUG),true)
+  $(eval $(call build-libart-compiler,host,ndebug))
+endif
+ifeq ($(ART_BUILD_DEBUG),true)
+  $(eval $(call build-libart-compiler,host,debug))
 endif
 ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
   $(eval $(call build-libart-compiler,target,ndebug))
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index ce95286..439dc8c 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -814,7 +814,7 @@
       }
     }
   }
-
+  *op_size = kWord;
   return RegStorage::InvalidReg();
 }
 
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index ce7229b..ec165af 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -730,6 +730,7 @@
     rX86_FARG5 = fr5;
     rX86_FARG6 = fr6;
     rX86_FARG7 = fr7;
+    rs_rX86_INVOKE_TGT = rs_rDI;
   } else {
     rs_rX86_SP = rs_rX86_SP_32;
 
@@ -755,13 +756,13 @@
     rX86_FARG1 = rCX;
     rX86_FARG2 = rDX;
     rX86_FARG3 = rBX;
+    rs_rX86_INVOKE_TGT = rs_rAX;
     // TODO(64): Initialize with invalid reg
 //    rX86_ARG4 = RegStorage::InvalidReg();
 //    rX86_ARG5 = RegStorage::InvalidReg();
   }
   rs_rX86_RET0 = rs_rAX;
   rs_rX86_RET1 = rs_rDX;
-  rs_rX86_INVOKE_TGT = rs_rAX;
   rs_rX86_COUNT = rs_rCX;
   rX86_RET0 = rAX;
   rX86_RET1 = rDX;
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 49cf71b..0b7272c 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -180,7 +180,7 @@
   EXPECT_EQ(80U, sizeof(OatHeader));
   EXPECT_EQ(8U, sizeof(OatMethodOffsets));
   EXPECT_EQ(24U, sizeof(OatQuickMethodHeader));
-  EXPECT_EQ(79 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
+  EXPECT_EQ(78 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
 }
 
 TEST_F(OatTest, OatHeaderIsValid) {
diff --git a/compiler/utils/arena_allocator.cc b/compiler/utils/arena_allocator.cc
index 6a39641..925d4a2 100644
--- a/compiler/utils/arena_allocator.cc
+++ b/compiler/utils/arena_allocator.cc
@@ -139,7 +139,7 @@
     if (kUseMemSet || !kUseMemMap) {
       memset(Begin(), 0, bytes_allocated_);
     } else {
-      madvise(Begin(), bytes_allocated_, MADV_DONTNEED);
+      map_->MadviseDontNeedAndZero();
     }
     bytes_allocated_ = 0;
   }
diff --git a/dalvikvm/Android.mk b/dalvikvm/Android.mk
index 03d32f0..31fcd17 100644
--- a/dalvikvm/Android.mk
+++ b/dalvikvm/Android.mk
@@ -38,7 +38,6 @@
 
 ART_TARGET_EXECUTABLES += $(TARGET_OUT_EXECUTABLES)/$(LOCAL_MODULE)
 
-ifeq ($(WITH_HOST_DALVIK),true)
 include $(CLEAR_VARS)
 LOCAL_MODULE := dalvikvm
 LOCAL_MODULE_TAGS := optional
@@ -54,4 +53,3 @@
 include external/libcxx/libcxx.mk
 include $(BUILD_HOST_EXECUTABLE)
 ART_HOST_EXECUTABLES += $(HOST_OUT_EXECUTABLES)/$(LOCAL_MODULE)
-endif
diff --git a/dex2oat/Android.mk b/dex2oat/Android.mk
index c17788e..28db711 100644
--- a/dex2oat/Android.mk
+++ b/dex2oat/Android.mk
@@ -36,12 +36,10 @@
   $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libartd-compiler,art/compiler,target,debug,$(dex2oat_arch)))
 endif
 
-ifeq ($(WITH_HOST_DALVIK),true)
-  # We always build dex2oat and dependencies, even if the host build is otherwise disabled, since they are used to cross compile for the target.
-  ifeq ($(ART_BUILD_NDEBUG),true)
-    $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libart-compiler,art/compiler,host,ndebug))
-  endif
-  ifeq ($(ART_BUILD_DEBUG),true)
-    $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libartd-compiler,art/compiler,host,debug))
-  endif
+# We always build dex2oat and dependencies, even if the host build is otherwise disabled, since they are used to cross compile for the target.
+ifeq ($(ART_BUILD_NDEBUG),true)
+  $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libart-compiler,art/compiler,host,ndebug))
+endif
+ifeq ($(ART_BUILD_DEBUG),true)
+  $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libartd-compiler,art/compiler,host,debug))
 endif
diff --git a/disassembler/Android.mk b/disassembler/Android.mk
index b4b194d..feacbde 100644
--- a/disassembler/Android.mk
+++ b/disassembler/Android.mk
@@ -98,12 +98,10 @@
 ifeq ($(ART_BUILD_TARGET_DEBUG),true)
   $(eval $(call build-libart-disassembler,target,debug))
 endif
-ifeq ($(WITH_HOST_DALVIK),true)
-  # We always build dex2oat and dependencies, even if the host build is otherwise disabled, since they are used to cross compile for the target.
-  ifeq ($(ART_BUILD_NDEBUG),true)
-    $(eval $(call build-libart-disassembler,host,ndebug))
-  endif
-  ifeq ($(ART_BUILD_DEBUG),true)
-    $(eval $(call build-libart-disassembler,host,debug))
-  endif
+# We always build dex2oat and dependencies, even if the host build is otherwise disabled, since they are used to cross compile for the target.
+ifeq ($(ART_BUILD_NDEBUG),true)
+  $(eval $(call build-libart-disassembler,host,ndebug))
+endif
+ifeq ($(ART_BUILD_DEBUG),true)
+  $(eval $(call build-libart-disassembler,host,debug))
 endif
diff --git a/oatdump/Android.mk b/oatdump/Android.mk
index 7cee00e..ecf6a0b 100644
--- a/oatdump/Android.mk
+++ b/oatdump/Android.mk
@@ -28,11 +28,9 @@
   $(eval $(call build-art-executable,oatdump,$(OATDUMP_SRC_FILES),libcutils libartd-disassembler,art/disassembler,target,debug))
 endif
 
-ifeq ($(WITH_HOST_DALVIK),true)
-  ifeq ($(ART_BUILD_HOST_NDEBUG),true)
-    $(eval $(call build-art-executable,oatdump,$(OATDUMP_SRC_FILES),libart-disassembler,art/disassembler,host,ndebug))
-  endif
-  ifeq ($(ART_BUILD_HOST_DEBUG),true)
-    $(eval $(call build-art-executable,oatdump,$(OATDUMP_SRC_FILES),libartd-disassembler,art/disassembler,host,debug))
-  endif
+ifeq ($(ART_BUILD_HOST_NDEBUG),true)
+  $(eval $(call build-art-executable,oatdump,$(OATDUMP_SRC_FILES),libart-disassembler,art/disassembler,host,ndebug))
+endif
+ifeq ($(ART_BUILD_HOST_DEBUG),true)
+  $(eval $(call build-art-executable,oatdump,$(OATDUMP_SRC_FILES),libartd-disassembler,art/disassembler,host,debug))
 endif
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 8d532c7..c40ae7a 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -158,6 +158,7 @@
 
 LIBART_COMMON_SRC_FILES += \
 	arch/context.cc \
+	arch/memcmp16.cc \
 	arch/arm/registers_arm.cc \
 	arch/arm64/registers_arm64.cc \
 	arch/x86/registers_x86.cc \
@@ -195,7 +196,8 @@
 LIBART_GCC_ONLY_SRC_FILES := \
 	interpreter/interpreter_goto_table_impl.cc
 
-LIBART_LDFLAGS := -Wl,--no-fatal-warnings
+LIBART_TARGET_LDFLAGS := -Wl,--no-fatal-warnings
+LIBART_HOST_LDFLAGS :=
 
 LIBART_TARGET_SRC_FILES := \
 	$(LIBART_COMMON_SRC_FILES) \
@@ -209,6 +211,7 @@
 	arch/arm/context_arm.cc.arm \
 	arch/arm/entrypoints_init_arm.cc \
 	arch/arm/jni_entrypoints_arm.S \
+	arch/arm/memcmp16_arm.S \
 	arch/arm/portable_entrypoints_arm.S \
 	arch/arm/quick_entrypoints_arm.S \
 	arch/arm/arm_sdiv.S \
@@ -254,6 +257,7 @@
 	arch/mips/context_mips.cc \
 	arch/mips/entrypoints_init_mips.cc \
 	arch/mips/jni_entrypoints_mips.S \
+	arch/mips/memcmp16_mips.S \
 	arch/mips/portable_entrypoints_mips.S \
 	arch/mips/quick_entrypoints_mips.S \
 	arch/mips/thread_mips.cc \
@@ -365,6 +369,11 @@
 
   LOCAL_CFLAGS := $(LIBART_CFLAGS)
   LOCAL_LDFLAGS := $(LIBART_LDFLAGS)
+  ifeq ($$(art_target_or_host),target)
+    LOCAL_LDFLAGS += $(LIBART_TARGET_LDFLAGS)
+  else
+    LOCAL_LDFLAGS += $(LIBART_HOST_LDFLAGS)
+  endif
   $(foreach arch,$(ART_SUPPORTED_ARCH),
     LOCAL_LDFLAGS_$(arch) := $$(LIBART_TARGET_LDFLAGS_$(arch)))
 
@@ -439,13 +448,11 @@
 
 # We always build dex2oat and dependencies, even if the host build is otherwise disabled, since
 # they are used to cross compile for the target.
-ifeq ($(WITH_HOST_DALVIK),true)
-  ifeq ($(ART_BUILD_NDEBUG),true)
-    $(eval $(call build-libart,host,ndebug))
-  endif
-  ifeq ($(ART_BUILD_DEBUG),true)
-    $(eval $(call build-libart,host,debug))
-  endif
+ifeq ($(ART_BUILD_NDEBUG),true)
+  $(eval $(call build-libart,host,ndebug))
+endif
+ifeq ($(ART_BUILD_DEBUG),true)
+  $(eval $(call build-libart,host,debug))
 endif
 
 ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
diff --git a/runtime/arch/arm/asm_support_arm.S b/runtime/arch/arm/asm_support_arm.S
index c4f68af..e1b0ce7 100644
--- a/runtime/arch/arm/asm_support_arm.S
+++ b/runtime/arch/arm/asm_support_arm.S
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_S_
-#define ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_S_
+#ifndef ART_RUNTIME_ARCH_ARM_ASM_SUPPORT_ARM_S_
+#define ART_RUNTIME_ARCH_ARM_ASM_SUPPORT_ARM_S_
 
 #include "asm_support_arm.h"
 
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index 340a83e..ebceb63 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -102,7 +102,6 @@
 extern "C" uint64_t art_quick_ushr_long(uint64_t, uint32_t);
 
 // Intrinsic entrypoints.
-extern "C" int32_t __memcmp16(void*, void*, int32_t);
 extern "C" int32_t art_quick_indexof(void*, uint32_t, uint32_t, uint32_t);
 extern "C" int32_t art_quick_string_compareto(void*, void*);
 
@@ -213,7 +212,6 @@
 
   // Intrinsics
   qpoints->pIndexOf = art_quick_indexof;
-  qpoints->pMemcmp16 = __memcmp16;
   qpoints->pStringCompareTo = art_quick_string_compareto;
   qpoints->pMemcpy = memcpy;
 
diff --git a/runtime/arch/arm/memcmp16_arm.S b/runtime/arch/arm/memcmp16_arm.S
new file mode 100644
index 0000000..3762194
--- /dev/null
+++ b/runtime/arch/arm/memcmp16_arm.S
@@ -0,0 +1,227 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ARCH_ARM_MEMCMP16_ARM_S_
+#define ART_RUNTIME_ARCH_ARM_MEMCMP16_ARM_S_
+
+#include "asm_support_arm.S"
+
+/*
+ * Optimized memcmp16() for ARM9.
+ * This would not be optimal on XScale or ARM11, where more prefetching
+ * and use of pld will be needed.
+ * The 2 major optimzations here are
+ * (1) The main loop compares 16 bytes at a time
+ * (2) The loads are scheduled in a way they won't stall
+ */
+
+ARM_ENTRY __memcmp16
+        pld         [r0, #0]
+        pld         [r1, #0]
+
+        /* take of the case where length is nul or the buffers are the same */
+        cmp         r0, r1
+        cmpne       r2, #0
+        moveq       r0, #0
+        bxeq        lr
+
+        /* since r0 hold the result, move the first source
+         * pointer somewhere else
+         */
+
+        mov         r3, r0
+
+         /* make sure we have at least 12 words, this simplify things below
+          * and avoid some overhead for small blocks
+          */
+
+        cmp         r2, #12
+        bpl         0f
+
+        /* small blocks (less then 12 words) */
+        pld         [r0, #32]
+        pld         [r1, #32]
+
+1:      ldrh        r0, [r3], #2
+        ldrh        ip, [r1], #2
+        subs        r0, r0, ip
+        bxne        lr
+        subs        r2, r2, #1
+        bne         1b
+        bx          lr
+
+
+        /* save registers */
+0:      stmfd       sp!, {r4, lr}
+        .cfi_def_cfa_offset 8
+        .cfi_rel_offset r4, 0
+        .cfi_rel_offset lr, 4
+
+        /* align first pointer to word boundary */
+        tst         r3, #2
+        beq         0f
+
+        ldrh        r0, [r3], #2
+        ldrh        ip, [r1], #2
+        sub         r2, r2, #1
+        subs        r0, r0, ip
+        /* restore registers and return */
+        ldmnefd     sp!, {r4, lr}
+        bxne        lr
+
+
+0:      /* here the first pointer is aligned, and we have at least 3 words
+         * to process.
+         */
+
+        /* see if the pointers are congruent */
+        eor         r0, r3, r1
+        ands        r0, r0, #2
+        bne         5f
+
+        /* congruent case, 16 half-words per iteration
+         * We need to make sure there are at least 16+2 words left
+         * because we effectively read ahead one long word, and we could
+         * read past the buffer (and segfault) if we're not careful.
+         */
+
+        ldr         ip, [r1]
+        subs        r2, r2, #(16 + 2)
+        bmi         1f
+
+0:
+        pld         [r3, #64]
+        pld         [r1, #64]
+        ldr         r0, [r3], #4
+        ldr         lr, [r1, #4]!
+        eors        r0, r0, ip
+        ldreq       r0, [r3], #4
+        ldreq       ip, [r1, #4]!
+        eoreqs      r0, r0, lr
+        ldreq       r0, [r3], #4
+        ldreq       lr, [r1, #4]!
+        eoreqs      r0, r0, ip
+        ldreq       r0, [r3], #4
+        ldreq       ip, [r1, #4]!
+        eoreqs      r0, r0, lr
+        ldreq       r0, [r3], #4
+        ldreq       lr, [r1, #4]!
+        eoreqs      r0, r0, ip
+        ldreq       r0, [r3], #4
+        ldreq       ip, [r1, #4]!
+        eoreqs      r0, r0, lr
+        ldreq       r0, [r3], #4
+        ldreq       lr, [r1, #4]!
+        eoreqs      r0, r0, ip
+        ldreq       r0, [r3], #4
+        ldreq       ip, [r1, #4]!
+        eoreqs      r0, r0, lr
+        bne         2f
+        subs        r2, r2, #16
+        bhs         0b
+
+        /* do we have at least 2 words left? */
+1:      adds        r2, r2, #(16 - 2 + 2)
+        bmi         4f
+
+        /* finish off 2 words at a time */
+3:      ldr         r0, [r3], #4
+        ldr         ip, [r1], #4
+        eors        r0, r0, ip
+        bne         2f
+        subs        r2, r2, #2
+        bhs         3b
+
+        /* are we done? */
+4:      adds        r2, r2, #2
+        bne         8f
+        /* restore registers and return */
+        mov         r0, #0
+        ldmfd       sp!, {r4, lr}
+        bx          lr
+
+2:      /* the last 2 words are different, restart them */
+        ldrh        r0, [r3, #-4]
+        ldrh        ip, [r1, #-4]
+        subs        r0, r0, ip
+        ldreqh      r0, [r3, #-2]
+        ldreqh      ip, [r1, #-2]
+        subeqs      r0, r0, ip
+        /* restore registers and return */
+        ldmfd       sp!, {r4, lr}
+        bx          lr
+
+        /* process the last few words */
+8:      ldrh        r0, [r3], #2
+        ldrh        ip, [r1], #2
+        subs        r0, r0, ip
+        bne         9f
+        subs        r2, r2, #1
+        bne         8b
+
+9:      /* restore registers and return */
+        ldmfd       sp!, {r4, lr}
+        bx          lr
+
+
+5:      /*************** non-congruent case ***************/
+
+        /* align the unaligned pointer */
+        bic         r1, r1, #3
+        ldr         lr, [r1], #4
+        sub         r2, r2, #8
+
+6:
+        pld         [r3, #64]
+        pld         [r1, #64]
+        mov         ip, lr, lsr #16
+        ldr         lr, [r1], #4
+        ldr         r0, [r3], #4
+        orr         ip, ip, lr, lsl #16
+        eors        r0, r0, ip
+        moveq       ip, lr, lsr #16
+        ldreq       lr, [r1], #4
+        ldreq       r0, [r3], #4
+        orreq       ip, ip, lr, lsl #16
+        eoreqs      r0, r0, ip
+        moveq       ip, lr, lsr #16
+        ldreq       lr, [r1], #4
+        ldreq       r0, [r3], #4
+        orreq       ip, ip, lr, lsl #16
+        eoreqs      r0, r0, ip
+        moveq       ip, lr, lsr #16
+        ldreq       lr, [r1], #4
+        ldreq       r0, [r3], #4
+        orreq       ip, ip, lr, lsl #16
+        eoreqs      r0, r0, ip
+        bne         7f
+        subs        r2, r2, #8
+        bhs         6b
+        sub         r1, r1, #2
+        /* are we done? */
+        adds        r2, r2, #8
+        moveq       r0, #0
+        beq         9b
+        /* finish off the remaining bytes */
+        b           8b
+
+7:      /* fix up the 2 pointers and fallthrough... */
+        sub         r1, r1, #2
+        b           2b
+END __memcmp16
+
+
+#endif  // ART_RUNTIME_ARCH_ARM_MEMCMP16_ARM_S_
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 46e819e..84ee778 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -85,7 +85,6 @@
 extern "C" double fmod(double a, double b);         // REM_DOUBLE[_2ADDR]
 
 // Intrinsic entrypoints.
-extern "C" int32_t __memcmp16(void*, void*, int32_t);
 extern "C" int32_t art_quick_indexof(void*, uint32_t, uint32_t, uint32_t);
 extern "C" int32_t art_quick_string_compareto(void*, void*);
 
@@ -199,7 +198,6 @@
 
   // Intrinsics
   qpoints->pIndexOf = art_quick_indexof;
-  qpoints->pMemcmp16 = __memcmp16;
   qpoints->pStringCompareTo = art_quick_string_compareto;
   qpoints->pMemcpy = memcpy;
 
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 9a877f6..6031e25 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1662,7 +1662,7 @@
      *    x1:   comp object pointer
      *
      */
-    .extern __memcmp16
+    .extern memcmp16_generic_static
 ENTRY art_quick_string_compareto
     mov    x2, x0         // x0 is return, use x2 for first input.
     sub    x0, x2, x1     // Same string object?
@@ -1758,7 +1758,7 @@
 
     mov x0, x2
     uxtw x2, w3
-    bl __memcmp16
+    bl memcmp16_generic_static
 
     ldr x1, [sp], #16            // Restore old x0 = length diff
 
diff --git a/runtime/arch/memcmp16.cc b/runtime/arch/memcmp16.cc
new file mode 100644
index 0000000..7928085
--- /dev/null
+++ b/runtime/arch/memcmp16.cc
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "memcmp16.h"
+
+// This linked against by assembly stubs, only.
+#pragma GCC diagnostic ignored "-Wunused-function"
+
+int32_t memcmp16_generic_static(const uint16_t* s0, const uint16_t* s1, size_t count) {
+  for (size_t i = 0; i < count; i++) {
+    if (s0[i] != s1[i]) {
+      return static_cast<int32_t>(s0[i]) - static_cast<int32_t>(s1[i]);
+    }
+  }
+  return 0;
+}
+
+#pragma GCC diagnostic warning "-Wunused-function"
diff --git a/runtime/arch/memcmp16.h b/runtime/arch/memcmp16.h
new file mode 100644
index 0000000..ad58588
--- /dev/null
+++ b/runtime/arch/memcmp16.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ARCH_MEMCMP16_H_
+#define ART_RUNTIME_ARCH_MEMCMP16_H_
+
+#include <cstddef>
+#include <cstdint>
+
+// memcmp16 support.
+//
+// This can either be optimized assembly code, in which case we expect a function __memcmp16,
+// or generic C support.
+//
+// In case of the generic support we declare two versions: one in this header file meant to be
+// inlined, and a static version that assembly stubs can link against.
+//
+// In both cases, MemCmp16 is declared.
+
+#if defined(__arm__) || defined(__mips)
+
+extern "C" uint32_t __memcmp16(const uint16_t* s0, const uint16_t* s1, size_t count);
+#define MemCmp16 __memcmp16
+
+#else
+
+// This is the generic inlined version.
+static inline int32_t MemCmp16(const uint16_t* s0, const uint16_t* s1, size_t count) {
+  for (size_t i = 0; i < count; i++) {
+    if (s0[i] != s1[i]) {
+      return static_cast<int32_t>(s0[i]) - static_cast<int32_t>(s1[i]);
+    }
+  }
+  return 0;
+}
+
+extern "C" int32_t memcmp16_generic_static(const uint16_t* s0, const uint16_t* s1, size_t count);
+#endif
+
+#endif  // ART_RUNTIME_ARCH_MEMCMP16_H_
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 500a2eb..08caa80 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -103,7 +103,6 @@
 extern "C" uint64_t art_quick_ushr_long(uint64_t, uint32_t);
 
 // Intrinsic entrypoints.
-extern "C" int32_t __memcmp16(void*, void*, int32_t);
 extern "C" int32_t art_quick_indexof(void*, uint32_t, uint32_t, uint32_t);
 extern "C" int32_t art_quick_string_compareto(void*, void*);
 
@@ -216,7 +215,6 @@
 
   // Intrinsics
   qpoints->pIndexOf = art_quick_indexof;
-  qpoints->pMemcmp16 = __memcmp16;
   qpoints->pStringCompareTo = art_quick_string_compareto;
   qpoints->pMemcpy = memcpy;
 
diff --git a/runtime/arch/mips/memcmp16_mips.S b/runtime/arch/mips/memcmp16_mips.S
new file mode 100644
index 0000000..0196edc
--- /dev/null
+++ b/runtime/arch/mips/memcmp16_mips.S
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ARCH_MIPS_MEMCMP16_MIPS_S_
+#define ART_RUNTIME_ARCH_MIPS_MEMCMP16_MIPS_S_
+
+#include "asm_support_mips.S"
+
+// u4 __memcmp16(const u2*, const u2*, size_t);
+ENTRY __memcmp16
+  li  $t0,0
+  li  $t1,0
+  beqz  $a2,done   /* 0 length string */
+  beq $a0,$a1,done    /* strings are identical */
+
+  /* Unoptimised... */
+1:  lhu $t0,0($a0)
+  lhu $t1,0($a1)
+  addu  $a1,2
+  bne $t0,$t1,done
+  addu  $a0,2
+  subu  $a2,1
+  bnez  $a2,1b
+
+done:
+  subu  $v0,$t0,$t1
+  j $ra
+END __memcmp16
+
+#endif  // ART_RUNTIME_ARCH_MIPS_MEMCMP16_MIPS_S_
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 3be0faf..59311bc 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -77,9 +77,10 @@
 #if defined(__i386__)
     // TODO: Set the thread?
     __asm__ __volatile__(
-        "pushl %[referrer]\n\t"     // Store referrer
+        "subl $12, %%esp\n\t"       // Align stack.
+        "pushl %[referrer]\n\t"     // Store referrer.
         "call *%%edi\n\t"           // Call the stub
-        "addl $4, %%esp"            // Pop referrer
+        "addl $16, %%esp"           // Pop referrer
         : "=a" (result)
           // Use the result from eax
         : "a"(arg0), "c"(arg1), "d"(arg2), "D"(code), [referrer]"r"(referrer)
@@ -300,9 +301,10 @@
     // TODO: Set the thread?
     __asm__ __volatile__(
         "movd %[hidden], %%xmm0\n\t"
+        "subl $12, %%esp\n\t"       // Align stack.
         "pushl %[referrer]\n\t"     // Store referrer
         "call *%%edi\n\t"           // Call the stub
-        "addl $4, %%esp"            // Pop referrer
+        "addl $16, %%esp"           // Pop referrer
         : "=a" (result)
           // Use the result from eax
         : "a"(arg0), "c"(arg1), "d"(arg2), "D"(code), [referrer]"m"(referrer), [hidden]"r"(hidden)
diff --git a/runtime/arch/x86/asm_support_x86.S b/runtime/arch/x86/asm_support_x86.S
index f1d0746..ae39be1 100644
--- a/runtime/arch/x86/asm_support_x86.S
+++ b/runtime/arch/x86/asm_support_x86.S
@@ -28,6 +28,7 @@
     #define END_MACRO .endmacro
 
     // Clang's as(1) uses $0, $1, and so on for macro arguments.
+    #define RAW_VAR(name,index) $index
     #define VAR(name,index) SYMBOL($index)
     #define PLT_VAR(name, index) SYMBOL($index)
     #define REG_VAR(name,index) %$index
@@ -50,6 +51,7 @@
     // no special meaning to $, so literals are still just $x. The use of altmacro means % is a
     // special character meaning care needs to be taken when passing registers as macro arguments.
     .altmacro
+    #define RAW_VAR(name,index) name&
     #define VAR(name,index) name&
     #define PLT_VAR(name, index) name&@PLT
     #define REG_VAR(name,index) %name
@@ -94,7 +96,7 @@
 #if !defined(__APPLE__)
     #define SYMBOL(name) name
     #if defined(__clang__) && (__clang_major__ < 4) && (__clang_minor__ < 5)
-        // TODO: Disabled for old clang 3.3, this leads to text reolocations and there should be a
+        // TODO: Disabled for old clang 3.3, this leads to text relocations and there should be a
         // better fix.
         #define PLT_SYMBOL(name) name // ## @PLT
     #else
@@ -151,8 +153,10 @@
 END_MACRO
 
 MACRO0(SETUP_GOT_NOSAVE)
+#ifndef __APPLE__
     call __x86.get_pc_thunk.bx
     addl $_GLOBAL_OFFSET_TABLE_, %ebx
+#endif
 END_MACRO
 
 MACRO0(SETUP_GOT)
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index c53fa1e..c30dca1 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -81,7 +81,6 @@
 extern "C" uint64_t art_quick_lushr(uint64_t, uint32_t);
 
 // Intrinsic entrypoints.
-extern "C" int32_t art_quick_memcmp16(void*, void*, int32_t);
 extern "C" int32_t art_quick_string_compareto(void*, void*);
 extern "C" void* art_quick_memcpy(void*, const void*, size_t);
 
@@ -194,7 +193,6 @@
 
   // Intrinsics
   // qpoints->pIndexOf = nullptr;  // Not needed on x86
-  qpoints->pMemcmp16 = art_quick_memcmp16;
   qpoints->pStringCompareTo = art_quick_string_compareto;
   qpoints->pMemcpy = art_quick_memcpy;
 
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 989ecf9..28e4dd6 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -111,7 +111,7 @@
 END_MACRO
 
 MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
-    DEFINE_FUNCTION VAR(c_name, 0)
+    DEFINE_FUNCTION RAW_VAR(c_name, 0)
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
     mov %esp, %ecx
     // Outgoing argument set up
@@ -123,11 +123,11 @@
     SETUP_GOT_NOSAVE              // clobbers ebx (harmless here)
     call PLT_VAR(cxx_name, 1)     // cxx_name(Thread*, SP)
     int3                          // unreached
-    END_FUNCTION VAR(c_name, 0)
+    END_FUNCTION RAW_VAR(c_name, 0)
 END_MACRO
 
 MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
-    DEFINE_FUNCTION VAR(c_name, 0)
+    DEFINE_FUNCTION RAW_VAR(c_name, 0)
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
     mov %esp, %ecx
     // Outgoing argument set up
@@ -139,11 +139,11 @@
     SETUP_GOT_NOSAVE              // clobbers ebx (harmless here)
     call PLT_VAR(cxx_name, 1)     // cxx_name(arg1, Thread*, SP)
     int3                          // unreached
-    END_FUNCTION VAR(c_name, 0)
+    END_FUNCTION RAW_VAR(c_name, 0)
 END_MACRO
 
 MACRO2(TWO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
-    DEFINE_FUNCTION VAR(c_name, 0)
+    DEFINE_FUNCTION RAW_VAR(c_name, 0)
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
     mov %esp, %edx
     // Outgoing argument set up
@@ -155,7 +155,7 @@
     SETUP_GOT_NOSAVE              // clobbers ebx (harmless here)
     call PLT_VAR(cxx_name, 1)     // cxx_name(arg1, arg2, Thread*, SP)
     int3                          // unreached
-    END_FUNCTION VAR(c_name, 0)
+    END_FUNCTION RAW_VAR(c_name, 0)
 END_MACRO
 
     /*
@@ -207,7 +207,7 @@
      * pointing back to the original caller.
      */
 MACRO2(INVOKE_TRAMPOLINE, c_name, cxx_name)
-    DEFINE_FUNCTION VAR(c_name, 0)
+    DEFINE_FUNCTION RAW_VAR(c_name, 0)
     // Set up the callee save frame to conform with Runtime::CreateCalleeSaveMethod(kRefsAndArgs)
     // return address
     PUSH edi
@@ -248,7 +248,7 @@
     addl MACRO_LITERAL(4), %esp   // Pop code pointer off stack
     CFI_ADJUST_CFA_OFFSET(-4)
     DELIVER_PENDING_EXCEPTION
-    END_FUNCTION VAR(c_name, 0)
+    END_FUNCTION RAW_VAR(c_name, 0)
 END_MACRO
 
 INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline, artInvokeInterfaceTrampoline
@@ -315,7 +315,7 @@
 END_FUNCTION art_quick_invoke_stub
 
 MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION VAR(c_name, 0)
+    DEFINE_FUNCTION RAW_VAR(c_name, 0)
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
     mov %esp, %edx                // remember SP
     SETUP_GOT_NOSAVE              // clobbers ebx (harmless here)
@@ -330,11 +330,11 @@
     CFI_ADJUST_CFA_OFFSET(-16)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
     CALL_MACRO(return_macro, 2)   // return or deliver exception
-    END_FUNCTION VAR(c_name, 0)
+    END_FUNCTION RAW_VAR(c_name, 0)
 END_MACRO
 
 MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION VAR(c_name, 0)
+    DEFINE_FUNCTION RAW_VAR(c_name, 0)
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
     mov %esp, %edx                // remember SP
     SETUP_GOT_NOSAVE              // clobbers EBX
@@ -349,11 +349,11 @@
     CFI_ADJUST_CFA_OFFSET(-16)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
     CALL_MACRO(return_macro, 2)   // return or deliver exception
-    END_FUNCTION VAR(c_name, 0)
+    END_FUNCTION RAW_VAR(c_name, 0)
 END_MACRO
 
 MACRO3(TWO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION VAR(c_name, 0)
+    DEFINE_FUNCTION RAW_VAR(c_name, 0)
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
     mov %esp, %edx                // remember SP
     SETUP_GOT_NOSAVE              // clobbers EBX
@@ -368,11 +368,11 @@
     CFI_ADJUST_CFA_OFFSET(-16)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
     CALL_MACRO(return_macro, 2)   // return or deliver exception
-    END_FUNCTION VAR(c_name, 0)
+    END_FUNCTION RAW_VAR(c_name, 0)
 END_MACRO
 
 MACRO3(THREE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION VAR(c_name, 0)
+    DEFINE_FUNCTION RAW_VAR(c_name, 0)
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
     mov %esp, %ebx                // remember SP
     // Outgoing argument set up
@@ -390,7 +390,7 @@
     CFI_ADJUST_CFA_OFFSET(-32)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
     CALL_MACRO(return_macro, 2)   // return or deliver exception
-    END_FUNCTION VAR(c_name, 0)
+    END_FUNCTION RAW_VAR(c_name, 0)
 END_MACRO
 
 MACRO0(RETURN_IF_RESULT_IS_NON_ZERO)
@@ -653,17 +653,17 @@
      */
 DEFINE_FUNCTION art_quick_aput_obj_with_null_and_bound_check
     testl %eax, %eax
-    jnz art_quick_aput_obj_with_bound_check
-    jmp art_quick_throw_null_pointer_exception
+    jnz SYMBOL(art_quick_aput_obj_with_bound_check)
+    jmp SYMBOL(art_quick_throw_null_pointer_exception)
 END_FUNCTION art_quick_aput_obj_with_null_and_bound_check
 
 DEFINE_FUNCTION art_quick_aput_obj_with_bound_check
     movl ARRAY_LENGTH_OFFSET(%eax), %ebx
     cmpl %ebx, %ecx
-    jb art_quick_aput_obj
+    jb SYMBOL(art_quick_aput_obj)
     mov %ecx, %eax
     mov %ebx, %ecx
-    jmp art_quick_throw_array_bounds
+    jmp SYMBOL(art_quick_throw_array_bounds)
 END_FUNCTION art_quick_aput_obj_with_bound_check
 
 DEFINE_FUNCTION art_quick_aput_obj
@@ -1122,7 +1122,7 @@
     movd %xmm0, %ecx              // get target method index stored in xmm0
     movl OBJECT_ARRAY_DATA_OFFSET(%eax, %ecx, 4), %eax  // load the target method
     POP ecx
-    jmp art_quick_invoke_interface_trampoline
+    jmp SYMBOL(art_quick_invoke_interface_trampoline)
 END_FUNCTION art_quick_imt_conflict_trampoline
 
 DEFINE_FUNCTION art_quick_resolution_trampoline
@@ -1218,10 +1218,10 @@
     jnz .Lexception_in_native
 
     // Tear down the callee-save frame.
-    addl MACRO_LITERAL(4), %esp     // Remove padding
+    addl LITERAL(4), %esp     // Remove padding
     CFI_ADJUST_CFA_OFFSET(-4)
     POP ecx
-    addl MACRO_LITERAL(4), %esp     // Avoid edx, as it may be part of the result.
+    addl LITERAL(4), %esp     // Avoid edx, as it may be part of the result.
     CFI_ADJUST_CFA_OFFSET(-4)
     POP ebx
     POP ebp  // Restore callee saves
diff --git a/runtime/arch/x86/thread_x86.cc b/runtime/arch/x86/thread_x86.cc
index 9f36927..b97c143 100644
--- a/runtime/arch/x86/thread_x86.cc
+++ b/runtime/arch/x86/thread_x86.cc
@@ -156,7 +156,11 @@
 
   // Free LDT entry.
 #if defined(__APPLE__)
-  i386_set_ldt(selector >> 3, 0, 1);
+  // TODO: release selectors on OS/X this is a leak which will cause ldt entries to be exhausted
+  // after enough threads are created. However, the following code results in kernel panics in OS/X
+  // 10.9.
+  UNUSED(selector);
+  // i386_set_ldt(selector >> 3, 0, 1);
 #else
   user_desc ldt_entry;
   memset(&ldt_entry, 0, sizeof(ldt_entry));
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index aeda072..2612417 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -80,7 +80,6 @@
 extern "C" uint64_t art_quick_lushr(uint64_t, uint32_t);
 
 // Intrinsic entrypoints.
-extern "C" int32_t art_quick_memcmp16(void*, void*, int32_t);
 extern "C" int32_t art_quick_string_compareto(void*, void*);
 extern "C" void* art_quick_memcpy(void*, const void*, size_t);
 
@@ -193,7 +192,6 @@
 
   // Intrinsics
   // qpoints->pIndexOf = NULL;  // Not needed on x86.
-  qpoints->pMemcmp16 = art_quick_memcmp16;
   qpoints->pStringCompareTo = art_quick_string_compareto;
   qpoints->pMemcpy = art_quick_memcpy;
 
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index d20eb17..1890181 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -45,54 +45,6 @@
 }
 #endif  // ART_USE_FUTEXES
 
-#if defined(__APPLE__)
-
-// This works on Mac OS 10.6 but hasn't been tested on older releases.
-struct __attribute__((__may_alias__)) darwin_pthread_mutex_t {
-  long padding0;  // NOLINT(runtime/int) exact match to darwin type
-  int padding1;
-  uint32_t padding2;
-  int16_t padding3;
-  int16_t padding4;
-  uint32_t padding5;
-  pthread_t darwin_pthread_mutex_owner;
-  // ...other stuff we don't care about.
-};
-
-struct __attribute__((__may_alias__)) darwin_pthread_rwlock_t {
-  long padding0;  // NOLINT(runtime/int) exact match to darwin type
-  pthread_mutex_t padding1;
-  int padding2;
-  pthread_cond_t padding3;
-  pthread_cond_t padding4;
-  int padding5;
-  int padding6;
-  pthread_t darwin_pthread_rwlock_owner;
-  // ...other stuff we don't care about.
-};
-
-#endif  // __APPLE__
-
-#if defined(__GLIBC__)
-
-struct __attribute__((__may_alias__)) glibc_pthread_mutex_t {
-  int32_t padding0[2];
-  int owner;
-  // ...other stuff we don't care about.
-};
-
-struct __attribute__((__may_alias__)) glibc_pthread_rwlock_t {
-#ifdef __LP64__
-  int32_t padding0[6];
-#else
-  int32_t padding0[7];
-#endif
-  int writer;
-  // ...other stuff we don't care about.
-};
-
-#endif  // __GLIBC__
-
 class ScopedContentionRecorder {
  public:
   ScopedContentionRecorder(BaseMutex* mutex, uint64_t blocked_tid, uint64_t owner_tid)
@@ -219,12 +171,14 @@
 #else
   CHECK_MUTEX_CALL(pthread_rwlock_rdlock, (&rwlock_));
 #endif
+  DCHECK(exclusive_owner_ == 0U || exclusive_owner_ == -1U);
   RegisterAsLocked(self);
   AssertSharedHeld(self);
 }
 
 inline void ReaderWriterMutex::SharedUnlock(Thread* self) {
   DCHECK(self == NULL || self == Thread::Current());
+  DCHECK(exclusive_owner_ == 0U || exclusive_owner_ == -1U);
   AssertSharedHeld(self);
   RegisterAsUnlocked(self);
 #if ART_USE_FUTEXES
@@ -262,26 +216,7 @@
 }
 
 inline uint64_t Mutex::GetExclusiveOwnerTid() const {
-#if ART_USE_FUTEXES
   return exclusive_owner_;
-#elif defined(__BIONIC__)
-  return static_cast<uint64_t>((mutex_.value >> 16) & 0xffff);
-#elif defined(__GLIBC__)
-  return reinterpret_cast<const glibc_pthread_mutex_t*>(&mutex_)->owner;
-#elif defined(__APPLE__)
-  const darwin_pthread_mutex_t* dpmutex = reinterpret_cast<const darwin_pthread_mutex_t*>(&mutex_);
-  pthread_t owner = dpmutex->darwin_pthread_mutex_owner;
-  // 0 for unowned, -1 for PTHREAD_MTX_TID_SWITCHING
-  // TODO: should we make darwin_pthread_mutex_owner volatile and recheck until not -1?
-  if ((owner == (pthread_t)0) || (owner == (pthread_t)-1)) {
-    return 0;
-  }
-  uint64_t tid;
-  CHECK_PTHREAD_CALL(pthread_threadid_np, (owner, &tid), __FUNCTION__);  // Requires Mac OS 10.6
-  return tid;
-#else
-#error unsupported C library
-#endif
 }
 
 inline bool ReaderWriterMutex::IsExclusiveHeld(const Thread* self) const {
@@ -307,23 +242,7 @@
     return exclusive_owner_;
   }
 #else
-#if defined(__BIONIC__)
-  return rwlock_.writerThreadId;
-#elif defined(__GLIBC__)
-  return reinterpret_cast<const glibc_pthread_rwlock_t*>(&rwlock_)->writer;
-#elif defined(__APPLE__)
-  const darwin_pthread_rwlock_t*
-      dprwlock = reinterpret_cast<const darwin_pthread_rwlock_t*>(&rwlock_);
-  pthread_t owner = dprwlock->darwin_pthread_rwlock_owner;
-  if (owner == (pthread_t)0) {
-    return 0;
-  }
-  uint64_t tid;
-  CHECK_PTHREAD_CALL(pthread_threadid_np, (owner, &tid), __FUNCTION__);  // Requires Mac OS 10.6
-  return tid;
-#else
-#error unsupported C library
-#endif
+  return exclusive_owner_;
 #endif
 }
 
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index aeece74..fd1eb12 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -263,19 +263,11 @@
     : BaseMutex(name, level), recursive_(recursive), recursion_count_(0) {
 #if ART_USE_FUTEXES
   state_ = 0;
-  exclusive_owner_ = 0;
   DCHECK_EQ(0, num_contenders_.LoadRelaxed());
-#elif defined(__BIONIC__) || defined(__APPLE__)
-  // Use recursive mutexes for bionic and Apple otherwise the
-  // non-recursive mutexes don't have TIDs to check lock ownership of.
-  pthread_mutexattr_t attributes;
-  CHECK_MUTEX_CALL(pthread_mutexattr_init, (&attributes));
-  CHECK_MUTEX_CALL(pthread_mutexattr_settype, (&attributes, PTHREAD_MUTEX_RECURSIVE));
-  CHECK_MUTEX_CALL(pthread_mutex_init, (&mutex_, &attributes));
-  CHECK_MUTEX_CALL(pthread_mutexattr_destroy, (&attributes));
 #else
-  CHECK_MUTEX_CALL(pthread_mutex_init, (&mutex_, NULL));
+  CHECK_MUTEX_CALL(pthread_mutex_init, (&mutex_, nullptr));
 #endif
+  exclusive_owner_ = 0;
 }
 
 Mutex::~Mutex() {
@@ -336,10 +328,11 @@
     // TODO: Change state_ to be a art::Atomic and use an intention revealing CAS operation
     // that exposes the ordering semantics.
     DCHECK_EQ(state_, 1);
-    exclusive_owner_ = SafeGetTid(self);
 #else
     CHECK_MUTEX_CALL(pthread_mutex_lock, (&mutex_));
 #endif
+    DCHECK_EQ(exclusive_owner_, 0U);
+    exclusive_owner_ = SafeGetTid(self);
     RegisterAsLocked(self);
   }
   recursion_count_++;
@@ -369,7 +362,6 @@
     } while (!done);
     // We again assert no memory fence is needed.
     DCHECK_EQ(state_, 1);
-    exclusive_owner_ = SafeGetTid(self);
 #else
     int result = pthread_mutex_trylock(&mutex_);
     if (result == EBUSY) {
@@ -380,6 +372,8 @@
       PLOG(FATAL) << "pthread_mutex_trylock failed for " << name_;
     }
 #endif
+    DCHECK_EQ(exclusive_owner_, 0U);
+    exclusive_owner_ = SafeGetTid(self);
     RegisterAsLocked(self);
   }
   recursion_count_++;
@@ -394,6 +388,7 @@
 void Mutex::ExclusiveUnlock(Thread* self) {
   DCHECK(self == NULL || self == Thread::Current());
   AssertHeld(self);
+  DCHECK_NE(exclusive_owner_, 0U);
   recursion_count_--;
   if (!recursive_ || recursion_count_ == 0) {
     if (kDebugLocking) {
@@ -402,34 +397,35 @@
     }
     RegisterAsUnlocked(self);
 #if ART_USE_FUTEXES
-  bool done = false;
-  do {
-    int32_t cur_state = state_;
-    if (LIKELY(cur_state == 1)) {
-      // The __sync_bool_compare_and_swap enforces the necessary memory ordering.
-      // We're no longer the owner.
-      exclusive_owner_ = 0;
-      // Change state to 0.
-      done =  __sync_bool_compare_and_swap(&state_, cur_state, 0 /* new state */);
-      if (LIKELY(done)) {  // Spurious fail?
-        // Wake a contender
-        if (UNLIKELY(num_contenders_.LoadRelaxed() > 0)) {
-          futex(&state_, FUTEX_WAKE, 1, NULL, NULL, 0);
+    bool done = false;
+    do {
+      int32_t cur_state = state_;
+      if (LIKELY(cur_state == 1)) {
+        // The __sync_bool_compare_and_swap enforces the necessary memory ordering.
+        // We're no longer the owner.
+        exclusive_owner_ = 0;
+        // Change state to 0.
+        done =  __sync_bool_compare_and_swap(&state_, cur_state, 0 /* new state */);
+        if (LIKELY(done)) {  // Spurious fail?
+          // Wake a contender
+          if (UNLIKELY(num_contenders_.LoadRelaxed() > 0)) {
+            futex(&state_, FUTEX_WAKE, 1, NULL, NULL, 0);
+          }
+        }
+      } else {
+        // Logging acquires the logging lock, avoid infinite recursion in that case.
+        if (this != Locks::logging_lock_) {
+          LOG(FATAL) << "Unexpected state_ in unlock " << cur_state << " for " << name_;
+        } else {
+          LogMessageData data(__FILE__, __LINE__, INTERNAL_FATAL, -1);
+          LogMessage::LogLine(data, StringPrintf("Unexpected state_ %d in unlock for %s",
+                                                 cur_state, name_).c_str());
+          _exit(1);
         }
       }
-    } else {
-      // Logging acquires the logging lock, avoid infinite recursion in that case.
-      if (this != Locks::logging_lock_) {
-        LOG(FATAL) << "Unexpected state_ in unlock " << cur_state << " for " << name_;
-      } else {
-        LogMessageData data(__FILE__, __LINE__, INTERNAL_FATAL, -1);
-        LogMessage::LogLine(data, StringPrintf("Unexpected state_ %d in unlock for %s",
-                                               cur_state, name_).c_str());
-        _exit(1);
-      }
-    }
-  } while (!done);
+    } while (!done);
 #else
+    exclusive_owner_ = 0;
     CHECK_MUTEX_CALL(pthread_mutex_unlock, (&mutex_));
 #endif
   }
@@ -452,12 +448,13 @@
 ReaderWriterMutex::ReaderWriterMutex(const char* name, LockLevel level)
     : BaseMutex(name, level)
 #if ART_USE_FUTEXES
-    , state_(0), exclusive_owner_(0), num_pending_readers_(0), num_pending_writers_(0)
+    , state_(0), num_pending_readers_(0), num_pending_writers_(0)
 #endif
 {  // NOLINT(whitespace/braces)
 #if !ART_USE_FUTEXES
-  CHECK_MUTEX_CALL(pthread_rwlock_init, (&rwlock_, NULL));
+  CHECK_MUTEX_CALL(pthread_rwlock_init, (&rwlock_, nullptr));
 #endif
+  exclusive_owner_ = 0;
 }
 
 ReaderWriterMutex::~ReaderWriterMutex() {
@@ -506,10 +503,11 @@
     }
   } while (!done);
   DCHECK_EQ(state_, -1);
-  exclusive_owner_ = SafeGetTid(self);
 #else
   CHECK_MUTEX_CALL(pthread_rwlock_wrlock, (&rwlock_));
 #endif
+  DCHECK_EQ(exclusive_owner_, 0U);
+  exclusive_owner_ = SafeGetTid(self);
   RegisterAsLocked(self);
   AssertExclusiveHeld(self);
 }
@@ -518,6 +516,7 @@
   DCHECK(self == NULL || self == Thread::Current());
   AssertExclusiveHeld(self);
   RegisterAsUnlocked(self);
+  DCHECK_NE(exclusive_owner_, 0U);
 #if ART_USE_FUTEXES
   bool done = false;
   do {
@@ -538,6 +537,7 @@
     }
   } while (!done);
 #else
+  exclusive_owner_ = 0;
   CHECK_MUTEX_CALL(pthread_rwlock_unlock, (&rwlock_));
 #endif
 }
@@ -578,7 +578,6 @@
       num_pending_writers_--;
     }
   } while (!done);
-  exclusive_owner_ = SafeGetTid(self);
 #else
   timespec ts;
   InitTimeSpec(true, CLOCK_REALTIME, ms, ns, &ts);
@@ -591,6 +590,7 @@
     PLOG(FATAL) << "pthread_rwlock_timedwrlock failed for " << name_;
   }
 #endif
+  exclusive_owner_ = SafeGetTid(self);
   RegisterAsLocked(self);
   AssertSharedHeld(self);
   return true;
@@ -656,7 +656,7 @@
   num_waiters_ = 0;
 #else
   pthread_condattr_t cond_attrs;
-  CHECK_MUTEX_CALL(pthread_condattr_init(&cond_attrs));
+  CHECK_MUTEX_CALL(pthread_condattr_init, (&cond_attrs));
 #if !defined(__APPLE__)
   // Apple doesn't have CLOCK_MONOTONIC or pthread_condattr_setclock.
   CHECK_MUTEX_CALL(pthread_condattr_setclock(&cond_attrs, CLOCK_MONOTONIC));
@@ -763,8 +763,11 @@
   CHECK_GE(guard_.num_contenders_.LoadRelaxed(), 0);
   guard_.num_contenders_--;
 #else
+  uint64_t old_owner = guard_.exclusive_owner_;
+  guard_.exclusive_owner_ = 0;
   guard_.recursion_count_ = 0;
   CHECK_MUTEX_CALL(pthread_cond_wait, (&cond_, &guard_.mutex_));
+  guard_.exclusive_owner_ = old_owner;
 #endif
   guard_.recursion_count_ = old_recursion_count;
 }
@@ -804,6 +807,8 @@
 #else
   int clock = CLOCK_REALTIME;
 #endif
+  uint64_t old_owner = guard_.exclusive_owner_;
+  guard_.exclusive_owner_ = 0;
   guard_.recursion_count_ = 0;
   timespec ts;
   InitTimeSpec(true, clock, ms, ns, &ts);
@@ -812,6 +817,7 @@
     errno = rc;
     PLOG(FATAL) << "TimedWait failed for " << name_;
   }
+  guard_.exclusive_owner_ = old_owner;
 #endif
   guard_.recursion_count_ = old_recursion_count;
 }
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 68b450a..1ba6180 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -245,6 +245,7 @@
   AtomicInteger num_contenders_;
 #else
   pthread_mutex_t mutex_;
+  volatile uint64_t exclusive_owner_;  // Guarded by mutex_.
 #endif
   const bool recursive_;  // Can the lock be recursively held?
   unsigned int recursion_count_;
@@ -358,6 +359,7 @@
   AtomicInteger num_pending_writers_;
 #else
   pthread_rwlock_t rwlock_;
+  volatile uint64_t exclusive_owner_;  // Guarded by rwlock_.
 #endif
   DISALLOW_COPY_AND_ASSIGN(ReaderWriterMutex);
 };
diff --git a/runtime/base/unix_file/fd_file.cc b/runtime/base/unix_file/fd_file.cc
index 87d1c06..6d5b59c 100644
--- a/runtime/base/unix_file/fd_file.cc
+++ b/runtime/base/unix_file/fd_file.cc
@@ -69,17 +69,29 @@
 }
 
 int FdFile::Flush() {
+#ifdef __linux__
   int rc = TEMP_FAILURE_RETRY(fdatasync(fd_));
+#else
+  int rc = TEMP_FAILURE_RETRY(fsync(fd_));
+#endif
   return (rc == -1) ? -errno : rc;
 }
 
 int64_t FdFile::Read(char* buf, int64_t byte_count, int64_t offset) const {
+#ifdef __linux__
   int rc = TEMP_FAILURE_RETRY(pread64(fd_, buf, byte_count, offset));
+#else
+  int rc = TEMP_FAILURE_RETRY(pread(fd_, buf, byte_count, offset));
+#endif
   return (rc == -1) ? -errno : rc;
 }
 
 int FdFile::SetLength(int64_t new_length) {
+#ifdef __linux__
   int rc = TEMP_FAILURE_RETRY(ftruncate64(fd_, new_length));
+#else
+  int rc = TEMP_FAILURE_RETRY(ftruncate(fd_, new_length));
+#endif
   return (rc == -1) ? -errno : rc;
 }
 
@@ -90,7 +102,11 @@
 }
 
 int64_t FdFile::Write(const char* buf, int64_t byte_count, int64_t offset) {
+#ifdef __linux__
   int rc = TEMP_FAILURE_RETRY(pwrite64(fd_, buf, byte_count, offset));
+#else
+  int rc = TEMP_FAILURE_RETRY(pwrite(fd_, buf, byte_count, offset));
+#endif
   return (rc == -1) ? -errno : rc;
 }
 
diff --git a/runtime/base/unix_file/mapped_file.cc b/runtime/base/unix_file/mapped_file.cc
index bc23a74..63927b1 100644
--- a/runtime/base/unix_file/mapped_file.cc
+++ b/runtime/base/unix_file/mapped_file.cc
@@ -61,7 +61,11 @@
 bool MappedFile::MapReadWrite(int64_t file_size) {
   CHECK(IsOpened());
   CHECK(!IsMapped());
+#ifdef __linux__
   int result = TEMP_FAILURE_RETRY(ftruncate64(Fd(), file_size));
+#else
+  int result = TEMP_FAILURE_RETRY(ftruncate(Fd(), file_size));
+#endif
   if (result == -1) {
     PLOG(ERROR) << "Failed to truncate file '" << GetPath()
                 << "' to size " << file_size;
diff --git a/runtime/base/unix_file/mapped_file.h b/runtime/base/unix_file/mapped_file.h
index 28cc551..73056e9 100644
--- a/runtime/base/unix_file/mapped_file.h
+++ b/runtime/base/unix_file/mapped_file.h
@@ -32,8 +32,13 @@
  public:
   // File modes used in Open().
   enum FileMode {
+#ifdef __linux__
     kReadOnlyMode = O_RDONLY | O_LARGEFILE,
     kReadWriteMode = O_CREAT | O_RDWR | O_LARGEFILE,
+#else
+    kReadOnlyMode = O_RDONLY,
+    kReadWriteMode = O_CREAT | O_RDWR,
+#endif
   };
 
   MappedFile() : FdFile(), file_size_(-1), mapped_file_(NULL) {
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 73ed590..a0cecb0 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -3430,6 +3430,7 @@
   auto old_throw_method = hs.NewHandle<mirror::ArtMethod>(nullptr);
   auto old_exception = hs.NewHandle<mirror::Throwable>(nullptr);
   uint32_t old_throw_dex_pc;
+  bool old_exception_report_flag;
   {
     ThrowLocation old_throw_location;
     mirror::Throwable* old_exception_obj = soa.Self()->GetException(&old_throw_location);
@@ -3437,6 +3438,7 @@
     old_throw_method.Assign(old_throw_location.GetMethod());
     old_exception.Assign(old_exception_obj);
     old_throw_dex_pc = old_throw_location.GetDexPc();
+    old_exception_report_flag = soa.Self()->IsExceptionReportedToInstrumentation();
     soa.Self()->ClearException();
   }
 
@@ -3491,6 +3493,7 @@
     ThrowLocation gc_safe_throw_location(old_throw_this_object.Get(), old_throw_method.Get(),
                                          old_throw_dex_pc);
     soa.Self()->SetException(gc_safe_throw_location, old_exception.Get());
+    soa.Self()->SetExceptionReportedToInstrumentation(old_exception_report_flag);
   }
 }
 
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index 17d1ffc..52cece6 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -120,7 +120,8 @@
   if (UNLIKELY((range_start < file_start) || (range_start > file_end) ||
                (range_end < file_start) || (range_end > file_end))) {
     ErrorStringPrintf("Bad range for %s: %zx to %zx", label,
-                      range_start - file_start, range_end - file_start);
+                      static_cast<size_t>(range_start - file_start),
+                      static_cast<size_t>(range_end - file_start));
     return false;
   }
   return true;
diff --git a/runtime/elf_utils.h b/runtime/elf_utils.h
index f3ec713..f160dc4 100644
--- a/runtime/elf_utils.h
+++ b/runtime/elf_utils.h
@@ -17,9 +17,8 @@
 #ifndef ART_RUNTIME_ELF_UTILS_H_
 #define ART_RUNTIME_ELF_UTILS_H_
 
-// Include the micro-API to avoid potential macro conflicts with the
-// compiler's own elf.h file.
-#include "../../bionic/libc/kernel/uapi/linux/elf.h"
+// Explicitly include elf.h from elfutils to avoid Linux and other dependencies.
+#include "../../external/elfutils/0.153/libelf/elf.h"
 
 // Architecture dependent flags for the ELF header.
 #define EF_ARM_EABI_VER5 0x05000000
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index 09899c0..3d8b29f 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -652,6 +652,7 @@
   // Save any pending exception over monitor exit call.
   mirror::Throwable* saved_exception = NULL;
   ThrowLocation saved_throw_location;
+  bool is_exception_reported = self->IsExceptionReportedToInstrumentation();
   if (UNLIKELY(self->IsExceptionPending())) {
     saved_exception = self->GetException(&saved_throw_location);
     self->ClearException();
@@ -667,6 +668,7 @@
   // Restore pending exception.
   if (saved_exception != NULL) {
     self->SetException(saved_throw_location, saved_exception);
+    self->SetExceptionReportedToInstrumentation(is_exception_reported);
   }
 }
 
diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h
index 7bd1582..469d373 100644
--- a/runtime/entrypoints/quick/quick_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_entrypoints.h
@@ -115,7 +115,6 @@
 
   // Intrinsics
   int32_t (*pIndexOf)(void*, uint32_t, uint32_t, uint32_t);
-  int32_t (*pMemcmp16)(void*, void*, int32_t);
   int32_t (*pStringCompareTo)(void*, void*);
   void* (*pMemcpy)(void*, const void*, size_t);
 
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index f33befb..0dd33cf 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -233,8 +233,7 @@
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pShlLong, pShrLong, kPointerSize);
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pShrLong, pUshrLong, kPointerSize);
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pUshrLong, pIndexOf, kPointerSize);
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pIndexOf, pMemcmp16, kPointerSize);
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pMemcmp16, pStringCompareTo, kPointerSize);
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pIndexOf, pStringCompareTo, kPointerSize);
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pStringCompareTo, pMemcpy, kPointerSize);
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pMemcpy, pQuickImtConflictTrampoline, kPointerSize);
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pQuickImtConflictTrampoline, pQuickResolutionTrampoline,
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index 6b216c7..3112bc0 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -67,7 +67,7 @@
   action.sa_sigaction = art_fault_handler;
   sigemptyset(&action.sa_mask);
   action.sa_flags = SA_SIGINFO | SA_ONSTACK;
-#if !defined(__mips__)
+#if !defined(__APPLE__) && !defined(__mips__)
   action.sa_restorer = nullptr;
 #endif
 
diff --git a/runtime/gc/accounting/atomic_stack.h b/runtime/gc/accounting/atomic_stack.h
index bd04473..2c72ba1 100644
--- a/runtime/gc/accounting/atomic_stack.h
+++ b/runtime/gc/accounting/atomic_stack.h
@@ -49,10 +49,7 @@
     front_index_.StoreRelaxed(0);
     back_index_.StoreRelaxed(0);
     debug_is_sorted_ = true;
-    int result = madvise(begin_, sizeof(T) * capacity_, MADV_DONTNEED);
-    if (result == -1) {
-      PLOG(WARNING) << "madvise failed";
-    }
+    mem_map_->MadviseDontNeedAndZero();
   }
 
   // Beware: Mixing atomic pushes and atomic pops will cause ABA problem.
diff --git a/runtime/gc/accounting/card_table.cc b/runtime/gc/accounting/card_table.cc
index 43a173e..a95c003 100644
--- a/runtime/gc/accounting/card_table.cc
+++ b/runtime/gc/accounting/card_table.cc
@@ -96,7 +96,7 @@
 
 void CardTable::ClearCardTable() {
   COMPILE_ASSERT(kCardClean == 0, clean_card_must_be_0);
-  madvise(mem_map_->Begin(), mem_map_->Size(), MADV_DONTNEED);
+  mem_map_->MadviseDontNeedAndZero();
 }
 
 bool CardTable::AddrIsInCardTable(const void* addr) const {
diff --git a/runtime/gc/accounting/space_bitmap.cc b/runtime/gc/accounting/space_bitmap.cc
index c294bae..224b33e 100644
--- a/runtime/gc/accounting/space_bitmap.cc
+++ b/runtime/gc/accounting/space_bitmap.cc
@@ -79,12 +79,8 @@
 
 template<size_t kAlignment>
 void SpaceBitmap<kAlignment>::Clear() {
-  if (bitmap_begin_ != NULL) {
-    // This returns the memory to the system.  Successive page faults will return zeroed memory.
-    int result = madvise(bitmap_begin_, bitmap_size_, MADV_DONTNEED);
-    if (result == -1) {
-      PLOG(FATAL) << "madvise failed";
-    }
+  if (bitmap_begin_ != nullptr) {
+    mem_map_->MadviseDontNeedAndZero();
   }
 }
 
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 10b88b3..55262f2 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -1507,6 +1507,9 @@
     if (madvise_size > 0) {
       DCHECK_ALIGNED(madvise_begin, kPageSize);
       DCHECK_EQ(RoundUp(madvise_size, kPageSize), madvise_size);
+      if (!kMadviseZeroes) {
+        memset(madvise_begin, 0, madvise_size);
+      }
       CHECK_EQ(madvise(madvise_begin, madvise_size, MADV_DONTNEED), 0);
     }
     if (madvise_begin - zero_begin) {
@@ -2117,6 +2120,9 @@
           start = reinterpret_cast<byte*>(fpr) + kPageSize;
         }
         byte* end = reinterpret_cast<byte*>(fpr) + fpr_size;
+        if (!kMadviseZeroes) {
+          memset(start, 0, end - start);
+        }
         CHECK_EQ(madvise(start, end - start, MADV_DONTNEED), 0);
         reclaimed_bytes += fpr_size;
         size_t num_pages = fpr_size / kPageSize;
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index 9464331..a439188 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -110,11 +110,17 @@
         byte_size -= kPageSize;
         if (byte_size > 0) {
           if (release_pages) {
+            if (!kMadviseZeroes) {
+              memset(start, 0, byte_size);
+            }
             madvise(start, byte_size, MADV_DONTNEED);
           }
         }
       } else {
         if (release_pages) {
+          if (!kMadviseZeroes) {
+            memset(start, 0, byte_size);
+          }
           madvise(start, byte_size, MADV_DONTNEED);
         }
       }
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index c062706..890036b 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -1130,9 +1130,7 @@
   allocations->Reset();
   timings_.EndSplit();
 
-  int success = madvise(sweep_array_free_buffer_mem_map_->BaseBegin(),
-                        sweep_array_free_buffer_mem_map_->BaseSize(), MADV_DONTNEED);
-  DCHECK_EQ(success, 0) << "Failed to madvise the sweep array free buffer pages.";
+  sweep_array_free_buffer_mem_map_->MadviseDontNeedAndZero();
 }
 
 void MarkSweep::Sweep(bool swap_bitmaps) {
diff --git a/runtime/gc/space/bump_pointer_space.cc b/runtime/gc/space/bump_pointer_space.cc
index fd0a92d..8b35692 100644
--- a/runtime/gc/space/bump_pointer_space.cc
+++ b/runtime/gc/space/bump_pointer_space.cc
@@ -64,6 +64,9 @@
 
 void BumpPointerSpace::Clear() {
   // Release the pages back to the operating system.
+  if (!kMadviseZeroes) {
+    memset(Begin(), 0, Limit() - Begin());
+  }
   CHECK_NE(madvise(Begin(), Limit() - Begin(), MADV_DONTNEED), -1) << "madvise failed";
   // Reset the end of the space back to the beginning, we move the end forward as we allocate
   // objects.
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 261c241..8f5da83 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -780,24 +780,20 @@
 void Instrumentation::FieldReadEventImpl(Thread* thread, mirror::Object* this_object,
                                          mirror::ArtMethod* method, uint32_t dex_pc,
                                          mirror::ArtField* field) const {
-  if (have_field_read_listeners_) {
-    // TODO: same comment than DexPcMovedEventImpl.
-    std::list<InstrumentationListener*> copy(field_read_listeners_);
-    for (InstrumentationListener* listener : copy) {
-      listener->FieldRead(thread, this_object, method, dex_pc, field);
-    }
+  // TODO: same comment than DexPcMovedEventImpl.
+  std::list<InstrumentationListener*> copy(field_read_listeners_);
+  for (InstrumentationListener* listener : copy) {
+    listener->FieldRead(thread, this_object, method, dex_pc, field);
   }
 }
 
 void Instrumentation::FieldWriteEventImpl(Thread* thread, mirror::Object* this_object,
                                          mirror::ArtMethod* method, uint32_t dex_pc,
                                          mirror::ArtField* field, const JValue& field_value) const {
-  if (have_field_write_listeners_) {
-    // TODO: same comment than DexPcMovedEventImpl.
-    std::list<InstrumentationListener*> copy(field_write_listeners_);
-    for (InstrumentationListener* listener : copy) {
-      listener->FieldWritten(thread, this_object, method, dex_pc, field, field_value);
-    }
+  // TODO: same comment than DexPcMovedEventImpl.
+  std::list<InstrumentationListener*> copy(field_write_listeners_);
+  for (InstrumentationListener* listener : copy) {
+    listener->FieldWritten(thread, this_object, method, dex_pc, field, field_value);
   }
 }
 
@@ -805,8 +801,9 @@
                                            mirror::ArtMethod* catch_method,
                                            uint32_t catch_dex_pc,
                                            mirror::Throwable* exception_object) const {
-  if (have_exception_caught_listeners_) {
-    DCHECK_EQ(thread->GetException(NULL), exception_object);
+  if (HasExceptionCaughtListeners()) {
+    DCHECK_EQ(thread->GetException(nullptr), exception_object);
+    bool is_exception_reported = thread->IsExceptionReportedToInstrumentation();
     thread->ClearException();
     // TODO: The copy below is due to the debug listener having an action where it can remove
     // itself as a listener and break the iterator. The copy only works around the problem.
@@ -815,6 +812,7 @@
       listener->ExceptionCaught(thread, throw_location, catch_method, catch_dex_pc, exception_object);
     }
     thread->SetException(throw_location, exception_object);
+    thread->SetExceptionReportedToInstrumentation(is_exception_reported);
   }
 }
 
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 6625801..d0cb4de 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -237,6 +237,10 @@
     return have_field_write_listeners_;
   }
 
+  bool HasExceptionCaughtListeners() const {
+    return have_exception_caught_listeners_;
+  }
+
   bool IsActive() const {
     return have_dex_pc_listeners_ || have_method_entry_listeners_ || have_method_exit_listeners_ ||
         have_field_read_listeners_ || have_field_write_listeners_ ||
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index c6e05ae..c7fb884 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -20,6 +20,471 @@
 namespace art {
 namespace interpreter {
 
+void ThrowNullPointerExceptionFromInterpreter(const ShadowFrame& shadow_frame) {
+  ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+}
+
+template<FindFieldType find_type, Primitive::Type field_type, bool do_access_check>
+bool DoFieldGet(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst,
+                uint16_t inst_data) {
+  const bool is_static = (find_type == StaticObjectRead) || (find_type == StaticPrimitiveRead);
+  const uint32_t field_idx = is_static ? inst->VRegB_21c() : inst->VRegC_22c();
+  ArtField* f = FindFieldFromCode<find_type, do_access_check>(field_idx, shadow_frame.GetMethod(), self,
+                                                              Primitive::FieldSize(field_type));
+  if (UNLIKELY(f == nullptr)) {
+    CHECK(self->IsExceptionPending());
+    return false;
+  }
+  Object* obj;
+  if (is_static) {
+    obj = f->GetDeclaringClass();
+  } else {
+    obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
+    if (UNLIKELY(obj == nullptr)) {
+      ThrowNullPointerExceptionForFieldAccess(shadow_frame.GetCurrentLocationForThrow(), f, true);
+      return false;
+    }
+  }
+  // Report this field access to instrumentation if needed.
+  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+  if (UNLIKELY(instrumentation->HasFieldReadListeners())) {
+    Object* this_object = f->IsStatic() ? nullptr : obj;
+    instrumentation->FieldReadEvent(self, this_object, shadow_frame.GetMethod(),
+                                    shadow_frame.GetDexPC(), f);
+  }
+  uint32_t vregA = is_static ? inst->VRegA_21c(inst_data) : inst->VRegA_22c(inst_data);
+  switch (field_type) {
+    case Primitive::kPrimBoolean:
+      shadow_frame.SetVReg(vregA, f->GetBoolean(obj));
+      break;
+    case Primitive::kPrimByte:
+      shadow_frame.SetVReg(vregA, f->GetByte(obj));
+      break;
+    case Primitive::kPrimChar:
+      shadow_frame.SetVReg(vregA, f->GetChar(obj));
+      break;
+    case Primitive::kPrimShort:
+      shadow_frame.SetVReg(vregA, f->GetShort(obj));
+      break;
+    case Primitive::kPrimInt:
+      shadow_frame.SetVReg(vregA, f->GetInt(obj));
+      break;
+    case Primitive::kPrimLong:
+      shadow_frame.SetVRegLong(vregA, f->GetLong(obj));
+      break;
+    case Primitive::kPrimNot:
+      shadow_frame.SetVRegReference(vregA, f->GetObject(obj));
+      break;
+    default:
+      LOG(FATAL) << "Unreachable: " << field_type;
+  }
+  return true;
+}
+
+// Explicitly instantiate all DoFieldGet functions.
+#define EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL(_find_type, _field_type, _do_check) \
+  template bool DoFieldGet<_find_type, _field_type, _do_check>(Thread* self, \
+                                                               ShadowFrame& shadow_frame, \
+                                                               const Instruction* inst, \
+                                                               uint16_t inst_data)
+
+#define EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(_find_type, _field_type)  \
+    EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL(_find_type, _field_type, false);  \
+    EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL(_find_type, _field_type, true);
+
+// iget-XXX
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimBoolean);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimByte);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimChar);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimShort);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimInt);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimLong);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstanceObjectRead, Primitive::kPrimNot);
+
+// sget-XXX
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimBoolean);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimByte);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimChar);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimShort);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimInt);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimLong);
+EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticObjectRead, Primitive::kPrimNot);
+
+#undef EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL
+#undef EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL
+
+// Handles iget-quick, iget-wide-quick and iget-object-quick instructions.
+// Returns true on success, otherwise throws an exception and returns false.
+template<Primitive::Type field_type>
+bool DoIGetQuick(ShadowFrame& shadow_frame, const Instruction* inst, uint16_t inst_data) {
+  Object* obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
+  if (UNLIKELY(obj == nullptr)) {
+    // We lost the reference to the field index so we cannot get a more
+    // precised exception message.
+    ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+    return false;
+  }
+  MemberOffset field_offset(inst->VRegC_22c());
+  // Report this field access to instrumentation if needed. Since we only have the offset of
+  // the field from the base of the object, we need to look for it first.
+  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+  if (UNLIKELY(instrumentation->HasFieldReadListeners())) {
+    ArtField* f = ArtField::FindInstanceFieldWithOffset(obj->GetClass(),
+                                                        field_offset.Uint32Value());
+    DCHECK(f != nullptr);
+    DCHECK(!f->IsStatic());
+    instrumentation->FieldReadEvent(Thread::Current(), obj, shadow_frame.GetMethod(),
+                                    shadow_frame.GetDexPC(), f);
+  }
+  // Note: iget-x-quick instructions are only for non-volatile fields.
+  const uint32_t vregA = inst->VRegA_22c(inst_data);
+  switch (field_type) {
+    case Primitive::kPrimInt:
+      shadow_frame.SetVReg(vregA, static_cast<int32_t>(obj->GetField32(field_offset)));
+      break;
+    case Primitive::kPrimLong:
+      shadow_frame.SetVRegLong(vregA, static_cast<int64_t>(obj->GetField64(field_offset)));
+      break;
+    case Primitive::kPrimNot:
+      shadow_frame.SetVRegReference(vregA, obj->GetFieldObject<mirror::Object>(field_offset));
+      break;
+    default:
+      LOG(FATAL) << "Unreachable: " << field_type;
+  }
+  return true;
+}
+
+// Explicitly instantiate all DoIGetQuick functions.
+#define EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL(_field_type) \
+  template bool DoIGetQuick<_field_type>(ShadowFrame& shadow_frame, const Instruction* inst, \
+                                         uint16_t inst_data)
+
+EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL(Primitive::kPrimInt);    // iget-quick.
+EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL(Primitive::kPrimLong);   // iget-wide-quick.
+EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL(Primitive::kPrimNot);    // iget-object-quick.
+#undef EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL
+
+template<Primitive::Type field_type>
+static JValue GetFieldValue(const ShadowFrame& shadow_frame, uint32_t vreg)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  JValue field_value;
+  switch (field_type) {
+    case Primitive::kPrimBoolean:
+      field_value.SetZ(static_cast<uint8_t>(shadow_frame.GetVReg(vreg)));
+      break;
+    case Primitive::kPrimByte:
+      field_value.SetB(static_cast<int8_t>(shadow_frame.GetVReg(vreg)));
+      break;
+    case Primitive::kPrimChar:
+      field_value.SetC(static_cast<uint16_t>(shadow_frame.GetVReg(vreg)));
+      break;
+    case Primitive::kPrimShort:
+      field_value.SetS(static_cast<int16_t>(shadow_frame.GetVReg(vreg)));
+      break;
+    case Primitive::kPrimInt:
+      field_value.SetI(shadow_frame.GetVReg(vreg));
+      break;
+    case Primitive::kPrimLong:
+      field_value.SetJ(shadow_frame.GetVRegLong(vreg));
+      break;
+    case Primitive::kPrimNot:
+      field_value.SetL(shadow_frame.GetVRegReference(vreg));
+      break;
+    default:
+      LOG(FATAL) << "Unreachable: " << field_type;
+      break;
+  }
+  return field_value;
+}
+
+template<FindFieldType find_type, Primitive::Type field_type, bool do_access_check,
+         bool transaction_active>
+bool DoFieldPut(Thread* self, const ShadowFrame& shadow_frame, const Instruction* inst,
+                uint16_t inst_data) {
+  bool do_assignability_check = do_access_check;
+  bool is_static = (find_type == StaticObjectWrite) || (find_type == StaticPrimitiveWrite);
+  uint32_t field_idx = is_static ? inst->VRegB_21c() : inst->VRegC_22c();
+  ArtField* f = FindFieldFromCode<find_type, do_access_check>(field_idx, shadow_frame.GetMethod(), self,
+                                                              Primitive::FieldSize(field_type));
+  if (UNLIKELY(f == nullptr)) {
+    CHECK(self->IsExceptionPending());
+    return false;
+  }
+  Object* obj;
+  if (is_static) {
+    obj = f->GetDeclaringClass();
+  } else {
+    obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
+    if (UNLIKELY(obj == nullptr)) {
+      ThrowNullPointerExceptionForFieldAccess(shadow_frame.GetCurrentLocationForThrow(),
+                                              f, false);
+      return false;
+    }
+  }
+  uint32_t vregA = is_static ? inst->VRegA_21c(inst_data) : inst->VRegA_22c(inst_data);
+  // Report this field access to instrumentation if needed. Since we only have the offset of
+  // the field from the base of the object, we need to look for it first.
+  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+  if (UNLIKELY(instrumentation->HasFieldWriteListeners())) {
+    JValue field_value = GetFieldValue<field_type>(shadow_frame, vregA);
+    Object* this_object = f->IsStatic() ? nullptr : obj;
+    instrumentation->FieldWriteEvent(self, this_object, shadow_frame.GetMethod(),
+                                     shadow_frame.GetDexPC(), f, field_value);
+  }
+  switch (field_type) {
+    case Primitive::kPrimBoolean:
+      f->SetBoolean<transaction_active>(obj, shadow_frame.GetVReg(vregA));
+      break;
+    case Primitive::kPrimByte:
+      f->SetByte<transaction_active>(obj, shadow_frame.GetVReg(vregA));
+      break;
+    case Primitive::kPrimChar:
+      f->SetChar<transaction_active>(obj, shadow_frame.GetVReg(vregA));
+      break;
+    case Primitive::kPrimShort:
+      f->SetShort<transaction_active>(obj, shadow_frame.GetVReg(vregA));
+      break;
+    case Primitive::kPrimInt:
+      f->SetInt<transaction_active>(obj, shadow_frame.GetVReg(vregA));
+      break;
+    case Primitive::kPrimLong:
+      f->SetLong<transaction_active>(obj, shadow_frame.GetVRegLong(vregA));
+      break;
+    case Primitive::kPrimNot: {
+      Object* reg = shadow_frame.GetVRegReference(vregA);
+      if (do_assignability_check && reg != nullptr) {
+        // FieldHelper::GetType can resolve classes, use a handle wrapper which will restore the
+        // object in the destructor.
+        Class* field_class;
+        {
+          StackHandleScope<3> hs(self);
+          HandleWrapper<mirror::ArtField> h_f(hs.NewHandleWrapper(&f));
+          HandleWrapper<mirror::Object> h_reg(hs.NewHandleWrapper(&reg));
+          HandleWrapper<mirror::Object> h_obj(hs.NewHandleWrapper(&obj));
+          FieldHelper fh(h_f);
+          field_class = fh.GetType();
+        }
+        if (!reg->VerifierInstanceOf(field_class)) {
+          // This should never happen.
+          self->ThrowNewExceptionF(self->GetCurrentLocationForThrow(),
+                                   "Ljava/lang/VirtualMachineError;",
+                                   "Put '%s' that is not instance of field '%s' in '%s'",
+                                   reg->GetClass()->GetDescriptor().c_str(),
+                                   field_class->GetDescriptor().c_str(),
+                                   f->GetDeclaringClass()->GetDescriptor().c_str());
+          return false;
+        }
+      }
+      f->SetObj<transaction_active>(obj, reg);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unreachable: " << field_type;
+  }
+  return true;
+}
+
+// Explicitly instantiate all DoFieldPut functions.
+#define EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL(_find_type, _field_type, _do_check, _transaction_active) \
+  template bool DoFieldPut<_find_type, _field_type, _do_check, _transaction_active>(Thread* self, \
+      const ShadowFrame& shadow_frame, const Instruction* inst, uint16_t inst_data)
+
+#define EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(_find_type, _field_type)  \
+    EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL(_find_type, _field_type, false, false);  \
+    EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL(_find_type, _field_type, true, false);  \
+    EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL(_find_type, _field_type, false, true);  \
+    EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL(_find_type, _field_type, true, true);
+
+// iput-XXX
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimBoolean);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimByte);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimChar);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimShort);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimInt);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimLong);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstanceObjectWrite, Primitive::kPrimNot);
+
+// sput-XXX
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimBoolean);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimByte);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimChar);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimShort);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimInt);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimLong);
+EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticObjectWrite, Primitive::kPrimNot);
+
+#undef EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL
+#undef EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL
+
+template<Primitive::Type field_type, bool transaction_active>
+bool DoIPutQuick(const ShadowFrame& shadow_frame, const Instruction* inst, uint16_t inst_data) {
+  Object* obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
+  if (UNLIKELY(obj == nullptr)) {
+    // We lost the reference to the field index so we cannot get a more
+    // precised exception message.
+    ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
+    return false;
+  }
+  MemberOffset field_offset(inst->VRegC_22c());
+  const uint32_t vregA = inst->VRegA_22c(inst_data);
+  // Report this field modification to instrumentation if needed. Since we only have the offset of
+  // the field from the base of the object, we need to look for it first.
+  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+  if (UNLIKELY(instrumentation->HasFieldWriteListeners())) {
+    ArtField* f = ArtField::FindInstanceFieldWithOffset(obj->GetClass(),
+                                                        field_offset.Uint32Value());
+    DCHECK(f != nullptr);
+    DCHECK(!f->IsStatic());
+    JValue field_value = GetFieldValue<field_type>(shadow_frame, vregA);
+    instrumentation->FieldWriteEvent(Thread::Current(), obj, shadow_frame.GetMethod(),
+                                     shadow_frame.GetDexPC(), f, field_value);
+  }
+  // Note: iput-x-quick instructions are only for non-volatile fields.
+  switch (field_type) {
+    case Primitive::kPrimInt:
+      obj->SetField32<transaction_active>(field_offset, shadow_frame.GetVReg(vregA));
+      break;
+    case Primitive::kPrimLong:
+      obj->SetField64<transaction_active>(field_offset, shadow_frame.GetVRegLong(vregA));
+      break;
+    case Primitive::kPrimNot:
+      obj->SetFieldObject<transaction_active>(field_offset, shadow_frame.GetVRegReference(vregA));
+      break;
+    default:
+      LOG(FATAL) << "Unreachable: " << field_type;
+  }
+  return true;
+}
+
+// Explicitly instantiate all DoIPutQuick functions.
+#define EXPLICIT_DO_IPUT_QUICK_TEMPLATE_DECL(_field_type, _transaction_active) \
+  template bool DoIPutQuick<_field_type, _transaction_active>(const ShadowFrame& shadow_frame, \
+                                                              const Instruction* inst, \
+                                                              uint16_t inst_data)
+
+#define EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL(_field_type)   \
+  EXPLICIT_DO_IPUT_QUICK_TEMPLATE_DECL(_field_type, false);     \
+  EXPLICIT_DO_IPUT_QUICK_TEMPLATE_DECL(_field_type, true);
+
+EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL(Primitive::kPrimInt);    // iget-quick.
+EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL(Primitive::kPrimLong);   // iget-wide-quick.
+EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL(Primitive::kPrimNot);    // iget-object-quick.
+#undef EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL
+#undef EXPLICIT_DO_IPUT_QUICK_TEMPLATE_DECL
+
+/**
+ * Finds the location where this exception will be caught. We search until we reach either the top
+ * frame or a native frame, in which cases this exception is considered uncaught.
+ */
+class CatchLocationFinder : public StackVisitor {
+ public:
+  explicit CatchLocationFinder(Thread* self, Handle<mirror::Throwable>* exception)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+    : StackVisitor(self, nullptr), self_(self), handle_scope_(self), exception_(exception),
+      catch_method_(handle_scope_.NewHandle<mirror::ArtMethod>(nullptr)),
+      catch_dex_pc_(DexFile::kDexNoIndex), clear_exception_(false) {
+  }
+
+  bool VisitFrame() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    mirror::ArtMethod* method = GetMethod();
+    if (method == nullptr) {
+      return true;
+    }
+    if (method->IsRuntimeMethod()) {
+      // Ignore callee save method.
+      DCHECK(method->IsCalleeSaveMethod());
+      return true;
+    }
+    if (method->IsNative()) {
+      return false;  // End stack walk.
+    }
+    DCHECK(!method->IsNative());
+    uint32_t dex_pc = GetDexPc();
+    if (dex_pc != DexFile::kDexNoIndex) {
+      uint32_t found_dex_pc;
+      {
+        StackHandleScope<3> hs(self_);
+        Handle<mirror::Class> exception_class(hs.NewHandle((*exception_)->GetClass()));
+        Handle<mirror::ArtMethod> h_method(hs.NewHandle(method));
+        found_dex_pc = mirror::ArtMethod::FindCatchBlock(h_method, exception_class, dex_pc,
+                                                         &clear_exception_);
+      }
+      if (found_dex_pc != DexFile::kDexNoIndex) {
+        catch_method_.Assign(method);
+        catch_dex_pc_ = found_dex_pc;
+        return false;  // End stack walk.
+      }
+    }
+    return true;  // Continue stack walk.
+  }
+
+  ArtMethod* GetCatchMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return catch_method_.Get();
+  }
+
+  uint32_t GetCatchDexPc() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return catch_dex_pc_;
+  }
+
+  bool NeedClearException() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return clear_exception_;
+  }
+
+ private:
+  Thread* const self_;
+  StackHandleScope<1> handle_scope_;
+  Handle<mirror::Throwable>* exception_;
+  Handle<mirror::ArtMethod> catch_method_;
+  uint32_t catch_dex_pc_;
+  bool clear_exception_;
+
+
+  DISALLOW_COPY_AND_ASSIGN(CatchLocationFinder);
+};
+
+uint32_t FindNextInstructionFollowingException(Thread* self,
+                                               ShadowFrame& shadow_frame,
+                                               uint32_t dex_pc,
+                                               const instrumentation::Instrumentation* instrumentation) {
+  self->VerifyStack();
+  ThrowLocation throw_location;
+  StackHandleScope<3> hs(self);
+  Handle<mirror::Throwable> exception(hs.NewHandle(self->GetException(&throw_location)));
+  if (!self->IsExceptionReportedToInstrumentation() && instrumentation->HasExceptionCaughtListeners()) {
+    CatchLocationFinder clf(self, &exception);
+    clf.WalkStack(false);
+    instrumentation->ExceptionCaughtEvent(self, throw_location, clf.GetCatchMethod(),
+                                          clf.GetCatchDexPc(), exception.Get());
+    self->SetExceptionReportedToInstrumentation(true);
+  }
+  bool clear_exception = false;
+  uint32_t found_dex_pc;
+  {
+    Handle<mirror::Class> exception_class(hs.NewHandle(exception->GetClass()));
+    Handle<mirror::ArtMethod> h_method(hs.NewHandle(shadow_frame.GetMethod()));
+    found_dex_pc = mirror::ArtMethod::FindCatchBlock(h_method, exception_class, dex_pc,
+                                                     &clear_exception);
+  }
+  if (found_dex_pc == DexFile::kDexNoIndex) {
+    instrumentation->MethodUnwindEvent(self, shadow_frame.GetThisObject(),
+                                       shadow_frame.GetMethod(), dex_pc);
+  } else {
+    if (self->IsExceptionReportedToInstrumentation()) {
+      instrumentation->MethodUnwindEvent(self, shadow_frame.GetThisObject(),
+                                         shadow_frame.GetMethod(), dex_pc);
+    }
+    if (clear_exception) {
+      self->ClearException();
+    }
+  }
+  return found_dex_pc;
+}
+
+void UnexpectedOpcode(const Instruction* inst, MethodHelper& mh) {
+  LOG(FATAL) << "Unexpected instruction: " << inst->DumpString(mh.GetMethod()->GetDexFile());
+  exit(0);  // Unreachable, keep GCC happy.
+}
+
 static void UnstartedRuntimeInvoke(Thread* self, MethodHelper& mh,
                                    const DexFile::CodeItem* code_item, ShadowFrame* shadow_frame,
                                    JValue* result, size_t arg_offset)
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 88a8468..5277330 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -38,6 +38,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
+#include "mirror/string-inl.h"
 #include "object_utils.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change.h"
@@ -60,15 +61,6 @@
 using ::art::mirror::String;
 using ::art::mirror::Throwable;
 
-// b/14882674 Workaround stack overflow issue with clang
-#if defined(__clang__) && defined(__aarch64__)
-#define SOMETIMES_INLINE __attribute__((noinline))
-#define SOMETIMES_INLINE_KEYWORD
-#else
-#define SOMETIMES_INLINE ALWAYS_INLINE
-#define SOMETIMES_INLINE_KEYWORD inline
-#endif
-
 namespace art {
 namespace interpreter {
 
@@ -84,16 +76,8 @@
                               const DexFile::CodeItem* code_item,
                               ShadowFrame& shadow_frame, JValue result_register);
 
-// Workaround for b/14882674 where clang allocates stack for each ThrowLocation created by calls to
-// ShadowFrame::GetCurrentLocationForThrow(). Moving the call here prevents from doing such
-// allocation in the interpreter itself.
-static inline void ThrowNullPointerExceptionFromInterpreter(const ShadowFrame& shadow_frame)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) SOMETIMES_INLINE;
-
-static inline void ThrowNullPointerExceptionFromInterpreter(
-    const ShadowFrame& shadow_frame) {
-  ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
-}
+void ThrowNullPointerExceptionFromInterpreter(const ShadowFrame& shadow_frame)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 static inline void DoMonitorEnter(Thread* self, Object* ref) NO_THREAD_SAFETY_ANALYSIS {
   ref->MonitorEnter(self);
@@ -174,266 +158,28 @@
 // Handles iget-XXX and sget-XXX instructions.
 // Returns true on success, otherwise throws an exception and returns false.
 template<FindFieldType find_type, Primitive::Type field_type, bool do_access_check>
-static SOMETIMES_INLINE_KEYWORD bool DoFieldGet(Thread* self, ShadowFrame& shadow_frame,
-                                                const Instruction* inst, uint16_t inst_data) {
-  const bool is_static = (find_type == StaticObjectRead) || (find_type == StaticPrimitiveRead);
-  const uint32_t field_idx = is_static ? inst->VRegB_21c() : inst->VRegC_22c();
-  ArtField* f = FindFieldFromCode<find_type, do_access_check>(field_idx, shadow_frame.GetMethod(), self,
-                                                              Primitive::FieldSize(field_type));
-  if (UNLIKELY(f == nullptr)) {
-    CHECK(self->IsExceptionPending());
-    return false;
-  }
-  Object* obj;
-  if (is_static) {
-    obj = f->GetDeclaringClass();
-  } else {
-    obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(shadow_frame.GetCurrentLocationForThrow(), f, true);
-      return false;
-    }
-  }
-  // Report this field access to instrumentation if needed.
-  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
-  if (UNLIKELY(instrumentation->HasFieldReadListeners())) {
-    Object* this_object = f->IsStatic() ? nullptr : obj;
-    instrumentation->FieldReadEvent(self, this_object, shadow_frame.GetMethod(),
-                                    shadow_frame.GetDexPC(), f);
-  }
-  uint32_t vregA = is_static ? inst->VRegA_21c(inst_data) : inst->VRegA_22c(inst_data);
-  switch (field_type) {
-    case Primitive::kPrimBoolean:
-      shadow_frame.SetVReg(vregA, f->GetBoolean(obj));
-      break;
-    case Primitive::kPrimByte:
-      shadow_frame.SetVReg(vregA, f->GetByte(obj));
-      break;
-    case Primitive::kPrimChar:
-      shadow_frame.SetVReg(vregA, f->GetChar(obj));
-      break;
-    case Primitive::kPrimShort:
-      shadow_frame.SetVReg(vregA, f->GetShort(obj));
-      break;
-    case Primitive::kPrimInt:
-      shadow_frame.SetVReg(vregA, f->GetInt(obj));
-      break;
-    case Primitive::kPrimLong:
-      shadow_frame.SetVRegLong(vregA, f->GetLong(obj));
-      break;
-    case Primitive::kPrimNot:
-      shadow_frame.SetVRegReference(vregA, f->GetObject(obj));
-      break;
-    default:
-      LOG(FATAL) << "Unreachable: " << field_type;
-  }
-  return true;
-}
+bool DoFieldGet(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst,
+                uint16_t inst_data) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Handles iget-quick, iget-wide-quick and iget-object-quick instructions.
 // Returns true on success, otherwise throws an exception and returns false.
 template<Primitive::Type field_type>
-static SOMETIMES_INLINE_KEYWORD bool DoIGetQuick(ShadowFrame& shadow_frame, const Instruction* inst, uint16_t inst_data) {
-  Object* obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
-  if (UNLIKELY(obj == nullptr)) {
-    // We lost the reference to the field index so we cannot get a more
-    // precised exception message.
-    ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
-    return false;
-  }
-  MemberOffset field_offset(inst->VRegC_22c());
-  // Report this field access to instrumentation if needed. Since we only have the offset of
-  // the field from the base of the object, we need to look for it first.
-  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
-  if (UNLIKELY(instrumentation->HasFieldReadListeners())) {
-    ArtField* f = ArtField::FindInstanceFieldWithOffset(obj->GetClass(),
-                                                        field_offset.Uint32Value());
-    DCHECK(f != nullptr);
-    DCHECK(!f->IsStatic());
-    instrumentation->FieldReadEvent(Thread::Current(), obj, shadow_frame.GetMethod(),
-                                    shadow_frame.GetDexPC(), f);
-  }
-  // Note: iget-x-quick instructions are only for non-volatile fields.
-  const uint32_t vregA = inst->VRegA_22c(inst_data);
-  switch (field_type) {
-    case Primitive::kPrimInt:
-      shadow_frame.SetVReg(vregA, static_cast<int32_t>(obj->GetField32(field_offset)));
-      break;
-    case Primitive::kPrimLong:
-      shadow_frame.SetVRegLong(vregA, static_cast<int64_t>(obj->GetField64(field_offset)));
-      break;
-    case Primitive::kPrimNot:
-      shadow_frame.SetVRegReference(vregA, obj->GetFieldObject<mirror::Object>(field_offset));
-      break;
-    default:
-      LOG(FATAL) << "Unreachable: " << field_type;
-  }
-  return true;
-}
-
-template<Primitive::Type field_type>
-static inline JValue GetFieldValue(const ShadowFrame& shadow_frame, uint32_t vreg)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  JValue field_value;
-  switch (field_type) {
-    case Primitive::kPrimBoolean:
-      field_value.SetZ(static_cast<uint8_t>(shadow_frame.GetVReg(vreg)));
-      break;
-    case Primitive::kPrimByte:
-      field_value.SetB(static_cast<int8_t>(shadow_frame.GetVReg(vreg)));
-      break;
-    case Primitive::kPrimChar:
-      field_value.SetC(static_cast<uint16_t>(shadow_frame.GetVReg(vreg)));
-      break;
-    case Primitive::kPrimShort:
-      field_value.SetS(static_cast<int16_t>(shadow_frame.GetVReg(vreg)));
-      break;
-    case Primitive::kPrimInt:
-      field_value.SetI(shadow_frame.GetVReg(vreg));
-      break;
-    case Primitive::kPrimLong:
-      field_value.SetJ(shadow_frame.GetVRegLong(vreg));
-      break;
-    case Primitive::kPrimNot:
-      field_value.SetL(shadow_frame.GetVRegReference(vreg));
-      break;
-    default:
-      LOG(FATAL) << "Unreachable: " << field_type;
-      break;
-  }
-  return field_value;
-}
+bool DoIGetQuick(ShadowFrame& shadow_frame, const Instruction* inst, uint16_t inst_data)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Handles iput-XXX and sput-XXX instructions.
 // Returns true on success, otherwise throws an exception and returns false.
-template<FindFieldType find_type, Primitive::Type field_type, bool do_access_check, bool transaction_active>
-static SOMETIMES_INLINE_KEYWORD bool DoFieldPut(Thread* self, const ShadowFrame& shadow_frame,
-                                                const Instruction* inst, uint16_t inst_data) {
-  bool do_assignability_check = do_access_check;
-  bool is_static = (find_type == StaticObjectWrite) || (find_type == StaticPrimitiveWrite);
-  uint32_t field_idx = is_static ? inst->VRegB_21c() : inst->VRegC_22c();
-  ArtField* f = FindFieldFromCode<find_type, do_access_check>(field_idx, shadow_frame.GetMethod(), self,
-                                                              Primitive::FieldSize(field_type));
-  if (UNLIKELY(f == nullptr)) {
-    CHECK(self->IsExceptionPending());
-    return false;
-  }
-  Object* obj;
-  if (is_static) {
-    obj = f->GetDeclaringClass();
-  } else {
-    obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(shadow_frame.GetCurrentLocationForThrow(),
-                                              f, false);
-      return false;
-    }
-  }
-  uint32_t vregA = is_static ? inst->VRegA_21c(inst_data) : inst->VRegA_22c(inst_data);
-  // Report this field access to instrumentation if needed. Since we only have the offset of
-  // the field from the base of the object, we need to look for it first.
-  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
-  if (UNLIKELY(instrumentation->HasFieldWriteListeners())) {
-    JValue field_value = GetFieldValue<field_type>(shadow_frame, vregA);
-    Object* this_object = f->IsStatic() ? nullptr : obj;
-    instrumentation->FieldWriteEvent(self, this_object, shadow_frame.GetMethod(),
-                                     shadow_frame.GetDexPC(), f, field_value);
-  }
-  switch (field_type) {
-    case Primitive::kPrimBoolean:
-      f->SetBoolean<transaction_active>(obj, shadow_frame.GetVReg(vregA));
-      break;
-    case Primitive::kPrimByte:
-      f->SetByte<transaction_active>(obj, shadow_frame.GetVReg(vregA));
-      break;
-    case Primitive::kPrimChar:
-      f->SetChar<transaction_active>(obj, shadow_frame.GetVReg(vregA));
-      break;
-    case Primitive::kPrimShort:
-      f->SetShort<transaction_active>(obj, shadow_frame.GetVReg(vregA));
-      break;
-    case Primitive::kPrimInt:
-      f->SetInt<transaction_active>(obj, shadow_frame.GetVReg(vregA));
-      break;
-    case Primitive::kPrimLong:
-      f->SetLong<transaction_active>(obj, shadow_frame.GetVRegLong(vregA));
-      break;
-    case Primitive::kPrimNot: {
-      Object* reg = shadow_frame.GetVRegReference(vregA);
-      if (do_assignability_check && reg != nullptr) {
-        // FieldHelper::GetType can resolve classes, use a handle wrapper which will restore the
-        // object in the destructor.
-        Class* field_class;
-        {
-          StackHandleScope<3> hs(self);
-          HandleWrapper<mirror::ArtField> h_f(hs.NewHandleWrapper(&f));
-          HandleWrapper<mirror::Object> h_reg(hs.NewHandleWrapper(&reg));
-          HandleWrapper<mirror::Object> h_obj(hs.NewHandleWrapper(&obj));
-          FieldHelper fh(h_f);
-          field_class = fh.GetType();
-        }
-        if (!reg->VerifierInstanceOf(field_class)) {
-          // This should never happen.
-          self->ThrowNewExceptionF(self->GetCurrentLocationForThrow(),
-                                   "Ljava/lang/VirtualMachineError;",
-                                   "Put '%s' that is not instance of field '%s' in '%s'",
-                                   reg->GetClass()->GetDescriptor().c_str(),
-                                   field_class->GetDescriptor().c_str(),
-                                   f->GetDeclaringClass()->GetDescriptor().c_str());
-          return false;
-        }
-      }
-      f->SetObj<transaction_active>(obj, reg);
-      break;
-    }
-    default:
-      LOG(FATAL) << "Unreachable: " << field_type;
-  }
-  return true;
-}
+template<FindFieldType find_type, Primitive::Type field_type, bool do_access_check,
+         bool transaction_active>
+bool DoFieldPut(Thread* self, const ShadowFrame& shadow_frame, const Instruction* inst,
+                uint16_t inst_data) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Handles iput-quick, iput-wide-quick and iput-object-quick instructions.
 // Returns true on success, otherwise throws an exception and returns false.
 template<Primitive::Type field_type, bool transaction_active>
-static SOMETIMES_INLINE_KEYWORD bool DoIPutQuick(const ShadowFrame& shadow_frame,
-                                                 const Instruction* inst, uint16_t inst_data) {
-  Object* obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
-  if (UNLIKELY(obj == nullptr)) {
-    // We lost the reference to the field index so we cannot get a more
-    // precised exception message.
-    ThrowNullPointerExceptionFromDexPC(shadow_frame.GetCurrentLocationForThrow());
-    return false;
-  }
-  MemberOffset field_offset(inst->VRegC_22c());
-  const uint32_t vregA = inst->VRegA_22c(inst_data);
-  // Report this field modification to instrumentation if needed. Since we only have the offset of
-  // the field from the base of the object, we need to look for it first.
-  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
-  if (UNLIKELY(instrumentation->HasFieldWriteListeners())) {
-    ArtField* f = ArtField::FindInstanceFieldWithOffset(obj->GetClass(),
-                                                        field_offset.Uint32Value());
-    DCHECK(f != nullptr);
-    DCHECK(!f->IsStatic());
-    JValue field_value = GetFieldValue<field_type>(shadow_frame, vregA);
-    instrumentation->FieldWriteEvent(Thread::Current(), obj, shadow_frame.GetMethod(),
-                                     shadow_frame.GetDexPC(), f, field_value);
-  }
-  // Note: iput-x-quick instructions are only for non-volatile fields.
-  switch (field_type) {
-    case Primitive::kPrimInt:
-      obj->SetField32<transaction_active>(field_offset, shadow_frame.GetVReg(vregA));
-      break;
-    case Primitive::kPrimLong:
-      obj->SetField64<transaction_active>(field_offset, shadow_frame.GetVRegLong(vregA));
-      break;
-    case Primitive::kPrimNot:
-      obj->SetFieldObject<transaction_active>(field_offset, shadow_frame.GetVRegReference(vregA));
-      break;
-    default:
-      LOG(FATAL) << "Unreachable: " << field_type;
-  }
-  return true;
-}
+bool DoIPutQuick(const ShadowFrame& shadow_frame, const Instruction* inst, uint16_t inst_data)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
 
 // Handles string resolution for const-string and const-string-jumbo instructions. Also ensures the
 // java.lang.String class is initialized.
@@ -588,54 +334,14 @@
   return 3;
 }
 
-static inline uint32_t FindNextInstructionFollowingException(Thread* self,
-                                                             ShadowFrame& shadow_frame,
-                                                             uint32_t dex_pc,
-                                                             mirror::Object* this_object,
-                                                             const instrumentation::Instrumentation* instrumentation)
-SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) SOMETIMES_INLINE;
+uint32_t FindNextInstructionFollowingException(Thread* self, ShadowFrame& shadow_frame,
+    uint32_t dex_pc, const instrumentation::Instrumentation* instrumentation)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-static inline uint32_t FindNextInstructionFollowingException(Thread* self,
-                                                             ShadowFrame& shadow_frame,
-                                                             uint32_t dex_pc,
-                                                             mirror::Object* this_object,
-                                                             const instrumentation::Instrumentation* instrumentation) {
-  self->VerifyStack();
-  ThrowLocation throw_location;
-  mirror::Throwable* exception = self->GetException(&throw_location);
-  bool clear_exception = false;
-  uint32_t found_dex_pc;
-  {
-    StackHandleScope<3> hs(self);
-    Handle<mirror::Class> exception_class(hs.NewHandle(exception->GetClass()));
-    Handle<mirror::ArtMethod> h_method(hs.NewHandle(shadow_frame.GetMethod()));
-    HandleWrapper<mirror::Object> h(hs.NewHandleWrapper(&this_object));
-    found_dex_pc = mirror::ArtMethod::FindCatchBlock(h_method, exception_class, dex_pc,
-                                                     &clear_exception);
-  }
-  if (found_dex_pc == DexFile::kDexNoIndex) {
-    instrumentation->MethodUnwindEvent(self, this_object,
-                                       shadow_frame.GetMethod(), dex_pc);
-  } else {
-    instrumentation->ExceptionCaughtEvent(self, throw_location,
-                                          shadow_frame.GetMethod(),
-                                          found_dex_pc, exception);
-    if (clear_exception) {
-      self->ClearException();
-    }
-  }
-  return found_dex_pc;
-}
-
-static inline void UnexpectedOpcode(const Instruction* inst, MethodHelper& mh)
+void UnexpectedOpcode(const Instruction* inst, MethodHelper& mh)
   __attribute__((cold, noreturn))
   SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-static inline void UnexpectedOpcode(const Instruction* inst, MethodHelper& mh) {
-  LOG(FATAL) << "Unexpected instruction: " << inst->DumpString(mh.GetMethod()->GetDexFile());
-  exit(0);  // Unreachable, keep GCC happy.
-}
-
 static inline void TraceExecution(const ShadowFrame& shadow_frame, const Instruction* inst,
                                   const uint32_t dex_pc, MethodHelper& mh)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -670,7 +376,7 @@
 
 // Explicitly instantiate all DoInvoke functions.
 #define EXPLICIT_DO_INVOKE_TEMPLATE_DECL(_type, _is_range, _do_check)                      \
-  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) SOMETIMES_INLINE                    \
+  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)                                     \
   bool DoInvoke<_type, _is_range, _do_check>(Thread* self, ShadowFrame& shadow_frame,      \
                                              const Instruction* inst, uint16_t inst_data,  \
                                              JValue* result)
@@ -689,73 +395,9 @@
 #undef EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL
 #undef EXPLICIT_DO_INVOKE_TEMPLATE_DECL
 
-// Explicitly instantiate all DoFieldGet functions.
-#define EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL(_find_type, _field_type, _do_check)                \
-  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) SOMETIMES_INLINE                        \
-  bool DoFieldGet<_find_type, _field_type, _do_check>(Thread* self, ShadowFrame& shadow_frame, \
-                                                      const Instruction* inst, uint16_t inst_data)
-
-#define EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(_find_type, _field_type)  \
-    EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL(_find_type, _field_type, false);  \
-    EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL(_find_type, _field_type, true);
-
-// iget-XXX
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimBoolean);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimByte);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimChar);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimShort);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimInt);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimLong);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstanceObjectRead, Primitive::kPrimNot);
-
-// sget-XXX
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimBoolean);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimByte);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimChar);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimShort);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimInt);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticPrimitiveRead, Primitive::kPrimLong);
-EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(StaticObjectRead, Primitive::kPrimNot);
-
-#undef EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL
-#undef EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL
-
-// Explicitly instantiate all DoFieldPut functions.
-#define EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL(_find_type, _field_type, _do_check, _transaction_active)                      \
-  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) SOMETIMES_INLINE                                                   \
-  bool DoFieldPut<_find_type, _field_type, _do_check, _transaction_active>(Thread* self, const ShadowFrame& shadow_frame, \
-                                                                           const Instruction* inst, uint16_t inst_data)
-
-#define EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(_find_type, _field_type)  \
-    EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL(_find_type, _field_type, false, false);  \
-    EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL(_find_type, _field_type, true, false);  \
-    EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL(_find_type, _field_type, false, true);  \
-    EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL(_find_type, _field_type, true, true);
-
-// iput-XXX
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimBoolean);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimByte);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimChar);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimShort);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimInt);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstancePrimitiveWrite, Primitive::kPrimLong);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(InstanceObjectWrite, Primitive::kPrimNot);
-
-// sput-XXX
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimBoolean);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimByte);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimChar);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimShort);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimInt);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticPrimitiveWrite, Primitive::kPrimLong);
-EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL(StaticObjectWrite, Primitive::kPrimNot);
-
-#undef EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL
-#undef EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL
-
 // Explicitly instantiate all DoInvokeVirtualQuick functions.
 #define EXPLICIT_DO_INVOKE_VIRTUAL_QUICK_TEMPLATE_DECL(_is_range)                    \
-  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) SOMETIMES_INLINE              \
+  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)                               \
   bool DoInvokeVirtualQuick<_is_range>(Thread* self, ShadowFrame& shadow_frame,      \
                                        const Instruction* inst, uint16_t inst_data,  \
                                        JValue* result)
@@ -764,33 +406,6 @@
 EXPLICIT_DO_INVOKE_VIRTUAL_QUICK_TEMPLATE_DECL(true);   // invoke-virtual-quick-range.
 #undef EXPLICIT_INSTANTIATION_DO_INVOKE_VIRTUAL_QUICK
 
-// Explicitly instantiate all DoIGetQuick functions.
-#define EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL(_field_type)                            \
-  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) SOMETIMES_INLINE              \
-  bool DoIGetQuick<_field_type>(ShadowFrame& shadow_frame, const Instruction* inst,  \
-                                uint16_t inst_data)
-
-EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL(Primitive::kPrimInt);    // iget-quick.
-EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL(Primitive::kPrimLong);   // iget-wide-quick.
-EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL(Primitive::kPrimNot);    // iget-object-quick.
-#undef EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL
-
-// Explicitly instantiate all DoIPutQuick functions.
-#define EXPLICIT_DO_IPUT_QUICK_TEMPLATE_DECL(_field_type, _transaction_active)        \
-  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) SOMETIMES_INLINE               \
-  bool DoIPutQuick<_field_type, _transaction_active>(const ShadowFrame& shadow_frame, \
-                                                     const Instruction* inst,         \
-                                                     uint16_t inst_data)
-
-#define EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL(_field_type)   \
-  EXPLICIT_DO_IPUT_QUICK_TEMPLATE_DECL(_field_type, false);     \
-  EXPLICIT_DO_IPUT_QUICK_TEMPLATE_DECL(_field_type, true);
-
-EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL(Primitive::kPrimInt);    // iget-quick.
-EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL(Primitive::kPrimLong);   // iget-wide-quick.
-EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL(Primitive::kPrimNot);    // iget-object-quick.
-#undef EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL
-#undef EXPLICIT_DO_IPUT_QUICK_TEMPLATE_DECL
 
 }  // namespace interpreter
 }  // namespace art
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 19673ac..cb4868c 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -2391,10 +2391,8 @@
       CheckSuspend(self);
       UPDATE_HANDLER_TABLE();
     }
-    Object* this_object = shadow_frame.GetThisObject(code_item->ins_size_);
     instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     uint32_t found_dex_pc = FindNextInstructionFollowingException(self, shadow_frame, dex_pc,
-                                                                  this_object,
                                                                   instrumentation);
     if (found_dex_pc == DexFile::kDexNoIndex) {
       return JValue(); /* Handled in caller. */
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index a43fad3..bdf2a20 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -25,10 +25,8 @@
     if (UNLIKELY(self->TestAllFlags())) {                                                       \
       CheckSuspend(self);                                                                       \
     }                                                                                           \
-    Object* this_object = shadow_frame.GetThisObject(code_item->ins_size_);                     \
     uint32_t found_dex_pc = FindNextInstructionFollowingException(self, shadow_frame,           \
                                                                   inst->GetDexPc(insns),        \
-                                                                  this_object,                  \
                                                                   instrumentation);             \
     if (found_dex_pc == DexFile::kDexNoIndex) {                                                 \
       return JValue(); /* Handled in caller. */                                                 \
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 19ee1ff..66406bf 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -682,6 +682,7 @@
     auto old_throw_method(hs.NewHandle<mirror::ArtMethod>(nullptr));
     auto old_exception(hs.NewHandle<mirror::Throwable>(nullptr));
     uint32_t old_throw_dex_pc;
+    bool old_is_exception_reported;
     {
       ThrowLocation old_throw_location;
       mirror::Throwable* old_exception_obj = soa.Self()->GetException(&old_throw_location);
@@ -689,6 +690,7 @@
       old_throw_method.Assign(old_throw_location.GetMethod());
       old_exception.Assign(old_exception_obj);
       old_throw_dex_pc = old_throw_location.GetDexPc();
+      old_is_exception_reported = soa.Self()->IsExceptionReportedToInstrumentation();
       soa.Self()->ClearException();
     }
     ScopedLocalRef<jthrowable> exception(env,
@@ -710,6 +712,7 @@
                                          old_throw_dex_pc);
 
     soa.Self()->SetException(gc_safe_throw_location, old_exception.Get());
+    soa.Self()->SetExceptionReportedToInstrumentation(old_is_exception_reported);
   }
 
   static jthrowable ExceptionOccurred(JNIEnv* env) {
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 22a61a2..81a8623 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -473,6 +473,18 @@
   return new MemMap(tail_name, actual, tail_size, actual, tail_base_size, tail_prot);
 }
 
+void MemMap::MadviseDontNeedAndZero() {
+  if (base_begin_ != nullptr || base_size_ != 0) {
+    if (!kMadviseZeroes) {
+      memset(base_begin_, 0, base_size_);
+    }
+    int result = madvise(base_begin_, base_size_, MADV_DONTNEED);
+    if (result == -1) {
+      PLOG(WARNING) << "madvise failed";
+    }
+  }
+}
+
 bool MemMap::Protect(int prot) {
   if (base_begin_ == nullptr && base_size_ == 0) {
     prot_ = prot;
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index dc5909b..e42251c 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -30,6 +30,12 @@
 
 namespace art {
 
+#ifdef __linux__
+static constexpr bool kMadviseZeroes = true;
+#else
+static constexpr bool kMadviseZeroes = false;
+#endif
+
 // Used to keep track of mmap segments.
 //
 // On 64b systems not supporting MAP_32BIT, the implementation of MemMap will do a linear scan
@@ -77,6 +83,8 @@
 
   bool Protect(int prot);
 
+  void MadviseDontNeedAndZero();
+
   int GetProtect() const {
     return prot_;
   }
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index 3db4be3..4821e29 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -240,6 +240,7 @@
   ThrowLocation throw_location;
   StackHandleScope<1> hs(self);
   Handle<mirror::Throwable> exception(hs.NewHandle(self->GetException(&throw_location)));
+  bool is_exception_reported = self->IsExceptionReportedToInstrumentation();
   self->ClearException();
   // Default to handler not found.
   uint32_t found_dex_pc = DexFile::kDexNoIndex;
@@ -276,6 +277,7 @@
   // Put the exception back.
   if (exception.Get() != nullptr) {
     self->SetException(throw_location, exception.Get());
+    self->SetExceptionReportedToInstrumentation(is_exception_reported);
   }
   return found_dex_pc;
 }
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 7b31a82..a20f7b9 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -84,7 +84,7 @@
     Handle<mirror::Object> old_throw_this_object(hs.NewHandle(old_throw_location.GetThis()));
     Handle<mirror::ArtMethod> old_throw_method(hs.NewHandle(old_throw_location.GetMethod()));
     uint32_t old_throw_dex_pc = old_throw_location.GetDexPc();
-
+    bool is_exception_reported = self->IsExceptionReportedToInstrumentation();
     // clear exception to call FindSystemClass
     self->ClearException();
     ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
@@ -102,8 +102,8 @@
     // Restore exception.
     ThrowLocation gc_safe_throw_location(old_throw_this_object.Get(), old_throw_method.Get(),
                                          old_throw_dex_pc);
-
     self->SetException(gc_safe_throw_location, old_exception.Get());
+    self->SetExceptionReportedToInstrumentation(is_exception_reported);
   }
   COMPILE_ASSERT(sizeof(Status) == sizeof(uint32_t), size_of_status_not_uint32);
   if (Runtime::Current()->IsActiveTransaction()) {
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index 1d79106..5c57dce 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -16,6 +16,7 @@
 
 #include "string-inl.h"
 
+#include "arch/memcmp16.h"
 #include "array.h"
 #include "class-inl.h"
 #include "gc/accounting/card_table-inl.h"
@@ -206,21 +207,6 @@
   return result;
 }
 
-#ifdef HAVE__MEMCMP16
-// "count" is in 16-bit units.
-extern "C" uint32_t __memcmp16(const uint16_t* s0, const uint16_t* s1, size_t count);
-#define MemCmp16 __memcmp16
-#else
-static uint32_t MemCmp16(const uint16_t* s0, const uint16_t* s1, size_t count) {
-  for (size_t i = 0; i < count; i++) {
-    if (s0[i] != s1[i]) {
-      return static_cast<int32_t>(s0[i]) - static_cast<int32_t>(s1[i]);
-    }
-  }
-  return 0;
-}
-#endif
-
 int32_t String::CompareTo(String* rhs) {
   // Quick test for comparison of a string with itself.
   String* lhs = this;
@@ -233,13 +219,13 @@
   // *without* sign extension before it subtracts them (which makes some
   // sense since "char" is unsigned).  So what we get is the result of
   // 0x000000e9 - 0x0000ffff, which is 0xffff00ea.
-  int lhsCount = lhs->GetLength();
-  int rhsCount = rhs->GetLength();
-  int countDiff = lhsCount - rhsCount;
-  int minCount = (countDiff < 0) ? lhsCount : rhsCount;
+  int32_t lhsCount = lhs->GetLength();
+  int32_t rhsCount = rhs->GetLength();
+  int32_t countDiff = lhsCount - rhsCount;
+  int32_t minCount = (countDiff < 0) ? lhsCount : rhsCount;
   const uint16_t* lhsChars = lhs->GetCharArray()->GetData() + lhs->GetOffset();
   const uint16_t* rhsChars = rhs->GetCharArray()->GetData() + rhs->GetOffset();
-  int otherRes = MemCmp16(lhsChars, rhsChars, minCount);
+  int32_t otherRes = MemCmp16(lhsChars, rhsChars, minCount);
   if (otherRes != 0) {
     return otherRes;
   }
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 2c24e33..7e3810c 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -17,7 +17,11 @@
 #include <algorithm>
 #include <set>
 #include <fcntl.h>
+#ifdef __linux__
 #include <sys/sendfile.h>
+#else
+#include <sys/socket.h>
+#endif
 #include <sys/stat.h>
 #include <unistd.h>
 
@@ -241,7 +245,12 @@
     return;
   }
 
+#ifdef __linux__
   if (sendfile(dst.get(), src.get(), nullptr, stat_src.st_size) == -1) {
+#else
+  off_t len;
+  if (sendfile(dst.get(), src.get(), 0, &len, nullptr, 0) == -1) {
+#endif
     PLOG(ERROR) << "Failed to copy profile file " << oldfile << " to " << newfile
       << ". My uid:gid is " << getuid() << ":" << getgid();
   }
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index 7490e6a..820bd04 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -32,9 +32,11 @@
 static void EnableDebugger() {
   // To let a non-privileged gdbserver attach to this
   // process, we must set our dumpable flag.
+#if defined(HAVE_PRCTL)
   if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) == -1) {
     PLOG(ERROR) << "prctl(PR_SET_DUMPABLE) failed for pid " << getpid();
   }
+#endif
   // We don't want core dumps, though, so set the core dump size to 0.
   rlimit rl;
   rl.rlim_cur = 0;
diff --git a/runtime/oat.cc b/runtime/oat.cc
index 96834b8..f4721f2 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -22,7 +22,7 @@
 namespace art {
 
 const uint8_t OatHeader::kOatMagic[] = { 'o', 'a', 't', '\n' };
-const uint8_t OatHeader::kOatVersion[] = { '0', '3', '3', '\0' };
+const uint8_t OatHeader::kOatVersion[] = { '0', '3', '5', '\0' };
 
 OatHeader::OatHeader() {
   memset(this, 0, sizeof(*this));
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index e3f9afc..1034923 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -110,7 +110,8 @@
 };
 
 void QuickExceptionHandler::FindCatch(const ThrowLocation& throw_location,
-                                      mirror::Throwable* exception) {
+                                      mirror::Throwable* exception,
+                                      bool is_exception_reported) {
   DCHECK(!is_deoptimization_);
   if (kDebugExceptionDelivery) {
     mirror::String* msg = exception->GetDetailMessage();
@@ -141,12 +142,24 @@
   } else {
     // Put exception back in root set with clear throw location.
     self_->SetException(ThrowLocation(), exception_ref.Get());
+    self_->SetExceptionReportedToInstrumentation(is_exception_reported);
   }
   // The debugger may suspend this thread and walk its stack. Let's do this before popping
   // instrumentation frames.
-  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
-  instrumentation->ExceptionCaughtEvent(self_, throw_location, handler_method_, handler_dex_pc_,
-                                        exception_ref.Get());
+  if (!is_exception_reported) {
+    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+    instrumentation->ExceptionCaughtEvent(self_, throw_location, handler_method_, handler_dex_pc_,
+                                          exception_ref.Get());
+      // We're not catching this exception but let's remind we already reported the exception above
+      // to avoid reporting it twice.
+      self_->SetExceptionReportedToInstrumentation(true);
+  }
+  bool caught_exception = (handler_method_ != nullptr && handler_dex_pc_ != DexFile::kDexNoIndex);
+  if (caught_exception) {
+    // We're catching this exception so we finish reporting it. We do it here to avoid doing it
+    // in the compiled code.
+    self_->SetExceptionReportedToInstrumentation(false);
+  }
 }
 
 // Prepares deoptimization.
diff --git a/runtime/quick_exception_handler.h b/runtime/quick_exception_handler.h
index a4229b3..1d600ed 100644
--- a/runtime/quick_exception_handler.h
+++ b/runtime/quick_exception_handler.h
@@ -42,7 +42,8 @@
     LOG(FATAL) << "UNREACHABLE";  // Expected to take long jump.
   }
 
-  void FindCatch(const ThrowLocation& throw_location, mirror::Throwable* exception)
+  void FindCatch(const ThrowLocation& throw_location, mirror::Throwable* exception,
+                 bool is_exception_reported)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void DeoptimizeStack() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void UpdateInstrumentationStack() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 89058c8..bcb4eb3 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -18,7 +18,9 @@
 
 // sys/mount.h has to come before linux/fs.h due to redefinition of MS_RDONLY, MS_BIND, etc
 #include <sys/mount.h>
+#ifdef __linux__
 #include <linux/fs.h>
+#endif
 
 #include <signal.h>
 #include <sys/syscall.h>
@@ -437,6 +439,7 @@
 
 // Do zygote-mode-only initialization.
 bool Runtime::InitZygote() {
+#ifdef __linux__
   // zygote goes into its own process group
   setpgid(0, 0);
 
@@ -467,6 +470,10 @@
   }
 
   return true;
+#else
+  UNIMPLEMENTED(FATAL);
+  return false;
+#endif
 }
 
 void Runtime::DidForkFromZygote() {
diff --git a/runtime/runtime_linux.cc b/runtime/runtime_linux.cc
index 960d332..46ee274 100644
--- a/runtime/runtime_linux.cc
+++ b/runtime/runtime_linux.cc
@@ -327,7 +327,7 @@
     while (true) {
     }
   }
-
+#ifdef __linux__
   // Remove our signal handler for this signal...
   struct sigaction action;
   memset(&action, 0, sizeof(action));
@@ -336,6 +336,9 @@
   sigaction(signal_number, &action, NULL);
   // ...and re-raise so we die with the appropriate status.
   kill(getpid(), signal_number);
+#else
+  exit(EXIT_FAILURE);
+#endif
 }
 
 void Runtime::InitPlatformSignalHandlers() {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 22f0e80..6980530 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -495,7 +495,9 @@
   }
 
   // TODO: move this into the Linux GetThreadStack implementation.
-#if !defined(__APPLE__)
+#if defined(__APPLE__)
+  bool is_main_thread = false;
+#else
   // If we're the main thread, check whether we were run with an unlimited stack. In that case,
   // glibc will have reported a 2GB stack for our 32-bit process, and our stack overflow detection
   // will be broken because we'll die long before we get close to 2GB.
@@ -1611,6 +1613,7 @@
   Handle<mirror::ArtMethod> saved_throw_method(hs.NewHandle(throw_location.GetMethod()));
   // Ignore the cause throw location. TODO: should we report this as a re-throw?
   ScopedLocalRef<jobject> cause(GetJniEnv(), soa.AddLocalReference<jobject>(GetException(nullptr)));
+  bool is_exception_reported = IsExceptionReportedToInstrumentation();
   ClearException();
   Runtime* runtime = Runtime::Current();
 
@@ -1641,6 +1644,7 @@
     ThrowLocation gc_safe_throw_location(saved_throw_this.Get(), saved_throw_method.Get(),
                                          throw_location.GetDexPc());
     SetException(gc_safe_throw_location, Runtime::Current()->GetPreAllocatedOutOfMemoryError());
+    SetExceptionReportedToInstrumentation(is_exception_reported);
     return;
   }
 
@@ -1693,6 +1697,7 @@
     ThrowLocation gc_safe_throw_location(saved_throw_this.Get(), saved_throw_method.Get(),
                                          throw_location.GetDexPc());
     SetException(gc_safe_throw_location, exception.Get());
+    SetExceptionReportedToInstrumentation(is_exception_reported);
   } else {
     jvalue jv_args[2];
     size_t i = 0;
@@ -1710,6 +1715,7 @@
       ThrowLocation gc_safe_throw_location(saved_throw_this.Get(), saved_throw_method.Get(),
                                            throw_location.GetDexPc());
       SetException(gc_safe_throw_location, exception.Get());
+      SetExceptionReportedToInstrumentation(is_exception_reported);
     }
   }
 }
@@ -1862,7 +1868,6 @@
   QUICK_ENTRY_POINT_INFO(pShrLong)
   QUICK_ENTRY_POINT_INFO(pUshrLong)
   QUICK_ENTRY_POINT_INFO(pIndexOf)
-  QUICK_ENTRY_POINT_INFO(pMemcmp16)
   QUICK_ENTRY_POINT_INFO(pStringCompareTo)
   QUICK_ENTRY_POINT_INFO(pMemcpy)
   QUICK_ENTRY_POINT_INFO(pQuickImtConflictTrampoline)
@@ -1893,13 +1898,14 @@
   CHECK(exception != nullptr);
   // Don't leave exception visible while we try to find the handler, which may cause class
   // resolution.
+  bool is_exception_reported = IsExceptionReportedToInstrumentation();
   ClearException();
   bool is_deoptimization = (exception == GetDeoptimizationException());
   QuickExceptionHandler exception_handler(this, is_deoptimization);
   if (is_deoptimization) {
     exception_handler.DeoptimizeStack();
   } else {
-    exception_handler.FindCatch(throw_location, exception);
+    exception_handler.FindCatch(throw_location, exception, is_exception_reported);
   }
   exception_handler.UpdateInstrumentationStack();
   exception_handler.DoLongJump();
diff --git a/runtime/thread.h b/runtime/thread.h
index 5de54b3..bff9b52 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -329,6 +329,7 @@
   void ClearException() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     tlsPtr_.exception = nullptr;
     tlsPtr_.throw_location.Clear();
+    SetExceptionReportedToInstrumentation(false);
   }
 
   // Find catch block and perform long jump to appropriate exception handle
@@ -809,6 +810,14 @@
     tlsPtr_.rosalloc_runs[index] = run;
   }
 
+  bool IsExceptionReportedToInstrumentation() const {
+    return tls32_.is_exception_reported_to_instrumentation_;
+  }
+
+  void SetExceptionReportedToInstrumentation(bool reported) {
+    tls32_.is_exception_reported_to_instrumentation_ = reported;
+  }
+
  private:
   explicit Thread(bool daemon);
   ~Thread() LOCKS_EXCLUDED(Locks::mutator_lock_,
@@ -911,7 +920,7 @@
     explicit tls_32bit_sized_values(bool is_daemon) :
       suspend_count(0), debug_suspend_count(0), thin_lock_thread_id(0), tid(0),
       daemon(is_daemon), throwing_OutOfMemoryError(false), no_thread_suspension(0),
-      thread_exit_check_count(0) {
+      thread_exit_check_count(0), is_exception_reported_to_instrumentation_(false) {
     }
 
     union StateAndFlags state_and_flags;
@@ -947,6 +956,10 @@
 
     // How many times has our pthread key's destructor been called?
     uint32_t thread_exit_check_count;
+
+    // When true this field indicates that the exception associated with this thread has already
+    // been reported to instrumentation.
+    bool32_t is_exception_reported_to_instrumentation_;
   } tls32_;
 
   struct PACKED(8) tls_64bit_sized_values {
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 7700658..f60f795 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -1054,6 +1054,7 @@
   if (current_method != nullptr) {
     Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
   }
+#ifdef __linux__
   std::unique_ptr<Backtrace> backtrace(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, tid));
   if (!backtrace->Unwind(0)) {
     os << prefix << "(backtrace::Unwind failed for thread " << tid << ")\n";
@@ -1095,6 +1096,7 @@
     }
     os << "\n";
   }
+#endif
 }
 
 #if defined(__APPLE__)
diff --git a/test/Android.mk b/test/Android.mk
index c15259c..7897449 100644
--- a/test/Android.mk
+++ b/test/Android.mk
@@ -143,8 +143,8 @@
 	mkdir -p /tmp/android-data/test-art-host-oat-default-$(1)
 	ANDROID_DATA=/tmp/android-data/test-art-host-oat-default-$(1) \
 	  ANDROID_ROOT=$(HOST_OUT) \
-	  LD_LIBRARY_PATH=$(HOST_OUT_SHARED_LIBRARIES) \
-	  $(HOST_OUT_EXECUTABLES)/dalvikvm $(DALVIKVM_FLAGS) -XXlib:libartd.so -Ximage:$(HOST_CORE_IMG_LOCATION) -classpath $(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).jar -Djava.library.path=$(HOST_OUT_SHARED_LIBRARIES) $(1) $(2) \
+	  LD_LIBRARY_PATH=$(HOST_LIBRARY_PATH) \
+	  $(HOST_OUT_EXECUTABLES)/dalvikvm $(DALVIKVM_FLAGS) -XXlib:libartd$(HOST_SHLIB_SUFFIX) -Ximage:$(HOST_CORE_IMG_LOCATION) -classpath $(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).jar -Djava.library.path=$(HOST_LIBRARY_PATH) $(1) $(2) \
           && echo test-art-host-oat-default-$(1) PASSED || (echo test-art-host-oat-default-$(1) FAILED && exit 1)
 	$(hide) rm -r /tmp/android-data/test-art-host-oat-default-$(1)
 
@@ -153,8 +153,8 @@
 	mkdir -p /tmp/android-data/test-art-host-oat-interpreter-$(1)
 	ANDROID_DATA=/tmp/android-data/test-art-host-oat-interpreter-$(1) \
 	  ANDROID_ROOT=$(HOST_OUT) \
-	  LD_LIBRARY_PATH=$(HOST_OUT_SHARED_LIBRARIES) \
-	  $(HOST_OUT_EXECUTABLES)/dalvikvm -XXlib:libartd.so -Ximage:$(HOST_CORE_IMG_LOCATION) $(DALVIKVM_FLAGS) -Xint -classpath $(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).jar -Djava.library.path=$(HOST_OUT_SHARED_LIBRARIES) $(1) $(2) \
+	  LD_LIBRARY_PATH=$(HOST_LIBRARY_PATH) \
+	  $(HOST_OUT_EXECUTABLES)/dalvikvm -XXlib:libartd$(HOST_SHLIB_SUFFIX) -Ximage:$(HOST_CORE_IMG_LOCATION) $(DALVIKVM_FLAGS) -Xint -classpath $(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).jar -Djava.library.path=$(HOST_LIBRARY_PATH) $(1) $(2) \
           && echo test-art-host-oat-interpreter-$(1) PASSED || (echo test-art-host-oat-interpreter-$(1) FAILED && exit 1)
 	$(hide) rm -r /tmp/android-data/test-art-host-oat-interpreter-$(1)
 
diff --git a/test/SignalTest/signaltest.cc b/test/SignalTest/signaltest.cc
index b84e395..dfe3197 100644
--- a/test/SignalTest/signaltest.cc
+++ b/test/SignalTest/signaltest.cc
@@ -46,7 +46,7 @@
   action.sa_sigaction = signalhandler;
   sigemptyset(&action.sa_mask);
   action.sa_flags = SA_SIGINFO | SA_ONSTACK;
-#if !defined(__mips__)
+#if !defined(__APPLE__) && !defined(__mips__)
   action.sa_restorer = nullptr;
 #endif
 
diff --git a/tools/Android.mk b/tools/Android.mk
index 6c385dc..d3be17f 100644
--- a/tools/Android.mk
+++ b/tools/Android.mk
@@ -16,7 +16,6 @@
 
 LOCAL_PATH := $(call my-dir)
 
-ifeq ($(WITH_HOST_DALVIK),true)
 # Copy the art shell script to the host's bin directory
 include $(CLEAR_VARS)
 LOCAL_IS_HOST_MODULE := true
@@ -28,5 +27,3 @@
 	@echo "Copy: $(PRIVATE_MODULE) ($@)"
 	$(copy-file-to-new-target)
 	$(hide) chmod 755 $@
-
-endif