Merge "Optimizing: Better invoke-static/-direct dispatch."
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index 7550f50..ad2feeb 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -227,6 +227,59 @@
   -fvisibility=protected \
   $(art_default_gc_type_cflags)
 
+# The architectures the compiled tools are able to run on. Setting this to 'all' will cause all
+# architectures to be included.
+ART_TARGET_CODEGEN_ARCHS ?= all
+ART_HOST_CODEGEN_ARCHS ?= all
+
+ifeq ($(ART_TARGET_CODEGEN_ARCHS),all)
+  ART_TARGET_CODEGEN_ARCHS := $(sort $(ART_TARGET_SUPPORTED_ARCH) $(ART_HOST_SUPPORTED_ARCH))
+  # We need to handle the fact that some compiler tests mix code from different architectures.
+  ART_TARGET_COMPILER_TESTS ?= true
+else
+  ART_TARGET_COMPILER_TESTS := false
+  ifeq ($(ART_TARGET_CODEGEN_ARCHS),svelte)
+    ART_TARGET_CODEGEN_ARCHS := $(sort $(ART_TARGET_ARCH_64) $(ART_TARGET_ARCH_32))
+  endif
+endif
+ifeq ($(ART_HOST_CODEGEN_ARCHS),all)
+  ART_HOST_CODEGEN_ARCHS := $(sort $(ART_TARGET_SUPPORTED_ARCH) $(ART_HOST_SUPPORTED_ARCH))
+  ART_HOST_COMPILER_TESTS ?= true
+else
+  ART_HOST_COMPILER_TESTS := false
+  ifeq ($(ART_HOST_CODEGEN_ARCHS),svelte)
+    ART_HOST_CODEGEN_ARCHS := $(sort $(ART_TARGET_CODEGEN_ARCHS) $(ART_HOST_ARCH_64) $(ART_HOST_ARCH_32))
+  endif
+endif
+
+ifneq (,$(filter arm64,$(ART_TARGET_CODEGEN_ARCHS)))
+  ART_TARGET_CODEGEN_ARCHS += arm
+endif
+ifneq (,$(filter mips64,$(ART_TARGET_CODEGEN_ARCHS)))
+  ART_TARGET_CODEGEN_ARCHS += mips
+endif
+ifneq (,$(filter x86_64,$(ART_TARGET_CODEGEN_ARCHS)))
+  ART_TARGET_CODEGEN_ARCHS += x86
+endif
+ART_TARGET_CODEGEN_ARCHS := $(sort $(ART_TARGET_CODEGEN_ARCHS))
+ifneq (,$(filter arm64,$(ART_HOST_CODEGEN_ARCHS)))
+  ART_HOST_CODEGEN_ARCHS += arm
+endif
+ifneq (,$(filter mips64,$(ART_HOST_CODEGEN_ARCHS)))
+  ART_HOST_CODEGEN_ARCHS += mips
+endif
+ifneq (,$(filter x86_64,$(ART_HOST_CODEGEN_ARCHS)))
+  ART_HOST_CODEGEN_ARCHS += x86
+endif
+ART_HOST_CODEGEN_ARCHS := $(sort $(ART_HOST_CODEGEN_ARCHS))
+
+# Base set of cflags used by target build only
+art_target_cflags := \
+  $(foreach target_arch,$(strip $(ART_TARGET_CODEGEN_ARCHS)), -DART_ENABLE_CODEGEN_$(target_arch))
+# Base set of cflags used by host build only
+art_host_cflags := \
+  $(foreach host_arch,$(strip $(ART_HOST_CODEGEN_ARCHS)), -DART_ENABLE_CODEGEN_$(host_arch))
+
 # Base set of asflags used by all things ART.
 art_asflags :=
 
@@ -292,7 +345,7 @@
   $(error LIBART_IMG_HOST_BASE_ADDRESS unset)
 endif
 ART_HOST_CFLAGS += $(art_cflags) -DART_BASE_ADDRESS=$(LIBART_IMG_HOST_BASE_ADDRESS)
-ART_HOST_CFLAGS += -DART_DEFAULT_INSTRUCTION_SET_FEATURES=default
+ART_HOST_CFLAGS += -DART_DEFAULT_INSTRUCTION_SET_FEATURES=default $(art_host_cflags)
 ART_HOST_ASFLAGS += $(art_asflags)
 
 # Disable -Wpessimizing-move: triggered for art/runtime/base/variant_map.h:261
@@ -315,6 +368,7 @@
   $(error LIBART_IMG_TARGET_BASE_ADDRESS unset)
 endif
 ART_TARGET_CFLAGS += $(art_cflags) -DART_TARGET -DART_BASE_ADDRESS=$(LIBART_IMG_TARGET_BASE_ADDRESS)
+ART_TARGET_CFLAGS += $(art_target_cflags)
 ART_TARGET_ASFLAGS += $(art_asflags)
 
 ART_HOST_NON_DEBUG_CFLAGS := $(art_host_non_debug_cflags)
@@ -346,6 +400,8 @@
 # Clear locals now they've served their purpose.
 art_cflags :=
 art_asflags :=
+art_host_cflags :=
+art_target_cflags :=
 art_debug_cflags :=
 art_non_debug_cflags :=
 art_host_non_debug_cflags :=
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 63ad9cf..566d289 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -19,6 +19,7 @@
 
 include art/build/Android.common_test.mk
 include art/build/Android.common_path.mk
+include art/build/Android.common_build.mk
 
 # Subdirectories in art/test which contain dex files used as inputs for gtests.
 GTEST_DEX_DIRECTORIES := \
@@ -233,38 +234,24 @@
   compiler/dex/local_value_numbering_test.cc \
   compiler/dex/mir_graph_test.cc \
   compiler/dex/mir_optimization_test.cc \
-  compiler/dex/quick/quick_cfi_test.cc \
   compiler/dex/type_inference_test.cc \
   compiler/dwarf/dwarf_test.cc \
   compiler/driver/compiler_driver_test.cc \
   compiler/elf_writer_test.cc \
   compiler/image_test.cc \
-  compiler/jni/jni_cfi_test.cc \
   compiler/jni/jni_compiler_test.cc \
-  compiler/linker/arm64/relative_patcher_arm64_test.cc \
-  compiler/linker/arm/relative_patcher_thumb2_test.cc \
-  compiler/linker/x86/relative_patcher_x86_test.cc \
-  compiler/linker/x86_64/relative_patcher_x86_64_test.cc \
   compiler/oat_test.cc \
   compiler/optimizing/bounds_check_elimination_test.cc \
-  compiler/optimizing/codegen_test.cc \
-  compiler/optimizing/dead_code_elimination_test.cc \
-  compiler/optimizing/constant_folding_test.cc \
   compiler/optimizing/dominator_test.cc \
   compiler/optimizing/find_loops_test.cc \
   compiler/optimizing/graph_checker_test.cc \
   compiler/optimizing/graph_test.cc \
   compiler/optimizing/gvn_test.cc \
   compiler/optimizing/licm_test.cc \
-  compiler/optimizing/linearize_test.cc \
-  compiler/optimizing/liveness_test.cc \
   compiler/optimizing/live_interval_test.cc \
-  compiler/optimizing/live_ranges_test.cc \
   compiler/optimizing/nodes_test.cc \
-  compiler/optimizing/optimizing_cfi_test.cc \
   compiler/optimizing/parallel_move_test.cc \
   compiler/optimizing/pretty_printer_test.cc \
-  compiler/optimizing/register_allocator_test.cc \
   compiler/optimizing/side_effects_test.cc \
   compiler/optimizing/ssa_test.cc \
   compiler/optimizing/stack_map_test.cc \
@@ -274,10 +261,38 @@
   compiler/utils/dedupe_set_test.cc \
   compiler/utils/swap_space_test.cc \
   compiler/utils/test_dex_file_builder_test.cc \
+
+COMPILER_GTEST_COMMON_SRC_FILES_all := \
+  compiler/dex/quick/quick_cfi_test.cc \
+  compiler/jni/jni_cfi_test.cc \
+  compiler/optimizing/codegen_test.cc \
+  compiler/optimizing/constant_folding_test.cc \
+  compiler/optimizing/dead_code_elimination_test.cc \
+  compiler/optimizing/linearize_test.cc \
+  compiler/optimizing/liveness_test.cc \
+  compiler/optimizing/live_ranges_test.cc \
+  compiler/optimizing/optimizing_cfi_test.cc \
+  compiler/optimizing/register_allocator_test.cc \
+
+COMPILER_GTEST_COMMON_SRC_FILES_arm := \
+  compiler/linker/arm/relative_patcher_thumb2_test.cc \
   compiler/utils/arm/managed_register_arm_test.cc \
+
+COMPILER_GTEST_COMMON_SRC_FILES_arm64 := \
+  compiler/linker/arm64/relative_patcher_arm64_test.cc \
   compiler/utils/arm64/managed_register_arm64_test.cc \
+
+COMPILER_GTEST_COMMON_SRC_FILES_mips := \
+
+COMPILER_GTEST_COMMON_SRC_FILES_mips64 := \
+
+COMPILER_GTEST_COMMON_SRC_FILES_x86 := \
+  compiler/linker/x86/relative_patcher_x86_test.cc \
   compiler/utils/x86/managed_register_x86_test.cc \
 
+COMPILER_GTEST_COMMON_SRC_FILES_x86_64 := \
+  compiler/linker/x86_64/relative_patcher_x86_64_test.cc \
+
 RUNTIME_GTEST_TARGET_SRC_FILES := \
   $(RUNTIME_GTEST_COMMON_SRC_FILES)
 
@@ -287,15 +302,67 @@
 COMPILER_GTEST_TARGET_SRC_FILES := \
   $(COMPILER_GTEST_COMMON_SRC_FILES)
 
+COMPILER_GTEST_TARGET_SRC_FILES_all := \
+  $(COMPILER_GTEST_COMMON_SRC_FILES_all) \
+
+COMPILER_GTEST_TARGET_SRC_FILES_arm := \
+  $(COMPILER_GTEST_COMMON_SRC_FILES_arm) \
+
+COMPILER_GTEST_TARGET_SRC_FILES_arm64 := \
+  $(COMPILER_GTEST_COMMON_SRC_FILES_arm64) \
+
+COMPILER_GTEST_TARGET_SRC_FILES_mips := \
+  $(COMPILER_GTEST_COMMON_SRC_FILES_mips) \
+
+COMPILER_GTEST_TARGET_SRC_FILES_mips64 := \
+  $(COMPILER_GTEST_COMMON_SRC_FILES_mips64) \
+
+COMPILER_GTEST_TARGET_SRC_FILES_x86 := \
+  $(COMPILER_GTEST_COMMON_SRC_FILES_x86) \
+
+COMPILER_GTEST_TARGET_SRC_FILES_x86_64 := \
+  $(COMPILER_GTEST_COMMON_SRC_FILES_x86_64) \
+
+$(foreach arch,$(ART_TARGET_CODEGEN_ARCHS),$(eval COMPILER_GTEST_TARGET_SRC_FILES += $$(COMPILER_GTEST_TARGET_SRC_FILES_$(arch))))
+ifeq (true,$(ART_TARGET_COMPILER_TESTS))
+  COMPILER_GTEST_TARGET_SRC_FILES += $(COMPILER_GTEST_TARGET_SRC_FILES_all)
+endif
+
 COMPILER_GTEST_HOST_SRC_FILES := \
   $(COMPILER_GTEST_COMMON_SRC_FILES) \
-  compiler/dex/quick/x86/quick_assemble_x86_test.cc \
+
+COMPILER_GTEST_HOST_SRC_FILES_all := \
+  $(COMPILER_GTEST_COMMON_SRC_FILES_all) \
+
+COMPILER_GTEST_HOST_SRC_FILES_arm := \
+  $(COMPILER_GTEST_COMMON_SRC_FILES_arm) \
   compiler/utils/arm/assembler_arm32_test.cc \
   compiler/utils/arm/assembler_thumb2_test.cc \
   compiler/utils/assembler_thumb_test.cc \
+
+COMPILER_GTEST_HOST_SRC_FILES_arm64 := \
+  $(COMPILER_GTEST_COMMON_SRC_FILES_arm64) \
+
+COMPILER_GTEST_HOST_SRC_FILES_mips := \
+  $(COMPILER_GTEST_COMMON_SRC_FILES_mips) \
+
+COMPILER_GTEST_HOST_SRC_FILES_mips64 := \
+  $(COMPILER_GTEST_COMMON_SRC_FILES_mips64) \
+
+COMPILER_GTEST_HOST_SRC_FILES_x86 := \
+  $(COMPILER_GTEST_COMMON_SRC_FILES_x86) \
+  compiler/dex/quick/x86/quick_assemble_x86_test.cc \
   compiler/utils/x86/assembler_x86_test.cc \
+
+COMPILER_GTEST_HOST_SRC_FILES_x86_64 := \
+  $(COMPILER_GTEST_COMMON_SRC_FILES_x86_64) \
   compiler/utils/x86_64/assembler_x86_64_test.cc
 
+$(foreach arch,$(ART_HOST_CODEGEN_ARCHS),$(eval COMPILER_GTEST_HOST_SRC_FILES += $$(COMPILER_GTEST_HOST_SRC_FILES_$(arch))))
+ifeq (true,$(ART_HOST_COMPILER_TESTS))
+  COMPILER_GTEST_HOST_SRC_FILES += $(COMPILER_GTEST_HOST_SRC_FILES_all)
+endif
+
 ART_TEST_CFLAGS :=
 
 include $(CLEAR_VARS)
@@ -558,6 +625,7 @@
   2nd_library_path :=
 endef  # define-art-gtest
 
+
 ifeq ($(ART_BUILD_TARGET),true)
   $(foreach file,$(RUNTIME_GTEST_TARGET_SRC_FILES), $(eval $(call define-art-gtest,target,$(file),,libbacktrace)))
   $(foreach file,$(COMPILER_GTEST_TARGET_SRC_FILES), $(eval $(call define-art-gtest,target,$(file),art/compiler,libartd-compiler libbacktrace)))
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 4944915..7d368a2 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -24,18 +24,6 @@
 	dex/gvn_dead_code_elimination.cc \
 	dex/local_value_numbering.cc \
 	dex/type_inference.cc \
-	dex/quick/arm/assemble_arm.cc \
-	dex/quick/arm/call_arm.cc \
-	dex/quick/arm/fp_arm.cc \
-	dex/quick/arm/int_arm.cc \
-	dex/quick/arm/target_arm.cc \
-	dex/quick/arm/utility_arm.cc \
-	dex/quick/arm64/assemble_arm64.cc \
-	dex/quick/arm64/call_arm64.cc \
-	dex/quick/arm64/fp_arm64.cc \
-	dex/quick/arm64/int_arm64.cc \
-	dex/quick/arm64/target_arm64.cc \
-	dex/quick/arm64/utility_arm64.cc \
 	dex/quick/codegen_util.cc \
 	dex/quick/dex_file_method_inliner.cc \
 	dex/quick/dex_file_to_method_inliner_map.cc \
@@ -44,22 +32,10 @@
 	dex/quick/gen_loadstore.cc \
 	dex/quick/lazy_debug_frame_opcode_writer.cc \
 	dex/quick/local_optimizations.cc \
-	dex/quick/mips/assemble_mips.cc \
-	dex/quick/mips/call_mips.cc \
-	dex/quick/mips/fp_mips.cc \
-	dex/quick/mips/int_mips.cc \
-	dex/quick/mips/target_mips.cc \
-	dex/quick/mips/utility_mips.cc \
 	dex/quick/mir_to_lir.cc \
 	dex/quick/quick_compiler.cc \
 	dex/quick/ralloc_util.cc \
 	dex/quick/resource_mask.cc \
-	dex/quick/x86/assemble_x86.cc \
-	dex/quick/x86/call_x86.cc \
-	dex/quick/x86/fp_x86.cc \
-	dex/quick/x86/int_x86.cc \
-	dex/quick/x86/target_x86.cc \
-	dex/quick/x86/utility_x86.cc \
 	dex/dex_to_dex_compiler.cc \
 	dex/bb_optimizations.cc \
 	dex/compiler_ir.cc \
@@ -82,30 +58,13 @@
 	driver/compiler_options.cc \
 	driver/dex_compilation_unit.cc \
 	linker/relative_patcher.cc \
-	linker/arm/relative_patcher_arm_base.cc \
-	linker/arm/relative_patcher_thumb2.cc \
-	linker/arm64/relative_patcher_arm64.cc \
-	linker/x86/relative_patcher_x86_base.cc \
-	linker/x86/relative_patcher_x86.cc \
-	linker/x86_64/relative_patcher_x86_64.cc \
 	jit/jit_compiler.cc \
-	jni/quick/arm/calling_convention_arm.cc \
-	jni/quick/arm64/calling_convention_arm64.cc \
-	jni/quick/mips/calling_convention_mips.cc \
-	jni/quick/mips64/calling_convention_mips64.cc \
-	jni/quick/x86/calling_convention_x86.cc \
-	jni/quick/x86_64/calling_convention_x86_64.cc \
 	jni/quick/calling_convention.cc \
 	jni/quick/jni_compiler.cc \
 	optimizing/boolean_simplifier.cc \
 	optimizing/builder.cc \
 	optimizing/bounds_check_elimination.cc \
 	optimizing/code_generator.cc \
-	optimizing/code_generator_arm.cc \
-	optimizing/code_generator_arm64.cc \
-	optimizing/code_generator_mips64.cc \
-	optimizing/code_generator_x86.cc \
-	optimizing/code_generator_x86_64.cc \
 	optimizing/code_generator_utils.cc \
 	optimizing/constant_folding.cc \
 	optimizing/dead_code_elimination.cc \
@@ -115,10 +74,6 @@
 	optimizing/inliner.cc \
 	optimizing/instruction_simplifier.cc \
 	optimizing/intrinsics.cc \
-	optimizing/intrinsics_arm.cc \
-	optimizing/intrinsics_arm64.cc \
-	optimizing/intrinsics_x86.cc \
-	optimizing/intrinsics_x86_64.cc \
 	optimizing/licm.cc \
 	optimizing/locations.cc \
 	optimizing/nodes.cc \
@@ -136,21 +91,7 @@
 	optimizing/stack_map_stream.cc \
 	trampolines/trampoline_compiler.cc \
 	utils/arena_bit_vector.cc \
-	utils/arm/assembler_arm.cc \
-	utils/arm/assembler_arm32.cc \
-	utils/arm/assembler_thumb2.cc \
-	utils/arm/managed_register_arm.cc \
-	utils/arm64/assembler_arm64.cc \
-	utils/arm64/managed_register_arm64.cc \
 	utils/assembler.cc \
-	utils/mips/assembler_mips.cc \
-	utils/mips/managed_register_mips.cc \
-	utils/mips64/assembler_mips64.cc \
-	utils/mips64/managed_register_mips64.cc \
-	utils/x86/assembler_x86.cc \
-	utils/x86/managed_register_x86.cc \
-	utils/x86_64/assembler_x86_64.cc \
-	utils/x86_64/managed_register_x86_64.cc \
 	utils/swap_space.cc \
 	buffered_output_stream.cc \
 	compiler.cc \
@@ -163,12 +104,89 @@
 	output_stream.cc \
 	vector_output_stream.cc
 
+LIBART_COMPILER_SRC_FILES_arm := \
+	dex/quick/arm/assemble_arm.cc \
+	dex/quick/arm/call_arm.cc \
+	dex/quick/arm/fp_arm.cc \
+	dex/quick/arm/int_arm.cc \
+	dex/quick/arm/target_arm.cc \
+	dex/quick/arm/utility_arm.cc \
+	jni/quick/arm/calling_convention_arm.cc \
+	linker/arm/relative_patcher_arm_base.cc \
+	linker/arm/relative_patcher_thumb2.cc \
+	optimizing/code_generator_arm.cc \
+	optimizing/intrinsics_arm.cc \
+	utils/arm/assembler_arm.cc \
+	utils/arm/assembler_arm32.cc \
+	utils/arm/assembler_thumb2.cc \
+	utils/arm/managed_register_arm.cc \
+
+# TODO We should really separate out those files that are actually needed for both variants of an
+# architecture into its own category. Currently we just include all of the 32bit variant in the
+# 64bit variant. It also might be good to allow one to compile only the 64bit variant without the
+# 32bit one.
+LIBART_COMPILER_SRC_FILES_arm64 := \
+    $(LIBART_COMPILER_SRC_FILES_arm) \
+	dex/quick/arm64/assemble_arm64.cc \
+	dex/quick/arm64/call_arm64.cc \
+	dex/quick/arm64/fp_arm64.cc \
+	dex/quick/arm64/int_arm64.cc \
+	dex/quick/arm64/target_arm64.cc \
+	dex/quick/arm64/utility_arm64.cc \
+	jni/quick/arm64/calling_convention_arm64.cc \
+	linker/arm64/relative_patcher_arm64.cc \
+	optimizing/code_generator_arm64.cc \
+	optimizing/intrinsics_arm64.cc \
+	utils/arm64/assembler_arm64.cc \
+	utils/arm64/managed_register_arm64.cc \
+
+LIBART_COMPILER_SRC_FILES_mips := \
+	dex/quick/mips/assemble_mips.cc \
+	dex/quick/mips/call_mips.cc \
+	dex/quick/mips/fp_mips.cc \
+	dex/quick/mips/int_mips.cc \
+	dex/quick/mips/target_mips.cc \
+	dex/quick/mips/utility_mips.cc \
+	jni/quick/mips/calling_convention_mips.cc \
+	utils/mips/assembler_mips.cc \
+	utils/mips/managed_register_mips.cc \
+
+LIBART_COMPILER_SRC_FILES_mips64 := \
+    $(LIBART_COMPILER_SRC_FILES_mips) \
+	jni/quick/mips64/calling_convention_mips64.cc \
+	optimizing/code_generator_mips64.cc \
+	utils/mips64/assembler_mips64.cc \
+	utils/mips64/managed_register_mips64.cc \
+
+
+LIBART_COMPILER_SRC_FILES_x86 := \
+	dex/quick/x86/assemble_x86.cc \
+	dex/quick/x86/call_x86.cc \
+	dex/quick/x86/fp_x86.cc \
+	dex/quick/x86/int_x86.cc \
+	dex/quick/x86/target_x86.cc \
+	dex/quick/x86/utility_x86.cc \
+	jni/quick/x86/calling_convention_x86.cc \
+	linker/x86/relative_patcher_x86.cc \
+	linker/x86/relative_patcher_x86_base.cc \
+	optimizing/code_generator_x86.cc \
+	optimizing/intrinsics_x86.cc \
+	utils/x86/assembler_x86.cc \
+	utils/x86/managed_register_x86.cc \
+
+LIBART_COMPILER_SRC_FILES_x86_64 := \
+    $(LIBART_COMPILER_SRC_FILES_x86) \
+	jni/quick/x86_64/calling_convention_x86_64.cc \
+	linker/x86_64/relative_patcher_x86_64.cc \
+	optimizing/intrinsics_x86_64.cc \
+	optimizing/code_generator_x86_64.cc \
+	utils/x86_64/assembler_x86_64.cc \
+	utils/x86_64/managed_register_x86_64.cc \
+
+
 LIBART_COMPILER_CFLAGS :=
 
 LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES := \
-  dex/quick/arm/arm_lir.h \
-  dex/quick/arm64/arm64_lir.h \
-  dex/quick/mips/mips_lir.h \
   dex/quick/resource_mask.h \
   dex/compiler_enums.h \
   dex/dex_to_dex_compiler.h \
@@ -177,9 +195,26 @@
   driver/compiler_driver.h \
   driver/compiler_options.h \
   image_writer.h \
-  optimizing/locations.h \
+  optimizing/locations.h
+
+LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_arm := \
+  dex/quick/arm/arm_lir.h \
   utils/arm/constants_arm.h
 
+LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_arm64 := \
+  $(LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_arm) \
+  dex/quick/arm64/arm64_lir.h
+
+LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_mips := \
+  dex/quick/mips/mips_lir.h
+
+LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_mips64 := \
+  $(LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_mips)
+
+LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_x86 :=
+LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_x86_64 := \
+  $(LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_x86)
+
 # $(1): target or host
 # $(2): ndebug or debug
 # $(3): static or shared (empty means shared, applies only for host)
@@ -202,6 +237,9 @@
   include $(CLEAR_VARS)
   ifeq ($$(art_target_or_host),host)
     LOCAL_IS_HOST_MODULE := true
+    art_codegen_targets := $(ART_HOST_CODEGEN_ARCHS)
+  else
+    art_codegen_targets := $(ART_TARGET_CODEGEN_ARCHS)
   endif
   LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
   ifeq ($$(art_ndebug_or_debug),ndebug)
@@ -230,10 +268,14 @@
     LOCAL_MODULE_CLASS := SHARED_LIBRARIES
   endif
 
-  LOCAL_SRC_FILES := $$(LIBART_COMPILER_SRC_FILES)
+  # Sort removes duplicates.
+  LOCAL_SRC_FILES := $$(LIBART_COMPILER_SRC_FILES) \
+    $$(sort $$(foreach arch,$$(art_codegen_targets), $$(LIBART_COMPILER_SRC_FILES_$$(arch))))
 
   GENERATED_SRC_DIR := $$(call local-generated-sources-dir)
-  ENUM_OPERATOR_OUT_CC_FILES := $$(patsubst %.h,%_operator_out.cc,$$(LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES))
+  ENUM_OPERATOR_OUT_CC_FILES := $$(patsubst %.h,%_operator_out.cc,\
+                                $$(LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES) \
+                                $$(sort $$(foreach arch,$$(art_codegen_targets), $$(LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_$$(arch)))))
   ENUM_OPERATOR_OUT_GEN := $$(addprefix $$(GENERATED_SRC_DIR)/,$$(ENUM_OPERATOR_OUT_CC_FILES))
 
 $$(ENUM_OPERATOR_OUT_GEN): art/tools/generate-operator-out.py
@@ -326,6 +368,7 @@
   art_target_or_host :=
   art_ndebug_or_debug :=
   art_static_or_shared :=
+  art_codegen_targets :=
 endef
 
 # We always build dex2oat and dependencies, even if the host build is otherwise disabled, since they are used to cross compile for the target.
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 39496a4..6e73ae7 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -43,10 +43,21 @@
 #include "runtime.h"
 
 // Specific compiler backends.
+#ifdef ART_ENABLE_CODEGEN_arm
 #include "dex/quick/arm/backend_arm.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_arm64
 #include "dex/quick/arm64/backend_arm64.h"
+#endif
+
+#if defined(ART_ENABLE_CODEGEN_mips) || defined(ART_ENABLE_CODEGEN_mips64)
 #include "dex/quick/mips/backend_mips.h"
+#endif
+
+#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64)
 #include "dex/quick/x86/backend_x86.h"
+#endif
 
 namespace art {
 
@@ -844,22 +855,42 @@
   UNUSED(compilation_unit);
   Mir2Lir* mir_to_lir = nullptr;
   switch (cu->instruction_set) {
+#ifdef ART_ENABLE_CODEGEN_arm
     case kThumb2:
       mir_to_lir = ArmCodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
       break;
+#endif  // ART_ENABLE_CODEGEN_arm
+#ifdef ART_ENABLE_CODEGEN_arm64
     case kArm64:
       mir_to_lir = Arm64CodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
       break;
+#endif  // ART_ENABLE_CODEGEN_arm64
+#if defined(ART_ENABLE_CODEGEN_mips) || defined(ART_ENABLE_CODEGEN_mips64)
+      // Intentional 2 level ifdef. Want to fail on mips64 if it is not enabled, even if mips is
+      // and vice versa.
+#ifdef ART_ENABLE_CODEGEN_mips
     case kMips:
       // Fall-through.
+#endif  // ART_ENABLE_CODEGEN_mips
+#ifdef ART_ENABLE_CODEGEN_mips64
     case kMips64:
+#endif  // ART_ENABLE_CODEGEN_mips64
       mir_to_lir = MipsCodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
       break;
+#endif  // ART_ENABLE_CODEGEN_mips || ART_ENABLE_CODEGEN_mips64
+#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64)
+      // Intentional 2 level ifdef. Want to fail on x86_64 if it is not enabled, even if x86 is
+      // and vice versa.
+#ifdef ART_ENABLE_CODEGEN_x86
     case kX86:
       // Fall-through.
+#endif  // ART_ENABLE_CODEGEN_x86
+#ifdef ART_ENABLE_CODEGEN_x86_64
     case kX86_64:
+#endif  // ART_ENABLE_CODEGEN_x86_64
       mir_to_lir = X86CodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
       break;
+#endif  // ART_ENABLE_CODEGEN_x86 || ART_ENABLE_CODEGEN_x86_64
     default:
       LOG(FATAL) << "Unexpected instruction set: " << cu->instruction_set;
   }
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index fa4667e..fa25a17 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -1109,7 +1109,7 @@
     // If it is not a DexCache, visit all references.
     mirror::Class* klass = object->GetClass();
     if (klass != dex_cache_class_) {
-      object->VisitReferences<false /* visit class */>(*this, *this);
+      object->VisitReferences(*this, *this);
     }
   }
 
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 3a3410c..93897aa 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -1350,7 +1350,7 @@
     }
   }
   FixupClassVisitor visitor(this, copy);
-  static_cast<mirror::Object*>(orig)->VisitReferences<true /*visit class*/>(visitor, visitor);
+  static_cast<mirror::Object*>(orig)->VisitReferences(visitor, visitor);
 }
 
 void ImageWriter::FixupObject(Object* orig, Object* copy) {
@@ -1397,7 +1397,7 @@
       down_cast<mirror::ClassLoader*>(copy)->SetClassTable(nullptr);
     }
     FixupVisitor visitor(this, copy);
-    orig->VisitReferences<true /*visit class*/>(visitor, visitor);
+    orig->VisitReferences(visitor, visitor);
   }
 }
 
diff --git a/compiler/jni/quick/calling_convention.cc b/compiler/jni/quick/calling_convention.cc
index bb8136b..cef8c5d 100644
--- a/compiler/jni/quick/calling_convention.cc
+++ b/compiler/jni/quick/calling_convention.cc
@@ -17,12 +17,30 @@
 #include "calling_convention.h"
 
 #include "base/logging.h"
+
+#ifdef ART_ENABLE_CODEGEN_arm
 #include "jni/quick/arm/calling_convention_arm.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_arm64
 #include "jni/quick/arm64/calling_convention_arm64.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips
 #include "jni/quick/mips/calling_convention_mips.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips64
 #include "jni/quick/mips64/calling_convention_mips64.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86
 #include "jni/quick/x86/calling_convention_x86.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86_64
 #include "jni/quick/x86_64/calling_convention_x86_64.h"
+#endif
 
 namespace art {
 
@@ -31,19 +49,31 @@
 ManagedRuntimeCallingConvention* ManagedRuntimeCallingConvention::Create(
     bool is_static, bool is_synchronized, const char* shorty, InstructionSet instruction_set) {
   switch (instruction_set) {
+#ifdef ART_ENABLE_CODEGEN_arm
     case kArm:
     case kThumb2:
       return new arm::ArmManagedRuntimeCallingConvention(is_static, is_synchronized, shorty);
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
     case kArm64:
       return new arm64::Arm64ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty);
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
     case kMips:
       return new mips::MipsManagedRuntimeCallingConvention(is_static, is_synchronized, shorty);
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
     case kMips64:
       return new mips64::Mips64ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty);
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
     case kX86:
       return new x86::X86ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty);
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
     case kX86_64:
       return new x86_64::X86_64ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty);
+#endif
     default:
       LOG(FATAL) << "Unknown InstructionSet: " << instruction_set;
       return nullptr;
@@ -106,19 +136,31 @@
                                                    const char* shorty,
                                                    InstructionSet instruction_set) {
   switch (instruction_set) {
+#ifdef ART_ENABLE_CODEGEN_arm
     case kArm:
     case kThumb2:
       return new arm::ArmJniCallingConvention(is_static, is_synchronized, shorty);
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
     case kArm64:
       return new arm64::Arm64JniCallingConvention(is_static, is_synchronized, shorty);
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
     case kMips:
       return new mips::MipsJniCallingConvention(is_static, is_synchronized, shorty);
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
     case kMips64:
       return new mips64::Mips64JniCallingConvention(is_static, is_synchronized, shorty);
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
     case kX86:
       return new x86::X86JniCallingConvention(is_static, is_synchronized, shorty);
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
     case kX86_64:
       return new x86_64::X86_64JniCallingConvention(is_static, is_synchronized, shorty);
+#endif
     default:
       LOG(FATAL) << "Unknown InstructionSet: " << instruction_set;
       return nullptr;
diff --git a/compiler/linker/relative_patcher.cc b/compiler/linker/relative_patcher.cc
index 89aed95..82702dc 100644
--- a/compiler/linker/relative_patcher.cc
+++ b/compiler/linker/relative_patcher.cc
@@ -16,10 +16,18 @@
 
 #include "linker/relative_patcher.h"
 
+#ifdef ART_ENABLE_CODEGEN_arm
 #include "linker/arm/relative_patcher_thumb2.h"
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
 #include "linker/arm64/relative_patcher_arm64.h"
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
 #include "linker/x86/relative_patcher_x86.h"
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
 #include "linker/x86_64/relative_patcher_x86_64.h"
+#endif
 #include "output_stream.h"
 
 namespace art {
@@ -64,18 +72,28 @@
     DISALLOW_COPY_AND_ASSIGN(RelativePatcherNone);
   };
 
+  UNUSED(features);
+  UNUSED(provider);
   switch (instruction_set) {
+#ifdef ART_ENABLE_CODEGEN_x86
     case kX86:
       return std::unique_ptr<RelativePatcher>(new X86RelativePatcher());
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
     case kX86_64:
       return std::unique_ptr<RelativePatcher>(new X86_64RelativePatcher());
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm
     case kArm:
       // Fall through: we generate Thumb2 code for "arm".
     case kThumb2:
       return std::unique_ptr<RelativePatcher>(new Thumb2RelativePatcher(provider));
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
     case kArm64:
       return std::unique_ptr<RelativePatcher>(
           new Arm64RelativePatcher(provider, features->AsArm64InstructionSetFeatures()));
+#endif
     default:
       return std::unique_ptr<RelativePatcher>(new RelativePatcherNone);
   }
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 8fe3170..6568ea4 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -16,11 +16,26 @@
 
 #include "code_generator.h"
 
+#ifdef ART_ENABLE_CODEGEN_arm
 #include "code_generator_arm.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_arm64
 #include "code_generator_arm64.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86
 #include "code_generator_x86.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86_64
 #include "code_generator_x86_64.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips64
 #include "code_generator_mips64.h"
+#endif
+
 #include "compiled_method.h"
 #include "dex/verified_method.h"
 #include "driver/dex_compilation_unit.h"
@@ -31,6 +46,7 @@
 #include "mirror/array-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object_reference.h"
+#include "parallel_move_resolver.h"
 #include "ssa_liveness_analysis.h"
 #include "utils/assembler.h"
 #include "verifier/dex_gc_map.h"
@@ -520,34 +536,49 @@
                                      const InstructionSetFeatures& isa_features,
                                      const CompilerOptions& compiler_options) {
   switch (instruction_set) {
+#ifdef ART_ENABLE_CODEGEN_arm
     case kArm:
     case kThumb2: {
       return new arm::CodeGeneratorARM(graph,
           *isa_features.AsArmInstructionSetFeatures(),
           compiler_options);
     }
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
     case kArm64: {
       return new arm64::CodeGeneratorARM64(graph,
           *isa_features.AsArm64InstructionSetFeatures(),
           compiler_options);
     }
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
     case kMips:
+      UNUSED(compiler_options);
+      UNUSED(graph);
+      UNUSED(isa_features);
       return nullptr;
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
     case kMips64: {
       return new mips64::CodeGeneratorMIPS64(graph,
           *isa_features.AsMips64InstructionSetFeatures(),
           compiler_options);
     }
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
     case kX86: {
       return new x86::CodeGeneratorX86(graph,
            *isa_features.AsX86InstructionSetFeatures(),
            compiler_options);
     }
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
     case kX86_64: {
       return new x86_64::CodeGeneratorX86_64(graph,
           *isa_features.AsX86_64InstructionSetFeatures(),
           compiler_options);
     }
+#endif
     default:
       return nullptr;
   }
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 96ef863..5049989 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -282,12 +282,10 @@
  public:
   TypeCheckSlowPathX86(HInstruction* instruction,
                        Location class_to_check,
-                       Location object_class,
-                       uint32_t dex_pc)
+                       Location object_class)
       : instruction_(instruction),
         class_to_check_(class_to_check),
-        object_class_(object_class),
-        dex_pc_(dex_pc) {}
+        object_class_(object_class) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -322,7 +320,6 @@
                                  this);
     }
 
-    RecordPcInfo(codegen, instruction_, dex_pc_);
     if (instruction_->IsInstanceOf()) {
       x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
     }
@@ -337,7 +334,6 @@
   HInstruction* const instruction_;
   const Location class_to_check_;
   const Location object_class_;
-  const uint32_t dex_pc_;
 
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86);
 };
@@ -348,6 +344,7 @@
     : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    DCHECK(instruction_->IsDeoptimize());
     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, instruction_->GetLocations());
@@ -355,11 +352,6 @@
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
-    // No need to restore live registers.
-    DCHECK(instruction_->IsDeoptimize());
-    HDeoptimize* deoptimize = instruction_->AsDeoptimize();
-    uint32_t dex_pc = deoptimize->GetDexPc();
-    codegen->RecordPcInfo(instruction_, dex_pc, this);
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; }
@@ -2832,11 +2824,6 @@
                                 instruction->GetDexPc(),
                                 nullptr);
       }
-      uint32_t dex_pc = is_div
-          ? instruction->AsDiv()->GetDexPc()
-          : instruction->AsRem()->GetDexPc();
-      codegen_->RecordPcInfo(instruction, dex_pc);
-
       break;
     }
 
@@ -4878,7 +4865,7 @@
     // If the classes are not equal, we go into a slow path.
     DCHECK(locations->OnlyCallsOnSlowPath());
     slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(
-        instruction, locations->InAt(1), locations->Out(), instruction->GetDexPc());
+        instruction, locations->InAt(1), locations->Out());
     codegen_->AddSlowPath(slow_path);
     __ j(kNotEqual, slow_path->GetEntryLabel());
     __ movl(out, Immediate(1));
@@ -4911,7 +4898,7 @@
   Register temp = locations->GetTemp(0).AsRegister<Register>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(
-      instruction, locations->InAt(1), locations->GetTemp(0), instruction->GetDexPc());
+      instruction, locations->InAt(1), locations->GetTemp(0));
   codegen_->AddSlowPath(slow_path);
 
   // Avoid null check if we know obj is not null.
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index e4bc9e6..5406a0c 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -89,7 +89,7 @@
                           block->GetBlockId()));
   }
 
-  // Ensure that the only Return(Void) and Throw jump to Exit. An exiting
+  // Ensure that only Return(Void) and Throw jump to Exit. An exiting
   // TryBoundary may be between a Throw and the Exit if the Throw is in a try.
   if (block->IsExitBlock()) {
     for (size_t i = 0, e = block->GetPredecessors().Size(); i < e; ++i) {
@@ -355,39 +355,6 @@
 void SSAChecker::VisitBasicBlock(HBasicBlock* block) {
   super_type::VisitBasicBlock(block);
 
-  // Ensure that only catch blocks have exceptional predecessors, and if they do
-  // these are instructions which throw into them.
-  if (block->IsCatchBlock()) {
-    for (size_t i = 0, e = block->GetExceptionalPredecessors().Size(); i < e; ++i) {
-      HInstruction* thrower = block->GetExceptionalPredecessors().Get(i);
-      HBasicBlock* try_block = thrower->GetBlock();
-      if (!thrower->CanThrow()) {
-        AddError(StringPrintf("Exceptional predecessor %s:%d of catch block %d does not throw.",
-                              thrower->DebugName(),
-                              thrower->GetId(),
-                              block->GetBlockId()));
-      } else if (!try_block->IsInTry()) {
-        AddError(StringPrintf("Exceptional predecessor %s:%d of catch block %d "
-                              "is not in a try block.",
-                              thrower->DebugName(),
-                              thrower->GetId(),
-                              block->GetBlockId()));
-      } else if (!try_block->GetTryEntry()->HasExceptionHandler(*block)) {
-        AddError(StringPrintf("Catch block %d is not an exception handler of "
-                              "its exceptional predecessor %s:%d.",
-                              block->GetBlockId(),
-                              thrower->DebugName(),
-                              thrower->GetId()));
-      }
-    }
-  } else {
-    if (!block->GetExceptionalPredecessors().IsEmpty()) {
-      AddError(StringPrintf("Normal block %d has %zu exceptional predecessors.",
-                            block->GetBlockId(),
-                            block->GetExceptionalPredecessors().Size()));
-    }
-  }
-
   // Ensure that catch blocks are not normal successors, and normal blocks are
   // never exceptional successors.
   const size_t num_normal_successors = block->NumberOfNormalSuccessors();
@@ -572,7 +539,6 @@
 
 void SSAChecker::VisitInstruction(HInstruction* instruction) {
   super_type::VisitInstruction(instruction);
-  HBasicBlock* block = instruction->GetBlock();
 
   // Ensure an instruction dominates all its uses.
   for (HUseIterator<HInstruction*> use_it(instruction->GetUses());
@@ -604,24 +570,6 @@
       }
     }
   }
-
-  // Ensure that throwing instructions in try blocks are listed as exceptional
-  // predecessors in their exception handlers.
-  if (instruction->CanThrow() && block->IsInTry()) {
-    for (HExceptionHandlerIterator handler_it(*block->GetTryEntry());
-         !handler_it.Done();
-         handler_it.Advance()) {
-      if (!handler_it.Current()->GetExceptionalPredecessors().Contains(instruction)) {
-        AddError(StringPrintf("Instruction %s:%d is in try block %d and can throw "
-                              "but its exception handler %d does not list it in "
-                              "its exceptional predecessors.",
-                              instruction->DebugName(),
-                              instruction->GetId(),
-                              block->GetBlockId(),
-                              handler_it.Current()->GetBlockId()));
-      }
-    }
-  }
 }
 
 static Primitive::Type PrimitiveKind(Primitive::Type type) {
@@ -669,27 +617,6 @@
   if (phi->IsCatchPhi()) {
     // The number of inputs of a catch phi corresponds to the total number of
     // throwing instructions caught by this catch block.
-    const GrowableArray<HInstruction*>& predecessors =
-        phi->GetBlock()->GetExceptionalPredecessors();
-    if (phi->InputCount() != predecessors.Size()) {
-      AddError(StringPrintf(
-          "Phi %d in catch block %d has %zu inputs, "
-          "but catch block %d has %zu exceptional predecessors.",
-          phi->GetId(), phi->GetBlock()->GetBlockId(), phi->InputCount(),
-          phi->GetBlock()->GetBlockId(), predecessors.Size()));
-    } else {
-      for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
-        HInstruction* input = phi->InputAt(i);
-        HInstruction* thrower = predecessors.Get(i);
-        if (!input->StrictlyDominates(thrower)) {
-          AddError(StringPrintf(
-              "Input %d at index %zu of phi %d from catch block %d does not "
-              "dominate the throwing instruction %s:%d.",
-              input->GetId(), i, phi->GetId(), phi->GetBlock()->GetBlockId(),
-              thrower->DebugName(), thrower->GetId()));
-        }
-      }
-    }
   } else {
     // Ensure the number of inputs of a non-catch phi is the same as the number
     // of its predecessors.
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 4471d71..b7126b2 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -20,6 +20,7 @@
 
 #include "arch/x86/instruction_set_features_x86.h"
 #include "art_method.h"
+#include "base/bit_utils.h"
 #include "code_generator_x86.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "intrinsics.h"
@@ -1835,6 +1836,115 @@
   SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
 }
 
+static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  if (is_long) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  } else {
+    locations->SetInAt(0, Location::Any());
+  }
+  locations->SetOut(Location::RequiresRegister());
+}
+
+static void GenLeadingZeros(X86Assembler* assembler, HInvoke* invoke, bool is_long) {
+  LocationSummary* locations = invoke->GetLocations();
+  Location src = locations->InAt(0);
+  Register out = locations->Out().AsRegister<Register>();
+
+  if (invoke->InputAt(0)->IsConstant()) {
+    // Evaluate this at compile time.
+    int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
+    if (value == 0) {
+      value = is_long ? 64 : 32;
+    } else {
+      value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
+    }
+    if (value == 0) {
+      __ xorl(out, out);
+    } else {
+      __ movl(out, Immediate(value));
+    }
+    return;
+  }
+
+  // Handle the non-constant cases.
+  if (!is_long) {
+    if (src.IsRegister()) {
+      __ bsrl(out, src.AsRegister<Register>());
+    } else {
+      DCHECK(src.IsStackSlot());
+      __ bsrl(out, Address(ESP, src.GetStackIndex()));
+    }
+
+    // BSR sets ZF if the input was zero, and the output is undefined.
+    Label all_zeroes, done;
+    __ j(kEqual, &all_zeroes);
+
+    // Correct the result from BSR to get the final CLZ result.
+    __ xorl(out, Immediate(31));
+    __ jmp(&done);
+
+    // Fix the zero case with the expected result.
+    __ Bind(&all_zeroes);
+    __ movl(out, Immediate(32));
+
+    __ Bind(&done);
+    return;
+  }
+
+  // 64 bit case needs to worry about both parts of the register.
+  DCHECK(src.IsRegisterPair());
+  Register src_lo = src.AsRegisterPairLow<Register>();
+  Register src_hi = src.AsRegisterPairHigh<Register>();
+  Label handle_low, done, all_zeroes;
+
+  // Is the high word zero?
+  __ testl(src_hi, src_hi);
+  __ j(kEqual, &handle_low);
+
+  // High word is not zero. We know that the BSR result is defined in this case.
+  __ bsrl(out, src_hi);
+
+  // Correct the result from BSR to get the final CLZ result.
+  __ xorl(out, Immediate(31));
+  __ jmp(&done);
+
+  // High word was zero.  We have to compute the low word count and add 32.
+  __ Bind(&handle_low);
+  __ bsrl(out, src_lo);
+  __ j(kEqual, &all_zeroes);
+
+  // We had a valid result.  Use an XOR to both correct the result and add 32.
+  __ xorl(out, Immediate(63));
+  __ jmp(&done);
+
+  // All zero case.
+  __ Bind(&all_zeroes);
+  __ movl(out, Immediate(64));
+
+  __ Bind(&done);
+}
+
+void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
+  CreateLeadingZeroLocations(arena_, invoke, /* is_long */ false);
+}
+
+void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
+  X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
+  GenLeadingZeros(assembler, invoke, /* is_long */ false);
+}
+
+void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
+  CreateLeadingZeroLocations(arena_, invoke, /* is_long */ true);
+}
+
+void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
+  X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
+  GenLeadingZeros(assembler, invoke, /* is_long */ true);
+}
+
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                   \
@@ -1847,8 +1957,6 @@
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
-UNIMPLEMENTED_INTRINSIC(IntegerNumberOfLeadingZeros)
-UNIMPLEMENTED_INTRINSIC(LongNumberOfLeadingZeros)
 
 #undef UNIMPLEMENTED_INTRINSIC
 
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 9ea68ec..15fbac1 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -20,6 +20,7 @@
 
 #include "arch/x86_64/instruction_set_features_x86_64.h"
 #include "art_method-inl.h"
+#include "base/bit_utils.h"
 #include "code_generator_x86_64.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "intrinsics.h"
@@ -1685,6 +1686,84 @@
   SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
 }
 
+static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::Any());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+static void GenLeadingZeros(X86_64Assembler* assembler, HInvoke* invoke, bool is_long) {
+  LocationSummary* locations = invoke->GetLocations();
+  Location src = locations->InAt(0);
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+  int zero_value_result = is_long ? 64 : 32;
+  if (invoke->InputAt(0)->IsConstant()) {
+    // Evaluate this at compile time.
+    int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
+    if (value == 0) {
+      value = zero_value_result;
+    } else {
+      value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
+    }
+    if (value == 0) {
+      __ xorl(out, out);
+    } else {
+      __ movl(out, Immediate(value));
+    }
+    return;
+  }
+
+  // Handle the non-constant cases.
+  if (src.IsRegister()) {
+    if (is_long) {
+      __ bsrq(out, src.AsRegister<CpuRegister>());
+    } else {
+      __ bsrl(out, src.AsRegister<CpuRegister>());
+    }
+  } else if (is_long) {
+    DCHECK(src.IsDoubleStackSlot());
+    __ bsrq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
+  } else {
+    DCHECK(src.IsStackSlot());
+    __ bsrl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
+  }
+
+  // BSR sets ZF if the input was zero, and the output is undefined.
+  Label is_zero, done;
+  __ j(kEqual, &is_zero);
+
+  // Correct the result from BSR to get the CLZ result.
+  __ xorl(out, Immediate(zero_value_result - 1));
+  __ jmp(&done);
+
+  // Fix the zero case with the expected result.
+  __ Bind(&is_zero);
+  __ movl(out, Immediate(zero_value_result));
+
+  __ Bind(&done);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
+  CreateLeadingZeroLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
+  X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
+  GenLeadingZeros(assembler, invoke, /* is_long */ false);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
+  CreateLeadingZeroLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
+  X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
+  GenLeadingZeros(assembler, invoke, /* is_long */ true);
+}
+
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                   \
@@ -1696,8 +1775,6 @@
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
-UNIMPLEMENTED_INTRINSIC(IntegerNumberOfLeadingZeros)
-UNIMPLEMENTED_INTRINSIC(LongNumberOfLeadingZeros)
 
 #undef UNIMPLEMENTED_INTRINSIC
 
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index b6a1980..f2b63ae 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -564,13 +564,6 @@
   return false;
 }
 
-void HBasicBlock::AddExceptionalPredecessor(HInstruction* exceptional_predecessor) {
-  DCHECK(exceptional_predecessor->CanThrow());
-  DCHECK(exceptional_predecessor->GetBlock()->IsInTry());
-  DCHECK(exceptional_predecessor->GetBlock()->GetTryEntry()->HasExceptionHandler(*this));
-  exceptional_predecessors_.Add(exceptional_predecessor);
-}
-
 static void UpdateInputsUsers(HInstruction* instruction) {
   for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
     instruction->InputAt(i)->AddUseAt(instruction, i);
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index fd5b86e..2374c9c 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -568,7 +568,6 @@
   explicit HBasicBlock(HGraph* graph, uint32_t dex_pc = kNoDexPc)
       : graph_(graph),
         predecessors_(graph->GetArena(), kDefaultNumberOfPredecessors),
-        exceptional_predecessors_(graph->GetArena(), kDefaultNumberOfExceptionalPredecessors),
         successors_(graph->GetArena(), kDefaultNumberOfSuccessors),
         loop_information_(nullptr),
         dominator_(nullptr),
@@ -583,10 +582,6 @@
     return predecessors_;
   }
 
-  const GrowableArray<HInstruction*>& GetExceptionalPredecessors() const {
-    return exceptional_predecessors_;
-  }
-
   const GrowableArray<HBasicBlock*>& GetSuccessors() const {
     return successors_;
   }
@@ -655,8 +650,6 @@
   HInstruction* GetLastPhi() const { return phis_.last_instruction_; }
   const HInstructionList& GetPhis() const { return phis_; }
 
-  void AddExceptionalPredecessor(HInstruction* exceptional_predecessor);
-
   void AddSuccessor(HBasicBlock* block) {
     successors_.Add(block);
     block->predecessors_.Add(this);
@@ -696,10 +689,6 @@
     predecessors_.Delete(block);
   }
 
-  void RemoveExceptionalPredecessor(HInstruction* instruction) {
-    exceptional_predecessors_.Delete(instruction);
-  }
-
   void RemoveSuccessor(HBasicBlock* block) {
     successors_.Delete(block);
   }
@@ -736,15 +725,6 @@
     return -1;
   }
 
-  size_t GetExceptionalPredecessorIndexOf(HInstruction* exceptional_predecessor) const {
-    for (size_t i = 0, e = exceptional_predecessors_.Size(); i < e; ++i) {
-      if (exceptional_predecessors_.Get(i) == exceptional_predecessor) {
-        return i;
-      }
-    }
-    return -1;
-  }
-
   size_t GetSuccessorIndexOf(HBasicBlock* successor) const {
     for (size_t i = 0, e = successors_.Size(); i < e; ++i) {
       if (successors_.Get(i) == successor) {
@@ -905,7 +885,6 @@
  private:
   HGraph* graph_;
   GrowableArray<HBasicBlock*> predecessors_;
-  GrowableArray<HInstruction*> exceptional_predecessors_;
   GrowableArray<HBasicBlock*> successors_;
   HInstructionList instructions_;
   HInstructionList phis_;
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 2c34e4d..ff2e6ad 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -570,9 +570,7 @@
   if (instruction->GetBlock()->IsInTry() && instruction->CanThrow()) {
     HTryBoundary* try_block = instruction->GetBlock()->GetTryEntry();
     for (HExceptionHandlerIterator it(*try_block); !it.Done(); it.Advance()) {
-      HBasicBlock* handler = it.Current();
-      handler->AddExceptionalPredecessor(instruction);
-      GrowableArray<HInstruction*>* handler_locals = GetLocalsFor(handler);
+      GrowableArray<HInstruction*>* handler_locals = GetLocalsFor(it.Current());
       for (size_t i = 0, e = current_locals_->Size(); i < e; ++i) {
         HInstruction* local_value = current_locals_->Get(i);
         if (local_value != nullptr) {
diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc
index facc630..39e5259 100644
--- a/compiler/trampolines/trampoline_compiler.cc
+++ b/compiler/trampolines/trampoline_compiler.cc
@@ -17,17 +17,36 @@
 #include "trampoline_compiler.h"
 
 #include "jni_env_ext.h"
+
+#ifdef ART_ENABLE_CODEGEN_arm
 #include "utils/arm/assembler_thumb2.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_arm64
 #include "utils/arm64/assembler_arm64.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips
 #include "utils/mips/assembler_mips.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips64
 #include "utils/mips64/assembler_mips64.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86
 #include "utils/x86/assembler_x86.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86_64
 #include "utils/x86_64/assembler_x86_64.h"
+#endif
 
 #define __ assembler.
 
 namespace art {
 
+#ifdef ART_ENABLE_CODEGEN_arm
 namespace arm {
 static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
                                                     ThreadOffset<4> offset) {
@@ -55,7 +74,9 @@
   return entry_stub.release();
 }
 }  // namespace arm
+#endif  // ART_ENABLE_CODEGEN_arm
 
+#ifdef ART_ENABLE_CODEGEN_arm64
 namespace arm64 {
 static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
                                                     ThreadOffset<8> offset) {
@@ -92,7 +113,9 @@
   return entry_stub.release();
 }
 }  // namespace arm64
+#endif  // ART_ENABLE_CODEGEN_arm64
 
+#ifdef ART_ENABLE_CODEGEN_mips
 namespace mips {
 static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
                                                     ThreadOffset<4> offset) {
@@ -122,7 +145,9 @@
   return entry_stub.release();
 }
 }  // namespace mips
+#endif  // ART_ENABLE_CODEGEN_mips
 
+#ifdef ART_ENABLE_CODEGEN_mips64
 namespace mips64 {
 static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
                                                     ThreadOffset<8> offset) {
@@ -152,7 +177,9 @@
   return entry_stub.release();
 }
 }  // namespace mips64
+#endif  // ART_ENABLE_CODEGEN_mips
 
+#ifdef ART_ENABLE_CODEGEN_x86
 namespace x86 {
 static const std::vector<uint8_t>* CreateTrampoline(ThreadOffset<4> offset) {
   X86Assembler assembler;
@@ -170,7 +197,9 @@
   return entry_stub.release();
 }
 }  // namespace x86
+#endif  // ART_ENABLE_CODEGEN_x86
 
+#ifdef ART_ENABLE_CODEGEN_x86_64
 namespace x86_64 {
 static const std::vector<uint8_t>* CreateTrampoline(ThreadOffset<8> offset) {
   x86_64::X86_64Assembler assembler;
@@ -188,17 +217,26 @@
   return entry_stub.release();
 }
 }  // namespace x86_64
+#endif  // ART_ENABLE_CODEGEN_x86_64
 
 const std::vector<uint8_t>* CreateTrampoline64(InstructionSet isa, EntryPointCallingConvention abi,
                                                ThreadOffset<8> offset) {
   switch (isa) {
+#ifdef ART_ENABLE_CODEGEN_arm64
     case kArm64:
       return arm64::CreateTrampoline(abi, offset);
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
     case kMips64:
       return mips64::CreateTrampoline(abi, offset);
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
     case kX86_64:
       return x86_64::CreateTrampoline(offset);
+#endif
     default:
+      UNUSED(abi);
+      UNUSED(offset);
       LOG(FATAL) << "Unexpected InstructionSet: " << isa;
       UNREACHABLE();
   }
@@ -207,13 +245,20 @@
 const std::vector<uint8_t>* CreateTrampoline32(InstructionSet isa, EntryPointCallingConvention abi,
                                                ThreadOffset<4> offset) {
   switch (isa) {
+#ifdef ART_ENABLE_CODEGEN_arm
     case kArm:
     case kThumb2:
       return arm::CreateTrampoline(abi, offset);
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
     case kMips:
       return mips::CreateTrampoline(abi, offset);
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
     case kX86:
+      UNUSED(abi);
       return x86::CreateTrampoline(offset);
+#endif
     default:
       LOG(FATAL) << "Unexpected InstructionSet: " << isa;
       UNREACHABLE();
diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc
index 6d8a989..496ca95 100644
--- a/compiler/utils/assembler.cc
+++ b/compiler/utils/assembler.cc
@@ -19,13 +19,25 @@
 #include <algorithm>
 #include <vector>
 
+#ifdef ART_ENABLE_CODEGEN_arm
 #include "arm/assembler_arm32.h"
 #include "arm/assembler_thumb2.h"
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
 #include "arm64/assembler_arm64.h"
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
 #include "mips/assembler_mips.h"
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
 #include "mips64/assembler_mips64.h"
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
 #include "x86/assembler_x86.h"
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
 #include "x86_64/assembler_x86_64.h"
+#endif
 #include "globals.h"
 #include "memory_region.h"
 
@@ -112,20 +124,32 @@
 
 Assembler* Assembler::Create(InstructionSet instruction_set) {
   switch (instruction_set) {
+#ifdef ART_ENABLE_CODEGEN_arm
     case kArm:
       return new arm::Arm32Assembler();
     case kThumb2:
       return new arm::Thumb2Assembler();
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
     case kArm64:
       return new arm64::Arm64Assembler();
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
     case kMips:
       return new mips::MipsAssembler();
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
     case kMips64:
       return new mips64::Mips64Assembler();
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
     case kX86:
       return new x86::X86Assembler();
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
     case kX86_64:
       return new x86_64::X86_64Assembler();
+#endif
     default:
       LOG(FATAL) << "Unknown InstructionSet: " << instruction_set;
       return nullptr;
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index d601035..a71197a 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -623,7 +623,7 @@
     }
   }
   PatchOat::PatchVisitor visitor(this, copy);
-  object->VisitReferences<true, kVerifyNone>(visitor, visitor);
+  object->VisitReferences<kVerifyNone>(visitor, visitor);
   if (object->IsClass<kVerifyNone>()) {
     auto* klass = object->AsClass();
     auto* copy_klass = down_cast<mirror::Class*>(copy);
diff --git a/runtime/elf.h b/runtime/elf.h
index 4514bb2..d1efc92 100644
--- a/runtime/elf.h
+++ b/runtime/elf.h
@@ -42,7 +42,7 @@
 typedef int64_t  Elf64_Sxword;
 
 // Object file magic string.
-static const char ElfMagic[] = { 0x7f, 'E', 'L', 'F', '\0' };
+static constexpr char ElfMagic[] = { 0x7f, 'E', 'L', 'F', '\0' };
 
 // e_ident size and indices.
 enum {
@@ -60,10 +60,10 @@
 };
 
 // BEGIN android-added for <elf.h> compat
-const char ELFMAG0 = ElfMagic[EI_MAG0];
-const char ELFMAG1 = ElfMagic[EI_MAG1];
-const char ELFMAG2 = ElfMagic[EI_MAG2];
-const char ELFMAG3 = ElfMagic[EI_MAG3];
+constexpr char ELFMAG0 = ElfMagic[EI_MAG0];
+constexpr char ELFMAG1 = ElfMagic[EI_MAG1];
+constexpr char ELFMAG2 = ElfMagic[EI_MAG2];
+constexpr char ELFMAG3 = ElfMagic[EI_MAG3];
 // END android-added for <elf.h> compat
 
 struct Elf32_Ehdr {
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index dd9e2d1..5151819 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -153,7 +153,7 @@
     DCHECK(root != nullptr);
     ModUnionUpdateObjectReferencesVisitor ref_visitor(visitor_, from_space_, immune_space_,
                                                       contains_reference_to_other_space_);
-    root->VisitReferences<kMovingClasses>(ref_visitor, VoidFunctor());
+    root->VisitReferences(ref_visitor, VoidFunctor());
   }
 
  private:
@@ -237,7 +237,7 @@
                                        visitor_,
                                        references_,
                                        has_target_reference_);
-    obj->VisitReferences<kMovingClasses>(visitor, VoidFunctor());
+    obj->VisitReferences(visitor, VoidFunctor());
   }
 
  private:
@@ -304,7 +304,7 @@
   void operator()(Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
     Locks::heap_bitmap_lock_->AssertSharedHeld(Thread::Current());
     CheckReferenceVisitor visitor(mod_union_table_, references_);
-    obj->VisitReferences<kMovingClasses>(visitor, VoidFunctor());
+    obj->VisitReferences(visitor, VoidFunctor());
   }
 
  private:
diff --git a/runtime/gc/accounting/remembered_set.cc b/runtime/gc/accounting/remembered_set.cc
index b9f24f3..277d319 100644
--- a/runtime/gc/accounting/remembered_set.cc
+++ b/runtime/gc/accounting/remembered_set.cc
@@ -120,7 +120,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_) {
     RememberedSetReferenceVisitor visitor(target_space_, contains_reference_to_target_space_,
                                           collector_);
-    obj->VisitReferences<kMovingClasses>(visitor, visitor);
+    obj->VisitReferences(visitor, visitor);
   }
 
  private:
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 220c06e..263e678 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -683,7 +683,7 @@
     space::RegionSpace* region_space = collector->RegionSpace();
     CHECK(!region_space->IsInFromSpace(obj)) << "Scanning object " << obj << " in from space";
     ConcurrentCopyingVerifyNoFromSpaceRefsFieldVisitor visitor(collector);
-    obj->VisitReferences<true>(visitor, visitor);
+    obj->VisitReferences(visitor, visitor);
     if (kUseBakerReadBarrier) {
       if (collector->RegionSpace()->IsInToSpace(obj)) {
         CHECK(obj->GetReadBarrierPointer() == nullptr)
@@ -808,7 +808,7 @@
     CHECK(!region_space->IsInFromSpace(obj)) << "Scanning object " << obj << " in from space";
     collector->AssertToSpaceInvariant(nullptr, MemberOffset(0), obj);
     ConcurrentCopyingAssertToSpaceInvariantFieldVisitor visitor(collector);
-    obj->VisitReferences<true>(visitor, visitor);
+    obj->VisitReferences(visitor, visitor);
   }
 
  private:
@@ -1546,7 +1546,7 @@
 void ConcurrentCopying::Scan(mirror::Object* to_ref) {
   DCHECK(!region_space_->IsInFromSpace(to_ref));
   ConcurrentCopyingRefFieldsVisitor visitor(this);
-  to_ref->VisitReferences<true>(visitor, visitor);
+  to_ref->VisitReferences(visitor, visitor);
 }
 
 // Process a field.
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index 94ffe6e..60f833b 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -457,7 +457,7 @@
 
 void MarkCompact::UpdateObjectReferences(mirror::Object* obj) {
   UpdateReferenceVisitor visitor(this);
-  obj->VisitReferences<kMovingClasses>(visitor, visitor);
+  obj->VisitReferences(visitor, visitor);
 }
 
 inline mirror::Object* MarkCompact::GetMarkedForwardAddress(mirror::Object* obj) {
@@ -608,7 +608,7 @@
 // Visit all of the references of an object and update.
 void MarkCompact::ScanObject(mirror::Object* obj) {
   MarkCompactMarkObjectVisitor visitor(this);
-  obj->VisitReferences<kMovingClasses>(visitor, visitor);
+  obj->VisitReferences(visitor, visitor);
 }
 
 // Scan anything that's on the mark stack.
diff --git a/runtime/gc/collector/mark_sweep-inl.h b/runtime/gc/collector/mark_sweep-inl.h
index 4e3845e..a3cc831 100644
--- a/runtime/gc/collector/mark_sweep-inl.h
+++ b/runtime/gc/collector/mark_sweep-inl.h
@@ -32,7 +32,7 @@
 inline void MarkSweep::ScanObjectVisit(mirror::Object* obj, const MarkVisitor& visitor,
                                        const ReferenceVisitor& ref_visitor) {
   DCHECK(IsMarked(obj)) << "Scanning unmarked object " << obj << "\n" << heap_->DumpSpaces();
-  obj->VisitReferences<false>(visitor, ref_visitor);
+  obj->VisitReferences(visitor, ref_visitor);
   if (kCountScannedTypes) {
     mirror::Class* klass = obj->GetClass<kVerifyNone>();
     if (UNLIKELY(klass == mirror::Class::GetJavaLangClass())) {
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index fc2a801..a355d40 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -320,7 +320,7 @@
 void SemiSpace::VerifyNoFromSpaceReferences(Object* obj) {
   DCHECK(!from_space_->HasAddress(obj)) << "Scanning object " << obj << " in from space";
   SemiSpaceVerifyNoFromSpaceReferencesVisitor visitor(from_space_);
-  obj->VisitReferences<kMovingClasses>(visitor, VoidFunctor());
+  obj->VisitReferences(visitor, VoidFunctor());
 }
 
 class SemiSpaceVerifyNoFromSpaceReferencesObjectVisitor {
@@ -722,7 +722,7 @@
 void SemiSpace::ScanObject(Object* obj) {
   DCHECK(!from_space_->HasAddress(obj)) << "Scanning object " << obj << " in from space";
   SemiSpaceMarkObjectVisitor visitor(this);
-  obj->VisitReferences<kMovingClasses>(visitor, visitor);
+  obj->VisitReferences(visitor, visitor);
 }
 
 // Scan anything that's on the mark stack.
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index e56351f..d7f918b 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -1795,7 +1795,7 @@
   // TODO: Fix lock analysis to not use NO_THREAD_SAFETY_ANALYSIS, requires support for
   // annotalysis on visitors.
   void operator()(mirror::Object* o) const NO_THREAD_SAFETY_ANALYSIS {
-    o->VisitReferences<true>(*this, VoidFunctor());
+    o->VisitReferences(*this, VoidFunctor());
   }
 
   // For Object::VisitReferences.
@@ -2788,7 +2788,7 @@
     // be live or else how did we find it in the live bitmap?
     VerifyReferenceVisitor visitor(heap_, fail_count_, verify_referent_);
     // The class doesn't count as a reference but we should verify it anyways.
-    obj->VisitReferences<true>(visitor, visitor);
+    obj->VisitReferences(visitor, visitor);
   }
 
   static void VisitCallback(mirror::Object* obj, void* arg)
@@ -2969,7 +2969,7 @@
   void operator()(mirror::Object* obj) const
       SHARED_REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
     VerifyReferenceCardVisitor visitor(heap_, const_cast<bool*>(&failed_));
-    obj->VisitReferences<true>(visitor, VoidFunctor());
+    obj->VisitReferences(visitor, VoidFunctor());
   }
 
   bool Failed() const {
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index 713797f..a9a236f 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -1063,7 +1063,7 @@
   }
 
   GcRootVisitor visitor(this);
-  obj->VisitReferences<true>(visitor, VoidFunctor());
+  obj->VisitReferences(visitor, VoidFunctor());
 
   gc::Heap* const heap = Runtime::Current()->GetHeap();
   const gc::space::ContinuousSpace* const space = heap->FindContinuousSpaceFromObject(obj, true);
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index ac9cb09..cd678f6 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -669,9 +669,9 @@
   return size;
 }
 
-template <bool kVisitClass, typename Visitor>
+template <typename Visitor>
 inline void Class::VisitReferences(mirror::Class* klass, const Visitor& visitor) {
-  VisitInstanceFieldsReferences<kVisitClass>(klass, visitor);
+  VisitInstanceFieldsReferences(klass, visitor);
   // Right after a class is allocated, but not yet loaded
   // (kStatusNotReady, see ClassLinker::LoadClass()), GC may find it
   // and scan it. IsTemp() may call Class::GetAccessFlags() but may
@@ -683,7 +683,7 @@
     // Temp classes don't ever populate imt/vtable or static fields and they are not even
     // allocated with the right size for those. Also, unresolved classes don't have fields
     // linked yet.
-    VisitStaticFieldsReferences<kVisitClass>(this, visitor);
+    VisitStaticFieldsReferences(this, visitor);
   }
   // Since this class is reachable, we must also visit the associated roots when we scan it.
   VisitNativeRoots(visitor, Runtime::Current()->GetClassLinker()->GetImagePointerSize());
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index f20cc6e..055b3e5 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -872,8 +872,8 @@
     h_new_class_obj->SetClassSize(new_length_);
     // Visit all of the references to make sure there is no from space references in the native
     // roots.
-    h_new_class_obj->VisitReferences<true>(h_new_class_obj->GetClass(),
-                                           ReadBarrierOnNativeRootsVisitor());
+    static_cast<mirror::Object*>(h_new_class_obj.Get())->VisitReferences(
+        ReadBarrierOnNativeRootsVisitor(), VoidFunctor());
   }
 
  private:
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index dc60a38..3f375be 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -1021,10 +1021,6 @@
   void SetPreverifiedFlagOnAllMethods(size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  template <bool kVisitClass, typename Visitor>
-  void VisitReferences(mirror::Class* klass, const Visitor& visitor)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
   // Get the descriptor of the class. In a few cases a std::string is required, rather than
   // always create one the storage argument is populated and its internal c_str() returned. We do
   // this to avoid memory allocation in the common case.
@@ -1153,6 +1149,10 @@
   static MemberOffset EmbeddedImTableOffset(size_t pointer_size);
   static MemberOffset EmbeddedVTableOffset(size_t pointer_size);
 
+  template <typename Visitor>
+  void VisitReferences(mirror::Class* klass, const Visitor& visitor)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Defining class loader, or null for the "bootstrap" system loader.
   HeapReference<ClassLoader> class_loader_;
 
@@ -1279,6 +1279,7 @@
   static GcRoot<Class> java_lang_Class_;
 
   friend struct art::ClassOffsets;  // for verifying offset information
+  friend class Object;  // For VisitReferences
   DISALLOW_IMPLICIT_CONSTRUCTORS(Class);
 };
 
diff --git a/runtime/mirror/class_loader-inl.h b/runtime/mirror/class_loader-inl.h
index 35f3664..e22ddd7 100644
--- a/runtime/mirror/class_loader-inl.h
+++ b/runtime/mirror/class_loader-inl.h
@@ -25,10 +25,10 @@
 namespace art {
 namespace mirror {
 
-template <const bool kVisitClass, VerifyObjectFlags kVerifyFlags, typename Visitor>
+template <VerifyObjectFlags kVerifyFlags, typename Visitor>
 inline void ClassLoader::VisitReferences(mirror::Class* klass, const Visitor& visitor) {
   // Visit instance fields first.
-  VisitInstanceFieldsReferences<kVisitClass>(klass, visitor);
+  VisitInstanceFieldsReferences(klass, visitor);
   // Visit classes loaded after.
   ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
   ClassTable* const class_table = GetClassTable();
diff --git a/runtime/mirror/class_loader.h b/runtime/mirror/class_loader.h
index 21c652a..f27b615 100644
--- a/runtime/mirror/class_loader.h
+++ b/runtime/mirror/class_loader.h
@@ -46,14 +46,15 @@
     SetField64<false>(OFFSET_OF_OBJECT_MEMBER(ClassLoader, class_table_),
                       reinterpret_cast<uint64_t>(class_table));
   }
+
+ private:
   // Visit instance fields of the class loader as well as its associated classes.
   // Null class loader is handled by ClassLinker::VisitClassRoots.
-  template <const bool kVisitClass, VerifyObjectFlags kVerifyFlags, typename Visitor>
+  template <VerifyObjectFlags kVerifyFlags, typename Visitor>
   void VisitReferences(mirror::Class* klass, const Visitor& visitor)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!Locks::classlinker_classes_lock_);
 
- private:
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
   HeapReference<Object> packages_;
   HeapReference<ClassLoader> parent_;
@@ -63,6 +64,7 @@
   uint64_t class_table_;
 
   friend struct art::ClassLoaderOffsets;  // for verifying offset information
+  friend class Object;  // For VisitReferences
   DISALLOW_IMPLICIT_CONSTRUCTORS(ClassLoader);
 };
 
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 7b1660b..586ae30 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -942,13 +942,10 @@
   return success;
 }
 
-template<bool kVisitClass, bool kIsStatic, typename Visitor>
+template<bool kIsStatic, typename Visitor>
 inline void Object::VisitFieldsReferences(uint32_t ref_offsets, const Visitor& visitor) {
   if (!kIsStatic && (ref_offsets != mirror::Class::kClassWalkSuper)) {
     // Instance fields and not the slow-path.
-    if (kVisitClass) {
-      visitor(this, ClassOffset(), kIsStatic);
-    }
     uint32_t field_offset = mirror::kObjectHeaderSize;
     while (ref_offsets != 0) {
       if ((ref_offsets & 1) != 0) {
@@ -974,9 +971,9 @@
           ? klass->GetFirstReferenceStaticFieldOffset(
               Runtime::Current()->GetClassLinker()->GetImagePointerSize())
           : klass->GetFirstReferenceInstanceFieldOffset();
-      for (size_t i = 0; i < num_reference_fields; ++i) {
+      for (size_t i = 0u; i < num_reference_fields; ++i) {
         // TODO: Do a simpler check?
-        if (kVisitClass || field_offset.Uint32Value() != ClassOffset().Uint32Value()) {
+        if (field_offset.Uint32Value() != ClassOffset().Uint32Value()) {
           visitor(this, field_offset, kIsStatic);
         }
         field_offset = MemberOffset(field_offset.Uint32Value() +
@@ -986,19 +983,17 @@
   }
 }
 
-template<bool kVisitClass, typename Visitor>
+template<typename Visitor>
 inline void Object::VisitInstanceFieldsReferences(mirror::Class* klass, const Visitor& visitor) {
-  VisitFieldsReferences<kVisitClass, false>(
-      klass->GetReferenceInstanceOffsets<kVerifyNone>(), visitor);
+  VisitFieldsReferences<false>(klass->GetReferenceInstanceOffsets<kVerifyNone>(), visitor);
 }
 
-template<bool kVisitClass, typename Visitor>
+template<typename Visitor>
 inline void Object::VisitStaticFieldsReferences(mirror::Class* klass, const Visitor& visitor) {
   DCHECK(!klass->IsTemp());
-  klass->VisitFieldsReferences<kVisitClass, true>(0, visitor);
+  klass->VisitFieldsReferences<true>(0, visitor);
 }
 
-
 template<VerifyObjectFlags kVerifyFlags>
 inline bool Object::IsClassLoader() {
   return GetClass<kVerifyFlags>()->IsClassLoaderClass();
@@ -1010,25 +1005,23 @@
   return down_cast<mirror::ClassLoader*>(this);
 }
 
-template <const bool kVisitClass, VerifyObjectFlags kVerifyFlags, typename Visitor,
-    typename JavaLangRefVisitor>
+template <VerifyObjectFlags kVerifyFlags, typename Visitor, typename JavaLangRefVisitor>
 inline void Object::VisitReferences(const Visitor& visitor,
                                     const JavaLangRefVisitor& ref_visitor) {
   mirror::Class* klass = GetClass<kVerifyFlags>();
+  visitor(this, ClassOffset(), false);
   if (klass == Class::GetJavaLangClass()) {
-    AsClass<kVerifyNone>()->VisitReferences<kVisitClass>(klass, visitor);
+    AsClass<kVerifyNone>()->VisitReferences(klass, visitor);
   } else if (klass->IsArrayClass() || klass->IsStringClass()) {
     if (klass->IsObjectArrayClass<kVerifyNone>()) {
-      AsObjectArray<mirror::Object, kVerifyNone>()->VisitReferences<kVisitClass>(visitor);
-    } else if (kVisitClass) {
-      visitor(this, ClassOffset(), false);
+      AsObjectArray<mirror::Object, kVerifyNone>()->VisitReferences(visitor);
     }
   } else if (klass->IsClassLoaderClass()) {
     mirror::ClassLoader* class_loader = AsClassLoader<kVerifyFlags>();
-    class_loader->VisitReferences<kVisitClass, kVerifyFlags>(klass, visitor);
+    class_loader->VisitReferences<kVerifyFlags>(klass, visitor);
   } else {
     DCHECK(!klass->IsVariableSize());
-    VisitInstanceFieldsReferences<kVisitClass>(klass, visitor);
+    VisitInstanceFieldsReferences(klass, visitor);
     if (UNLIKELY(klass->IsTypeOfReferenceClass<kVerifyNone>())) {
       ref_visitor(klass, AsReference());
     }
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index df680b5..4d94130 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -85,7 +85,7 @@
     // object above, copy references fields one by one again with a
     // RB. TODO: Optimize this later?
     CopyReferenceFieldsWithReadBarrierVisitor visitor(dest);
-    src->VisitReferences<true>(visitor, visitor);
+    src->VisitReferences(visitor, visitor);
   }
   gc::Heap* heap = Runtime::Current()->GetHeap();
   // Perform write barriers on copied object references.
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 4967a14..3cec29c 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -446,8 +446,9 @@
   }
   // TODO fix thread safety analysis broken by the use of template. This should be
   // SHARED_REQUIRES(Locks::mutator_lock_).
-  template <const bool kVisitClass, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
-      typename Visitor, typename JavaLangRefVisitor = VoidFunctor>
+  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+            typename Visitor,
+            typename JavaLangRefVisitor = VoidFunctor>
   void VisitReferences(const Visitor& visitor, const JavaLangRefVisitor& ref_visitor)
       NO_THREAD_SAFETY_ANALYSIS;
 
@@ -481,13 +482,13 @@
   }
 
   // TODO: Fixme when anotatalysis works with visitors.
-  template<bool kVisitClass, bool kIsStatic, typename Visitor>
+  template<bool kIsStatic, typename Visitor>
   void VisitFieldsReferences(uint32_t ref_offsets, const Visitor& visitor) HOT_ATTR
       NO_THREAD_SAFETY_ANALYSIS;
-  template<bool kVisitClass, typename Visitor>
+  template<typename Visitor>
   void VisitInstanceFieldsReferences(mirror::Class* klass, const Visitor& visitor) HOT_ATTR
       SHARED_REQUIRES(Locks::mutator_lock_);
-  template<bool kVisitClass, typename Visitor>
+  template<typename Visitor>
   void VisitStaticFieldsReferences(mirror::Class* klass, const Visitor& visitor) HOT_ATTR
       SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/mirror/object_array-inl.h b/runtime/mirror/object_array-inl.h
index 4a7e7b3..5b73557 100644
--- a/runtime/mirror/object_array-inl.h
+++ b/runtime/mirror/object_array-inl.h
@@ -269,11 +269,8 @@
                       (i * sizeof(HeapReference<Object>)));
 }
 
-template<class T> template<const bool kVisitClass, typename Visitor>
+template<class T> template<typename Visitor>
 void ObjectArray<T>::VisitReferences(const Visitor& visitor) {
-  if (kVisitClass) {
-    visitor(this, ClassOffset(), false);
-  }
   const size_t length = static_cast<size_t>(GetLength());
   for (size_t i = 0; i < length; ++i) {
     visitor(this, OffsetOfElement(i), false);
diff --git a/runtime/mirror/object_array.h b/runtime/mirror/object_array.h
index 607b000..b45cafd 100644
--- a/runtime/mirror/object_array.h
+++ b/runtime/mirror/object_array.h
@@ -83,14 +83,15 @@
   ObjectArray<T>* CopyOf(Thread* self, int32_t new_length)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
-  // TODO fix thread safety analysis broken by the use of template. This should be
-  // SHARED_REQUIRES(Locks::mutator_lock_).
-  template<const bool kVisitClass, typename Visitor>
-  void VisitReferences(const Visitor& visitor) NO_THREAD_SAFETY_ANALYSIS;
-
   static MemberOffset OffsetOfElement(int32_t i);
 
  private:
+  // TODO fix thread safety analysis broken by the use of template. This should be
+  // SHARED_REQUIRES(Locks::mutator_lock_).
+  template<typename Visitor>
+  void VisitReferences(const Visitor& visitor) NO_THREAD_SAFETY_ANALYSIS;
+
+  friend class Object;  // For VisitReferences
   DISALLOW_IMPLICIT_CONSTRUCTORS(ObjectArray);
 };
 
diff --git a/test/004-ThreadStress/src/Main.java b/test/004-ThreadStress/src/Main.java
index 7acd950..d5b389f 100644
--- a/test/004-ThreadStress/src/Main.java
+++ b/test/004-ThreadStress/src/Main.java
@@ -448,9 +448,14 @@
                             thread.join();
                         } catch (InterruptedException e) {
                         }
-                        System.out.println("Thread exited for " + id + " with "
-                                           + (operationsPerThread - threadStress.nextOperation)
-                                           + " operations remaining.");
+                        try {
+                            System.out.println("Thread exited for " + id + " with "
+                                               + (operationsPerThread - threadStress.nextOperation)
+                                               + " operations remaining.");
+                        } catch (OutOfMemoryError e) {
+                            // Ignore OOME since we need to print "Finishing worker" for the test
+                            // to pass.
+                        }
                     }
                     System.out.println("Finishing worker");
                 }
diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java
index 77c1a99..bd606a6 100644
--- a/test/082-inline-execute/src/Main.java
+++ b/test/082-inline-execute/src/Main.java
@@ -1043,8 +1043,20 @@
     return (r1 / i1) + (r2 / i2) + i3 + i4 + i5 + i6 + i7 + i8;
   }
 
+  public static boolean doThrow = false;
+
+  public static int $noinline$return_int_zero() {
+    if (doThrow) {
+      throw new Error();
+    }
+    return 0;
+  }
+
   public static void test_Integer_numberOfLeadingZeros() {
     Assert.assertEquals(Integer.numberOfLeadingZeros(0), Integer.SIZE);
+    Assert.assertEquals(Integer.numberOfLeadingZeros(1), Integer.SIZE - 1);
+    Assert.assertEquals(Integer.numberOfLeadingZeros(1 << (Integer.SIZE-1)), 0);
+    Assert.assertEquals(Integer.numberOfLeadingZeros($noinline$return_int_zero()), Integer.SIZE);
     for (int i = 0; i < Integer.SIZE; i++) {
         Assert.assertEquals(Integer.numberOfLeadingZeros(1 << i), Integer.SIZE - 1 - i);
         Assert.assertEquals(Integer.numberOfLeadingZeros((1 << i) | 1), Integer.SIZE - 1 - i);
@@ -1052,8 +1064,19 @@
     }
   }
 
+  public static long $noinline$return_long_zero() {
+    if (doThrow) {
+      throw new Error();
+    }
+    return 0;
+  }
+
   public static void test_Long_numberOfLeadingZeros() {
     Assert.assertEquals(Long.numberOfLeadingZeros(0L), Long.SIZE);
+    Assert.assertEquals(Long.numberOfLeadingZeros(1L), Long.SIZE - 1);
+    Assert.assertEquals(Long.numberOfLeadingZeros(1L << ((Long.SIZE/2)-1)), Long.SIZE/2);
+    Assert.assertEquals(Long.numberOfLeadingZeros(1L << (Long.SIZE-1)), 0);
+    Assert.assertEquals(Long.numberOfLeadingZeros($noinline$return_long_zero()), Long.SIZE);
     for (int i = 0; i < Long.SIZE; i++) {
         Assert.assertEquals(Long.numberOfLeadingZeros(1L << i), Long.SIZE - 1 - i);
         Assert.assertEquals(Long.numberOfLeadingZeros((1L << i) | 1L), Long.SIZE - 1 - i);