Merge "ART: Detached blocks should not be processed by compiler"
diff --git a/Android.mk b/Android.mk
index 3f4ead6..7a95dfe 100644
--- a/Android.mk
+++ b/Android.mk
@@ -324,7 +324,7 @@
 
 $$(OUT_OAT_FILE): $(PRODUCT_OUT)/$(1) $(DEFAULT_DEX_PREOPT_BUILT_IMAGE) $(DEX2OATD_DEPENDENCY)
 	@mkdir -p $$(dir $$@)
-	$(DEX2OATD) --runtime-arg $(DEX2OAT_XMS) --runtime-arg $(DEX2OAT_XMX) \
+	$(DEX2OATD) --runtime-arg -Xms$(DEX2OAT_XMS) --runtime-arg -Xmx$(DEX2OAT_XMX) \
 		--boot-image=$(DEFAULT_DEX_PREOPT_BUILT_IMAGE) --dex-file=$(PRODUCT_OUT)/$(1) \
 		--dex-location=/$(1) --oat-file=$$@ \
 		--instruction-set=$(DEX2OAT_TARGET_ARCH) \
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 10cd1cc..3a19c40 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -110,6 +110,7 @@
   runtime/mem_map_test.cc \
   runtime/mirror/dex_cache_test.cc \
   runtime/mirror/object_test.cc \
+  runtime/monitor_pool_test.cc \
   runtime/monitor_test.cc \
   runtime/parsed_options_test.cc \
   runtime/reference_table_test.cc \
@@ -124,6 +125,7 @@
   runtime/jni_internal_test.cc \
   runtime/proxy_test.cc \
   runtime/reflection_test.cc \
+  compiler/dex/global_value_numbering_test.cc \
   compiler/dex/local_value_numbering_test.cc \
   compiler/dex/mir_optimization_test.cc \
   compiler/driver/compiler_driver_test.cc \
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index dd87f4a..61a2cde 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -29,7 +29,7 @@
 $$($(1)HOST_CORE_IMG_OUT): $$(HOST_CORE_DEX_FILES) $$(DEX2OATD_DEPENDENCY)
 	@echo "host dex2oat: $$@ ($$?)"
 	@mkdir -p $$(dir $$@)
-	$$(hide) $$(DEX2OATD) --runtime-arg $(DEX2OAT_IMAGE_XMS) --runtime-arg $(DEX2OAT_IMAGE_XMX) \
+	$$(hide) $$(DEX2OATD) --runtime-arg -Xms$(DEX2OAT_IMAGE_XMS) --runtime-arg -Xmx$(DEX2OAT_IMAGE_XMX) \
 	  --image-classes=$$(PRELOADED_CLASSES) $$(addprefix --dex-file=,$$(HOST_CORE_DEX_FILES)) \
 	  $$(addprefix --dex-location=,$$(HOST_CORE_DEX_LOCATIONS)) --oat-file=$$($(1)HOST_CORE_OAT_OUT) \
 	  --oat-location=$$($(1)HOST_CORE_OAT) --image=$$($(1)HOST_CORE_IMG_OUT) \
@@ -57,7 +57,7 @@
 $$($(1)TARGET_CORE_IMG_OUT): $$($(1)TARGET_CORE_DEX_FILES) $$(DEX2OATD_DEPENDENCY)
 	@echo "target dex2oat: $$@ ($$?)"
 	@mkdir -p $$(dir $$@)
-	$$(hide) $$(DEX2OATD) --runtime-arg $(DEX2OAT_XMS) --runtime-arg $(DEX2OAT_XMX) \
+	$$(hide) $$(DEX2OATD) --runtime-arg -Xms$(DEX2OAT_XMS) --runtime-arg -Xmx$(DEX2OAT_XMX) \
 	  --image-classes=$$(PRELOADED_CLASSES) $$(addprefix --dex-file=,$$(TARGET_CORE_DEX_FILES)) \
 	  $$(addprefix --dex-location=,$$(TARGET_CORE_DEX_LOCATIONS)) --oat-file=$$($(1)TARGET_CORE_OAT_OUT) \
 	  --oat-location=$$($(1)TARGET_CORE_OAT) --image=$$($(1)TARGET_CORE_IMG_OUT) \
diff --git a/compiler/Android.mk b/compiler/Android.mk
index e197c97..b469946 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -20,6 +20,7 @@
 
 LIBART_COMPILER_SRC_FILES := \
 	compiled_method.cc \
+	dex/global_value_numbering.cc \
 	dex/local_value_numbering.cc \
 	dex/quick/arm/assemble_arm.cc \
 	dex/quick/arm/call_arm.cc \
diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h
index 6eccb0e..d1d5ad9 100644
--- a/compiler/dex/bb_optimizations.h
+++ b/compiler/dex/bb_optimizations.h
@@ -71,26 +71,28 @@
 };
 
 /**
- * @class CallInlining
- * @brief Perform method inlining pass.
+ * @class SpecialMethodInliner
+ * @brief Performs method inlining pass on special kinds of methods.
+ * @details Special methods are methods that fall in one of the following categories:
+ * empty, instance getter, instance setter, argument return, and constant return.
  */
-class CallInlining : public PassME {
+class SpecialMethodInliner : public PassME {
  public:
-  CallInlining() : PassME("CallInlining") {
+  SpecialMethodInliner() : PassME("SpecialMethodInliner") {
   }
 
   bool Gate(const PassDataHolder* data) const {
     DCHECK(data != nullptr);
     CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
     DCHECK(cUnit != nullptr);
-    return cUnit->mir_graph->InlineCallsGate();
+    return cUnit->mir_graph->InlineSpecialMethodsGate();
   }
 
   void Start(PassDataHolder* data) const {
     DCHECK(data != nullptr);
     CompilationUnit* cUnit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(cUnit != nullptr);
-    cUnit->mir_graph->InlineCallsStart();
+    cUnit->mir_graph->InlineSpecialMethodsStart();
   }
 
   bool Worker(const PassDataHolder* data) const {
@@ -100,7 +102,7 @@
     DCHECK(cUnit != nullptr);
     BasicBlock* bb = pass_me_data_holder->bb;
     DCHECK(bb != nullptr);
-    cUnit->mir_graph->InlineCalls(bb);
+    cUnit->mir_graph->InlineSpecialMethods(bb);
     // No need of repeating, so just return false.
     return false;
   }
@@ -109,7 +111,7 @@
     DCHECK(data != nullptr);
     CompilationUnit* cUnit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(cUnit != nullptr);
-    cUnit->mir_graph->InlineCallsEnd();
+    cUnit->mir_graph->InlineSpecialMethodsEnd();
   }
 };
 
@@ -199,8 +201,43 @@
 };
 
 /**
- * @class NullCheckEliminationAndTypeInference
- * @brief Null check elimination and type inference.
+ * @class GlobalValueNumberingPass
+ * @brief Performs the global value numbering pass.
+ */
+class GlobalValueNumberingPass : public PassME {
+ public:
+  GlobalValueNumberingPass()
+    : PassME("GVN", kRepeatingTopologicalSortTraversal, "4_post_gvn_cfg") {
+  }
+
+  bool Gate(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
+    return cUnit->mir_graph->ApplyGlobalValueNumberingGate();
+  }
+
+  bool Worker(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
+    CompilationUnit* cUnit = pass_me_data_holder->c_unit;
+    DCHECK(cUnit != nullptr);
+    BasicBlock* bb = pass_me_data_holder->bb;
+    DCHECK(bb != nullptr);
+    return cUnit->mir_graph->ApplyGlobalValueNumbering(bb);
+  }
+
+  void End(PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
+    cUnit->mir_graph->ApplyGlobalValueNumberingEnd();
+  }
+};
+
+/**
+ * @class BBCombine
+ * @brief Perform the basic block combination pass.
  */
 class BBCombine : public PassME {
  public:
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index dc6043d..f3ef796 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -45,6 +45,7 @@
   // (1 << kSuppressLoads) |
   // (1 << kNullCheckElimination) |
   // (1 << kClassInitCheckElimination) |
+  // (1 << kGlobalValueNumbering) |
   // (1 << kPromoteRegs) |
   // (1 << kTrackLiveTemps) |
   // (1 << kSafeOptimizations) |
@@ -78,65 +79,82 @@
   // (1 << kDebugCodegenDump) |
   0;
 
-CompilationUnit::CompilationUnit(ArenaPool* pool)
-  : compiler_driver(nullptr),
-    class_linker(nullptr),
-    dex_file(nullptr),
-    class_loader(nullptr),
-    class_def_idx(0),
-    method_idx(0),
-    code_item(nullptr),
-    access_flags(0),
-    invoke_type(kDirect),
-    shorty(nullptr),
-    disable_opt(0),
-    enable_debug(0),
-    verbose(false),
-    compiler(nullptr),
-    instruction_set(kNone),
-    target64(false),
-    num_dalvik_registers(0),
-    insns(nullptr),
-    num_ins(0),
-    num_outs(0),
-    num_regs(0),
-    compiler_flip_match(false),
-    arena(pool),
-    arena_stack(pool),
-    mir_graph(nullptr),
-    cg(nullptr),
-    timings("QuickCompiler", true, false),
-    print_pass(false) {
-}
+COMPILE_ASSERT(0U == static_cast<size_t>(kNone), kNone_not_0);
+COMPILE_ASSERT(1U == static_cast<size_t>(kArm), kArm_not_1);
+COMPILE_ASSERT(2U == static_cast<size_t>(kArm64), kArm64_not_2);
+COMPILE_ASSERT(3U == static_cast<size_t>(kThumb2), kThumb2_not_3);
+COMPILE_ASSERT(4U == static_cast<size_t>(kX86), kX86_not_4);
+COMPILE_ASSERT(5U == static_cast<size_t>(kX86_64), kX86_64_not_5);
+COMPILE_ASSERT(6U == static_cast<size_t>(kMips), kMips_not_6);
+COMPILE_ASSERT(7U == static_cast<size_t>(kMips64), kMips64_not_7);
 
-CompilationUnit::~CompilationUnit() {
-}
+// Additional disabled optimizations (over generally disabled) per instruction set.
+static constexpr uint32_t kDisabledOptimizationsPerISA[] = {
+    // 0 = kNone.
+    ~0U,
+    // 1 = kArm, unused (will use kThumb2).
+    ~0U,
+    // 2 = kArm64.     TODO(Arm64): enable optimizations once backend is mature enough.
+    (1 << kLoadStoreElimination) |
+    (1 << kLoadHoisting) |
+    (1 << kBBOpt) |
+    0,
+    // 3 = kThumb2.
+    0,
+    // 4 = kX86.
+    0,
+    // 5 = kX86_64.
+    (1 << kLoadStoreElimination) |
+    0,
+    // 6 = kMips.
+    (1 << kLoadStoreElimination) |
+    (1 << kLoadHoisting) |
+    (1 << kSuppressLoads) |
+    (1 << kNullCheckElimination) |
+    (1 << kPromoteRegs) |
+    (1 << kTrackLiveTemps) |
+    (1 << kSafeOptimizations) |
+    (1 << kBBOpt) |
+    (1 << kMatch) |
+    (1 << kPromoteCompilerTemps) |
+    0,
+    // 7 = kMips64.
+    ~0U
+};
+COMPILE_ASSERT(sizeof(kDisabledOptimizationsPerISA) == 8 * sizeof(uint32_t), kDisabledOpts_unexp);
 
-void CompilationUnit::StartTimingSplit(const char* label) {
-  if (compiler_driver->GetDumpPasses()) {
-    timings.StartTiming(label);
-  }
-}
+// Supported shorty types per instruction set. nullptr means that all are available.
+// Z : boolean
+// B : byte
+// S : short
+// C : char
+// I : int
+// J : long
+// F : float
+// D : double
+// L : reference(object, array)
+// V : void
+static const char* kSupportedTypes[] = {
+    // 0 = kNone.
+    "",
+    // 1 = kArm, unused (will use kThumb2).
+    "",
+    // 2 = kArm64.
+    nullptr,
+    // 3 = kThumb2.
+    nullptr,
+    // 4 = kX86.
+    nullptr,
+    // 5 = kX86_64.
+    nullptr,
+    // 6 = kMips.
+    nullptr,
+    // 7 = kMips64.
+    ""
+};
+COMPILE_ASSERT(sizeof(kSupportedTypes) == 8 * sizeof(char*), kSupportedTypes_unexp);
 
-void CompilationUnit::NewTimingSplit(const char* label) {
-  if (compiler_driver->GetDumpPasses()) {
-    timings.EndTiming();
-    timings.StartTiming(label);
-  }
-}
-
-void CompilationUnit::EndTiming() {
-  if (compiler_driver->GetDumpPasses()) {
-    timings.EndTiming();
-    if (enable_debug & (1 << kDebugTimings)) {
-      LOG(INFO) << "TIMINGS " << PrettyMethod(method_idx, *dex_file);
-      LOG(INFO) << Dumpable<TimingLogger>(timings);
-    }
-  }
-}
-
-// TODO: Remove this when we are able to compile everything.
-int arm64_support_list[] = {
+static int kAllOpcodes[] = {
     Instruction::NOP,
     Instruction::MOVE,
     Instruction::MOVE_FROM16,
@@ -410,305 +428,117 @@
     kMirOpSelect,
 };
 
-// TODO: Remove this when we are able to compile everything.
-int x86_64_support_list[] = {
-    Instruction::NOP,
-    // Instruction::MOVE,
-    // Instruction::MOVE_FROM16,
-    // Instruction::MOVE_16,
-    // Instruction::MOVE_WIDE,
-    // Instruction::MOVE_WIDE_FROM16,
-    // Instruction::MOVE_WIDE_16,
-    // Instruction::MOVE_OBJECT,
-    // Instruction::MOVE_OBJECT_FROM16,
-    // Instruction::MOVE_OBJECT_16,
-    // Instruction::MOVE_RESULT,
-    // Instruction::MOVE_RESULT_WIDE,
-    // Instruction::MOVE_RESULT_OBJECT,
-    // Instruction::MOVE_EXCEPTION,
-    Instruction::RETURN_VOID,
-    Instruction::RETURN,
-    // Instruction::RETURN_WIDE,
-    Instruction::RETURN_OBJECT,
-    // Instruction::CONST_4,
-    // Instruction::CONST_16,
-    // Instruction::CONST,
-    // Instruction::CONST_HIGH16,
-    // Instruction::CONST_WIDE_16,
-    // Instruction::CONST_WIDE_32,
-    // Instruction::CONST_WIDE,
-    // Instruction::CONST_WIDE_HIGH16,
-    // Instruction::CONST_STRING,
-    // Instruction::CONST_STRING_JUMBO,
-    // Instruction::CONST_CLASS,
-    // Instruction::MONITOR_ENTER,
-    // Instruction::MONITOR_EXIT,
-    // Instruction::CHECK_CAST,
-    // Instruction::INSTANCE_OF,
-    // Instruction::ARRAY_LENGTH,
-    // Instruction::NEW_INSTANCE,
-    // Instruction::NEW_ARRAY,
-    // Instruction::FILLED_NEW_ARRAY,
-    // Instruction::FILLED_NEW_ARRAY_RANGE,
-    // Instruction::FILL_ARRAY_DATA,
-    // Instruction::THROW,
-    // Instruction::GOTO,
-    // Instruction::GOTO_16,
-    // Instruction::GOTO_32,
-    // Instruction::PACKED_SWITCH,
-    // Instruction::SPARSE_SWITCH,
-    // Instruction::CMPL_FLOAT,
-    // Instruction::CMPG_FLOAT,
-    // Instruction::CMPL_DOUBLE,
-    // Instruction::CMPG_DOUBLE,
-    // Instruction::CMP_LONG,
-    // Instruction::IF_EQ,
-    // Instruction::IF_NE,
-    // Instruction::IF_LT,
-    // Instruction::IF_GE,
-    // Instruction::IF_GT,
-    // Instruction::IF_LE,
-    // Instruction::IF_EQZ,
-    // Instruction::IF_NEZ,
-    // Instruction::IF_LTZ,
-    // Instruction::IF_GEZ,
-    // Instruction::IF_GTZ,
-    // Instruction::IF_LEZ,
-    // Instruction::UNUSED_3E,
-    // Instruction::UNUSED_3F,
-    // Instruction::UNUSED_40,
-    // Instruction::UNUSED_41,
-    // Instruction::UNUSED_42,
-    // Instruction::UNUSED_43,
-    // Instruction::AGET,
-    // Instruction::AGET_WIDE,
-    // Instruction::AGET_OBJECT,
-    // Instruction::AGET_BOOLEAN,
-    // Instruction::AGET_BYTE,
-    // Instruction::AGET_CHAR,
-    // Instruction::AGET_SHORT,
-    // Instruction::APUT,
-    // Instruction::APUT_WIDE,
-    // Instruction::APUT_OBJECT,
-    // Instruction::APUT_BOOLEAN,
-    // Instruction::APUT_BYTE,
-    // Instruction::APUT_CHAR,
-    // Instruction::APUT_SHORT,
-    // Instruction::IGET,
-    // Instruction::IGET_WIDE,
-    // Instruction::IGET_OBJECT,
-    // Instruction::IGET_BOOLEAN,
-    // Instruction::IGET_BYTE,
-    // Instruction::IGET_CHAR,
-    // Instruction::IGET_SHORT,
-    // Instruction::IPUT,
-    // Instruction::IPUT_WIDE,
-    // Instruction::IPUT_OBJECT,
-    // Instruction::IPUT_BOOLEAN,
-    // Instruction::IPUT_BYTE,
-    // Instruction::IPUT_CHAR,
-    // Instruction::IPUT_SHORT,
-    Instruction::SGET,
-    // Instruction::SGET_WIDE,
-    Instruction::SGET_OBJECT,
-    Instruction::SGET_BOOLEAN,
-    Instruction::SGET_BYTE,
-    Instruction::SGET_CHAR,
-    Instruction::SGET_SHORT,
-    Instruction::SPUT,
-    // Instruction::SPUT_WIDE,
-    Instruction::SPUT_OBJECT,
-    Instruction::SPUT_BOOLEAN,
-    Instruction::SPUT_BYTE,
-    Instruction::SPUT_CHAR,
-    Instruction::SPUT_SHORT,
-    Instruction::INVOKE_VIRTUAL,
-    Instruction::INVOKE_SUPER,
-    Instruction::INVOKE_DIRECT,
-    Instruction::INVOKE_STATIC,
-    Instruction::INVOKE_INTERFACE,
-    // Instruction::RETURN_VOID_BARRIER,
-    // Instruction::INVOKE_VIRTUAL_RANGE,
-    // Instruction::INVOKE_SUPER_RANGE,
-    // Instruction::INVOKE_DIRECT_RANGE,
-    // Instruction::INVOKE_STATIC_RANGE,
-    // Instruction::INVOKE_INTERFACE_RANGE,
-    // Instruction::UNUSED_79,
-    // Instruction::UNUSED_7A,
-    // Instruction::NEG_INT,
-    // Instruction::NOT_INT,
-    // Instruction::NEG_LONG,
-    // Instruction::NOT_LONG,
-    // Instruction::NEG_FLOAT,
-    // Instruction::NEG_DOUBLE,
-    // Instruction::INT_TO_LONG,
-    // Instruction::INT_TO_FLOAT,
-    // Instruction::INT_TO_DOUBLE,
-    // Instruction::LONG_TO_INT,
-    // Instruction::LONG_TO_FLOAT,
-    // Instruction::LONG_TO_DOUBLE,
-    // Instruction::FLOAT_TO_INT,
-    // Instruction::FLOAT_TO_LONG,
-    // Instruction::FLOAT_TO_DOUBLE,
-    // Instruction::DOUBLE_TO_INT,
-    // Instruction::DOUBLE_TO_LONG,
-    // Instruction::DOUBLE_TO_FLOAT,
-    // Instruction::INT_TO_BYTE,
-    // Instruction::INT_TO_CHAR,
-    // Instruction::INT_TO_SHORT,
-    // Instruction::ADD_INT,
-    // Instruction::SUB_INT,
-    // Instruction::MUL_INT,
-    // Instruction::DIV_INT,
-    // Instruction::REM_INT,
-    // Instruction::AND_INT,
-    // Instruction::OR_INT,
-    // Instruction::XOR_INT,
-    // Instruction::SHL_INT,
-    // Instruction::SHR_INT,
-    // Instruction::USHR_INT,
-    // Instruction::ADD_LONG,
-    // Instruction::SUB_LONG,
-    // Instruction::MUL_LONG,
-    // Instruction::DIV_LONG,
-    // Instruction::REM_LONG,
-    // Instruction::AND_LONG,
-    // Instruction::OR_LONG,
-    // Instruction::XOR_LONG,
-    // Instruction::SHL_LONG,
-    // Instruction::SHR_LONG,
-    // Instruction::USHR_LONG,
-    // Instruction::ADD_FLOAT,
-    // Instruction::SUB_FLOAT,
-    // Instruction::MUL_FLOAT,
-    // Instruction::DIV_FLOAT,
-    // Instruction::REM_FLOAT,
-    // Instruction::ADD_DOUBLE,
-    // Instruction::SUB_DOUBLE,
-    // Instruction::MUL_DOUBLE,
-    // Instruction::DIV_DOUBLE,
-    // Instruction::REM_DOUBLE,
-    // Instruction::ADD_INT_2ADDR,
-    // Instruction::SUB_INT_2ADDR,
-    // Instruction::MUL_INT_2ADDR,
-    // Instruction::DIV_INT_2ADDR,
-    // Instruction::REM_INT_2ADDR,
-    // Instruction::AND_INT_2ADDR,
-    // Instruction::OR_INT_2ADDR,
-    // Instruction::XOR_INT_2ADDR,
-    // Instruction::SHL_INT_2ADDR,
-    // Instruction::SHR_INT_2ADDR,
-    // Instruction::USHR_INT_2ADDR,
-    // Instruction::ADD_LONG_2ADDR,
-    // Instruction::SUB_LONG_2ADDR,
-    // Instruction::MUL_LONG_2ADDR,
-    // Instruction::DIV_LONG_2ADDR,
-    // Instruction::REM_LONG_2ADDR,
-    // Instruction::AND_LONG_2ADDR,
-    // Instruction::OR_LONG_2ADDR,
-    // Instruction::XOR_LONG_2ADDR,
-    // Instruction::SHL_LONG_2ADDR,
-    // Instruction::SHR_LONG_2ADDR,
-    // Instruction::USHR_LONG_2ADDR,
-    // Instruction::ADD_FLOAT_2ADDR,
-    // Instruction::SUB_FLOAT_2ADDR,
-    // Instruction::MUL_FLOAT_2ADDR,
-    // Instruction::DIV_FLOAT_2ADDR,
-    // Instruction::REM_FLOAT_2ADDR,
-    // Instruction::ADD_DOUBLE_2ADDR,
-    // Instruction::SUB_DOUBLE_2ADDR,
-    // Instruction::MUL_DOUBLE_2ADDR,
-    // Instruction::DIV_DOUBLE_2ADDR,
-    // Instruction::REM_DOUBLE_2ADDR,
-    // Instruction::ADD_INT_LIT16,
-    // Instruction::RSUB_INT,
-    // Instruction::MUL_INT_LIT16,
-    // Instruction::DIV_INT_LIT16,
-    // Instruction::REM_INT_LIT16,
-    // Instruction::AND_INT_LIT16,
-    // Instruction::OR_INT_LIT16,
-    // Instruction::XOR_INT_LIT16,
-    // Instruction::ADD_INT_LIT8,
-    // Instruction::RSUB_INT_LIT8,
-    // Instruction::MUL_INT_LIT8,
-    // Instruction::DIV_INT_LIT8,
-    // Instruction::REM_INT_LIT8,
-    // Instruction::AND_INT_LIT8,
-    // Instruction::OR_INT_LIT8,
-    // Instruction::XOR_INT_LIT8,
-    // Instruction::SHL_INT_LIT8,
-    // Instruction::SHR_INT_LIT8,
-    // Instruction::USHR_INT_LIT8,
-    // Instruction::IGET_QUICK,
-    // Instruction::IGET_WIDE_QUICK,
-    // Instruction::IGET_OBJECT_QUICK,
-    // Instruction::IPUT_QUICK,
-    // Instruction::IPUT_WIDE_QUICK,
-    // Instruction::IPUT_OBJECT_QUICK,
-    // Instruction::INVOKE_VIRTUAL_QUICK,
-    // Instruction::INVOKE_VIRTUAL_RANGE_QUICK,
-    // Instruction::UNUSED_EB,
-    // Instruction::UNUSED_EC,
-    // Instruction::UNUSED_ED,
-    // Instruction::UNUSED_EE,
-    // Instruction::UNUSED_EF,
-    // Instruction::UNUSED_F0,
-    // Instruction::UNUSED_F1,
-    // Instruction::UNUSED_F2,
-    // Instruction::UNUSED_F3,
-    // Instruction::UNUSED_F4,
-    // Instruction::UNUSED_F5,
-    // Instruction::UNUSED_F6,
-    // Instruction::UNUSED_F7,
-    // Instruction::UNUSED_F8,
-    // Instruction::UNUSED_F9,
-    // Instruction::UNUSED_FA,
-    // Instruction::UNUSED_FB,
-    // Instruction::UNUSED_FC,
-    // Instruction::UNUSED_FD,
-    // Instruction::UNUSED_FE,
-    // Instruction::UNUSED_FF,
-
-    // ----- ExtendedMIROpcode -----
-    // kMirOpPhi,
-    // kMirOpCopy,
-    // kMirOpFusedCmplFloat,
-    // kMirOpFusedCmpgFloat,
-    // kMirOpFusedCmplDouble,
-    // kMirOpFusedCmpgDouble,
-    // kMirOpFusedCmpLong,
-    // kMirOpNop,
-    // kMirOpNullCheck,
-    // kMirOpRangeCheck,
-    // kMirOpDivZeroCheck,
-    // kMirOpCheck,
-    // kMirOpCheckPart2,
-    // kMirOpSelect,
-    // kMirOpLast,
+// Unsupported opcodes. nullptr can be used when everything is supported. Size of the lists is
+// recorded below.
+static const int* kUnsupportedOpcodes[] = {
+    // 0 = kNone.
+    kAllOpcodes,
+    // 1 = kArm, unused (will use kThumb2).
+    kAllOpcodes,
+    // 2 = kArm64.
+    nullptr,
+    // 3 = kThumb2.
+    nullptr,
+    // 4 = kX86.
+    nullptr,
+    // 5 = kX86_64.
+    nullptr,
+    // 6 = kMips.
+    nullptr,
+    // 7 = kMips64.
+    kAllOpcodes
 };
+COMPILE_ASSERT(sizeof(kUnsupportedOpcodes) == 8 * sizeof(int*), kUnsupportedOpcodes_unexp);
 
-// Z : boolean
-// B : byte
-// S : short
-// C : char
-// I : int
-// J : long
-// F : float
-// D : double
-// L : reference(object, array)
-// V : void
-// (ARM64) Current calling conversion only support 32bit softfp
-//         which has problems with long, float, double
-constexpr char arm64_supported_types[] = "ZBSCILVJFD";
-constexpr char x86_64_supported_types[] = "ZBSCILVJFD";
+// Size of the arrays stored above.
+static const size_t kUnsupportedOpcodesSize[] = {
+    // 0 = kNone.
+    arraysize(kAllOpcodes),
+    // 1 = kArm, unused (will use kThumb2).
+    arraysize(kAllOpcodes),
+    // 2 = kArm64.
+    0,
+    // 3 = kThumb2.
+    0,
+    // 4 = kX86.
+    0,
+    // 5 = kX86_64.
+    0,
+    // 6 = kMips.
+    0,
+    // 7 = kMips64.
+    arraysize(kAllOpcodes),
+};
+COMPILE_ASSERT(sizeof(kUnsupportedOpcodesSize) == 8 * sizeof(size_t),
+               kUnsupportedOpcodesSize_unexp);
 
-// TODO: Remove this when we are able to compile everything.
+CompilationUnit::CompilationUnit(ArenaPool* pool)
+  : compiler_driver(nullptr),
+    class_linker(nullptr),
+    dex_file(nullptr),
+    class_loader(nullptr),
+    class_def_idx(0),
+    method_idx(0),
+    code_item(nullptr),
+    access_flags(0),
+    invoke_type(kDirect),
+    shorty(nullptr),
+    disable_opt(0),
+    enable_debug(0),
+    verbose(false),
+    compiler(nullptr),
+    instruction_set(kNone),
+    target64(false),
+    num_dalvik_registers(0),
+    insns(nullptr),
+    num_ins(0),
+    num_outs(0),
+    num_regs(0),
+    compiler_flip_match(false),
+    arena(pool),
+    arena_stack(pool),
+    mir_graph(nullptr),
+    cg(nullptr),
+    timings("QuickCompiler", true, false),
+    print_pass(false) {
+}
+
+CompilationUnit::~CompilationUnit() {
+}
+
+void CompilationUnit::StartTimingSplit(const char* label) {
+  if (compiler_driver->GetDumpPasses()) {
+    timings.StartTiming(label);
+  }
+}
+
+void CompilationUnit::NewTimingSplit(const char* label) {
+  if (compiler_driver->GetDumpPasses()) {
+    timings.EndTiming();
+    timings.StartTiming(label);
+  }
+}
+
+void CompilationUnit::EndTiming() {
+  if (compiler_driver->GetDumpPasses()) {
+    timings.EndTiming();
+    if (enable_debug & (1 << kDebugTimings)) {
+      LOG(INFO) << "TIMINGS " << PrettyMethod(method_idx, *dex_file);
+      LOG(INFO) << Dumpable<TimingLogger>(timings);
+    }
+  }
+}
+
 static bool CanCompileShorty(const char* shorty, InstructionSet instruction_set) {
+  const char* supported_types = kSupportedTypes[instruction_set];
+  if (supported_types == nullptr) {
+    // Everything available.
+    return true;
+  }
+
   uint32_t shorty_size = strlen(shorty);
   CHECK_GE(shorty_size, 1u);
 
-  const char* supported_types =
-      (instruction_set == kX86_64) ? x86_64_supported_types : arm64_supported_types;
   for (uint32_t i = 0; i < shorty_size; i++) {
     if (strchr(supported_types, shorty[i]) == nullptr) {
       return false;
@@ -717,59 +547,57 @@
   return true;
 };
 
-// TODO: Remove this when we are able to compile everything.
 // Skip the method that we do not support currently.
 static bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file,
                              CompilationUnit& cu) {
-  // There is some limitation with current ARM 64 backend.
-  if (cu.instruction_set == kArm64) {
-    // Check if we can compile the prototype.
-    const char* shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx));
-    if (!CanCompileShorty(shorty, cu.instruction_set)) {
-      VLOG(compiler) << "Unsupported shorty : " << shorty;
-      return false;
-    }
+  // Check whether we do have limitations at all.
+  if (kSupportedTypes[cu.instruction_set] == nullptr &&
+      kUnsupportedOpcodesSize[cu.instruction_set] == 0U) {
+    return true;
+  }
 
-    const int *support_list = arm64_support_list;
-    int support_list_size = arraysize(arm64_support_list);
-    if (cu.instruction_set == kX86_64) {
-      support_list = x86_64_support_list;
-      support_list_size = arraysize(x86_64_support_list);
-    }
+  // Check if we can compile the prototype.
+  const char* shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx));
+  if (!CanCompileShorty(shorty, cu.instruction_set)) {
+    VLOG(compiler) << "Unsupported shorty : " << shorty;
+    return false;
+  }
 
-    for (unsigned int idx = 0; idx < cu.mir_graph->GetNumBlocks(); idx++) {
-      BasicBlock* bb = cu.mir_graph->GetBasicBlock(idx);
-      if (bb == NULL) continue;
-      if (bb->block_type == kDead) continue;
-      for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-        int opcode = mir->dalvikInsn.opcode;
-        // Check if we support the byte code.
-        if (std::find(support_list, support_list + support_list_size,
-            opcode) == support_list + support_list_size) {
-          if (!cu.mir_graph->IsPseudoMirOp(opcode)) {
-            VLOG(compiler) << "Unsupported dalvik byte code : "
-                           << mir->dalvikInsn.opcode;
-          } else {
-            VLOG(compiler) << "Unsupported extended MIR opcode : "
-                           << MIRGraph::extended_mir_op_names_[opcode - kMirOpFirst];
-          }
-          return false;
+  const int *unsupport_list = kUnsupportedOpcodes[cu.instruction_set];
+  int unsupport_list_size = kUnsupportedOpcodesSize[cu.instruction_set];
+
+  for (unsigned int idx = 0; idx < cu.mir_graph->GetNumBlocks(); idx++) {
+    BasicBlock* bb = cu.mir_graph->GetBasicBlock(idx);
+    if (bb == NULL) continue;
+    if (bb->block_type == kDead) continue;
+    for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
+      int opcode = mir->dalvikInsn.opcode;
+      // Check if we support the byte code.
+      if (std::find(unsupport_list, unsupport_list + unsupport_list_size,
+                    opcode) != unsupport_list + unsupport_list_size) {
+        if (!cu.mir_graph->IsPseudoMirOp(opcode)) {
+          VLOG(compiler) << "Unsupported dalvik byte code : "
+              << mir->dalvikInsn.opcode;
+        } else {
+          VLOG(compiler) << "Unsupported extended MIR opcode : "
+              << MIRGraph::extended_mir_op_names_[opcode - kMirOpFirst];
         }
-        // Check if it invokes a prototype that we cannot support.
-        if (Instruction::INVOKE_VIRTUAL == opcode ||
-            Instruction::INVOKE_SUPER == opcode ||
-            Instruction::INVOKE_DIRECT == opcode ||
-            Instruction::INVOKE_STATIC == opcode ||
-            Instruction::INVOKE_INTERFACE == opcode) {
-          uint32_t invoke_method_idx = mir->dalvikInsn.vB;
-          const char* invoke_method_shorty = dex_file.GetMethodShorty(
-              dex_file.GetMethodId(invoke_method_idx));
-          if (!CanCompileShorty(invoke_method_shorty, cu.instruction_set)) {
-            VLOG(compiler) << "Unsupported to invoke '"
-                           << PrettyMethod(invoke_method_idx, dex_file)
-                           << "' with shorty : " << invoke_method_shorty;
-            return false;
-          }
+        return false;
+      }
+      // Check if it invokes a prototype that we cannot support.
+      if (Instruction::INVOKE_VIRTUAL == opcode ||
+          Instruction::INVOKE_SUPER == opcode ||
+          Instruction::INVOKE_DIRECT == opcode ||
+          Instruction::INVOKE_STATIC == opcode ||
+          Instruction::INVOKE_INTERFACE == opcode) {
+        uint32_t invoke_method_idx = mir->dalvikInsn.vB;
+        const char* invoke_method_shorty = dex_file.GetMethodShorty(
+            dex_file.GetMethodId(invoke_method_idx));
+        if (!CanCompileShorty(invoke_method_shorty, cu.instruction_set)) {
+          VLOG(compiler) << "Unsupported to invoke '"
+              << PrettyMethod(invoke_method_idx, dex_file)
+              << "' with shorty : " << invoke_method_shorty;
+          return false;
         }
       }
     }
@@ -807,7 +635,7 @@
   }
   cu.target64 = Is64BitInstructionSet(cu.instruction_set);
   cu.compiler = compiler;
-  // TODO: x86_64 & arm64 are not yet implemented.
+  // TODO: Mips64 is not yet implemented.
   CHECK((cu.instruction_set == kThumb2) ||
         (cu.instruction_set == kArm64) ||
         (cu.instruction_set == kX86) ||
@@ -850,28 +678,8 @@
 
   compiler->InitCompilationUnit(cu);
 
-  if (cu.instruction_set == kMips) {
-    // Disable some optimizations for mips for now
-    cu.disable_opt |= (
-        (1 << kLoadStoreElimination) |
-        (1 << kLoadHoisting) |
-        (1 << kSuppressLoads) |
-        (1 << kNullCheckElimination) |
-        (1 << kPromoteRegs) |
-        (1 << kTrackLiveTemps) |
-        (1 << kSafeOptimizations) |
-        (1 << kBBOpt) |
-        (1 << kMatch) |
-        (1 << kPromoteCompilerTemps));
-  } else if (cu.instruction_set == kX86_64) {
-    // TODO(X86_64): enable optimizations once backend is mature enough.
-    cu.disable_opt |= (1 << kLoadStoreElimination);
-  } else if (cu.instruction_set == kArm64) {
-    // TODO(Arm64): enable optimizations once backend is mature enough.
-    cu.disable_opt = ~((1 << kSuppressMethodInlining) |
-                       (1 << kNullCheckElimination) |
-                       (1 << kPromoteRegs));
-  }
+  // Disable optimizations according to instruction set.
+  cu.disable_opt |= kDisabledOptimizationsPerISA[cu.instruction_set];
 
   cu.StartTimingSplit("BuildMIRGraph");
   cu.mir_graph.reset(new MIRGraph(&cu, &cu.arena));
@@ -892,7 +700,6 @@
   cu.mir_graph->InlineMethod(code_item, access_flags, invoke_type, class_def_idx, method_idx,
                               class_loader, dex_file);
 
-  // TODO(Arm64): Remove this when we are able to compile everything.
   if (!CanCompileMethod(method_idx, dex_file, cu)) {
     VLOG(compiler)  << cu.instruction_set << ": Cannot compile method : " << method_name;
     return nullptr;
@@ -995,7 +802,8 @@
                           uint32_t access_flags, art::InvokeType invoke_type,
                           uint16_t class_def_idx, uint32_t method_idx, jobject class_loader,
                           const art::DexFile& dex_file) {
-  // TODO: check method fingerprint here to determine appropriate backend type.  Until then, use build default
+  // TODO: check method fingerprint here to determine appropriate backend type.  Until then, use
+  // build default.
   art::Compiler* compiler = driver.GetCompiler();
   return art::CompileOneMethod(driver, compiler, code_item, access_flags, invoke_type,
                                class_def_idx, method_idx, class_loader, dex_file,
diff --git a/compiler/dex/frontend.h b/compiler/dex/frontend.h
index 9e376ee..f4cbdfb 100644
--- a/compiler/dex/frontend.h
+++ b/compiler/dex/frontend.h
@@ -45,6 +45,7 @@
   kSuppressLoads,
   kNullCheckElimination,
   kClassInitCheckElimination,
+  kGlobalValueNumbering,
   kPromoteRegs,
   kTrackLiveTemps,
   kSafeOptimizations,
diff --git a/compiler/dex/global_value_numbering.cc b/compiler/dex/global_value_numbering.cc
new file mode 100644
index 0000000..614e826
--- /dev/null
+++ b/compiler/dex/global_value_numbering.cc
@@ -0,0 +1,205 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "global_value_numbering.h"
+
+#include "local_value_numbering.h"
+
+namespace art {
+
+GlobalValueNumbering::GlobalValueNumbering(CompilationUnit* cu, ScopedArenaAllocator* allocator)
+    : cu_(cu),
+      allocator_(allocator),
+      repeat_count_(0u),
+      last_value_(0u),
+      modifications_allowed_(false),
+      global_value_map_(std::less<uint64_t>(), allocator->Adapter()),
+      field_index_map_(FieldReferenceComparator(), allocator->Adapter()),
+      field_index_reverse_map_(allocator->Adapter()),
+      array_location_map_(ArrayLocationComparator(), allocator->Adapter()),
+      array_location_reverse_map_(allocator->Adapter()),
+      ref_set_map_(std::less<ValueNameSet>(), allocator->Adapter()),
+      lvns_(cu_->mir_graph->GetNumBlocks(), nullptr, allocator->Adapter()),
+      work_lvn_(nullptr),
+      merge_lvns_(allocator->Adapter()) {
+  cu_->mir_graph->ClearAllVisitedFlags();
+}
+
+GlobalValueNumbering::~GlobalValueNumbering() {
+  STLDeleteElements(&lvns_);
+}
+
+LocalValueNumbering* GlobalValueNumbering::PrepareBasicBlock(BasicBlock* bb) {
+  if (UNLIKELY(!Good())) {
+    return nullptr;
+  }
+  if (bb->data_flow_info == nullptr) {
+    return nullptr;
+  }
+  if (bb->block_type == kEntryBlock) {
+    repeat_count_ += 1u;
+    if (repeat_count_ > kMaxRepeatCount) {
+      last_value_ = kNoValue;  // Make bad.
+      return nullptr;
+    }
+  }
+  if (bb->block_type == kExitBlock) {
+    DCHECK(bb->first_mir_insn == nullptr);
+    return nullptr;
+  }
+  if (bb->visited) {
+    return nullptr;
+  }
+  DCHECK(work_lvn_.get() == nullptr);
+  work_lvn_.reset(new (allocator_) LocalValueNumbering(this, bb->id));
+  if (bb->block_type == kEntryBlock) {
+    if ((cu_->access_flags & kAccStatic) == 0) {
+      // If non-static method, mark "this" as non-null
+      int this_reg = cu_->num_dalvik_registers - cu_->num_ins;
+      work_lvn_->SetSRegNullChecked(this_reg);
+    }
+  } else {
+    // Merge all incoming arcs.
+    // To avoid repeated allocation on the ArenaStack, reuse a single vector kept as a member.
+    DCHECK(merge_lvns_.empty());
+    GrowableArray<BasicBlockId>::Iterator iter(bb->predecessors);
+    for (BasicBlock* pred_bb = cu_->mir_graph->GetBasicBlock(iter.Next());
+         pred_bb != nullptr; pred_bb = cu_->mir_graph->GetBasicBlock(iter.Next())) {
+      if (lvns_[pred_bb->id] != nullptr) {
+        merge_lvns_.push_back(lvns_[pred_bb->id]);
+      }
+    }
+    // Determine merge type.
+    LocalValueNumbering::MergeType merge_type = LocalValueNumbering::kNormalMerge;
+    if (bb->catch_entry) {
+      merge_type = LocalValueNumbering::kCatchMerge;
+    } else if (bb->last_mir_insn != nullptr &&
+        (bb->last_mir_insn->dalvikInsn.opcode == Instruction::RETURN ||
+         bb->last_mir_insn->dalvikInsn.opcode == Instruction::RETURN_OBJECT ||
+         bb->last_mir_insn->dalvikInsn.opcode == Instruction::RETURN_WIDE) &&
+        (bb->first_mir_insn == bb->last_mir_insn ||
+         (bb->first_mir_insn->next == bb->last_mir_insn &&
+          static_cast<int>(bb->first_mir_insn->dalvikInsn.opcode) == kMirOpPhi))) {
+      merge_type = LocalValueNumbering::kReturnMerge;
+    }
+    // At least one predecessor must have been processed before this bb.
+    CHECK(!merge_lvns_.empty());
+    if (merge_lvns_.size() == 1u) {
+      work_lvn_->MergeOne(*merge_lvns_[0], merge_type);
+      BasicBlock* pred_bb = cu_->mir_graph->GetBasicBlock(merge_lvns_[0]->Id());
+      if (HasNullCheckLastInsn(pred_bb, bb->id)) {
+        work_lvn_->SetSRegNullChecked(pred_bb->last_mir_insn->ssa_rep->uses[0]);
+      }
+    } else {
+      work_lvn_->Merge(merge_type);
+    }
+  }
+  return work_lvn_.get();
+}
+
+bool GlobalValueNumbering::FinishBasicBlock(BasicBlock* bb) {
+  DCHECK(work_lvn_ != nullptr);
+  DCHECK(bb->id == work_lvn_->Id());
+  merge_lvns_.clear();
+
+  bool change = false;
+  // Look for a branch to self or an already processed child.
+  // (No need to repeat the LVN if all children are processed later.)
+  ChildBlockIterator iter(bb, cu_->mir_graph.get());
+  for (BasicBlock* child = iter.Next(); child != nullptr; child = iter.Next()) {
+    if (child == bb || lvns_[child->id] != nullptr) {
+      // If we found an already processed child, check if the LVN actually differs.
+      change = (lvns_[bb->id] == nullptr || !lvns_[bb->id]->Equals(*work_lvn_));
+      break;
+    }
+  }
+
+  std::unique_ptr<const LocalValueNumbering> old_lvn(lvns_[bb->id]);
+  lvns_[bb->id] = work_lvn_.release();
+
+  bb->visited = true;
+  if (change) {
+    ChildBlockIterator iter(bb, cu_->mir_graph.get());
+    for (BasicBlock* child = iter.Next(); child != nullptr; child = iter.Next()) {
+      child->visited = false;
+    }
+  }
+  return change;
+}
+
+uint16_t GlobalValueNumbering::GetFieldId(const MirFieldInfo& field_info, uint16_t type) {
+  FieldReference key = { field_info.DeclaringDexFile(), field_info.DeclaringFieldIndex(), type };
+  auto lb = field_index_map_.lower_bound(key);
+  if (lb != field_index_map_.end() && !field_index_map_.key_comp()(key, lb->first)) {
+    return lb->second;
+  }
+  DCHECK_LT(field_index_map_.size(), kNoValue);
+  uint16_t id = field_index_map_.size();
+  auto it = field_index_map_.PutBefore(lb, key, id);
+  field_index_reverse_map_.push_back(&*it);
+  return id;
+}
+
+uint16_t GlobalValueNumbering::GetArrayLocation(uint16_t base, uint16_t index) {
+  auto cmp = array_location_map_.key_comp();
+  ArrayLocation key = { base, index };
+  auto lb = array_location_map_.lower_bound(key);
+  if (lb != array_location_map_.end() && !cmp(key, lb->first)) {
+    return lb->second;
+  }
+  uint16_t location = static_cast<uint16_t>(array_location_reverse_map_.size());
+  DCHECK_EQ(location, array_location_reverse_map_.size());  // No overflow.
+  auto it = array_location_map_.PutBefore(lb, key, location);
+  array_location_reverse_map_.push_back(&*it);
+  return location;
+}
+
+bool GlobalValueNumbering::HasNullCheckLastInsn(const BasicBlock* pred_bb,
+                                                BasicBlockId succ_id) {
+  if (pred_bb->block_type != kDalvikByteCode || pred_bb->last_mir_insn == nullptr) {
+    return false;
+  }
+  Instruction::Code last_opcode = pred_bb->last_mir_insn->dalvikInsn.opcode;
+  return ((last_opcode == Instruction::IF_EQZ && pred_bb->fall_through == succ_id) ||
+      (last_opcode == Instruction::IF_NEZ && pred_bb->taken == succ_id));
+}
+
+bool GlobalValueNumbering::NullCheckedInAllPredecessors(
+    const ScopedArenaVector<uint16_t>& merge_names) const {
+  // Implicit parameters:
+  //   - *work_lvn: the LVN for which we're checking predecessors.
+  //   - merge_lvns_: the predecessor LVNs.
+  DCHECK_EQ(merge_lvns_.size(), merge_names.size());
+  for (size_t i = 0, size = merge_lvns_.size(); i != size; ++i) {
+    const LocalValueNumbering* pred_lvn = merge_lvns_[i];
+    uint16_t value_name = merge_names[i];
+    if (!pred_lvn->IsValueNullChecked(value_name)) {
+      // Check if the predecessor has an IF_EQZ/IF_NEZ as the last insn.
+      const BasicBlock* pred_bb = cu_->mir_graph->GetBasicBlock(pred_lvn->Id());
+      if (!HasNullCheckLastInsn(pred_bb, work_lvn_->Id())) {
+        return false;
+      }
+      // IF_EQZ/IF_NEZ checks some sreg, see if that sreg contains the value_name.
+      int s_reg = pred_bb->last_mir_insn->ssa_rep->uses[0];
+      if (!pred_lvn->IsSregValue(s_reg, value_name)) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+}  // namespace art
diff --git a/compiler/dex/global_value_numbering.h b/compiler/dex/global_value_numbering.h
new file mode 100644
index 0000000..7ab77b7
--- /dev/null
+++ b/compiler/dex/global_value_numbering.h
@@ -0,0 +1,239 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_GLOBAL_VALUE_NUMBERING_H_
+#define ART_COMPILER_DEX_GLOBAL_VALUE_NUMBERING_H_
+
+#include "base/macros.h"
+#include "compiler_internals.h"
+#include "utils/scoped_arena_containers.h"
+
+namespace art {
+
+class LocalValueNumbering;
+class MirFieldInfo;
+
+class GlobalValueNumbering {
+ public:
+  GlobalValueNumbering(CompilationUnit* cu, ScopedArenaAllocator* allocator);
+  ~GlobalValueNumbering();
+
+  LocalValueNumbering* PrepareBasicBlock(BasicBlock* bb);
+  bool FinishBasicBlock(BasicBlock* bb);
+
+  // Checks that the value names didn't overflow.
+  bool Good() const {
+    return last_value_ < kNoValue;
+  }
+
+  // Allow modifications.
+  void AllowModifications() {
+    DCHECK(Good());
+    cu_->mir_graph->ClearAllVisitedFlags();
+    modifications_allowed_ = true;
+  }
+
+  bool CanModify() const {
+    // TODO: DCHECK(Good()), see AllowModifications() and NewValueName().
+    return modifications_allowed_ && Good();
+  }
+
+  // GlobalValueNumbering should be allocated on the ArenaStack (or the native stack).
+  static void* operator new(size_t size, ScopedArenaAllocator* allocator) {
+    return allocator->Alloc(sizeof(GlobalValueNumbering), kArenaAllocMIR);
+  }
+
+  // Allow delete-expression to destroy a GlobalValueNumbering object without deallocation.
+  static void operator delete(void* ptr) { UNUSED(ptr); }
+
+ private:
+  static constexpr uint16_t kNoValue = 0xffffu;
+
+  // Allocate a new value name.
+  uint16_t NewValueName() {
+    // TODO: No new values should be needed once we allow modifications.
+    // DCHECK(!modifications_allowed_);
+    ++last_value_;
+    return last_value_;
+  }
+
+  // Key is concatenation of opcode, operand1, operand2 and modifier, value is value name.
+  typedef ScopedArenaSafeMap<uint64_t, uint16_t> ValueMap;
+
+  static uint64_t BuildKey(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier) {
+    return (static_cast<uint64_t>(op) << 48 | static_cast<uint64_t>(operand1) << 32 |
+            static_cast<uint64_t>(operand2) << 16 | static_cast<uint64_t>(modifier));
+  };
+
+  // Look up a value in the global value map, adding a new entry if there was none before.
+  uint16_t LookupValue(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier) {
+    uint16_t res;
+    uint64_t key = BuildKey(op, operand1, operand2, modifier);
+    ValueMap::iterator lb = global_value_map_.lower_bound(key);
+    if (lb != global_value_map_.end() && lb->first == key) {
+      res = lb->second;
+    } else {
+      res = NewValueName();
+      global_value_map_.PutBefore(lb, key, res);
+    }
+    return res;
+  };
+
+  // Check if the exact value is stored in the global value map.
+  bool HasValue(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier,
+                uint16_t value) const {
+    DCHECK(value != 0u || !Good());
+    DCHECK_LE(value, last_value_);
+    // This is equivalent to value == LookupValue(op, operand1, operand2, modifier)
+    // except that it doesn't add an entry to the global value map if it's not there.
+    uint64_t key = BuildKey(op, operand1, operand2, modifier);
+    ValueMap::const_iterator it = global_value_map_.find(key);
+    return (it != global_value_map_.end() && it->second == value);
+  };
+
+  // FieldReference represents a unique resolved field.
+  struct FieldReference {
+    const DexFile* dex_file;
+    uint16_t field_idx;
+    uint16_t type;  // See comments for LocalValueNumbering::kFieldTypeCount.
+  };
+
+  struct FieldReferenceComparator {
+    bool operator()(const FieldReference& lhs, const FieldReference& rhs) const {
+      if (lhs.field_idx != rhs.field_idx) {
+        return lhs.field_idx < rhs.field_idx;
+      }
+      // If the field_idx and dex_file match, the type must also match.
+      DCHECK(lhs.dex_file != rhs.dex_file || lhs.type == rhs.type);
+      return lhs.dex_file < rhs.dex_file;
+    }
+  };
+
+  // Maps field key to field id for resolved fields.
+  typedef ScopedArenaSafeMap<FieldReference, uint32_t, FieldReferenceComparator> FieldIndexMap;
+
+  // Get a field id.
+  uint16_t GetFieldId(const MirFieldInfo& field_info, uint16_t type);
+
+  // Get a field type based on field id.
+  uint16_t GetFieldType(uint16_t field_id) {
+    DCHECK_LT(field_id, field_index_reverse_map_.size());
+    return field_index_reverse_map_[field_id]->first.type;
+  }
+
+  struct ArrayLocation {
+    uint16_t base;
+    uint16_t index;
+  };
+
+  struct ArrayLocationComparator {
+    bool operator()(const ArrayLocation& lhs, const ArrayLocation& rhs) const {
+      if (lhs.base != rhs.base) {
+        return lhs.base < rhs.base;
+      }
+      return lhs.index < rhs.index;
+    }
+  };
+
+  typedef ScopedArenaSafeMap<ArrayLocation, uint16_t, ArrayLocationComparator> ArrayLocationMap;
+
+  // Get an array location.
+  uint16_t GetArrayLocation(uint16_t base, uint16_t index);
+
+  // Get the array base from an array location.
+  uint16_t GetArrayLocationBase(uint16_t location) const {
+    return array_location_reverse_map_[location]->first.base;
+  }
+
+  // Get the array index from an array location.
+  uint16_t GetArrayLocationIndex(uint16_t location) const {
+    return array_location_reverse_map_[location]->first.index;
+  }
+
+  // A set of value names.
+  typedef ScopedArenaSet<uint16_t> ValueNameSet;
+
+  // A map from a set of references to the set id.
+  typedef ScopedArenaSafeMap<ValueNameSet, uint16_t> RefSetIdMap;
+
+  uint16_t GetRefSetId(const ValueNameSet& ref_set) {
+    uint16_t res = kNoValue;
+    auto lb = ref_set_map_.lower_bound(ref_set);
+    if (lb != ref_set_map_.end() && !ref_set_map_.key_comp()(ref_set, lb->first)) {
+      res = lb->second;
+    } else {
+      res = NewValueName();
+      ref_set_map_.PutBefore(lb, ref_set, res);
+    }
+    return res;
+  }
+
+  const BasicBlock* GetBasicBlock(uint16_t bb_id) const {
+    return cu_->mir_graph->GetBasicBlock(bb_id);
+  }
+
+  static bool HasNullCheckLastInsn(const BasicBlock* pred_bb, BasicBlockId succ_id);
+
+  bool NullCheckedInAllPredecessors(const ScopedArenaVector<uint16_t>& merge_names) const;
+
+  CompilationUnit* GetCompilationUnit() const {
+    return cu_;
+  }
+
+  MIRGraph* GetMirGraph() const {
+    return cu_->mir_graph.get();
+  }
+
+  ScopedArenaAllocator* Allocator() const {
+    return allocator_;
+  }
+
+  CompilationUnit* const cu_;
+  ScopedArenaAllocator* const allocator_;
+
+  static constexpr uint32_t kMaxRepeatCount = 10u;
+
+  // Track the repeat count to make sure the GVN converges quickly and abort the GVN otherwise.
+  uint32_t repeat_count_;
+
+  // We have 32-bit last_value_ so that we can detect when we run out of value names, see Good().
+  // We usually don't check Good() until the end of LVN unless we're about to modify code.
+  uint32_t last_value_;
+
+  // Marks whether code modifications are allowed. The initial GVN is done without code
+  // modifications to settle the value names. Afterwards, we allow modifications and rerun
+  // LVN once for each BasicBlock.
+  bool modifications_allowed_;
+
+  ValueMap global_value_map_;
+  FieldIndexMap field_index_map_;
+  ScopedArenaVector<const FieldIndexMap::value_type*> field_index_reverse_map_;
+  ArrayLocationMap array_location_map_;
+  ScopedArenaVector<const ArrayLocationMap::value_type*> array_location_reverse_map_;
+  RefSetIdMap ref_set_map_;
+
+  ScopedArenaVector<const LocalValueNumbering*> lvns_;        // Owning.
+  std::unique_ptr<LocalValueNumbering> work_lvn_;
+  ScopedArenaVector<const LocalValueNumbering*> merge_lvns_;  // Not owning.
+
+  friend class LocalValueNumbering;
+
+  DISALLOW_COPY_AND_ASSIGN(GlobalValueNumbering);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_GLOBAL_VALUE_NUMBERING_H_
diff --git a/compiler/dex/global_value_numbering_test.cc b/compiler/dex/global_value_numbering_test.cc
new file mode 100644
index 0000000..40bd983
--- /dev/null
+++ b/compiler/dex/global_value_numbering_test.cc
@@ -0,0 +1,2093 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "compiler_internals.h"
+#include "dataflow_iterator.h"
+#include "dataflow_iterator-inl.h"
+#include "global_value_numbering.h"
+#include "local_value_numbering.h"
+#include "gtest/gtest.h"
+
+namespace art {
+
+class GlobalValueNumberingTest : public testing::Test {
+ protected:
+  struct IFieldDef {
+    uint16_t field_idx;
+    uintptr_t declaring_dex_file;
+    uint16_t declaring_field_idx;
+    bool is_volatile;
+  };
+
+  struct SFieldDef {
+    uint16_t field_idx;
+    uintptr_t declaring_dex_file;
+    uint16_t declaring_field_idx;
+    bool is_volatile;
+  };
+
+  struct BBDef {
+    static constexpr size_t kMaxSuccessors = 4;
+    static constexpr size_t kMaxPredecessors = 4;
+
+    BBType type;
+    size_t num_successors;
+    BasicBlockId successors[kMaxPredecessors];
+    size_t num_predecessors;
+    BasicBlockId predecessors[kMaxPredecessors];
+  };
+
+  struct MIRDef {
+    static constexpr size_t kMaxSsaDefs = 2;
+    static constexpr size_t kMaxSsaUses = 4;
+
+    BasicBlockId bbid;
+    Instruction::Code opcode;
+    int64_t value;
+    uint32_t field_info;
+    size_t num_uses;
+    int32_t uses[kMaxSsaUses];
+    size_t num_defs;
+    int32_t defs[kMaxSsaDefs];
+  };
+
+#define DEF_SUCC0() \
+    0u, { }
+#define DEF_SUCC1(s1) \
+    1u, { s1 }
+#define DEF_SUCC2(s1, s2) \
+    2u, { s1, s2 }
+#define DEF_SUCC3(s1, s2, s3) \
+    3u, { s1, s2, s3 }
+#define DEF_SUCC4(s1, s2, s3, s4) \
+    4u, { s1, s2, s3, s4 }
+#define DEF_PRED0() \
+    0u, { }
+#define DEF_PRED1(p1) \
+    1u, { p1 }
+#define DEF_PRED2(p1, p2) \
+    2u, { p1, p2 }
+#define DEF_PRED3(p1, p2, p3) \
+    3u, { p1, p2, p3 }
+#define DEF_PRED4(p1, p2, p3, p4) \
+    4u, { p1, p2, p3, p4 }
+#define DEF_BB(type, succ, pred) \
+    { type, succ, pred }
+
+#define DEF_CONST(bb, opcode, reg, value) \
+    { bb, opcode, value, 0u, 0, { }, 1, { reg } }
+#define DEF_CONST_WIDE(bb, opcode, reg, value) \
+    { bb, opcode, value, 0u, 0, { }, 2, { reg, reg + 1 } }
+#define DEF_CONST_STRING(bb, opcode, reg, index) \
+    { bb, opcode, index, 0u, 0, { }, 1, { reg } }
+#define DEF_IGET(bb, opcode, reg, obj, field_info) \
+    { bb, opcode, 0u, field_info, 1, { obj }, 1, { reg } }
+#define DEF_IGET_WIDE(bb, opcode, reg, obj, field_info) \
+    { bb, opcode, 0u, field_info, 1, { obj }, 2, { reg, reg + 1 } }
+#define DEF_IPUT(bb, opcode, reg, obj, field_info) \
+    { bb, opcode, 0u, field_info, 2, { reg, obj }, 0, { } }
+#define DEF_IPUT_WIDE(bb, opcode, reg, obj, field_info) \
+    { bb, opcode, 0u, field_info, 3, { reg, reg + 1, obj }, 0, { } }
+#define DEF_SGET(bb, opcode, reg, field_info) \
+    { bb, opcode, 0u, field_info, 0, { }, 1, { reg } }
+#define DEF_SGET_WIDE(bb, opcode, reg, field_info) \
+    { bb, opcode, 0u, field_info, 0, { }, 2, { reg, reg + 1 } }
+#define DEF_SPUT(bb, opcode, reg, field_info) \
+    { bb, opcode, 0u, field_info, 1, { reg }, 0, { } }
+#define DEF_SPUT_WIDE(bb, opcode, reg, field_info) \
+    { bb, opcode, 0u, field_info, 2, { reg, reg + 1 }, 0, { } }
+#define DEF_AGET(bb, opcode, reg, obj, idx) \
+    { bb, opcode, 0u, 0u, 2, { obj, idx }, 1, { reg } }
+#define DEF_AGET_WIDE(bb, opcode, reg, obj, idx) \
+    { bb, opcode, 0u, 0u, 2, { obj, idx }, 2, { reg, reg + 1 } }
+#define DEF_APUT(bb, opcode, reg, obj, idx) \
+    { bb, opcode, 0u, 0u, 3, { reg, obj, idx }, 0, { } }
+#define DEF_APUT_WIDE(bb, opcode, reg, obj, idx) \
+    { bb, opcode, 0u, 0u, 4, { reg, reg + 1, obj, idx }, 0, { } }
+#define DEF_INVOKE1(bb, opcode, reg) \
+    { bb, opcode, 0u, 0u, 1, { reg }, 0, { } }
+#define DEF_UNIQUE_REF(bb, opcode, reg) \
+    { bb, opcode, 0u, 0u, 0, { }, 1, { reg } }  // CONST_CLASS, CONST_STRING, NEW_ARRAY, ...
+#define DEF_IFZ(bb, opcode, reg) \
+    { bb, opcode, 0u, 0u, 1, { reg }, 0, { } }
+#define DEF_MOVE(bb, opcode, reg, src) \
+    { bb, opcode, 0u, 0u, 1, { src }, 1, { reg } }
+#define DEF_PHI2(bb, reg, src1, src2) \
+    { bb, static_cast<Instruction::Code>(kMirOpPhi), 0, 0u, 2u, { src1, src2 }, 1, { reg } }
+
+  void DoPrepareIFields(const IFieldDef* defs, size_t count) {
+    cu_.mir_graph->ifield_lowering_infos_.Reset();
+    cu_.mir_graph->ifield_lowering_infos_.Resize(count);
+    for (size_t i = 0u; i != count; ++i) {
+      const IFieldDef* def = &defs[i];
+      MirIFieldLoweringInfo field_info(def->field_idx);
+      if (def->declaring_dex_file != 0u) {
+        field_info.declaring_dex_file_ = reinterpret_cast<const DexFile*>(def->declaring_dex_file);
+        field_info.declaring_field_idx_ = def->declaring_field_idx;
+        field_info.flags_ = 0u |  // Without kFlagIsStatic.
+            (def->is_volatile ? MirIFieldLoweringInfo::kFlagIsVolatile : 0u);
+      }
+      cu_.mir_graph->ifield_lowering_infos_.Insert(field_info);
+    }
+  }
+
+  template <size_t count>
+  void PrepareIFields(const IFieldDef (&defs)[count]) {
+    DoPrepareIFields(defs, count);
+  }
+
+  void DoPrepareSFields(const SFieldDef* defs, size_t count) {
+    cu_.mir_graph->sfield_lowering_infos_.Reset();
+    cu_.mir_graph->sfield_lowering_infos_.Resize(count);
+    for (size_t i = 0u; i != count; ++i) {
+      const SFieldDef* def = &defs[i];
+      MirSFieldLoweringInfo field_info(def->field_idx);
+      // Mark even unresolved fields as initialized.
+      field_info.flags_ = MirSFieldLoweringInfo::kFlagIsStatic |
+          MirSFieldLoweringInfo::kFlagIsInitialized;
+      if (def->declaring_dex_file != 0u) {
+        field_info.declaring_dex_file_ = reinterpret_cast<const DexFile*>(def->declaring_dex_file);
+        field_info.declaring_field_idx_ = def->declaring_field_idx;
+        field_info.flags_ |= (def->is_volatile ? MirSFieldLoweringInfo::kFlagIsVolatile : 0u);
+      }
+      cu_.mir_graph->sfield_lowering_infos_.Insert(field_info);
+    }
+  }
+
+  template <size_t count>
+  void PrepareSFields(const SFieldDef (&defs)[count]) {
+    DoPrepareSFields(defs, count);
+  }
+
+  void DoPrepareBasicBlocks(const BBDef* defs, size_t count) {
+    cu_.mir_graph->block_id_map_.clear();
+    cu_.mir_graph->block_list_.Reset();
+    ASSERT_LT(3u, count);  // null, entry, exit and at least one bytecode block.
+    ASSERT_EQ(kNullBlock, defs[0].type);
+    ASSERT_EQ(kEntryBlock, defs[1].type);
+    ASSERT_EQ(kExitBlock, defs[2].type);
+    for (size_t i = 0u; i != count; ++i) {
+      const BBDef* def = &defs[i];
+      BasicBlock* bb = cu_.mir_graph->NewMemBB(def->type, i);
+      cu_.mir_graph->block_list_.Insert(bb);
+      if (def->num_successors <= 2) {
+        bb->successor_block_list_type = kNotUsed;
+        bb->successor_blocks = nullptr;
+        bb->fall_through = (def->num_successors >= 1) ? def->successors[0] : 0u;
+        bb->taken = (def->num_successors >= 2) ? def->successors[1] : 0u;
+      } else {
+        bb->successor_block_list_type = kPackedSwitch;
+        bb->fall_through = 0u;
+        bb->taken = 0u;
+        bb->successor_blocks = new (&cu_.arena) GrowableArray<SuccessorBlockInfo*>(
+            &cu_.arena, def->num_successors, kGrowableArraySuccessorBlocks);
+        for (size_t j = 0u; j != def->num_successors; ++j) {
+          SuccessorBlockInfo* successor_block_info =
+              static_cast<SuccessorBlockInfo*>(cu_.arena.Alloc(sizeof(SuccessorBlockInfo),
+                                                               kArenaAllocSuccessor));
+          successor_block_info->block = j;
+          successor_block_info->key = 0u;  // Not used by class init check elimination.
+          bb->successor_blocks->Insert(successor_block_info);
+        }
+      }
+      bb->predecessors = new (&cu_.arena) GrowableArray<BasicBlockId>(
+          &cu_.arena, def->num_predecessors, kGrowableArrayPredecessors);
+      for (size_t j = 0u; j != def->num_predecessors; ++j) {
+        ASSERT_NE(0u, def->predecessors[j]);
+        bb->predecessors->Insert(def->predecessors[j]);
+      }
+      if (def->type == kDalvikByteCode || def->type == kEntryBlock || def->type == kExitBlock) {
+        bb->data_flow_info = static_cast<BasicBlockDataFlow*>(
+            cu_.arena.Alloc(sizeof(BasicBlockDataFlow), kArenaAllocDFInfo));
+      }
+    }
+    cu_.mir_graph->num_blocks_ = count;
+    ASSERT_EQ(count, cu_.mir_graph->block_list_.Size());
+    cu_.mir_graph->entry_block_ = cu_.mir_graph->block_list_.Get(1);
+    ASSERT_EQ(kEntryBlock, cu_.mir_graph->entry_block_->block_type);
+    cu_.mir_graph->exit_block_ = cu_.mir_graph->block_list_.Get(2);
+    ASSERT_EQ(kExitBlock, cu_.mir_graph->exit_block_->block_type);
+  }
+
+  template <size_t count>
+  void PrepareBasicBlocks(const BBDef (&defs)[count]) {
+    DoPrepareBasicBlocks(defs, count);
+  }
+
+  void DoPrepareMIRs(const MIRDef* defs, size_t count) {
+    mir_count_ = count;
+    mirs_ = reinterpret_cast<MIR*>(cu_.arena.Alloc(sizeof(MIR) * count, kArenaAllocMIR));
+    ssa_reps_.resize(count);
+    for (size_t i = 0u; i != count; ++i) {
+      const MIRDef* def = &defs[i];
+      MIR* mir = &mirs_[i];
+      ASSERT_LT(def->bbid, cu_.mir_graph->block_list_.Size());
+      BasicBlock* bb = cu_.mir_graph->block_list_.Get(def->bbid);
+      bb->AppendMIR(mir);
+      mir->dalvikInsn.opcode = def->opcode;
+      mir->dalvikInsn.vB = static_cast<int32_t>(def->value);
+      mir->dalvikInsn.vB_wide = def->value;
+      if (def->opcode >= Instruction::IGET && def->opcode <= Instruction::IPUT_SHORT) {
+        ASSERT_LT(def->field_info, cu_.mir_graph->ifield_lowering_infos_.Size());
+        mir->meta.ifield_lowering_info = def->field_info;
+      } else if (def->opcode >= Instruction::SGET && def->opcode <= Instruction::SPUT_SHORT) {
+        ASSERT_LT(def->field_info, cu_.mir_graph->sfield_lowering_infos_.Size());
+        mir->meta.sfield_lowering_info = def->field_info;
+      } else if (def->opcode == static_cast<Instruction::Code>(kMirOpPhi)) {
+        mir->meta.phi_incoming = static_cast<BasicBlockId*>(
+            allocator_->Alloc(def->num_uses * sizeof(BasicBlockId), kArenaAllocDFInfo));
+        for (size_t i = 0; i != def->num_uses; ++i) {
+          mir->meta.phi_incoming[i] = bb->predecessors->Get(i);
+        }
+      }
+      mir->ssa_rep = &ssa_reps_[i];
+      mir->ssa_rep->num_uses = def->num_uses;
+      mir->ssa_rep->uses = const_cast<int32_t*>(def->uses);  // Not modified by LVN.
+      mir->ssa_rep->fp_use = nullptr;  // Not used by LVN.
+      mir->ssa_rep->num_defs = def->num_defs;
+      mir->ssa_rep->defs = const_cast<int32_t*>(def->defs);  // Not modified by LVN.
+      mir->ssa_rep->fp_def = nullptr;  // Not used by LVN.
+      mir->dalvikInsn.opcode = def->opcode;
+      mir->offset = i;  // LVN uses offset only for debug output
+      mir->optimization_flags = 0u;
+    }
+    mirs_[count - 1u].next = nullptr;
+  }
+
+  template <size_t count>
+  void PrepareMIRs(const MIRDef (&defs)[count]) {
+    DoPrepareMIRs(defs, count);
+  }
+
+  void PerformGVN() {
+    cu_.mir_graph->SSATransformationStart();
+    cu_.mir_graph->ComputeDFSOrders();
+    cu_.mir_graph->ComputeDominators();
+    cu_.mir_graph->ComputeTopologicalSortOrder();
+    cu_.mir_graph->SSATransformationEnd();
+    DoPerformGVN<RepeatingPreOrderDfsIterator>();
+  }
+
+  void PerformPreOrderDfsGVN() {
+    cu_.mir_graph->SSATransformationStart();
+    cu_.mir_graph->ComputeDFSOrders();
+    cu_.mir_graph->SSATransformationEnd();
+    DoPerformGVN<RepeatingPreOrderDfsIterator>();
+  }
+
+  template <typename IteratorType>
+  void DoPerformGVN() {
+    ASSERT_TRUE(gvn_ == nullptr);
+    gvn_.reset(new (allocator_.get()) GlobalValueNumbering(&cu_, allocator_.get()));
+    ASSERT_FALSE(gvn_->CanModify());
+    value_names_.resize(mir_count_, 0xffffu);
+    IteratorType iterator(cu_.mir_graph.get());
+    bool change = false;
+    for (BasicBlock* bb = iterator.Next(change); bb != nullptr; bb = iterator.Next(change)) {
+      LocalValueNumbering* lvn = gvn_->PrepareBasicBlock(bb);
+      if (lvn != nullptr) {
+        for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
+          value_names_[mir - mirs_] = lvn->GetValueNumber(mir);
+        }
+      }
+      change = (lvn != nullptr) && gvn_->FinishBasicBlock(bb);
+      ASSERT_TRUE(gvn_->Good());
+    }
+  }
+
+  void PerformGVNCodeModifications() {
+    ASSERT_TRUE(gvn_ != nullptr);
+    ASSERT_TRUE(gvn_->Good());
+    ASSERT_FALSE(gvn_->CanModify());
+    gvn_->AllowModifications();
+    PreOrderDfsIterator iterator(cu_.mir_graph.get());
+    for (BasicBlock* bb = iterator.Next(); bb != nullptr; bb = iterator.Next()) {
+      LocalValueNumbering* lvn = gvn_->PrepareBasicBlock(bb);
+      if (lvn != nullptr) {
+        for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
+          uint16_t value_name = lvn->GetValueNumber(mir);
+          ASSERT_EQ(value_name, value_names_[mir - mirs_]);
+        }
+      }
+      bool change = (lvn != nullptr) && gvn_->FinishBasicBlock(bb);
+      ASSERT_FALSE(change);
+      ASSERT_TRUE(gvn_->Good());
+    }
+  }
+
+  GlobalValueNumberingTest()
+      : pool_(),
+        cu_(&pool_),
+        mir_count_(0u),
+        mirs_(nullptr),
+        ssa_reps_(),
+        allocator_(),
+        gvn_(),
+        value_names_() {
+    cu_.mir_graph.reset(new MIRGraph(&cu_, &cu_.arena));
+    cu_.access_flags = kAccStatic;  // Don't let "this" interfere with this test.
+    allocator_.reset(ScopedArenaAllocator::Create(&cu_.arena_stack));
+    // gvn_->AllowModifications();
+  }
+
+  ArenaPool pool_;
+  CompilationUnit cu_;
+  size_t mir_count_;
+  MIR* mirs_;
+  std::vector<SSARepresentation> ssa_reps_;
+  std::unique_ptr<ScopedArenaAllocator> allocator_;
+  std::unique_ptr<GlobalValueNumbering> gvn_;
+  std::vector<uint16_t> value_names_;
+};
+
+class GlobalValueNumberingTestDiamond : public GlobalValueNumberingTest {
+ public:
+  GlobalValueNumberingTestDiamond();
+
+ private:
+  static const BBDef kDiamondBbs[];
+};
+
+const GlobalValueNumberingTest::BBDef GlobalValueNumberingTestDiamond::kDiamondBbs[] = {
+    DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+    DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+    DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(6)),
+    DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 5), DEF_PRED1(1)),  // Block #3, top of the diamond.
+    DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED1(3)),     // Block #4, left side.
+    DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED1(3)),     // Block #5, right side.
+    DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED2(4, 5)),  // Block #6, bottom.
+};
+
+GlobalValueNumberingTestDiamond::GlobalValueNumberingTestDiamond()
+    : GlobalValueNumberingTest() {
+  PrepareBasicBlocks(kDiamondBbs);
+}
+
+class GlobalValueNumberingTestLoop : public GlobalValueNumberingTest {
+ public:
+  GlobalValueNumberingTestLoop();
+
+ private:
+  static const BBDef kLoopBbs[];
+};
+
+const GlobalValueNumberingTest::BBDef GlobalValueNumberingTestLoop::kLoopBbs[] = {
+    DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+    DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+    DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(5)),
+    DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)),
+    DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 4), DEF_PRED2(3, 4)),  // "taken" loops to self.
+    DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(4)),
+};
+
+GlobalValueNumberingTestLoop::GlobalValueNumberingTestLoop()
+    : GlobalValueNumberingTest() {
+  PrepareBasicBlocks(kLoopBbs);
+}
+
+class GlobalValueNumberingTestCatch : public GlobalValueNumberingTest {
+ public:
+  GlobalValueNumberingTestCatch();
+
+ private:
+  static const BBDef kCatchBbs[];
+};
+
+const GlobalValueNumberingTest::BBDef GlobalValueNumberingTestCatch::kCatchBbs[] = {
+    DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+    DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+    DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(6)),
+    DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)),     // The top.
+    DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED1(3)),     // The throwing insn.
+    DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED1(3)),     // Catch handler.
+    DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED2(4, 5)),  // The merged block.
+};
+
+GlobalValueNumberingTestCatch::GlobalValueNumberingTestCatch()
+    : GlobalValueNumberingTest() {
+  PrepareBasicBlocks(kCatchBbs);
+  // Mark catch handler.
+  BasicBlock* catch_handler = cu_.mir_graph->GetBasicBlock(5u);
+  catch_handler->catch_entry = true;
+  // Add successor block info to the check block.
+  BasicBlock* check_bb = cu_.mir_graph->GetBasicBlock(3u);
+  check_bb->successor_block_list_type = kCatch;
+  check_bb->successor_blocks = new (&cu_.arena) GrowableArray<SuccessorBlockInfo*>(
+      &cu_.arena, 2, kGrowableArraySuccessorBlocks);
+  SuccessorBlockInfo* successor_block_info = reinterpret_cast<SuccessorBlockInfo*>
+      (cu_.arena.Alloc(sizeof(SuccessorBlockInfo), kArenaAllocSuccessor));
+  successor_block_info->block = catch_handler->id;
+  check_bb->successor_blocks->Insert(successor_block_info);
+}
+
+class GlobalValueNumberingTestTwoConsecutiveLoops : public GlobalValueNumberingTest {
+ public:
+  GlobalValueNumberingTestTwoConsecutiveLoops();
+
+ private:
+  static const BBDef kTwoConsecutiveLoopsBbs[];
+};
+
+const GlobalValueNumberingTest::BBDef
+GlobalValueNumberingTestTwoConsecutiveLoops::kTwoConsecutiveLoopsBbs[] = {
+    DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+    DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+    DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(9)),
+    DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)),
+    DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 6), DEF_PRED2(3, 5)),  // "taken" skips over the loop.
+    DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(4)),
+    DEF_BB(kDalvikByteCode, DEF_SUCC1(7), DEF_PRED1(4)),
+    DEF_BB(kDalvikByteCode, DEF_SUCC2(8, 9), DEF_PRED2(6, 8)),  // "taken" skips over the loop.
+    DEF_BB(kDalvikByteCode, DEF_SUCC1(7), DEF_PRED1(7)),
+    DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(7)),
+};
+
+GlobalValueNumberingTestTwoConsecutiveLoops::GlobalValueNumberingTestTwoConsecutiveLoops()
+    : GlobalValueNumberingTest() {
+  PrepareBasicBlocks(kTwoConsecutiveLoopsBbs);
+}
+
+class GlobalValueNumberingTestTwoNestedLoops : public GlobalValueNumberingTest {
+ public:
+  GlobalValueNumberingTestTwoNestedLoops();
+
+ private:
+  static const BBDef kTwoNestedLoopsBbs[];
+};
+
+const GlobalValueNumberingTest::BBDef
+GlobalValueNumberingTestTwoNestedLoops::kTwoNestedLoopsBbs[] = {
+    DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+    DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+    DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(8)),
+    DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)),
+    DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 8), DEF_PRED2(3, 7)),  // "taken" skips over the loop.
+    DEF_BB(kDalvikByteCode, DEF_SUCC2(6, 7), DEF_PRED2(4, 6)),  // "taken" skips over the loop.
+    DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED1(5)),
+    DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(5)),
+    DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(4)),
+};
+
+GlobalValueNumberingTestTwoNestedLoops::GlobalValueNumberingTestTwoNestedLoops()
+    : GlobalValueNumberingTest() {
+  PrepareBasicBlocks(kTwoNestedLoopsBbs);
+}
+
+TEST_F(GlobalValueNumberingTestDiamond, NonAliasingIFields) {
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false },  // Int.
+      { 1u, 1u, 1u, false },  // Int.
+      { 2u, 1u, 2u, false },  // Int.
+      { 3u, 1u, 3u, false },  // Int.
+      { 4u, 1u, 4u, false },  // Short.
+      { 5u, 1u, 5u, false },  // Char.
+      { 6u, 0u, 0u, false },  // Unresolved, Short.
+      { 7u, 1u, 7u, false },  // Int.
+      { 8u, 0u, 0u, false },  // Unresolved, Int.
+      { 9u, 1u, 9u, false },  // Int.
+      { 10u, 1u, 10u, false },  // Int.
+      { 11u, 1u, 11u, false },  // Int.
+  };
+  static const MIRDef mirs[] = {
+      // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 100u),
+      DEF_IGET(3, Instruction::IGET, 1u, 100u, 0u),
+      DEF_IGET(6, Instruction::IGET, 2u, 100u, 0u),   // Same as at the top.
+
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 200u),
+      DEF_IGET(4, Instruction::IGET, 4u, 200u, 1u),
+      DEF_IGET(6, Instruction::IGET, 5u, 200u, 1u),   // Same as at the left side.
+
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 300u),
+      DEF_IGET(3, Instruction::IGET, 7u, 300u, 2u),
+      DEF_CONST(5, Instruction::CONST, 8u, 1000),
+      DEF_IPUT(5, Instruction::IPUT, 8u, 300u, 2u),
+      DEF_IGET(6, Instruction::IGET, 10u, 300u, 2u),  // Differs from the top and the CONST.
+
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 400u),
+      DEF_IGET(3, Instruction::IGET, 12u, 400u, 3u),
+      DEF_CONST(3, Instruction::CONST, 13u, 2000),
+      DEF_IPUT(4, Instruction::IPUT, 13u, 400u, 3u),
+      DEF_IPUT(5, Instruction::IPUT, 13u, 400u, 3u),
+      DEF_IGET(6, Instruction::IGET, 16u, 400u, 3u),  // Differs from the top, equals the CONST.
+
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 500u),
+      DEF_IGET(3, Instruction::IGET_SHORT, 18u, 500u, 4u),
+      DEF_IGET(3, Instruction::IGET_CHAR, 19u, 500u, 5u),
+      DEF_IPUT(4, Instruction::IPUT_SHORT, 20u, 500u, 6u),  // Clobbers field #4, not #5.
+      DEF_IGET(6, Instruction::IGET_SHORT, 21u, 500u, 4u),  // Differs from the top.
+      DEF_IGET(6, Instruction::IGET_CHAR, 22u, 500u, 5u),   // Same as the top.
+
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 600u),
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 601u),
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 602u),
+      DEF_IGET(3, Instruction::IGET, 26u, 600u, 7u),
+      DEF_IGET(3, Instruction::IGET, 27u, 601u, 7u),
+      DEF_IPUT(4, Instruction::IPUT, 28u, 602u, 8u),  // Doesn't clobber field #7 for other refs.
+      DEF_IGET(6, Instruction::IGET, 29u, 600u, 7u),  // Same as the top.
+      DEF_IGET(6, Instruction::IGET, 30u, 601u, 7u),  // Same as the top.
+
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 700u),
+      DEF_CONST(4, Instruction::CONST, 32u, 3000),
+      DEF_IPUT(4, Instruction::IPUT, 32u, 700u, 9u),
+      DEF_IPUT(4, Instruction::IPUT, 32u, 700u, 10u),
+      DEF_CONST(5, Instruction::CONST, 35u, 3001),
+      DEF_IPUT(5, Instruction::IPUT, 35u, 700u, 9u),
+      DEF_IPUT(5, Instruction::IPUT, 35u, 700u, 10u),
+      DEF_IGET(6, Instruction::IGET, 38u, 700u, 9u),
+      DEF_IGET(6, Instruction::IGET, 39u, 700u, 10u),  // Same value as read from field #9.
+
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 800u),
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 801u),
+      DEF_CONST(4, Instruction::CONST, 42u, 3000),
+      DEF_IPUT(4, Instruction::IPUT, 42u, 800u, 11u),
+      DEF_IPUT(4, Instruction::IPUT, 42u, 801u, 11u),
+      DEF_CONST(5, Instruction::CONST, 45u, 3001),
+      DEF_IPUT(5, Instruction::IPUT, 45u, 800u, 11u),
+      DEF_IPUT(5, Instruction::IPUT, 45u, 801u, 11u),
+      DEF_IGET(6, Instruction::IGET, 48u, 800u, 11u),
+      DEF_IGET(6, Instruction::IGET, 49u, 801u, 11u),  // Same value as read from ref 46u.
+
+      // Invoke doesn't interfere with non-aliasing refs. There's one test above where a reference
+      // escapes in the left BB (we let a reference escape if we use it to store to an unresolved
+      // field) and the INVOKE in the right BB shouldn't interfere with that either.
+      DEF_INVOKE1(5, Instruction::INVOKE_STATIC, 48u),
+  };
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformGVN();
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_EQ(value_names_[1], value_names_[2]);
+
+  EXPECT_EQ(value_names_[4], value_names_[5]);
+
+  EXPECT_NE(value_names_[7], value_names_[10]);
+  EXPECT_NE(value_names_[8], value_names_[10]);
+
+  EXPECT_NE(value_names_[12], value_names_[16]);
+  EXPECT_EQ(value_names_[13], value_names_[16]);
+
+  EXPECT_NE(value_names_[18], value_names_[21]);
+  EXPECT_EQ(value_names_[19], value_names_[22]);
+
+  EXPECT_EQ(value_names_[26], value_names_[29]);
+  EXPECT_EQ(value_names_[27], value_names_[30]);
+
+  EXPECT_EQ(value_names_[38], value_names_[39]);
+
+  EXPECT_EQ(value_names_[48], value_names_[49]);
+}
+
+TEST_F(GlobalValueNumberingTestDiamond, AliasingIFieldsSingleObject) {
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false },  // Int.
+      { 1u, 1u, 1u, false },  // Int.
+      { 2u, 1u, 2u, false },  // Int.
+      { 3u, 1u, 3u, false },  // Int.
+      { 4u, 1u, 4u, false },  // Short.
+      { 5u, 1u, 5u, false },  // Char.
+      { 6u, 0u, 0u, false },  // Unresolved, Short.
+      { 7u, 1u, 7u, false },  // Int.
+      { 8u, 1u, 8u, false },  // Int.
+  };
+  static const MIRDef mirs[] = {
+      // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
+      DEF_IGET(3, Instruction::IGET, 0u, 100u, 0u),
+      DEF_IGET(6, Instruction::IGET, 1u, 100u, 0u),   // Same as at the top.
+
+      DEF_IGET(4, Instruction::IGET, 2u, 100u, 1u),
+      DEF_IGET(6, Instruction::IGET, 3u, 100u, 1u),   // Same as at the left side.
+
+      DEF_IGET(3, Instruction::IGET, 4u, 100u, 2u),
+      DEF_CONST(5, Instruction::CONST, 5u, 1000),
+      DEF_IPUT(5, Instruction::IPUT, 5u, 100u, 2u),
+      DEF_IGET(6, Instruction::IGET, 7u, 100u, 2u),   // Differs from the top and the CONST.
+
+      DEF_IGET(3, Instruction::IGET, 8u, 100u, 3u),
+      DEF_CONST(3, Instruction::CONST, 9u, 2000),
+      DEF_IPUT(4, Instruction::IPUT, 9u, 100u, 3u),
+      DEF_IPUT(5, Instruction::IPUT, 9u, 100u, 3u),
+      DEF_IGET(6, Instruction::IGET, 12u, 100u, 3u),  // Differs from the top, equals the CONST.
+
+      DEF_IGET(3, Instruction::IGET_SHORT, 13u, 100u, 4u),
+      DEF_IGET(3, Instruction::IGET_CHAR, 14u, 100u, 5u),
+      DEF_IPUT(4, Instruction::IPUT_SHORT, 15u, 100u, 6u),  // Clobbers field #4, not #5.
+      DEF_IGET(6, Instruction::IGET_SHORT, 16u, 100u, 4u),  // Differs from the top.
+      DEF_IGET(6, Instruction::IGET_CHAR, 17u, 100u, 5u),   // Same as the top.
+
+      DEF_CONST(4, Instruction::CONST, 18u, 3000),
+      DEF_IPUT(4, Instruction::IPUT, 18u, 100u, 7u),
+      DEF_IPUT(4, Instruction::IPUT, 18u, 100u, 8u),
+      DEF_CONST(5, Instruction::CONST, 21u, 3001),
+      DEF_IPUT(5, Instruction::IPUT, 21u, 100u, 7u),
+      DEF_IPUT(5, Instruction::IPUT, 21u, 100u, 8u),
+      DEF_IGET(6, Instruction::IGET, 24u, 100u, 7u),
+      DEF_IGET(6, Instruction::IGET, 25u, 100u, 8u),  // Same value as read from field #7.
+  };
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformGVN();
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_EQ(value_names_[0], value_names_[1]);
+
+  EXPECT_EQ(value_names_[2], value_names_[3]);
+
+  EXPECT_NE(value_names_[4], value_names_[7]);
+  EXPECT_NE(value_names_[5], value_names_[7]);
+
+  EXPECT_NE(value_names_[8], value_names_[12]);
+  EXPECT_EQ(value_names_[9], value_names_[12]);
+
+  EXPECT_NE(value_names_[13], value_names_[16]);
+  EXPECT_EQ(value_names_[14], value_names_[17]);
+
+  EXPECT_EQ(value_names_[24], value_names_[25]);
+}
+
+TEST_F(GlobalValueNumberingTestDiamond, AliasingIFieldsTwoObjects) {
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false },  // Int.
+      { 1u, 1u, 1u, false },  // Int.
+      { 2u, 1u, 2u, false },  // Int.
+      { 3u, 1u, 3u, false },  // Int.
+      { 4u, 1u, 4u, false },  // Short.
+      { 5u, 1u, 5u, false },  // Char.
+      { 6u, 0u, 0u, false },  // Unresolved, Short.
+      { 7u, 1u, 7u, false },  // Int.
+      { 8u, 1u, 8u, false },  // Int.
+  };
+  static const MIRDef mirs[] = {
+      // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
+      DEF_IGET(3, Instruction::IGET, 0u, 100u, 0u),
+      DEF_IPUT(4, Instruction::IPUT, 1u, 101u, 0u),   // May alias with the IGET at the top.
+      DEF_IGET(6, Instruction::IGET, 2u, 100u, 0u),   // Differs from the top.
+
+      DEF_IGET(3, Instruction::IGET, 3u, 100u, 1u),
+      DEF_IPUT(5, Instruction::IPUT, 3u, 101u, 1u),   // If aliasing, stores the same value.
+      DEF_IGET(6, Instruction::IGET, 5u, 100u, 1u),   // Same as the top.
+
+      DEF_IGET(3, Instruction::IGET, 6u, 100u, 2u),
+      DEF_CONST(5, Instruction::CONST, 7u, 1000),
+      DEF_IPUT(5, Instruction::IPUT, 7u, 101u, 2u),
+      DEF_IGET(6, Instruction::IGET, 9u, 100u, 2u),   // Differs from the top and the CONST.
+
+      DEF_IGET(3, Instruction::IGET, 10u, 100u, 3u),
+      DEF_CONST(3, Instruction::CONST, 11u, 2000),
+      DEF_IPUT(4, Instruction::IPUT, 11u, 101u, 3u),
+      DEF_IPUT(5, Instruction::IPUT, 11u, 101u, 3u),
+      DEF_IGET(6, Instruction::IGET, 14u, 100u, 3u),  // Differs from the top and the CONST.
+
+      DEF_IGET(3, Instruction::IGET_SHORT, 15u, 100u, 4u),
+      DEF_IGET(3, Instruction::IGET_CHAR, 16u, 100u, 5u),
+      DEF_IPUT(4, Instruction::IPUT_SHORT, 17u, 101u, 6u),  // Clobbers field #4, not #5.
+      DEF_IGET(6, Instruction::IGET_SHORT, 18u, 100u, 4u),  // Differs from the top.
+      DEF_IGET(6, Instruction::IGET_CHAR, 19u, 100u, 5u),   // Same as the top.
+
+      DEF_CONST(4, Instruction::CONST, 20u, 3000),
+      DEF_IPUT(4, Instruction::IPUT, 20u, 100u, 7u),
+      DEF_IPUT(4, Instruction::IPUT, 20u, 101u, 8u),
+      DEF_CONST(5, Instruction::CONST, 23u, 3001),
+      DEF_IPUT(5, Instruction::IPUT, 23u, 100u, 7u),
+      DEF_IPUT(5, Instruction::IPUT, 23u, 101u, 8u),
+      DEF_IGET(6, Instruction::IGET, 26u, 100u, 7u),
+      DEF_IGET(6, Instruction::IGET, 27u, 101u, 8u),  // Same value as read from field #7.
+  };
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformGVN();
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_NE(value_names_[0], value_names_[2]);
+
+  EXPECT_EQ(value_names_[3], value_names_[5]);
+
+  EXPECT_NE(value_names_[6], value_names_[9]);
+  EXPECT_NE(value_names_[7], value_names_[9]);
+
+  EXPECT_NE(value_names_[10], value_names_[14]);
+  EXPECT_NE(value_names_[10], value_names_[14]);
+
+  EXPECT_NE(value_names_[15], value_names_[18]);
+  EXPECT_EQ(value_names_[16], value_names_[19]);
+
+  EXPECT_EQ(value_names_[26], value_names_[27]);
+}
+
+TEST_F(GlobalValueNumberingTestDiamond, SFields) {
+  static const SFieldDef sfields[] = {
+      { 0u, 1u, 0u, false },  // Int.
+      { 1u, 1u, 1u, false },  // Int.
+      { 2u, 1u, 2u, false },  // Int.
+      { 3u, 1u, 3u, false },  // Int.
+      { 4u, 1u, 4u, false },  // Short.
+      { 5u, 1u, 5u, false },  // Char.
+      { 6u, 0u, 0u, false },  // Unresolved, Short.
+      { 7u, 1u, 7u, false },  // Int.
+      { 8u, 1u, 8u, false },  // Int.
+  };
+  static const MIRDef mirs[] = {
+      // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
+      DEF_SGET(3, Instruction::SGET, 0u, 0u),
+      DEF_SGET(6, Instruction::SGET, 1u, 0u),         // Same as at the top.
+
+      DEF_SGET(4, Instruction::SGET, 2u, 1u),
+      DEF_SGET(6, Instruction::SGET, 3u, 1u),         // Same as at the left side.
+
+      DEF_SGET(3, Instruction::SGET, 4u, 2u),
+      DEF_CONST(5, Instruction::CONST, 5u, 100),
+      DEF_SPUT(5, Instruction::SPUT, 5u, 2u),
+      DEF_SGET(6, Instruction::SGET, 7u, 2u),         // Differs from the top and the CONST.
+
+      DEF_SGET(3, Instruction::SGET, 8u, 3u),
+      DEF_CONST(3, Instruction::CONST, 9u, 200),
+      DEF_SPUT(4, Instruction::SPUT, 9u, 3u),
+      DEF_SPUT(5, Instruction::SPUT, 9u, 3u),
+      DEF_SGET(6, Instruction::SGET, 12u, 3u),        // Differs from the top, equals the CONST.
+
+      DEF_SGET(3, Instruction::SGET_SHORT, 13u, 4u),
+      DEF_SGET(3, Instruction::SGET_CHAR, 14u, 5u),
+      DEF_SPUT(4, Instruction::SPUT_SHORT, 15u, 6u),  // Clobbers field #4, not #5.
+      DEF_SGET(6, Instruction::SGET_SHORT, 16u, 4u),  // Differs from the top.
+      DEF_SGET(6, Instruction::SGET_CHAR, 17u, 5u),   // Same as the top.
+
+      DEF_CONST(4, Instruction::CONST, 18u, 300),
+      DEF_SPUT(4, Instruction::SPUT, 18u, 7u),
+      DEF_SPUT(4, Instruction::SPUT, 18u, 8u),
+      DEF_CONST(5, Instruction::CONST, 21u, 301),
+      DEF_SPUT(5, Instruction::SPUT, 21u, 7u),
+      DEF_SPUT(5, Instruction::SPUT, 21u, 8u),
+      DEF_SGET(6, Instruction::SGET, 24u, 7u),
+      DEF_SGET(6, Instruction::SGET, 25u, 8u),        // Same value as read from field #7.
+  };
+
+  PrepareSFields(sfields);
+  PrepareMIRs(mirs);
+  PerformGVN();
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_EQ(value_names_[0], value_names_[1]);
+
+  EXPECT_EQ(value_names_[2], value_names_[3]);
+
+  EXPECT_NE(value_names_[4], value_names_[7]);
+  EXPECT_NE(value_names_[5], value_names_[7]);
+
+  EXPECT_NE(value_names_[8], value_names_[12]);
+  EXPECT_EQ(value_names_[9], value_names_[12]);
+
+  EXPECT_NE(value_names_[13], value_names_[16]);
+  EXPECT_EQ(value_names_[14], value_names_[17]);
+
+  EXPECT_EQ(value_names_[24], value_names_[25]);
+}
+
+TEST_F(GlobalValueNumberingTestDiamond, NonAliasingArrays) {
+  static const MIRDef mirs[] = {
+      // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
+      DEF_UNIQUE_REF(3, Instruction::NEW_ARRAY, 100u),
+      DEF_AGET(3, Instruction::AGET, 1u, 100u, 101u),
+      DEF_AGET(6, Instruction::AGET, 2u, 100u, 101u),   // Same as at the top.
+
+      DEF_UNIQUE_REF(3, Instruction::NEW_ARRAY, 200u),
+      DEF_IGET(4, Instruction::AGET, 4u, 200u, 201u),
+      DEF_IGET(6, Instruction::AGET, 5u, 200u, 201u),   // Same as at the left side.
+
+      DEF_UNIQUE_REF(3, Instruction::NEW_ARRAY, 300u),
+      DEF_AGET(3, Instruction::AGET, 7u, 300u, 301u),
+      DEF_CONST(5, Instruction::CONST, 8u, 1000),
+      DEF_APUT(5, Instruction::APUT, 8u, 300u, 301u),
+      DEF_AGET(6, Instruction::AGET, 10u, 300u, 301u),  // Differs from the top and the CONST.
+
+      DEF_UNIQUE_REF(3, Instruction::NEW_ARRAY, 400u),
+      DEF_AGET(3, Instruction::AGET, 12u, 400u, 401u),
+      DEF_CONST(3, Instruction::CONST, 13u, 2000),
+      DEF_APUT(4, Instruction::APUT, 13u, 400u, 401u),
+      DEF_APUT(5, Instruction::APUT, 13u, 400u, 401u),
+      DEF_AGET(6, Instruction::AGET, 16u, 400u, 401u),  // Differs from the top, equals the CONST.
+
+      DEF_UNIQUE_REF(3, Instruction::NEW_ARRAY, 500u),
+      DEF_AGET(3, Instruction::AGET, 18u, 500u, 501u),
+      DEF_APUT(4, Instruction::APUT, 19u, 500u, 502u),  // Clobbers value at index 501u.
+      DEF_AGET(6, Instruction::AGET, 20u, 500u, 501u),  // Differs from the top.
+
+      DEF_UNIQUE_REF(3, Instruction::NEW_ARRAY, 600u),
+      DEF_CONST(4, Instruction::CONST, 22u, 3000),
+      DEF_APUT(4, Instruction::APUT, 22u, 600u, 601u),
+      DEF_APUT(4, Instruction::APUT, 22u, 600u, 602u),
+      DEF_CONST(5, Instruction::CONST, 25u, 3001),
+      DEF_APUT(5, Instruction::APUT, 25u, 600u, 601u),
+      DEF_APUT(5, Instruction::APUT, 25u, 600u, 602u),
+      DEF_AGET(6, Instruction::AGET, 28u, 600u, 601u),
+      DEF_AGET(6, Instruction::AGET, 29u, 600u, 602u),  // Same value as read from index 601u.
+
+      DEF_UNIQUE_REF(3, Instruction::NEW_ARRAY, 700u),
+      DEF_UNIQUE_REF(3, Instruction::NEW_ARRAY, 701u),
+      DEF_AGET(3, Instruction::AGET, 32u, 700u, 702u),
+      DEF_APUT(4, Instruction::APUT, 33u, 701u, 702u),  // Doesn't interfere with unrelated array.
+      DEF_AGET(6, Instruction::AGET, 34u, 700u, 702u),  // Same value as at the top.
+  };
+
+  PrepareMIRs(mirs);
+  PerformGVN();
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_EQ(value_names_[1], value_names_[2]);
+
+  EXPECT_EQ(value_names_[4], value_names_[5]);
+
+  EXPECT_NE(value_names_[7], value_names_[10]);
+  EXPECT_NE(value_names_[8], value_names_[10]);
+
+  EXPECT_NE(value_names_[12], value_names_[16]);
+  EXPECT_EQ(value_names_[13], value_names_[16]);
+
+  EXPECT_NE(value_names_[18], value_names_[20]);
+
+  EXPECT_NE(value_names_[28], value_names_[22]);
+  EXPECT_NE(value_names_[28], value_names_[25]);
+  EXPECT_EQ(value_names_[28], value_names_[29]);
+
+  EXPECT_EQ(value_names_[32], value_names_[34]);
+}
+
+TEST_F(GlobalValueNumberingTestDiamond, AliasingArrays) {
+  static const MIRDef mirs[] = {
+      // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
+      // NOTE: We're also testing that these tests really do not interfere with each other.
+
+      DEF_AGET(3, Instruction::AGET_BOOLEAN, 0u, 100u, 101u),
+      DEF_AGET(6, Instruction::AGET_BOOLEAN, 1u, 100u, 101u),  // Same as at the top.
+
+      DEF_IGET(4, Instruction::AGET_OBJECT, 2u, 200u, 201u),
+      DEF_IGET(6, Instruction::AGET_OBJECT, 3u, 200u, 201u),  // Same as at the left side.
+
+      DEF_AGET(3, Instruction::AGET_WIDE, 4u, 300u, 301u),
+      DEF_CONST(5, Instruction::CONST_WIDE, 5u, 1000),
+      DEF_APUT(5, Instruction::APUT_WIDE, 5u, 300u, 301u),
+      DEF_AGET(6, Instruction::AGET_WIDE, 7u, 300u, 301u),  // Differs from the top and the CONST.
+
+      DEF_AGET(3, Instruction::AGET_SHORT, 8u, 400u, 401u),
+      DEF_CONST(3, Instruction::CONST, 9u, 2000),
+      DEF_APUT(4, Instruction::APUT_SHORT, 9u, 400u, 401u),
+      DEF_APUT(5, Instruction::APUT_SHORT, 9u, 400u, 401u),
+      DEF_AGET(6, Instruction::AGET_SHORT, 12u, 400u, 401u),  // Differs from the top, == CONST.
+
+      DEF_AGET(3, Instruction::AGET_CHAR, 13u, 500u, 501u),
+      DEF_APUT(4, Instruction::APUT_CHAR, 14u, 500u, 502u),  // Clobbers value at index 501u.
+      DEF_AGET(6, Instruction::AGET_CHAR, 15u, 500u, 501u),  // Differs from the top.
+
+      DEF_AGET(3, Instruction::AGET_BYTE, 16u, 600u, 602u),
+      DEF_APUT(4, Instruction::APUT_BYTE, 17u, 601u, 602u),  // Clobbers values in array 600u.
+      DEF_AGET(6, Instruction::AGET_BYTE, 18u, 600u, 602u),  // Differs from the top.
+
+      DEF_CONST(4, Instruction::CONST, 19u, 3000),
+      DEF_APUT(4, Instruction::APUT, 19u, 700u, 701u),
+      DEF_APUT(4, Instruction::APUT, 19u, 700u, 702u),
+      DEF_CONST(5, Instruction::CONST, 22u, 3001),
+      DEF_APUT(5, Instruction::APUT, 22u, 700u, 701u),
+      DEF_APUT(5, Instruction::APUT, 22u, 700u, 702u),
+      DEF_AGET(6, Instruction::AGET, 25u, 700u, 701u),
+      DEF_AGET(6, Instruction::AGET, 26u, 700u, 702u),  // Same value as read from index 601u.
+  };
+
+  PrepareMIRs(mirs);
+  PerformGVN();
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_EQ(value_names_[0], value_names_[1]);
+
+  EXPECT_EQ(value_names_[2], value_names_[3]);
+
+  EXPECT_NE(value_names_[4], value_names_[7]);
+  EXPECT_NE(value_names_[5], value_names_[7]);
+
+  EXPECT_NE(value_names_[8], value_names_[12]);
+  EXPECT_EQ(value_names_[9], value_names_[12]);
+
+  EXPECT_NE(value_names_[13], value_names_[15]);
+
+  EXPECT_NE(value_names_[16], value_names_[18]);
+
+  EXPECT_NE(value_names_[25], value_names_[19]);
+  EXPECT_NE(value_names_[25], value_names_[22]);
+  EXPECT_EQ(value_names_[25], value_names_[26]);
+}
+
+TEST_F(GlobalValueNumberingTestDiamond, Phi) {
+  static const MIRDef mirs[] = {
+      DEF_CONST(3, Instruction::CONST, 0u, 1000),
+      DEF_CONST(4, Instruction::CONST, 1u, 2000),
+      DEF_CONST(5, Instruction::CONST, 2u, 3000),
+      DEF_MOVE(4, Instruction::MOVE, 3u, 0u),
+      DEF_MOVE(4, Instruction::MOVE, 4u, 1u),
+      DEF_MOVE(5, Instruction::MOVE, 5u, 0u),
+      DEF_MOVE(5, Instruction::MOVE, 6u, 2u),
+      DEF_PHI2(6, 7u, 3u, 5u),    // Same as CONST 0u (1000).
+      DEF_PHI2(6, 8u, 3u, 0u),    // Same as CONST 0u (1000).
+      DEF_PHI2(6, 9u, 0u, 5u),    // Same as CONST 0u (1000).
+      DEF_PHI2(6, 10u, 4u, 5u),   // Merge 1u (2000) and 0u (1000).
+      DEF_PHI2(6, 11u, 1u, 5u),   // Merge 1u (2000) and 0u (1000).
+      DEF_PHI2(6, 12u, 4u, 0u),   // Merge 1u (2000) and 0u (1000).
+      DEF_PHI2(6, 13u, 1u, 0u),   // Merge 1u (2000) and 0u (1000).
+      DEF_PHI2(6, 14u, 3u, 6u),   // Merge 0u (1000) and 2u (3000).
+      DEF_PHI2(6, 15u, 0u, 6u),   // Merge 0u (1000) and 2u (3000).
+      DEF_PHI2(6, 16u, 3u, 2u),   // Merge 0u (1000) and 2u (3000).
+      DEF_PHI2(6, 17u, 0u, 2u),   // Merge 0u (1000) and 2u (3000).
+      DEF_PHI2(6, 18u, 4u, 6u),   // Merge 1u (2000) and 2u (3000).
+      DEF_PHI2(6, 19u, 1u, 6u),   // Merge 1u (2000) and 2u (3000).
+      DEF_PHI2(6, 20u, 4u, 2u),   // Merge 1u (2000) and 2u (3000).
+      DEF_PHI2(6, 21u, 1u, 2u),   // Merge 1u (2000) and 2u (3000).
+  };
+
+  PrepareMIRs(mirs);
+  PerformGVN();
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_EQ(value_names_[0], value_names_[7]);
+  EXPECT_EQ(value_names_[0], value_names_[8]);
+  EXPECT_EQ(value_names_[0], value_names_[9]);
+  EXPECT_NE(value_names_[10], value_names_[0]);
+  EXPECT_NE(value_names_[10], value_names_[1]);
+  EXPECT_NE(value_names_[10], value_names_[2]);
+  EXPECT_EQ(value_names_[10], value_names_[11]);
+  EXPECT_EQ(value_names_[10], value_names_[12]);
+  EXPECT_EQ(value_names_[10], value_names_[13]);
+  EXPECT_NE(value_names_[14], value_names_[0]);
+  EXPECT_NE(value_names_[14], value_names_[1]);
+  EXPECT_NE(value_names_[14], value_names_[2]);
+  EXPECT_NE(value_names_[14], value_names_[10]);
+  EXPECT_EQ(value_names_[14], value_names_[15]);
+  EXPECT_EQ(value_names_[14], value_names_[16]);
+  EXPECT_EQ(value_names_[14], value_names_[17]);
+  EXPECT_NE(value_names_[18], value_names_[0]);
+  EXPECT_NE(value_names_[18], value_names_[1]);
+  EXPECT_NE(value_names_[18], value_names_[2]);
+  EXPECT_NE(value_names_[18], value_names_[10]);
+  EXPECT_NE(value_names_[18], value_names_[14]);
+  EXPECT_EQ(value_names_[18], value_names_[19]);
+  EXPECT_EQ(value_names_[18], value_names_[20]);
+  EXPECT_EQ(value_names_[18], value_names_[21]);
+}
+
+TEST_F(GlobalValueNumberingTestLoop, NonAliasingIFields) {
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false },  // Int.
+      { 1u, 1u, 1u, false },  // Int.
+      { 2u, 1u, 2u, false },  // Int.
+      { 3u, 1u, 3u, false },  // Int.
+      { 4u, 1u, 4u, false },  // Int.
+      { 5u, 1u, 5u, false },  // Short.
+      { 6u, 1u, 6u, false },  // Char.
+      { 7u, 0u, 0u, false },  // Unresolved, Short.
+      { 8u, 1u, 8u, false },  // Int.
+      { 9u, 0u, 0u, false },  // Unresolved, Int.
+      { 10u, 1u, 10u, false },  // Int.
+      { 11u, 1u, 11u, false },  // Int.
+  };
+  static const MIRDef mirs[] = {
+      // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 100u),
+      DEF_IGET(3, Instruction::IGET, 1u, 100u, 0u),
+      DEF_IGET(4, Instruction::IGET, 2u, 100u, 0u),   // Same as at the top.
+      DEF_IGET(5, Instruction::IGET, 3u, 100u, 0u),   // Same as at the top.
+
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 200u),
+      DEF_IGET(3, Instruction::IGET, 5u, 200u, 1u),
+      DEF_IGET(4, Instruction::IGET, 6u, 200u, 1u),   // Differs from top...
+      DEF_IPUT(4, Instruction::IPUT, 7u, 200u, 1u),   // Because of this IPUT.
+      DEF_IGET(5, Instruction::IGET, 8u, 200u, 1u),   // Differs from top and the loop IGET.
+
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 300u),
+      DEF_IGET(3, Instruction::IGET, 10u, 300u, 2u),
+      DEF_IPUT(4, Instruction::IPUT, 11u, 300u, 2u),  // Because of this IPUT...
+      DEF_IGET(4, Instruction::IGET, 12u, 300u, 2u),  // Differs from top.
+      DEF_IGET(5, Instruction::IGET, 13u, 300u, 2u),  // Differs from top but same as the loop IGET.
+
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 400u),
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 401u),
+      DEF_CONST(3, Instruction::CONST, 16u, 3000),
+      DEF_IPUT(3, Instruction::IPUT, 16u, 400u, 3u),
+      DEF_IPUT(3, Instruction::IPUT, 16u, 400u, 4u),
+      DEF_IPUT(3, Instruction::IPUT, 16u, 401u, 3u),
+      DEF_IGET(4, Instruction::IGET, 20u, 400u, 3u),  // Differs from 16u and 23u.
+      DEF_IGET(4, Instruction::IGET, 21u, 400u, 4u),  // Same as 20u.
+      DEF_IGET(4, Instruction::IGET, 22u, 401u, 3u),  // Same as 20u.
+      DEF_CONST(4, Instruction::CONST, 23u, 4000),
+      DEF_IPUT(4, Instruction::IPUT, 23u, 400u, 3u),
+      DEF_IPUT(4, Instruction::IPUT, 23u, 400u, 4u),
+      DEF_IPUT(4, Instruction::IPUT, 23u, 401u, 3u),
+      DEF_IGET(5, Instruction::IGET, 27u, 400u, 3u),  // Differs from 16u and 20u...
+      DEF_IGET(5, Instruction::IGET, 28u, 400u, 4u),  // and same as the CONST 23u
+      DEF_IGET(5, Instruction::IGET, 29u, 400u, 4u),  // and same as the CONST 23u.
+
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 500u),
+      DEF_IGET(3, Instruction::IGET_SHORT, 31u, 500u, 5u),
+      DEF_IGET(3, Instruction::IGET_CHAR, 32u, 500u, 6u),
+      DEF_IPUT(4, Instruction::IPUT_SHORT, 33u, 500u, 7u),  // Clobbers field #5, not #6.
+      DEF_IGET(5, Instruction::IGET_SHORT, 34u, 500u, 5u),  // Differs from the top.
+      DEF_IGET(5, Instruction::IGET_CHAR, 35u, 500u, 6u),   // Same as the top.
+
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 600u),
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 601u),
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 602u),
+      DEF_IGET(3, Instruction::IGET, 39u, 600u, 8u),
+      DEF_IGET(3, Instruction::IGET, 40u, 601u, 8u),
+      DEF_IPUT(4, Instruction::IPUT, 41u, 602u, 9u),  // Doesn't clobber field #8 for other refs.
+      DEF_IGET(5, Instruction::IGET, 42u, 600u, 8u),  // Same as the top.
+      DEF_IGET(5, Instruction::IGET, 43u, 601u, 8u),  // Same as the top.
+
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 700u),
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 701u),
+      DEF_CONST(3, Instruction::CONST, 46u, 3000),
+      DEF_IPUT(3, Instruction::IPUT, 46u, 700u, 10u),
+      DEF_IPUT(3, Instruction::IPUT, 46u, 700u, 11u),
+      DEF_IPUT(3, Instruction::IPUT, 46u, 701u, 10u),
+      DEF_IGET(4, Instruction::IGET, 50u, 700u, 10u),  // Differs from the CONSTs 46u and 53u.
+      DEF_IGET(4, Instruction::IGET, 51u, 700u, 11u),  // Same as 50u.
+      DEF_IGET(4, Instruction::IGET, 52u, 701u, 10u),  // Same as 50u.
+      DEF_CONST(4, Instruction::CONST, 53u, 3001),
+      DEF_IPUT(4, Instruction::IPUT, 53u, 700u, 10u),
+      DEF_IPUT(4, Instruction::IPUT, 53u, 700u, 11u),
+      DEF_IPUT(4, Instruction::IPUT, 53u, 701u, 10u),
+      DEF_IGET(5, Instruction::IGET, 57u, 700u, 10u),  // Same as the CONST 53u.
+      DEF_IGET(5, Instruction::IGET, 58u, 700u, 11u),  // Same as the CONST 53u.
+      DEF_IGET(5, Instruction::IGET, 59u, 701u, 10u),  // Same as the CONST 53u.
+  };
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformGVN();
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_EQ(value_names_[1], value_names_[2]);
+  EXPECT_EQ(value_names_[1], value_names_[3]);
+
+  EXPECT_NE(value_names_[5], value_names_[6]);
+  EXPECT_NE(value_names_[5], value_names_[7]);
+  EXPECT_NE(value_names_[6], value_names_[7]);
+
+  EXPECT_NE(value_names_[10], value_names_[12]);
+  EXPECT_EQ(value_names_[12], value_names_[13]);
+
+  EXPECT_NE(value_names_[20], value_names_[16]);
+  EXPECT_NE(value_names_[20], value_names_[23]);
+  EXPECT_EQ(value_names_[20], value_names_[21]);
+  EXPECT_EQ(value_names_[20], value_names_[22]);
+  EXPECT_NE(value_names_[27], value_names_[16]);
+  EXPECT_NE(value_names_[27], value_names_[20]);
+  EXPECT_EQ(value_names_[27], value_names_[28]);
+  EXPECT_EQ(value_names_[27], value_names_[29]);
+
+  EXPECT_NE(value_names_[31], value_names_[34]);
+  EXPECT_EQ(value_names_[32], value_names_[35]);
+
+  EXPECT_EQ(value_names_[39], value_names_[42]);
+  EXPECT_EQ(value_names_[40], value_names_[43]);
+
+  EXPECT_NE(value_names_[50], value_names_[46]);
+  EXPECT_NE(value_names_[50], value_names_[53]);
+  EXPECT_EQ(value_names_[50], value_names_[51]);
+  EXPECT_EQ(value_names_[50], value_names_[52]);
+  EXPECT_EQ(value_names_[57], value_names_[53]);
+  EXPECT_EQ(value_names_[58], value_names_[53]);
+  EXPECT_EQ(value_names_[59], value_names_[53]);
+}
+
+TEST_F(GlobalValueNumberingTestLoop, AliasingIFieldsSingleObject) {
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false },  // Int.
+      { 1u, 1u, 1u, false },  // Int.
+      { 2u, 1u, 2u, false },  // Int.
+      { 3u, 1u, 3u, false },  // Int.
+      { 4u, 1u, 4u, false },  // Int.
+      { 5u, 1u, 5u, false },  // Short.
+      { 6u, 1u, 6u, false },  // Char.
+      { 7u, 0u, 0u, false },  // Unresolved, Short.
+  };
+  static const MIRDef mirs[] = {
+      // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
+      DEF_IGET(3, Instruction::IGET, 0u, 100u, 0u),
+      DEF_IGET(4, Instruction::IGET, 1u, 100u, 0u),   // Same as at the top.
+      DEF_IGET(5, Instruction::IGET, 2u, 100u, 0u),   // Same as at the top.
+
+      DEF_IGET(3, Instruction::IGET, 3u, 100u, 1u),
+      DEF_IGET(4, Instruction::IGET, 4u, 100u, 1u),   // Differs from top...
+      DEF_IPUT(4, Instruction::IPUT, 5u, 100u, 1u),   // Because of this IPUT.
+      DEF_IGET(5, Instruction::IGET, 6u, 100u, 1u),   // Differs from top and the loop IGET.
+
+      DEF_IGET(3, Instruction::IGET, 7u, 100u, 2u),
+      DEF_IPUT(4, Instruction::IPUT, 8u, 100u, 2u),   // Because of this IPUT...
+      DEF_IGET(4, Instruction::IGET, 9u, 100u, 2u),   // Differs from top.
+      DEF_IGET(5, Instruction::IGET, 10u, 100u, 2u),  // Differs from top but same as the loop IGET.
+
+      DEF_CONST(3, Instruction::CONST, 11u, 3000),
+      DEF_IPUT(3, Instruction::IPUT, 11u, 100u, 3u),
+      DEF_IPUT(3, Instruction::IPUT, 11u, 100u, 4u),
+      DEF_IGET(4, Instruction::IGET, 14u, 100u, 3u),  // Differs from 11u and 16u.
+      DEF_IGET(4, Instruction::IGET, 15u, 100u, 4u),  // Same as 14u.
+      DEF_CONST(4, Instruction::CONST, 16u, 4000),
+      DEF_IPUT(4, Instruction::IPUT, 16u, 100u, 3u),
+      DEF_IPUT(4, Instruction::IPUT, 16u, 100u, 4u),
+      DEF_IGET(5, Instruction::IGET, 19u, 100u, 3u),  // Differs from 11u and 14u...
+      DEF_IGET(5, Instruction::IGET, 20u, 100u, 4u),  // and same as the CONST 16u.
+
+      DEF_IGET(3, Instruction::IGET_SHORT, 21u, 100u, 5u),
+      DEF_IGET(3, Instruction::IGET_CHAR, 22u, 100u, 6u),
+      DEF_IPUT(4, Instruction::IPUT_SHORT, 23u, 100u, 7u),  // Clobbers field #5, not #6.
+      DEF_IGET(5, Instruction::IGET_SHORT, 24u, 100u, 5u),  // Differs from the top.
+      DEF_IGET(5, Instruction::IGET_CHAR, 25u, 100u, 6u),   // Same as the top.
+  };
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformGVN();
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_EQ(value_names_[0], value_names_[1]);
+  EXPECT_EQ(value_names_[0], value_names_[2]);
+
+  EXPECT_NE(value_names_[3], value_names_[4]);
+  EXPECT_NE(value_names_[3], value_names_[6]);
+  EXPECT_NE(value_names_[4], value_names_[6]);
+
+  EXPECT_NE(value_names_[7], value_names_[9]);
+  EXPECT_EQ(value_names_[9], value_names_[10]);
+
+  EXPECT_NE(value_names_[14], value_names_[11]);
+  EXPECT_NE(value_names_[14], value_names_[16]);
+  EXPECT_EQ(value_names_[14], value_names_[15]);
+  EXPECT_NE(value_names_[19], value_names_[11]);
+  EXPECT_NE(value_names_[19], value_names_[14]);
+  EXPECT_EQ(value_names_[19], value_names_[16]);
+  EXPECT_EQ(value_names_[19], value_names_[20]);
+
+  EXPECT_NE(value_names_[21], value_names_[24]);
+  EXPECT_EQ(value_names_[22], value_names_[25]);
+}
+
+TEST_F(GlobalValueNumberingTestLoop, AliasingIFieldsTwoObjects) {
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false },  // Int.
+      { 1u, 1u, 1u, false },  // Int.
+      { 2u, 1u, 2u, false },  // Int.
+      { 3u, 1u, 3u, false },  // Short.
+      { 4u, 1u, 4u, false },  // Char.
+      { 5u, 0u, 0u, false },  // Unresolved, Short.
+      { 6u, 1u, 6u, false },  // Int.
+      { 7u, 1u, 7u, false },  // Int.
+  };
+  static const MIRDef mirs[] = {
+      // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
+      DEF_IGET(3, Instruction::IGET, 0u, 100u, 0u),
+      DEF_IPUT(4, Instruction::IPUT, 1u, 101u, 0u),   // May alias with the IGET at the top.
+      DEF_IGET(5, Instruction::IGET, 2u, 100u, 0u),   // Differs from the top.
+
+      DEF_IGET(3, Instruction::IGET, 3u, 100u, 1u),
+      DEF_IPUT(4, Instruction::IPUT, 3u, 101u, 1u),   // If aliasing, stores the same value.
+      DEF_IGET(5, Instruction::IGET, 5u, 100u, 1u),   // Same as the top.
+
+      DEF_IGET(3, Instruction::IGET, 6u, 100u, 2u),
+      DEF_CONST(4, Instruction::CONST, 7u, 1000),
+      DEF_IPUT(4, Instruction::IPUT, 7u, 101u, 2u),
+      DEF_IGET(5, Instruction::IGET, 9u, 100u, 2u),   // Differs from the top and the CONST.
+
+      DEF_IGET(3, Instruction::IGET_SHORT, 10u, 100u, 3u),
+      DEF_IGET(3, Instruction::IGET_CHAR, 11u, 100u, 4u),
+      DEF_IPUT(4, Instruction::IPUT_SHORT, 12u, 101u, 5u),  // Clobbers field #3, not #4.
+      DEF_IGET(5, Instruction::IGET_SHORT, 13u, 100u, 3u),  // Differs from the top.
+      DEF_IGET(5, Instruction::IGET_CHAR, 14u, 100u, 4u),   // Same as the top.
+
+      DEF_CONST(3, Instruction::CONST, 15u, 3000),
+      DEF_IPUT(3, Instruction::IPUT, 15u, 100u, 6u),
+      DEF_IPUT(3, Instruction::IPUT, 15u, 100u, 7u),
+      DEF_IPUT(3, Instruction::IPUT, 15u, 101u, 6u),
+      DEF_IGET(4, Instruction::IGET, 19u, 100u, 6u),  // Differs from CONSTs 15u and 22u.
+      DEF_IGET(4, Instruction::IGET, 20u, 100u, 7u),  // Same value as 19u.
+      DEF_IGET(4, Instruction::IGET, 21u, 101u, 6u),  // Same value as read from field #7.
+      DEF_CONST(4, Instruction::CONST, 22u, 3001),
+      DEF_IPUT(4, Instruction::IPUT, 22u, 100u, 6u),
+      DEF_IPUT(4, Instruction::IPUT, 22u, 100u, 7u),
+      DEF_IPUT(4, Instruction::IPUT, 22u, 101u, 6u),
+      DEF_IGET(5, Instruction::IGET, 26u, 100u, 6u),  // Same as CONST 22u.
+      DEF_IGET(5, Instruction::IGET, 27u, 100u, 7u),  // Same as CONST 22u.
+      DEF_IGET(5, Instruction::IGET, 28u, 101u, 6u),  // Same as CONST 22u.
+  };
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformGVN();
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_NE(value_names_[0], value_names_[2]);
+
+  EXPECT_EQ(value_names_[3], value_names_[5]);
+
+  EXPECT_NE(value_names_[6], value_names_[9]);
+  EXPECT_NE(value_names_[7], value_names_[9]);
+
+  EXPECT_NE(value_names_[10], value_names_[13]);
+  EXPECT_EQ(value_names_[11], value_names_[14]);
+
+  EXPECT_NE(value_names_[19], value_names_[15]);
+  EXPECT_NE(value_names_[19], value_names_[22]);
+  EXPECT_EQ(value_names_[22], value_names_[26]);
+  EXPECT_EQ(value_names_[22], value_names_[27]);
+  EXPECT_EQ(value_names_[22], value_names_[28]);
+}
+
+TEST_F(GlobalValueNumberingTestLoop, IFieldToBaseDependency) {
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false },  // Int.
+  };
+  static const MIRDef mirs[] = {
+      // For the IGET that loads sreg 3u using base 2u, the following IPUT creates a dependency
+      // from the field value to the base. However, this dependency does not result in an
+      // infinite loop since the merge of the field value for base 0u gets assigned a value name
+      // based only on the base 0u, not on the actual value, and breaks the dependency cycle.
+      DEF_IGET(3, Instruction::IGET, 0u, 100u, 0u),
+      DEF_IGET(3, Instruction::IGET, 1u, 0u, 0u),
+      DEF_IGET(4, Instruction::IGET, 2u, 0u, 0u),
+      DEF_IGET(4, Instruction::IGET, 3u, 2u, 0u),
+      DEF_IPUT(4, Instruction::IPUT, 3u, 0u, 0u),
+      DEF_IGET(5, Instruction::IGET, 5u, 0u, 0u),
+  };
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformGVN();
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_NE(value_names_[1], value_names_[2]);
+  EXPECT_EQ(value_names_[3], value_names_[5]);
+}
+
+TEST_F(GlobalValueNumberingTestLoop, SFields) {
+  static const SFieldDef sfields[] = {
+      { 0u, 1u, 0u, false },  // Int.
+      { 1u, 1u, 1u, false },  // Int.
+      { 2u, 1u, 2u, false },  // Int.
+  };
+  static const MIRDef mirs[] = {
+      // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
+      DEF_SGET(3, Instruction::SGET, 0u, 0u),
+      DEF_SGET(4, Instruction::SGET, 1u, 0u),         // Same as at the top.
+      DEF_SGET(5, Instruction::SGET, 2u, 0u),         // Same as at the top.
+
+      DEF_SGET(3, Instruction::SGET, 3u, 1u),
+      DEF_SGET(4, Instruction::SGET, 4u, 1u),         // Differs from top...
+      DEF_SPUT(4, Instruction::SPUT, 5u, 1u),         // Because of this SPUT.
+      DEF_SGET(5, Instruction::SGET, 6u, 1u),         // Differs from top and the loop SGET.
+
+      DEF_SGET(3, Instruction::SGET, 7u, 2u),
+      DEF_SPUT(4, Instruction::SPUT, 8u, 2u),         // Because of this SPUT...
+      DEF_SGET(4, Instruction::SGET, 9u, 2u),         // Differs from top.
+      DEF_SGET(5, Instruction::SGET, 10u, 2u),        // Differs from top but same as the loop SGET.
+  };
+
+  PrepareSFields(sfields);
+  PrepareMIRs(mirs);
+  PerformGVN();
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_EQ(value_names_[0], value_names_[1]);
+  EXPECT_EQ(value_names_[0], value_names_[2]);
+
+  EXPECT_NE(value_names_[3], value_names_[4]);
+  EXPECT_NE(value_names_[3], value_names_[6]);
+  EXPECT_NE(value_names_[4], value_names_[5]);
+
+  EXPECT_NE(value_names_[7], value_names_[9]);
+  EXPECT_EQ(value_names_[9], value_names_[10]);
+}
+
+TEST_F(GlobalValueNumberingTestLoop, NonAliasingArrays) {
+  static const MIRDef mirs[] = {
+      // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
+      DEF_UNIQUE_REF(3, Instruction::NEW_ARRAY, 100u),
+      DEF_AGET(3, Instruction::AGET, 1u, 100u, 101u),
+      DEF_AGET(4, Instruction::AGET, 2u, 100u, 101u),   // Same as at the top.
+      DEF_AGET(5, Instruction::AGET, 3u, 100u, 101u),   // Same as at the top.
+
+      DEF_UNIQUE_REF(3, Instruction::NEW_ARRAY, 200u),
+      DEF_AGET(3, Instruction::AGET, 5u, 200u, 201u),
+      DEF_AGET(4, Instruction::AGET, 6u, 200u, 201u),  // Differs from top...
+      DEF_APUT(4, Instruction::APUT, 7u, 200u, 201u),  // Because of this IPUT.
+      DEF_AGET(5, Instruction::AGET, 8u, 200u, 201u),  // Differs from top and the loop AGET.
+
+      DEF_UNIQUE_REF(3, Instruction::NEW_ARRAY, 300u),
+      DEF_AGET(3, Instruction::AGET, 10u, 300u, 301u),
+      DEF_APUT(4, Instruction::APUT, 11u, 300u, 301u),  // Because of this IPUT...
+      DEF_AGET(4, Instruction::AGET, 12u, 300u, 301u),  // Differs from top.
+      DEF_AGET(5, Instruction::AGET, 13u, 300u, 301u),  // Differs from top but == the loop AGET.
+
+      DEF_UNIQUE_REF(3, Instruction::NEW_ARRAY, 400u),
+      DEF_CONST(3, Instruction::CONST, 15u, 3000),
+      DEF_APUT(3, Instruction::APUT, 15u, 400u, 401u),
+      DEF_APUT(3, Instruction::APUT, 15u, 400u, 402u),
+      DEF_AGET(4, Instruction::AGET, 18u, 400u, 401u),  // Differs from 15u and 20u.
+      DEF_AGET(4, Instruction::AGET, 19u, 400u, 402u),  // Same as 18u.
+      DEF_CONST(4, Instruction::CONST, 20u, 4000),
+      DEF_APUT(4, Instruction::APUT, 20u, 400u, 401u),
+      DEF_APUT(4, Instruction::APUT, 20u, 400u, 402u),
+      DEF_AGET(5, Instruction::AGET, 23u, 400u, 401u),  // Differs from 15u and 18u...
+      DEF_AGET(5, Instruction::AGET, 24u, 400u, 402u),  // and same as the CONST 20u.
+
+      DEF_UNIQUE_REF(3, Instruction::NEW_ARRAY, 500u),
+      DEF_AGET(3, Instruction::AGET, 26u, 500u, 501u),
+      DEF_APUT(4, Instruction::APUT, 27u, 500u, 502u),  // Clobbers element at index 501u.
+      DEF_AGET(5, Instruction::AGET, 28u, 500u, 501u),  // Differs from the top.
+  };
+
+  PrepareMIRs(mirs);
+  PerformGVN();
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_EQ(value_names_[1], value_names_[2]);
+  EXPECT_EQ(value_names_[1], value_names_[3]);
+
+  EXPECT_NE(value_names_[5], value_names_[6]);
+  EXPECT_NE(value_names_[5], value_names_[8]);
+  EXPECT_NE(value_names_[6], value_names_[8]);
+
+  EXPECT_NE(value_names_[10], value_names_[12]);
+  EXPECT_EQ(value_names_[12], value_names_[13]);
+
+  EXPECT_NE(value_names_[18], value_names_[15]);
+  EXPECT_NE(value_names_[18], value_names_[20]);
+  EXPECT_EQ(value_names_[18], value_names_[19]);
+  EXPECT_NE(value_names_[23], value_names_[15]);
+  EXPECT_NE(value_names_[23], value_names_[18]);
+  EXPECT_EQ(value_names_[23], value_names_[20]);
+  EXPECT_EQ(value_names_[23], value_names_[24]);
+
+  EXPECT_NE(value_names_[26], value_names_[28]);
+}
+
+TEST_F(GlobalValueNumberingTestLoop, AliasingArrays) {
+  static const MIRDef mirs[] = {
+      // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
+      DEF_AGET(3, Instruction::AGET_WIDE, 0u, 100u, 101u),
+      DEF_AGET(4, Instruction::AGET_WIDE, 1u, 100u, 101u),   // Same as at the top.
+      DEF_AGET(5, Instruction::AGET_WIDE, 2u, 100u, 101u),   // Same as at the top.
+
+      DEF_AGET(3, Instruction::AGET_BYTE, 3u, 200u, 201u),
+      DEF_AGET(4, Instruction::AGET_BYTE, 4u, 200u, 201u),  // Differs from top...
+      DEF_APUT(4, Instruction::APUT_BYTE, 5u, 200u, 201u),  // Because of this IPUT.
+      DEF_AGET(5, Instruction::AGET_BYTE, 6u, 200u, 201u),  // Differs from top and the loop AGET.
+
+      DEF_AGET(3, Instruction::AGET, 7u, 300u, 301u),
+      DEF_APUT(4, Instruction::APUT, 8u, 300u, 301u),   // Because of this IPUT...
+      DEF_AGET(4, Instruction::AGET, 9u, 300u, 301u),   // Differs from top.
+      DEF_AGET(5, Instruction::AGET, 10u, 300u, 301u),  // Differs from top but == the loop AGET.
+
+      DEF_CONST(3, Instruction::CONST, 11u, 3000),
+      DEF_APUT(3, Instruction::APUT_CHAR, 11u, 400u, 401u),
+      DEF_APUT(3, Instruction::APUT_CHAR, 11u, 400u, 402u),
+      DEF_AGET(4, Instruction::AGET_CHAR, 14u, 400u, 401u),  // Differs from 11u and 16u.
+      DEF_AGET(4, Instruction::AGET_CHAR, 15u, 400u, 402u),  // Same as 14u.
+      DEF_CONST(4, Instruction::CONST, 16u, 4000),
+      DEF_APUT(4, Instruction::APUT_CHAR, 16u, 400u, 401u),
+      DEF_APUT(4, Instruction::APUT_CHAR, 16u, 400u, 402u),
+      DEF_AGET(5, Instruction::AGET_CHAR, 19u, 400u, 401u),  // Differs from 11u and 14u...
+      DEF_AGET(5, Instruction::AGET_CHAR, 20u, 400u, 402u),  // and same as the CONST 16u.
+
+      DEF_AGET(3, Instruction::AGET_SHORT, 21u, 500u, 501u),
+      DEF_APUT(4, Instruction::APUT_SHORT, 22u, 500u, 502u),  // Clobbers element at index 501u.
+      DEF_AGET(5, Instruction::AGET_SHORT, 23u, 500u, 501u),  // Differs from the top.
+
+      DEF_AGET(3, Instruction::AGET_OBJECT, 24u, 600u, 601u),
+      DEF_APUT(4, Instruction::APUT_OBJECT, 25u, 601u, 602u),  // Clobbers 600u/601u.
+      DEF_AGET(5, Instruction::AGET_OBJECT, 26u, 600u, 601u),  // Differs from the top.
+
+      DEF_AGET(3, Instruction::AGET_BOOLEAN, 27u, 700u, 701u),
+      DEF_APUT(4, Instruction::APUT_BOOLEAN, 27u, 701u, 702u),  // Storing the same value.
+      DEF_AGET(5, Instruction::AGET_BOOLEAN, 29u, 700u, 701u),  // Differs from the top.
+  };
+
+  PrepareMIRs(mirs);
+  PerformGVN();
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_EQ(value_names_[0], value_names_[1]);
+  EXPECT_EQ(value_names_[0], value_names_[2]);
+
+  EXPECT_NE(value_names_[3], value_names_[4]);
+  EXPECT_NE(value_names_[3], value_names_[6]);
+  EXPECT_NE(value_names_[4], value_names_[6]);
+
+  EXPECT_NE(value_names_[7], value_names_[9]);
+  EXPECT_EQ(value_names_[9], value_names_[10]);
+
+  EXPECT_NE(value_names_[14], value_names_[11]);
+  EXPECT_NE(value_names_[14], value_names_[16]);
+  EXPECT_EQ(value_names_[14], value_names_[15]);
+  EXPECT_NE(value_names_[19], value_names_[11]);
+  EXPECT_NE(value_names_[19], value_names_[14]);
+  EXPECT_EQ(value_names_[19], value_names_[16]);
+  EXPECT_EQ(value_names_[19], value_names_[20]);
+
+  EXPECT_NE(value_names_[21], value_names_[23]);
+
+  EXPECT_NE(value_names_[24], value_names_[26]);
+
+  EXPECT_EQ(value_names_[27], value_names_[29]);
+}
+
+TEST_F(GlobalValueNumberingTestLoop, Phi) {
+  static const MIRDef mirs[] = {
+      DEF_CONST(3, Instruction::CONST, 0u, 1000),
+      DEF_PHI2(4, 1u, 0u, 6u),                     // Merge CONST 0u (1000) with the same.
+      DEF_PHI2(4, 2u, 0u, 7u),                     // Merge CONST 0u (1000) with the Phi itself.
+      DEF_PHI2(4, 3u, 0u, 8u),                     // Merge CONST 0u (1000) and CONST 4u (2000).
+      DEF_PHI2(4, 4u, 0u, 9u),                     // Merge CONST 0u (1000) and Phi 3u.
+      DEF_CONST(4, Instruction::CONST, 5u, 2000),
+      DEF_MOVE(4, Instruction::MOVE, 6u, 0u),
+      DEF_MOVE(4, Instruction::MOVE, 7u, 2u),
+      DEF_MOVE(4, Instruction::MOVE, 8u, 5u),
+      DEF_MOVE(4, Instruction::MOVE, 9u, 3u),
+  };
+
+  PrepareMIRs(mirs);
+  PerformGVN();
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_EQ(value_names_[1], value_names_[0]);
+  EXPECT_EQ(value_names_[2], value_names_[0]);
+
+  EXPECT_NE(value_names_[3], value_names_[0]);
+  EXPECT_NE(value_names_[3], value_names_[5]);
+  EXPECT_NE(value_names_[4], value_names_[0]);
+  EXPECT_NE(value_names_[4], value_names_[5]);
+  EXPECT_NE(value_names_[4], value_names_[3]);
+}
+
+TEST_F(GlobalValueNumberingTestCatch, IFields) {
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false },
+      { 1u, 1u, 1u, false },
+  };
+  static const MIRDef mirs[] = {
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 200u),
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 201u),
+      DEF_IGET(3, Instruction::IGET, 2u, 100u, 0u),
+      DEF_IGET(3, Instruction::IGET, 3u, 200u, 0u),
+      DEF_IGET(3, Instruction::IGET, 4u, 201u, 0u),
+      DEF_INVOKE1(4, Instruction::INVOKE_STATIC, 201u),     // Clobbering catch, 201u escapes.
+      DEF_IGET(4, Instruction::IGET, 6u, 100u, 0u),         // Differs from IGET 2u.
+      DEF_IPUT(4, Instruction::IPUT, 6u, 100u, 1u),
+      DEF_IPUT(4, Instruction::IPUT, 6u, 101u, 0u),
+      DEF_IPUT(4, Instruction::IPUT, 6u, 200u, 0u),
+      DEF_IGET(5, Instruction::IGET, 10u, 100u, 0u),        // Differs from IGETs 2u and 6u.
+      DEF_IGET(5, Instruction::IGET, 11u, 200u, 0u),        // Same as the top.
+      DEF_IGET(5, Instruction::IGET, 12u, 201u, 0u),        // Differs from the top, 201u escaped.
+      DEF_IPUT(5, Instruction::IPUT, 10u, 100u, 1u),
+      DEF_IPUT(5, Instruction::IPUT, 10u, 101u, 0u),
+      DEF_IPUT(5, Instruction::IPUT, 10u, 200u, 0u),
+      DEF_IGET(6, Instruction::IGET, 16u, 100u, 0u),        // Differs from IGETs 2u, 6u and 10u.
+      DEF_IGET(6, Instruction::IGET, 17u, 100u, 1u),        // Same as IGET 16u.
+      DEF_IGET(6, Instruction::IGET, 18u, 101u, 0u),        // Same as IGET 16u.
+      DEF_IGET(6, Instruction::IGET, 19u, 200u, 0u),        // Same as IGET 16u.
+  };
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformGVN();
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_NE(value_names_[2], value_names_[6]);
+  EXPECT_NE(value_names_[2], value_names_[10]);
+  EXPECT_NE(value_names_[6], value_names_[10]);
+  EXPECT_EQ(value_names_[3], value_names_[11]);
+  EXPECT_NE(value_names_[4], value_names_[12]);
+
+  EXPECT_NE(value_names_[2], value_names_[16]);
+  EXPECT_NE(value_names_[6], value_names_[16]);
+  EXPECT_NE(value_names_[10], value_names_[16]);
+  EXPECT_EQ(value_names_[16], value_names_[17]);
+  EXPECT_EQ(value_names_[16], value_names_[18]);
+  EXPECT_EQ(value_names_[16], value_names_[19]);
+}
+
+TEST_F(GlobalValueNumberingTestCatch, SFields) {
+  static const SFieldDef sfields[] = {
+      { 0u, 1u, 0u, false },
+      { 1u, 1u, 1u, false },
+  };
+  static const MIRDef mirs[] = {
+      DEF_SGET(3, Instruction::SGET, 0u, 0u),
+      DEF_INVOKE1(4, Instruction::INVOKE_STATIC, 100u),     // Clobbering catch.
+      DEF_SGET(4, Instruction::SGET, 2u, 0u),               // Differs from SGET 0u.
+      DEF_SPUT(4, Instruction::SPUT, 2u, 1u),
+      DEF_SGET(5, Instruction::SGET, 4u, 0u),               // Differs from SGETs 0u and 2u.
+      DEF_SPUT(5, Instruction::SPUT, 4u, 1u),
+      DEF_SGET(6, Instruction::SGET, 6u, 0u),               // Differs from SGETs 0u, 2u and 4u.
+      DEF_SGET(6, Instruction::SGET, 7u, 1u),               // Same as field #1.
+  };
+
+  PrepareSFields(sfields);
+  PrepareMIRs(mirs);
+  PerformGVN();
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_NE(value_names_[0], value_names_[2]);
+  EXPECT_NE(value_names_[0], value_names_[4]);
+  EXPECT_NE(value_names_[2], value_names_[4]);
+  EXPECT_NE(value_names_[0], value_names_[6]);
+  EXPECT_NE(value_names_[2], value_names_[6]);
+  EXPECT_NE(value_names_[4], value_names_[6]);
+  EXPECT_EQ(value_names_[6], value_names_[7]);
+}
+
+TEST_F(GlobalValueNumberingTestCatch, Arrays) {
+  static const MIRDef mirs[] = {
+      DEF_UNIQUE_REF(3, Instruction::NEW_ARRAY, 200u),
+      DEF_UNIQUE_REF(3, Instruction::NEW_ARRAY, 201u),
+      DEF_AGET(3, Instruction::AGET, 2u, 100u, 101u),
+      DEF_AGET(3, Instruction::AGET, 3u, 200u, 202u),
+      DEF_AGET(3, Instruction::AGET, 4u, 200u, 203u),
+      DEF_AGET(3, Instruction::AGET, 5u, 201u, 202u),
+      DEF_AGET(3, Instruction::AGET, 6u, 201u, 203u),
+      DEF_INVOKE1(4, Instruction::INVOKE_STATIC, 201u),     // Clobbering catch, 201u escapes.
+      DEF_AGET(4, Instruction::AGET, 8u, 100u, 101u),       // Differs from AGET 2u.
+      DEF_APUT(4, Instruction::APUT, 8u, 100u, 102u),
+      DEF_APUT(4, Instruction::APUT, 8u, 200u, 202u),
+      DEF_APUT(4, Instruction::APUT, 8u, 200u, 203u),
+      DEF_APUT(4, Instruction::APUT, 8u, 201u, 202u),
+      DEF_APUT(4, Instruction::APUT, 8u, 201u, 203u),
+      DEF_AGET(5, Instruction::AGET, 14u, 100u, 101u),      // Differs from AGETs 2u and 8u.
+      DEF_AGET(5, Instruction::AGET, 15u, 200u, 202u),      // Same as AGET 3u.
+      DEF_AGET(5, Instruction::AGET, 16u, 200u, 203u),      // Same as AGET 4u.
+      DEF_AGET(5, Instruction::AGET, 17u, 201u, 202u),      // Differs from AGET 5u.
+      DEF_AGET(5, Instruction::AGET, 18u, 201u, 203u),      // Differs from AGET 6u.
+      DEF_APUT(5, Instruction::APUT, 14u, 100u, 102u),
+      DEF_APUT(5, Instruction::APUT, 14u, 200u, 202u),
+      DEF_APUT(5, Instruction::APUT, 14u, 200u, 203u),
+      DEF_APUT(5, Instruction::APUT, 14u, 201u, 202u),
+      DEF_APUT(5, Instruction::APUT, 14u, 201u, 203u),
+      DEF_AGET(6, Instruction::AGET, 24u, 100u, 101u),      // Differs from AGETs 2u, 8u and 14u.
+      DEF_AGET(6, Instruction::AGET, 25u, 100u, 101u),      // Same as AGET 24u.
+      DEF_AGET(6, Instruction::AGET, 26u, 200u, 202u),      // Same as AGET 24u.
+      DEF_AGET(6, Instruction::AGET, 27u, 200u, 203u),      // Same as AGET 24u.
+      DEF_AGET(6, Instruction::AGET, 28u, 201u, 202u),      // Same as AGET 24u.
+      DEF_AGET(6, Instruction::AGET, 29u, 201u, 203u),      // Same as AGET 24u.
+  };
+
+  PrepareMIRs(mirs);
+  PerformGVN();
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_NE(value_names_[2], value_names_[8]);
+  EXPECT_NE(value_names_[2], value_names_[14]);
+  EXPECT_NE(value_names_[8], value_names_[14]);
+  EXPECT_EQ(value_names_[3], value_names_[15]);
+  EXPECT_EQ(value_names_[4], value_names_[16]);
+  EXPECT_NE(value_names_[5], value_names_[17]);
+  EXPECT_NE(value_names_[6], value_names_[18]);
+  EXPECT_NE(value_names_[2], value_names_[24]);
+  EXPECT_NE(value_names_[8], value_names_[24]);
+  EXPECT_NE(value_names_[14], value_names_[24]);
+  EXPECT_EQ(value_names_[24], value_names_[25]);
+  EXPECT_EQ(value_names_[24], value_names_[26]);
+  EXPECT_EQ(value_names_[24], value_names_[27]);
+  EXPECT_EQ(value_names_[24], value_names_[28]);
+  EXPECT_EQ(value_names_[24], value_names_[29]);
+}
+
+TEST_F(GlobalValueNumberingTestCatch, Phi) {
+  static const MIRDef mirs[] = {
+      DEF_CONST(3, Instruction::CONST, 0u, 1000),
+      DEF_CONST(3, Instruction::CONST, 1u, 2000),
+      DEF_MOVE(3, Instruction::MOVE, 2u, 1u),
+      DEF_INVOKE1(4, Instruction::INVOKE_STATIC, 100u),     // Clobbering catch.
+      DEF_CONST(5, Instruction::CONST, 4u, 1000),
+      DEF_CONST(5, Instruction::CONST, 5u, 3000),
+      DEF_MOVE(5, Instruction::MOVE, 6u, 5u),
+      DEF_PHI2(6, 7u, 0u, 4u),
+      DEF_PHI2(6, 8u, 0u, 5u),
+      DEF_PHI2(6, 9u, 0u, 6u),
+      DEF_PHI2(6, 10u, 1u, 4u),
+      DEF_PHI2(6, 11u, 1u, 5u),
+      DEF_PHI2(6, 12u, 1u, 6u),
+      DEF_PHI2(6, 13u, 2u, 4u),
+      DEF_PHI2(6, 14u, 2u, 5u),
+      DEF_PHI2(6, 15u, 2u, 6u),
+  };
+  PrepareMIRs(mirs);
+  PerformGVN();
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  ASSERT_EQ(value_names_[4], value_names_[0]);  // Both CONSTs are 1000.
+  EXPECT_EQ(value_names_[7], value_names_[0]);  // Merging CONST 0u and CONST 4u, both 1000.
+  EXPECT_NE(value_names_[8], value_names_[0]);
+  EXPECT_NE(value_names_[8], value_names_[5]);
+  EXPECT_EQ(value_names_[9], value_names_[8]);
+  EXPECT_NE(value_names_[10], value_names_[1]);
+  EXPECT_NE(value_names_[10], value_names_[4]);
+  EXPECT_NE(value_names_[10], value_names_[8]);
+  EXPECT_NE(value_names_[11], value_names_[1]);
+  EXPECT_NE(value_names_[11], value_names_[5]);
+  EXPECT_NE(value_names_[11], value_names_[8]);
+  EXPECT_NE(value_names_[11], value_names_[10]);
+  EXPECT_EQ(value_names_[12], value_names_[11]);
+  EXPECT_EQ(value_names_[13], value_names_[10]);
+  EXPECT_EQ(value_names_[14], value_names_[11]);
+  EXPECT_EQ(value_names_[15], value_names_[11]);
+}
+
+TEST_F(GlobalValueNumberingTest, NullCheckIFields) {
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false },  // Object.
+      { 1u, 1u, 1u, false },  // Object.
+  };
+  static const BBDef bbs[] = {
+      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(5)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 5), DEF_PRED1(1)),  // 4 is fall-through, 5 is taken.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED1(3)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED2(3, 4)),
+  };
+  static const MIRDef mirs[] = {
+      DEF_IGET(3, Instruction::IGET_OBJECT, 0u, 100u, 0u),
+      DEF_IGET(3, Instruction::IGET_OBJECT, 1u, 100u, 1u),
+      DEF_IGET(3, Instruction::IGET_OBJECT, 2u, 101u, 0u),
+      DEF_IFZ(3, Instruction::IF_NEZ, 0u),            // Null-check for field #0 for taken.
+      DEF_UNIQUE_REF(4, Instruction::NEW_ARRAY, 4u),
+      DEF_IPUT(4, Instruction::IPUT_OBJECT, 4u, 100u, 0u),
+      DEF_IPUT(4, Instruction::IPUT_OBJECT, 4u, 100u, 1u),
+      DEF_IPUT(4, Instruction::IPUT_OBJECT, 4u, 101u, 0u),
+      DEF_IGET(5, Instruction::IGET_OBJECT, 8u, 100u, 0u),   // 100u/#0, IF_NEZ/NEW_ARRAY.
+      DEF_IGET(5, Instruction::IGET_OBJECT, 9u, 100u, 1u),   // 100u/#1, -/NEW_ARRAY.
+      DEF_IGET(5, Instruction::IGET_OBJECT, 10u, 101u, 0u),  // 101u/#0, -/NEW_ARRAY.
+      DEF_CONST(5, Instruction::CONST, 11u, 0),
+      DEF_AGET(5, Instruction::AGET, 12u, 8u, 11u),   // Null-check eliminated.
+      DEF_AGET(5, Instruction::AGET, 13u, 9u, 11u),   // Null-check kept.
+      DEF_AGET(5, Instruction::AGET, 14u, 10u, 11u),  // Null-check kept.
+  };
+  static const bool expected_ignore_null_check[] = {
+      false, true, false, false,                      // BB #3; unimportant.
+      false, true, true, true,                        // BB #4; unimportant.
+      true, true, true, false, true, false, false,    // BB #5; only the last three are important.
+  };
+
+  PrepareIFields(ifields);
+  PrepareBasicBlocks(bbs);
+  PrepareMIRs(mirs);
+  PerformGVN();
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  PerformGVNCodeModifications();
+  ASSERT_EQ(arraysize(expected_ignore_null_check), mir_count_);
+  for (size_t i = 0u; i != arraysize(mirs); ++i) {
+    EXPECT_EQ(expected_ignore_null_check[i],
+              (mirs_[i].optimization_flags & MIR_IGNORE_NULL_CHECK) != 0) << i;
+  }
+}
+
+TEST_F(GlobalValueNumberingTest, NullCheckSFields) {
+  static const SFieldDef sfields[] = {
+      { 0u, 1u, 0u, false },  // Object.
+      { 1u, 1u, 1u, false },  // Object.
+  };
+  static const BBDef bbs[] = {
+      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(5)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 5), DEF_PRED1(1)),  // 4 is fall-through, 5 is taken.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED1(3)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED2(3, 4)),
+  };
+  static const MIRDef mirs[] = {
+      DEF_SGET(3, Instruction::SGET_OBJECT, 0u, 0u),
+      DEF_SGET(3, Instruction::SGET_OBJECT, 1u, 1u),
+      DEF_IFZ(3, Instruction::IF_NEZ, 0u),            // Null-check for field #0 for taken.
+      DEF_UNIQUE_REF(4, Instruction::NEW_ARRAY, 3u),
+      DEF_SPUT(4, Instruction::SPUT_OBJECT, 3u, 0u),
+      DEF_SPUT(4, Instruction::SPUT_OBJECT, 3u, 1u),
+      DEF_SGET(5, Instruction::SGET_OBJECT, 6u, 0u),  // Field #0 is null-checked, IF_NEZ/NEW_ARRAY.
+      DEF_SGET(5, Instruction::SGET_OBJECT, 7u, 1u),  // Field #1 is not null-checked, -/NEW_ARRAY.
+      DEF_CONST(5, Instruction::CONST, 8u, 0),
+      DEF_AGET(5, Instruction::AGET, 9u, 6u, 8u),     // Null-check eliminated.
+      DEF_AGET(5, Instruction::AGET, 10u, 7u, 8u),    // Null-check kept.
+  };
+  static const bool expected_ignore_null_check[] = {
+      false, false, false, false, false, false, false, false, false, true, false
+  };
+
+  PrepareSFields(sfields);
+  PrepareBasicBlocks(bbs);
+  PrepareMIRs(mirs);
+  PerformGVN();
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  PerformGVNCodeModifications();
+  ASSERT_EQ(arraysize(expected_ignore_null_check), mir_count_);
+  for (size_t i = 0u; i != arraysize(mirs); ++i) {
+    EXPECT_EQ(expected_ignore_null_check[i],
+              (mirs_[i].optimization_flags & MIR_IGNORE_NULL_CHECK) != 0) << i;
+  }
+}
+
+TEST_F(GlobalValueNumberingTest, NullCheckArrays) {
+  static const BBDef bbs[] = {
+      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(5)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 5), DEF_PRED1(1)),  // 4 is fall-through, 5 is taken.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED1(3)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED2(3, 4)),
+  };
+  static const MIRDef mirs[] = {
+      DEF_AGET(3, Instruction::AGET_OBJECT, 0u, 100u, 102u),
+      DEF_AGET(3, Instruction::AGET_OBJECT, 1u, 100u, 103u),
+      DEF_AGET(3, Instruction::AGET_OBJECT, 2u, 101u, 102u),
+      DEF_IFZ(3, Instruction::IF_NEZ, 0u),            // Null-check for field #0 for taken.
+      DEF_UNIQUE_REF(4, Instruction::NEW_ARRAY, 4u),
+      DEF_APUT(4, Instruction::APUT_OBJECT, 4u, 100u, 102u),
+      DEF_APUT(4, Instruction::APUT_OBJECT, 4u, 100u, 103u),
+      DEF_APUT(4, Instruction::APUT_OBJECT, 4u, 101u, 102u),
+      DEF_AGET(5, Instruction::AGET_OBJECT, 8u, 100u, 102u),   // Null-checked, IF_NEZ/NEW_ARRAY.
+      DEF_AGET(5, Instruction::AGET_OBJECT, 9u, 100u, 103u),   // Not null-checked, -/NEW_ARRAY.
+      DEF_AGET(5, Instruction::AGET_OBJECT, 10u, 101u, 102u),  // Not null-checked, -/NEW_ARRAY.
+      DEF_CONST(5, Instruction::CONST, 11u, 0),
+      DEF_AGET(5, Instruction::AGET, 12u, 8u, 11u),    // Null-check eliminated.
+      DEF_AGET(5, Instruction::AGET, 13u, 9u, 11u),    // Null-check kept.
+      DEF_AGET(5, Instruction::AGET, 14u, 10u, 11u),   // Null-check kept.
+  };
+  static const bool expected_ignore_null_check[] = {
+      false, true, false, false,                      // BB #3; unimportant.
+      false, true, true, true,                        // BB #4; unimportant.
+      true, true, true, false, true, false, false,    // BB #5; only the last three are important.
+  };
+
+  PrepareBasicBlocks(bbs);
+  PrepareMIRs(mirs);
+  PerformGVN();
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  PerformGVNCodeModifications();
+  ASSERT_EQ(arraysize(expected_ignore_null_check), mir_count_);
+  for (size_t i = 0u; i != arraysize(mirs); ++i) {
+    EXPECT_EQ(expected_ignore_null_check[i],
+              (mirs_[i].optimization_flags & MIR_IGNORE_NULL_CHECK) != 0) << i;
+  }
+}
+
+TEST_F(GlobalValueNumberingTestDiamond, RangeCheckArrays) {
+  // NOTE: We don't merge range checks when we merge value names for Phis or memory locations.
+  static const MIRDef mirs[] = {
+      DEF_AGET(4, Instruction::AGET, 0u, 100u, 101u),
+      DEF_AGET(5, Instruction::AGET, 1u, 100u, 101u),
+      DEF_APUT(6, Instruction::APUT, 2u, 100u, 101u),
+
+      DEF_AGET(4, Instruction::AGET, 3u, 200u, 201u),
+      DEF_AGET(5, Instruction::AGET, 4u, 200u, 202u),
+      DEF_APUT(6, Instruction::APUT, 5u, 200u, 201u),
+
+      DEF_AGET(4, Instruction::AGET, 6u, 300u, 302u),
+      DEF_AGET(5, Instruction::AGET, 7u, 301u, 302u),
+      DEF_APUT(6, Instruction::APUT, 8u, 300u, 302u),
+  };
+  static const bool expected_ignore_null_check[] = {
+      false, false, true,
+      false, false, true,
+      false, false, false,
+  };
+  static const bool expected_ignore_range_check[] = {
+      false, false, true,
+      false, false, false,
+      false, false, false,
+  };
+
+  PrepareMIRs(mirs);
+  PerformGVN();
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  PerformGVNCodeModifications();
+  ASSERT_EQ(arraysize(expected_ignore_null_check), mir_count_);
+  ASSERT_EQ(arraysize(expected_ignore_range_check), mir_count_);
+  for (size_t i = 0u; i != arraysize(mirs); ++i) {
+    EXPECT_EQ(expected_ignore_null_check[i],
+              (mirs_[i].optimization_flags & MIR_IGNORE_NULL_CHECK) != 0) << i;
+    EXPECT_EQ(expected_ignore_range_check[i],
+              (mirs_[i].optimization_flags & MIR_IGNORE_RANGE_CHECK) != 0) << i;
+  }
+}
+
+TEST_F(GlobalValueNumberingTestDiamond, MergeSameValueInDifferentMemoryLocations) {
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false },  // Int.
+      { 1u, 1u, 1u, false },  // Int.
+  };
+  static const SFieldDef sfields[] = {
+      { 0u, 1u, 0u, false },  // Int.
+      { 1u, 1u, 1u, false },  // Int.
+  };
+  static const MIRDef mirs[] = {
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 100u),
+      DEF_UNIQUE_REF(3, Instruction::NEW_ARRAY, 200u),
+      DEF_CONST(4, Instruction::CONST, 2u, 1000),
+      DEF_IPUT(4, Instruction::IPUT, 2u, 100u, 0u),
+      DEF_IPUT(4, Instruction::IPUT, 2u, 100u, 1u),
+      DEF_IPUT(4, Instruction::IPUT, 2u, 101u, 0u),
+      DEF_APUT(4, Instruction::APUT, 2u, 200u, 202u),
+      DEF_APUT(4, Instruction::APUT, 2u, 200u, 203u),
+      DEF_APUT(4, Instruction::APUT, 2u, 201u, 202u),
+      DEF_APUT(4, Instruction::APUT, 2u, 201u, 203u),
+      DEF_SPUT(4, Instruction::SPUT, 2u, 0u),
+      DEF_SPUT(4, Instruction::SPUT, 2u, 1u),
+      DEF_CONST(5, Instruction::CONST, 12u, 2000),
+      DEF_IPUT(5, Instruction::IPUT, 12u, 100u, 0u),
+      DEF_IPUT(5, Instruction::IPUT, 12u, 100u, 1u),
+      DEF_IPUT(5, Instruction::IPUT, 12u, 101u, 0u),
+      DEF_APUT(5, Instruction::APUT, 12u, 200u, 202u),
+      DEF_APUT(5, Instruction::APUT, 12u, 200u, 203u),
+      DEF_APUT(5, Instruction::APUT, 12u, 201u, 202u),
+      DEF_APUT(5, Instruction::APUT, 12u, 201u, 203u),
+      DEF_SPUT(5, Instruction::SPUT, 12u, 0u),
+      DEF_SPUT(5, Instruction::SPUT, 12u, 1u),
+      DEF_PHI2(6, 22u, 2u, 12u),
+      DEF_IGET(6, Instruction::IGET, 23u, 100u, 0u),
+      DEF_IGET(6, Instruction::IGET, 24u, 100u, 1u),
+      DEF_IGET(6, Instruction::IGET, 25u, 101u, 0u),
+      DEF_AGET(6, Instruction::AGET, 26u, 200u, 202u),
+      DEF_AGET(6, Instruction::AGET, 27u, 200u, 203u),
+      DEF_AGET(6, Instruction::AGET, 28u, 201u, 202u),
+      DEF_AGET(6, Instruction::AGET, 29u, 201u, 203u),
+      DEF_SGET(6, Instruction::SGET, 30u, 0u),
+      DEF_SGET(6, Instruction::SGET, 31u, 1u),
+  };
+  PrepareIFields(ifields);
+  PrepareSFields(sfields);
+  PrepareMIRs(mirs);
+  PerformGVN();
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_NE(value_names_[2], value_names_[12]);
+  EXPECT_NE(value_names_[2], value_names_[22]);
+  EXPECT_NE(value_names_[12], value_names_[22]);
+  for (size_t i = 23; i != arraysize(mirs); ++i) {
+    EXPECT_EQ(value_names_[22], value_names_[i]) << i;
+  }
+}
+
+TEST_F(GlobalValueNumberingTest, InfiniteLocationLoop) {
+  // This is a pattern that lead to an infinite loop during the GVN development. This has been
+  // fixed by rewriting the merging of AliasingValues to merge only locations read from or
+  // written to in each incoming LVN rather than merging all locations read from or written to
+  // in any incoming LVN. It also showed up only when the GVN used the DFS ordering instead of
+  // the "topological" ordering but, since the "topological" ordering is not really topological
+  // when there are cycles and an optimizing Java compiler (or a tool like proguard) could
+  // theoretically create any sort of flow graph, this could have shown up in real code.
+  //
+  // While we were merging all the locations:
+  // The first time the Phi evaluates to the same value name as CONST 0u.  After the second
+  // evaluation, when the BB #9 has been processed, the Phi receives its own value name.
+  // However, the index from the first evaluation keeps disappearing and reappearing in the
+  // LVN's aliasing_array_value_map_'s load_value_map for BBs #9, #4, #5, #7 because of the
+  // DFS ordering of LVN evaluation.
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false },  // Object.
+  };
+  static const BBDef bbs[] = {
+      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(4)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 2), DEF_PRED2(3, 9)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(6, 7), DEF_PRED1(4)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(9), DEF_PRED1(5)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(8, 9), DEF_PRED1(5)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(9), DEF_PRED1(7)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED3(6, 7, 8)),
+  };
+  static const MIRDef mirs[] = {
+      DEF_CONST(3, Instruction::CONST, 0u, 0),
+      DEF_PHI2(4, 1u, 0u, 10u),
+      DEF_INVOKE1(6, Instruction::INVOKE_STATIC, 100u),
+      DEF_IGET(6, Instruction::IGET_OBJECT, 3u, 100u, 0u),
+      DEF_CONST(6, Instruction::CONST, 4u, 1000),
+      DEF_APUT(6, Instruction::APUT, 4u, 3u, 1u),            // Index is Phi 1u.
+      DEF_INVOKE1(8, Instruction::INVOKE_STATIC, 100u),
+      DEF_IGET(8, Instruction::IGET_OBJECT, 7u, 100u, 0u),
+      DEF_CONST(8, Instruction::CONST, 8u, 2000),
+      DEF_APUT(8, Instruction::APUT, 9u, 7u, 1u),            // Index is Phi 1u.
+      DEF_CONST(9, Instruction::CONST, 10u, 3000),
+  };
+  PrepareIFields(ifields);
+  PrepareBasicBlocks(bbs);
+  PrepareMIRs(mirs);
+  // Using DFS order for this test. The GVN result should not depend on the used ordering
+  // once the GVN actually converges. But creating a test for this convergence issue with
+  // the topological ordering could be a very challenging task.
+  PerformPreOrderDfsGVN();
+}
+
+TEST_F(GlobalValueNumberingTestTwoConsecutiveLoops, DISABLED_IFieldAndPhi) {
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false },  // Int.
+  };
+  static const MIRDef mirs[] = {
+      DEF_MOVE(3, Instruction::MOVE_OBJECT, 0u, 100u),
+      DEF_IPUT(3, Instruction::IPUT_OBJECT, 0u, 200u, 0u),
+      DEF_PHI2(4, 2u, 0u, 3u),
+      DEF_MOVE(5, Instruction::MOVE_OBJECT, 3u, 300u),
+      DEF_IPUT(5, Instruction::IPUT_OBJECT, 3u, 200u, 0u),
+      DEF_MOVE(6, Instruction::MOVE_OBJECT, 5u, 2u),
+      DEF_IGET(6, Instruction::IGET_OBJECT, 6u, 200u, 0u),
+      DEF_MOVE(7, Instruction::MOVE_OBJECT, 7u, 5u),
+      DEF_IGET(7, Instruction::IGET_OBJECT, 8u, 200u, 0u),
+      DEF_MOVE(8, Instruction::MOVE_OBJECT, 9u, 5u),
+      DEF_IGET(8, Instruction::IGET_OBJECT, 10u, 200u, 0u),
+      DEF_MOVE(9, Instruction::MOVE_OBJECT, 11u, 5u),
+      DEF_IGET(9, Instruction::IGET_OBJECT, 12u, 200u, 0u),
+  };
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformGVN();
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_NE(value_names_[0], value_names_[3]);
+  EXPECT_NE(value_names_[0], value_names_[2]);
+  EXPECT_NE(value_names_[3], value_names_[2]);
+  EXPECT_EQ(value_names_[2], value_names_[5]);
+  EXPECT_EQ(value_names_[5], value_names_[6]);
+  EXPECT_EQ(value_names_[5], value_names_[7]);
+  EXPECT_EQ(value_names_[5], value_names_[8]);
+  EXPECT_EQ(value_names_[5], value_names_[9]);
+  EXPECT_EQ(value_names_[5], value_names_[10]);
+  EXPECT_EQ(value_names_[5], value_names_[11]);
+  EXPECT_EQ(value_names_[5], value_names_[12]);
+}
+
+TEST_F(GlobalValueNumberingTestTwoConsecutiveLoops, DISABLED_NullCheck) {
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false },  // Int.
+  };
+  static const SFieldDef sfields[] = {
+      { 0u, 1u, 0u, false },  // Int.
+  };
+  static const MIRDef mirs[] = {
+      DEF_MOVE(3, Instruction::MOVE_OBJECT, 0u, 100u),
+      DEF_IGET(3, Instruction::IGET_OBJECT, 1u, 200u, 0u),
+      DEF_SGET(3, Instruction::SGET_OBJECT, 2u, 0u),
+      DEF_AGET(3, Instruction::AGET_OBJECT, 3u, 300u, 201u),
+      DEF_PHI2(4, 4u, 0u, 8u),
+      DEF_IGET(5, Instruction::IGET_OBJECT, 5u, 200u, 0u),
+      DEF_SGET(5, Instruction::SGET_OBJECT, 6u, 0u),
+      DEF_AGET(5, Instruction::AGET_OBJECT, 7u, 300u, 201u),
+      DEF_MOVE(5, Instruction::MOVE_OBJECT, 8u, 400u),
+      DEF_IPUT(5, Instruction::IPUT_OBJECT, 4u, 200u, 0u),          // PUT the Phi 4u.
+      DEF_SPUT(5, Instruction::SPUT_OBJECT, 4u, 0u),                // PUT the Phi 4u.
+      DEF_APUT(5, Instruction::APUT_OBJECT, 4u, 300u, 201u),        // PUT the Phi 4u.
+      DEF_MOVE(6, Instruction::MOVE_OBJECT, 12u, 4u),
+      DEF_IGET(6, Instruction::IGET_OBJECT, 13u, 200u, 0u),
+      DEF_SGET(6, Instruction::SGET_OBJECT, 14u, 0u),
+      DEF_AGET(6, Instruction::AGET_OBJECT, 15u, 300u, 201u),
+      DEF_AGET(6, Instruction::AGET_OBJECT, 16u, 12u, 600u),
+      DEF_AGET(6, Instruction::AGET_OBJECT, 17u, 13u, 600u),
+      DEF_AGET(6, Instruction::AGET_OBJECT, 18u, 14u, 600u),
+      DEF_AGET(6, Instruction::AGET_OBJECT, 19u, 15u, 600u),
+      DEF_MOVE(8, Instruction::MOVE_OBJECT, 20u, 12u),
+      DEF_IGET(8, Instruction::IGET_OBJECT, 21u, 200u, 0u),
+      DEF_SGET(8, Instruction::SGET_OBJECT, 22u, 0u),
+      DEF_AGET(8, Instruction::AGET_OBJECT, 23u, 300u, 201u),
+      DEF_AGET(8, Instruction::AGET_OBJECT, 24u, 12u, 600u),
+      DEF_AGET(8, Instruction::AGET_OBJECT, 25u, 13u, 600u),
+      DEF_AGET(8, Instruction::AGET_OBJECT, 26u, 14u, 600u),
+      DEF_AGET(8, Instruction::AGET_OBJECT, 27u, 15u, 600u),
+      DEF_MOVE(9, Instruction::MOVE_OBJECT, 28u, 12u),
+      DEF_IGET(9, Instruction::IGET_OBJECT, 29u, 200u, 0u),
+      DEF_SGET(9, Instruction::SGET_OBJECT, 30u, 0u),
+      DEF_AGET(9, Instruction::AGET_OBJECT, 31u, 300u, 201u),
+      DEF_AGET(9, Instruction::AGET_OBJECT, 32u, 12u, 600u),
+      DEF_AGET(9, Instruction::AGET_OBJECT, 33u, 13u, 600u),
+      DEF_AGET(9, Instruction::AGET_OBJECT, 34u, 14u, 600u),
+      DEF_AGET(9, Instruction::AGET_OBJECT, 35u, 15u, 600u),
+  };
+  static const bool expected_ignore_null_check[] = {
+      false, false, false, false,                                   // BB #3.
+      false, true, false, true, false, true, false, true,           // BBs #4 and #5.
+      false, true, false, true, false, false, false, false,         // BB #6.
+      false, true, false, true, true, true, true, true,             // BB #7.
+      false, true, false, true, true, true, true, true,             // BB #8.
+  };
+  static const bool expected_ignore_range_check[] = {
+      false, false, false, false,                                   // BB #3.
+      false, false, false, true, false, false, false, true,         // BBs #4 and #5.
+      false, false, false, true, false, false, false, false,        // BB #6.
+      false, false, false, true, true, true, true, true,            // BB #7.
+      false, false, false, true, true, true, true, true,            // BB #8.
+  };
+
+  PrepareIFields(ifields);
+  PrepareSFields(sfields);
+  PrepareMIRs(mirs);
+  PerformGVN();
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_NE(value_names_[0], value_names_[4]);
+  EXPECT_NE(value_names_[1], value_names_[5]);
+  EXPECT_NE(value_names_[2], value_names_[6]);
+  EXPECT_NE(value_names_[3], value_names_[7]);
+  EXPECT_NE(value_names_[4], value_names_[8]);
+  EXPECT_NE(value_names_[0], value_names_[12]);
+  EXPECT_NE(value_names_[1], value_names_[13]);
+  EXPECT_NE(value_names_[2], value_names_[14]);
+  EXPECT_NE(value_names_[3], value_names_[15]);
+  EXPECT_EQ(value_names_[4], value_names_[12]);
+  EXPECT_NE(value_names_[5], value_names_[13]);
+  EXPECT_NE(value_names_[6], value_names_[14]);
+  EXPECT_NE(value_names_[7], value_names_[15]);
+  EXPECT_EQ(value_names_[12], value_names_[20]);
+  EXPECT_EQ(value_names_[13], value_names_[21]);
+  EXPECT_EQ(value_names_[14], value_names_[22]);
+  EXPECT_EQ(value_names_[15], value_names_[23]);
+  EXPECT_EQ(value_names_[12], value_names_[28]);
+  EXPECT_EQ(value_names_[13], value_names_[29]);
+  EXPECT_EQ(value_names_[14], value_names_[30]);
+  EXPECT_EQ(value_names_[15], value_names_[31]);
+  PerformGVNCodeModifications();
+  for (size_t i = 0u; i != arraysize(mirs); ++i) {
+    EXPECT_EQ(expected_ignore_null_check[i],
+              (mirs_[i].optimization_flags & MIR_IGNORE_NULL_CHECK) != 0) << i;
+    EXPECT_EQ(expected_ignore_range_check[i],
+              (mirs_[i].optimization_flags & MIR_IGNORE_RANGE_CHECK) != 0) << i;
+  }
+}
+
+TEST_F(GlobalValueNumberingTestTwoNestedLoops, DISABLED_IFieldAndPhi) {
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false },  // Int.
+  };
+  static const MIRDef mirs[] = {
+      DEF_MOVE(3, Instruction::MOVE_OBJECT, 0u, 100u),
+      DEF_IPUT(3, Instruction::IPUT_OBJECT, 0u, 200u, 0u),
+      DEF_PHI2(4, 2u, 0u, 11u),
+      DEF_MOVE(4, Instruction::MOVE_OBJECT, 3u, 2u),
+      DEF_IGET(4, Instruction::IGET_OBJECT, 4u, 200u, 0u),
+      DEF_MOVE(5, Instruction::MOVE_OBJECT, 5u, 3u),
+      DEF_IGET(5, Instruction::IGET_OBJECT, 6u, 200u, 0u),
+      DEF_MOVE(6, Instruction::MOVE_OBJECT, 7u, 3u),
+      DEF_IGET(6, Instruction::IGET_OBJECT, 8u, 200u, 0u),
+      DEF_MOVE(7, Instruction::MOVE_OBJECT, 9u, 3u),
+      DEF_IGET(7, Instruction::IGET_OBJECT, 10u, 200u, 0u),
+      DEF_MOVE(7, Instruction::MOVE_OBJECT, 11u, 300u),
+      DEF_IPUT(7, Instruction::IPUT_OBJECT, 11u, 200u, 0u),
+      DEF_MOVE(8, Instruction::MOVE_OBJECT, 13u, 3u),
+      DEF_IGET(8, Instruction::IGET_OBJECT, 14u, 200u, 0u),
+  };
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformGVN();
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_NE(value_names_[0], value_names_[11]);
+  EXPECT_NE(value_names_[0], value_names_[2]);
+  EXPECT_NE(value_names_[11], value_names_[2]);
+  EXPECT_EQ(value_names_[2], value_names_[3]);
+  EXPECT_EQ(value_names_[3], value_names_[4]);
+  EXPECT_EQ(value_names_[3], value_names_[5]);
+  EXPECT_EQ(value_names_[3], value_names_[6]);
+  EXPECT_EQ(value_names_[3], value_names_[7]);
+  EXPECT_EQ(value_names_[3], value_names_[8]);
+  EXPECT_EQ(value_names_[3], value_names_[9]);
+  EXPECT_EQ(value_names_[3], value_names_[10]);
+  EXPECT_EQ(value_names_[3], value_names_[13]);
+  EXPECT_EQ(value_names_[3], value_names_[14]);
+}
+
+}  // namespace art
diff --git a/compiler/dex/local_value_numbering.cc b/compiler/dex/local_value_numbering.cc
index 6259496..d5fd6fe 100644
--- a/compiler/dex/local_value_numbering.cc
+++ b/compiler/dex/local_value_numbering.cc
@@ -16,6 +16,7 @@
 
 #include "local_value_numbering.h"
 
+#include "global_value_numbering.h"
 #include "mir_field_info.h"
 #include "mir_graph.h"
 
@@ -24,89 +25,925 @@
 namespace {  // anonymous namespace
 
 // Operations used for value map keys instead of actual opcode.
-static constexpr uint16_t kInvokeMemoryVersionBumpOp = Instruction::INVOKE_DIRECT;
-static constexpr uint16_t kUnresolvedSFieldOp = Instruction::SPUT;
-static constexpr uint16_t kResolvedSFieldOp = Instruction::SGET;
-static constexpr uint16_t kUnresolvedIFieldOp = Instruction::IPUT;
-static constexpr uint16_t kNonAliasingIFieldOp = Instruction::IGET;
-static constexpr uint16_t kAliasingIFieldOp = Instruction::IGET_WIDE;
-static constexpr uint16_t kAliasingIFieldStartVersionOp = Instruction::IGET_WIDE;
-static constexpr uint16_t kAliasingIFieldBumpVersionOp = Instruction::IGET_OBJECT;
-static constexpr uint16_t kArrayAccessLocOp = Instruction::APUT;
+static constexpr uint16_t kInvokeMemoryVersionBumpOp = Instruction::INVOKE_VIRTUAL;
+static constexpr uint16_t kUnresolvedSFieldOp = Instruction::SGET;
+static constexpr uint16_t kResolvedSFieldOp = Instruction::SGET_WIDE;
+static constexpr uint16_t kUnresolvedIFieldOp = Instruction::IGET;
+static constexpr uint16_t kNonAliasingIFieldLocOp = Instruction::IGET_WIDE;
+static constexpr uint16_t kNonAliasingIFieldInitialOp = Instruction::IGET_OBJECT;
+static constexpr uint16_t kAliasingIFieldOp = Instruction::IGET_BOOLEAN;
+static constexpr uint16_t kAliasingIFieldStartVersionOp = Instruction::IGET_BYTE;
+static constexpr uint16_t kAliasingIFieldBumpVersionOp = Instruction::IGET_CHAR;
 static constexpr uint16_t kNonAliasingArrayOp = Instruction::AGET;
 static constexpr uint16_t kNonAliasingArrayStartVersionOp = Instruction::AGET_WIDE;
-static constexpr uint16_t kAliasingArrayOp = Instruction::AGET_OBJECT;
-static constexpr uint16_t kAliasingArrayMemoryVersionOp = Instruction::AGET_BOOLEAN;
-static constexpr uint16_t kAliasingArrayBumpVersionOp = Instruction::AGET_BYTE;
+static constexpr uint16_t kNonAliasingArrayBumpVersionOp = Instruction::AGET_OBJECT;
+static constexpr uint16_t kAliasingArrayOp = Instruction::AGET_BOOLEAN;
+static constexpr uint16_t kAliasingArrayStartVersionOp = Instruction::AGET_BYTE;
+static constexpr uint16_t kAliasingArrayBumpVersionOp = Instruction::AGET_CHAR;
+static constexpr uint16_t kMergeBlockMemoryVersionBumpOp = Instruction::INVOKE_VIRTUAL_RANGE;
+static constexpr uint16_t kMergeBlockAliasingIFieldVersionBumpOp = Instruction::IPUT;
+static constexpr uint16_t kMergeBlockAliasingIFieldMergeLocationOp = Instruction::IPUT_WIDE;
+static constexpr uint16_t kMergeBlockNonAliasingArrayVersionBumpOp = Instruction::APUT;
+static constexpr uint16_t kMergeBlockNonAliasingArrayMergeLocationOp = Instruction::APUT_WIDE;
+static constexpr uint16_t kMergeBlockAliasingArrayVersionBumpOp = Instruction::APUT_OBJECT;
+static constexpr uint16_t kMergeBlockAliasingArrayMergeLocationOp = Instruction::APUT_BOOLEAN;
+static constexpr uint16_t kMergeBlockNonAliasingIFieldVersionBumpOp = Instruction::APUT_BYTE;
+static constexpr uint16_t kMergeBlockSFieldVersionBumpOp = Instruction::APUT_CHAR;
 
 }  // anonymous namespace
 
-LocalValueNumbering::LocalValueNumbering(CompilationUnit* cu, ScopedArenaAllocator* allocator)
-    : cu_(cu),
-      last_value_(0u),
-      sreg_value_map_(std::less<uint16_t>(), allocator->Adapter()),
-      sreg_wide_value_map_(std::less<uint16_t>(), allocator->Adapter()),
-      value_map_(std::less<uint64_t>(), allocator->Adapter()),
-      global_memory_version_(0u),
-      aliasing_ifield_version_map_(std::less<uint16_t>(), allocator->Adapter()),
-      non_aliasing_array_version_map_(std::less<uint16_t>(), allocator->Adapter()),
-      field_index_map_(FieldReferenceComparator(), allocator->Adapter()),
-      non_aliasing_refs_(std::less<uint16_t>(), allocator->Adapter()),
-      non_aliasing_ifields_(NonAliasingIFieldKeyComparator(), allocator->Adapter()),
-      escaped_array_refs_(EscapedArrayKeyComparator(), allocator->Adapter()),
-      range_checked_(RangeCheckKeyComparator() , allocator->Adapter()),
-      null_checked_(std::less<uint16_t>(), allocator->Adapter()) {
-  std::fill_n(unresolved_sfield_version_, kFieldTypeCount, 0u);
-  std::fill_n(unresolved_ifield_version_, kFieldTypeCount, 0u);
-  std::fill_n(aliasing_array_version_, kFieldTypeCount, 0u);
+class LocalValueNumbering::AliasingIFieldVersions {
+ public:
+  static uint16_t StartMemoryVersion(GlobalValueNumbering* gvn, const LocalValueNumbering* lvn,
+                                     uint16_t field_id) {
+    uint16_t type = gvn->GetFieldType(field_id);
+    return gvn->LookupValue(kAliasingIFieldStartVersionOp, field_id,
+                            lvn->global_memory_version_, lvn->unresolved_ifield_version_[type]);
+  }
+
+  static uint16_t BumpMemoryVersion(GlobalValueNumbering* gvn, uint16_t old_version,
+                                    uint16_t store_ref_set_id, uint16_t stored_value) {
+    return gvn->LookupValue(kAliasingIFieldBumpVersionOp, old_version,
+                            store_ref_set_id, stored_value);
+  }
+
+  static uint16_t LookupGlobalValue(GlobalValueNumbering* gvn,
+                                    uint16_t field_id, uint16_t base, uint16_t memory_version) {
+    return gvn->LookupValue(kAliasingIFieldOp, field_id, base, memory_version);
+  }
+
+  static uint16_t LookupMergeValue(GlobalValueNumbering* gvn, const LocalValueNumbering* lvn,
+                                   uint16_t field_id, uint16_t base) {
+    // If the base/field_id is non-aliasing in lvn, use the non-aliasing value.
+    uint16_t type = gvn->GetFieldType(field_id);
+    if (lvn->IsNonAliasingIField(base, field_id, type)) {
+      uint16_t loc = gvn->LookupValue(kNonAliasingIFieldLocOp, base, field_id, type);
+      auto lb = lvn->non_aliasing_ifield_value_map_.find(loc);
+      return (lb != lvn->non_aliasing_ifield_value_map_.end())
+          ? lb->second
+          : gvn->LookupValue(kNonAliasingIFieldInitialOp, loc, kNoValue, kNoValue);
+    }
+    return AliasingValuesMergeGet<AliasingIFieldVersions>(
+        gvn, lvn, &lvn->aliasing_ifield_value_map_, field_id, base);
+  }
+
+  static bool HasNewBaseVersion(GlobalValueNumbering* gvn, const LocalValueNumbering* lvn,
+                                uint16_t field_id) {
+    uint16_t type = gvn->GetFieldType(field_id);
+    return lvn->unresolved_ifield_version_[type] == lvn->merge_new_memory_version_ ||
+        lvn->global_memory_version_ == lvn->merge_new_memory_version_;
+  }
+
+  static uint16_t LookupMergeBlockValue(GlobalValueNumbering* gvn, uint16_t lvn_id,
+                                        uint16_t field_id) {
+    return gvn->LookupValue(kMergeBlockAliasingIFieldVersionBumpOp, field_id, kNoValue, lvn_id);
+  }
+
+  static uint16_t LookupMergeLocationValue(GlobalValueNumbering* gvn, uint16_t lvn_id,
+                                           uint16_t field_id, uint16_t base) {
+    return gvn->LookupValue(kMergeBlockAliasingIFieldMergeLocationOp, field_id, base, lvn_id);
+  }
+};
+
+class LocalValueNumbering::NonAliasingArrayVersions {
+ public:
+  static uint16_t StartMemoryVersion(GlobalValueNumbering* gvn, const LocalValueNumbering* lvn,
+                                     uint16_t array) {
+    return gvn->LookupValue(kNonAliasingArrayStartVersionOp, array, kNoValue, kNoValue);
+  }
+
+  static uint16_t BumpMemoryVersion(GlobalValueNumbering* gvn, uint16_t old_version,
+                                    uint16_t store_ref_set_id, uint16_t stored_value) {
+    return gvn->LookupValue(kNonAliasingArrayBumpVersionOp, old_version,
+                            store_ref_set_id, stored_value);
+  }
+
+  static uint16_t LookupGlobalValue(GlobalValueNumbering* gvn,
+                                    uint16_t array, uint16_t index, uint16_t memory_version) {
+    return gvn->LookupValue(kNonAliasingArrayOp, array, index, memory_version);
+  }
+
+  static uint16_t LookupMergeValue(GlobalValueNumbering* gvn, const LocalValueNumbering* lvn,
+                                   uint16_t array, uint16_t index) {
+    return AliasingValuesMergeGet<NonAliasingArrayVersions>(
+        gvn, lvn, &lvn->non_aliasing_array_value_map_, array, index);
+  }
+
+  static bool HasNewBaseVersion(GlobalValueNumbering* gvn, const LocalValueNumbering* lvn,
+                                uint16_t array) {
+    return false;  // Not affected by global_memory_version_.
+  }
+
+  static uint16_t LookupMergeBlockValue(GlobalValueNumbering* gvn, uint16_t lvn_id,
+                                        uint16_t array) {
+    return gvn->LookupValue(kMergeBlockNonAliasingArrayVersionBumpOp, array, kNoValue, lvn_id);
+  }
+
+  static uint16_t LookupMergeLocationValue(GlobalValueNumbering* gvn, uint16_t lvn_id,
+                                           uint16_t array, uint16_t index) {
+    return gvn->LookupValue(kMergeBlockNonAliasingArrayMergeLocationOp, array, index, lvn_id);
+  }
+};
+
+class LocalValueNumbering::AliasingArrayVersions {
+ public:
+  static uint16_t StartMemoryVersion(GlobalValueNumbering* gvn, const LocalValueNumbering* lvn,
+                                     uint16_t type) {
+    return gvn->LookupValue(kAliasingArrayStartVersionOp, type, lvn->global_memory_version_,
+                            kNoValue);
+  }
+
+  static uint16_t BumpMemoryVersion(GlobalValueNumbering* gvn, uint16_t old_version,
+                                    uint16_t store_ref_set_id, uint16_t stored_value) {
+    return gvn->LookupValue(kAliasingArrayBumpVersionOp, old_version,
+                            store_ref_set_id, stored_value);
+  }
+
+  static uint16_t LookupGlobalValue(GlobalValueNumbering* gvn,
+                                    uint16_t type, uint16_t location, uint16_t memory_version) {
+    return gvn->LookupValue(kAliasingArrayOp, type, location, memory_version);
+  }
+
+  static uint16_t LookupMergeValue(GlobalValueNumbering* gvn, const LocalValueNumbering* lvn,
+                                   uint16_t type, uint16_t location) {
+    // If the location is non-aliasing in lvn, use the non-aliasing value.
+    uint16_t array = gvn->GetArrayLocationBase(location);
+    if (lvn->IsNonAliasingArray(array, type)) {
+      uint16_t index = gvn->GetArrayLocationIndex(location);
+      return NonAliasingArrayVersions::LookupMergeValue(gvn, lvn, array, index);
+    }
+    return AliasingValuesMergeGet<AliasingArrayVersions>(
+        gvn, lvn, &lvn->aliasing_array_value_map_, type, location);
+  }
+
+  static bool HasNewBaseVersion(GlobalValueNumbering* gvn, const LocalValueNumbering* lvn,
+                                uint16_t type) {
+    return lvn->global_memory_version_ == lvn->merge_new_memory_version_;
+  }
+
+  static uint16_t LookupMergeBlockValue(GlobalValueNumbering* gvn, uint16_t lvn_id,
+                                        uint16_t type) {
+    return gvn->LookupValue(kMergeBlockAliasingArrayVersionBumpOp, type, kNoValue, lvn_id);
+  }
+
+  static uint16_t LookupMergeLocationValue(GlobalValueNumbering* gvn, uint16_t lvn_id,
+                                           uint16_t type, uint16_t location) {
+    return gvn->LookupValue(kMergeBlockAliasingArrayMergeLocationOp, type, location, lvn_id);
+  }
+};
+
+template <typename Map>
+LocalValueNumbering::AliasingValues* LocalValueNumbering::GetAliasingValues(
+    Map* map, const typename Map::key_type& key) {
+  auto lb = map->lower_bound(key);
+  if (lb == map->end() || map->key_comp()(key, lb->first)) {
+    map->PutBefore(lb, key, AliasingValues(gvn_->allocator_));
+    // The new entry was inserted before lb.
+    DCHECK(lb != map->begin());
+    --lb;
+    DCHECK(!map->key_comp()(lb->first, key) && !map->key_comp()(key, lb->first));
+  }
+  return &lb->second;
 }
 
-uint16_t LocalValueNumbering::GetFieldId(const MirFieldInfo& field_info) {
-  FieldReference key = { field_info.DeclaringDexFile(), field_info.DeclaringFieldIndex() };
-  auto it = field_index_map_.find(key);
-  if (it != field_index_map_.end()) {
-    return it->second;
+template <typename Versions, typename KeyType>
+void LocalValueNumbering::UpdateAliasingValuesLoadVersion(const KeyType& key,
+                                                          AliasingValues* values) {
+  if (values->last_load_memory_version == kNoValue) {
+    // Get the start version that accounts for aliasing with unresolved fields of the same
+    // type and make it unique for the field by including the field_id.
+    uint16_t memory_version = values->memory_version_before_stores;
+    if (memory_version == kNoValue) {
+      memory_version = Versions::StartMemoryVersion(gvn_, this, key);
+    }
+    if (!values->store_loc_set.empty()) {
+      uint16_t ref_set_id = gvn_->GetRefSetId(values->store_loc_set);
+      memory_version = Versions::BumpMemoryVersion(gvn_, memory_version, ref_set_id,
+                                                   values->last_stored_value);
+    }
+    values->last_load_memory_version = memory_version;
   }
-  uint16_t id = field_index_map_.size();
-  field_index_map_.Put(key, id);
-  return id;
+}
+
+template <typename Versions, typename Map>
+uint16_t LocalValueNumbering::AliasingValuesMergeGet(GlobalValueNumbering* gvn,
+                                                     const LocalValueNumbering* lvn,
+                                                     Map* map, const typename Map::key_type& key,
+                                                     uint16_t location) {
+  // Retrieve the value name that we would get from
+  //   const_cast<LocalValueNumbering*>(lvn)->HandleAliasingValueGet(map. key, location)
+  // but don't modify the map.
+  uint16_t value_name;
+  auto it = map->find(key);
+  if (it == map->end()) {
+    uint16_t start_version = Versions::StartMemoryVersion(gvn, lvn, key);
+    value_name = Versions::LookupGlobalValue(gvn, key, location, start_version);
+  } else if (it->second.store_loc_set.count(location) != 0u) {
+    value_name = it->second.last_stored_value;
+  } else {
+    auto load_it = it->second.load_value_map.find(location);
+    if (load_it != it->second.load_value_map.end()) {
+      value_name = load_it->second;
+    } else {
+      value_name = Versions::LookupGlobalValue(gvn, key, location, it->second.last_load_memory_version);
+    }
+  }
+  return value_name;
+}
+
+template <typename Versions, typename Map>
+uint16_t LocalValueNumbering::HandleAliasingValuesGet(Map* map, const typename Map::key_type& key,
+                                                      uint16_t location) {
+  // Retrieve the value name for IGET/SGET/AGET, update the map with new value if any.
+  uint16_t res;
+  AliasingValues* values = GetAliasingValues(map, key);
+  if (values->store_loc_set.count(location) != 0u) {
+    res = values->last_stored_value;
+  } else {
+    UpdateAliasingValuesLoadVersion<Versions>(key, values);
+    auto lb = values->load_value_map.lower_bound(location);
+    if (lb != values->load_value_map.end() && lb->first == location) {
+      res = lb->second;
+    } else {
+      res = Versions::LookupGlobalValue(gvn_, key, location, values->last_load_memory_version);
+      values->load_value_map.PutBefore(lb, location, res);
+    }
+  }
+  return res;
+}
+
+template <typename Versions, typename Map>
+bool LocalValueNumbering::HandleAliasingValuesPut(Map* map, const typename Map::key_type& key,
+                                                  uint16_t location, uint16_t value) {
+  AliasingValues* values = GetAliasingValues(map, key);
+  auto load_values_it = values->load_value_map.find(location);
+  if (load_values_it != values->load_value_map.end() && load_values_it->second == value) {
+    // This insn can be eliminated, it stores the same value that's already in the field.
+    return false;
+  }
+  if (value == values->last_stored_value) {
+    auto store_loc_lb = values->store_loc_set.lower_bound(location);
+    if (store_loc_lb != values->store_loc_set.end() && *store_loc_lb == location) {
+      // This insn can be eliminated, it stores the same value that's already in the field.
+      return false;
+    }
+    values->store_loc_set.emplace_hint(store_loc_lb, location);
+  } else {
+    UpdateAliasingValuesLoadVersion<Versions>(key, values);
+    values->memory_version_before_stores = values->last_load_memory_version;
+    values->last_stored_value = value;
+    values->store_loc_set.clear();
+    values->store_loc_set.insert(location);
+  }
+  // Clear the last load memory version and remove all potentially overwritten values.
+  values->last_load_memory_version = kNoValue;
+  auto it = values->load_value_map.begin(), end = values->load_value_map.end();
+  while (it != end) {
+    if (it->second == value) {
+      ++it;
+    } else {
+      it = values->load_value_map.erase(it);
+    }
+  }
+  return true;
+}
+
+LocalValueNumbering::LocalValueNumbering(GlobalValueNumbering* gvn, uint16_t id)
+    : gvn_(gvn),
+      id_(id),
+      sreg_value_map_(std::less<uint16_t>(), gvn->Allocator()->Adapter()),
+      sreg_wide_value_map_(std::less<uint16_t>(), gvn->Allocator()->Adapter()),
+      sfield_value_map_(std::less<uint16_t>(), gvn->Allocator()->Adapter()),
+      non_aliasing_ifield_value_map_(std::less<uint16_t>(), gvn->Allocator()->Adapter()),
+      aliasing_ifield_value_map_(std::less<uint16_t>(), gvn->Allocator()->Adapter()),
+      non_aliasing_array_value_map_(std::less<uint16_t>(), gvn->Allocator()->Adapter()),
+      aliasing_array_value_map_(std::less<uint16_t>(), gvn->Allocator()->Adapter()),
+      global_memory_version_(0u),
+      non_aliasing_refs_(std::less<uint16_t>(), gvn->Allocator()->Adapter()),
+      escaped_refs_(std::less<uint16_t>(), gvn->Allocator()->Adapter()),
+      escaped_ifield_clobber_set_(EscapedIFieldClobberKeyComparator(), gvn->Allocator()->Adapter()),
+      escaped_array_clobber_set_(EscapedArrayClobberKeyComparator(), gvn->Allocator()->Adapter()),
+      range_checked_(RangeCheckKeyComparator() , gvn->Allocator()->Adapter()),
+      null_checked_(std::less<uint16_t>(), gvn->Allocator()->Adapter()),
+      merge_names_(gvn->Allocator()->Adapter()),
+      merge_map_(std::less<ScopedArenaVector<BasicBlockId>>(), gvn->Allocator()->Adapter()),
+      merge_new_memory_version_(kNoValue) {
+  std::fill_n(unresolved_sfield_version_, kFieldTypeCount, 0u);
+  std::fill_n(unresolved_ifield_version_, kFieldTypeCount, 0u);
+}
+
+bool LocalValueNumbering::Equals(const LocalValueNumbering& other) const {
+  DCHECK(gvn_ == other.gvn_);
+  // Compare the maps/sets and memory versions.
+  return sreg_value_map_ == other.sreg_value_map_ &&
+      sreg_wide_value_map_ == other.sreg_wide_value_map_ &&
+      sfield_value_map_ == other.sfield_value_map_ &&
+      non_aliasing_ifield_value_map_ == other.non_aliasing_ifield_value_map_ &&
+      aliasing_ifield_value_map_ == other.aliasing_ifield_value_map_ &&
+      non_aliasing_array_value_map_ == other.non_aliasing_array_value_map_ &&
+      aliasing_array_value_map_ == other.aliasing_array_value_map_ &&
+      SameMemoryVersion(other) &&
+      non_aliasing_refs_ == other.non_aliasing_refs_ &&
+      escaped_refs_ == other.escaped_refs_ &&
+      escaped_ifield_clobber_set_ == other.escaped_ifield_clobber_set_ &&
+      escaped_array_clobber_set_ == other.escaped_array_clobber_set_ &&
+      range_checked_ == other.range_checked_ &&
+      null_checked_ == other.null_checked_;
+}
+
+void LocalValueNumbering::MergeOne(const LocalValueNumbering& other, MergeType merge_type) {
+  sreg_value_map_ = other.sreg_value_map_;
+  sreg_wide_value_map_ = other.sreg_wide_value_map_;
+
+  if (merge_type == kReturnMerge) {
+    // RETURN or PHI+RETURN. We need only sreg value maps.
+    return;
+  }
+
+  non_aliasing_ifield_value_map_ = other.non_aliasing_ifield_value_map_;
+  non_aliasing_array_value_map_ = other.non_aliasing_array_value_map_;
+  non_aliasing_refs_ = other.non_aliasing_refs_;
+  range_checked_ = other.range_checked_;
+  null_checked_ = other.null_checked_;
+
+  if (merge_type == kCatchMerge) {
+    // Memory is clobbered. Use new memory version and don't merge aliasing locations.
+    global_memory_version_ = NewMemoryVersion(&merge_new_memory_version_);
+    std::fill_n(unresolved_sfield_version_, kFieldTypeCount, global_memory_version_);
+    std::fill_n(unresolved_ifield_version_, kFieldTypeCount, global_memory_version_);
+    PruneNonAliasingRefsForCatch();
+    return;
+  }
+
+  DCHECK(merge_type == kNormalMerge);
+  global_memory_version_ = other.global_memory_version_;
+  std::copy_n(other.unresolved_ifield_version_, kFieldTypeCount, unresolved_ifield_version_);
+  std::copy_n(other.unresolved_sfield_version_, kFieldTypeCount, unresolved_sfield_version_);
+  sfield_value_map_ = other.sfield_value_map_;
+  aliasing_ifield_value_map_ = other.aliasing_ifield_value_map_;
+  aliasing_array_value_map_ = other.aliasing_array_value_map_;
+  escaped_refs_ = other.escaped_refs_;
+  escaped_ifield_clobber_set_ = other.escaped_ifield_clobber_set_;
+  escaped_array_clobber_set_ = other.escaped_array_clobber_set_;
+}
+
+bool LocalValueNumbering::SameMemoryVersion(const LocalValueNumbering& other) const {
+  return
+      global_memory_version_ == other.global_memory_version_ &&
+      std::equal(unresolved_ifield_version_, unresolved_ifield_version_ + kFieldTypeCount,
+                 other.unresolved_ifield_version_) &&
+      std::equal(unresolved_sfield_version_, unresolved_sfield_version_ + kFieldTypeCount,
+                 other.unresolved_sfield_version_);
+}
+
+uint16_t LocalValueNumbering::NewMemoryVersion(uint16_t* new_version) {
+  if (*new_version == kNoValue) {
+    *new_version = gvn_->LookupValue(kMergeBlockMemoryVersionBumpOp, 0u, 0u, id_);
+  }
+  return *new_version;
+}
+
+void LocalValueNumbering::MergeMemoryVersions(bool clobbered_catch) {
+  DCHECK_GE(gvn_->merge_lvns_.size(), 2u);
+  const LocalValueNumbering* cmp = gvn_->merge_lvns_[0];
+  // Check if the global version has changed.
+  bool new_global_version = clobbered_catch;
+  if (!new_global_version) {
+    for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
+      if (lvn->global_memory_version_ != cmp->global_memory_version_) {
+        // Use a new version for everything.
+        new_global_version = true;
+        break;
+      }
+    }
+  }
+  if (new_global_version) {
+    global_memory_version_ = NewMemoryVersion(&merge_new_memory_version_);
+    std::fill_n(unresolved_sfield_version_, kFieldTypeCount, merge_new_memory_version_);
+    std::fill_n(unresolved_ifield_version_, kFieldTypeCount, merge_new_memory_version_);
+  } else {
+    // Initialize with a copy of memory versions from the comparison LVN.
+    global_memory_version_ = cmp->global_memory_version_;
+    std::copy_n(cmp->unresolved_ifield_version_, kFieldTypeCount, unresolved_ifield_version_);
+    std::copy_n(cmp->unresolved_sfield_version_, kFieldTypeCount, unresolved_sfield_version_);
+    for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
+      if (lvn == cmp) {
+        continue;
+      }
+      for (size_t i = 0; i != kFieldTypeCount; ++i) {
+        if (lvn->unresolved_ifield_version_[i] != cmp->unresolved_ifield_version_[i]) {
+          unresolved_ifield_version_[i] = NewMemoryVersion(&merge_new_memory_version_);
+        }
+        if (lvn->unresolved_sfield_version_[i] != cmp->unresolved_sfield_version_[i]) {
+          unresolved_sfield_version_[i] = NewMemoryVersion(&merge_new_memory_version_);
+        }
+      }
+    }
+  }
+}
+
+void LocalValueNumbering::PruneNonAliasingRefsForCatch() {
+  for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
+    const BasicBlock* bb = gvn_->GetBasicBlock(lvn->Id());
+    DCHECK_EQ(bb->taken, kNullBlock);
+    DCHECK_NE(bb->fall_through, kNullBlock);
+    const BasicBlock* fall_through_bb = gvn_->GetBasicBlock(bb->fall_through);
+    const MIR* mir = fall_through_bb->first_mir_insn;
+    DCHECK(mir != nullptr);
+    // Only INVOKEs can leak and clobber non-aliasing references if they throw.
+    if ((Instruction::FlagsOf(mir->dalvikInsn.opcode) & Instruction::kInvoke) != 0) {
+      for (uint16_t i = 0u; i != mir->ssa_rep->num_uses; ++i) {
+        uint16_t value_name = lvn->GetOperandValue(mir->ssa_rep->uses[i]);
+        non_aliasing_refs_.erase(value_name);
+      }
+    }
+  }
+}
+
+
+template <typename Set, Set LocalValueNumbering::* set_ptr>
+void LocalValueNumbering::IntersectSets() {
+  DCHECK_GE(gvn_->merge_lvns_.size(), 2u);
+
+  // Find the LVN with the least entries in the set.
+  const LocalValueNumbering* least_entries_lvn = gvn_->merge_lvns_[0];
+  for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
+    if ((lvn->*set_ptr).size() < (least_entries_lvn->*set_ptr).size()) {
+      least_entries_lvn = lvn;
+    }
+  }
+
+  // For each key check if it's in all the LVNs.
+  for (const auto& key : least_entries_lvn->*set_ptr) {
+    bool checked = true;
+    for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
+      if (lvn != least_entries_lvn && (lvn->*set_ptr).count(key) == 0u) {
+        checked = false;
+        break;
+      }
+    }
+    if (checked) {
+      (this->*set_ptr).emplace_hint((this->*set_ptr).end(), key);
+    }
+  }
+}
+
+template <typename Map, Map LocalValueNumbering::* map_ptr>
+void LocalValueNumbering::IntersectMaps() {
+  DCHECK_GE(gvn_->merge_lvns_.size(), 2u);
+
+  // Find the LVN with the least entries in the set.
+  const LocalValueNumbering* least_entries_lvn = gvn_->merge_lvns_[0];
+  for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
+    if ((lvn->*map_ptr).size() < (least_entries_lvn->*map_ptr).size()) {
+      least_entries_lvn = lvn;
+    }
+  }
+
+  // For each key check if it's in all the LVNs.
+  for (const auto& entry : least_entries_lvn->*map_ptr) {
+    bool checked = true;
+    for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
+      if (lvn != least_entries_lvn) {
+        auto it = (lvn->*map_ptr).find(entry.first);
+        if (it == (lvn->*map_ptr).end() || !(it->second == entry.second)) {
+          checked = false;
+          break;
+        }
+      }
+    }
+    if (checked) {
+      (this->*map_ptr).PutBefore((this->*map_ptr).end(), entry.first, entry.second);
+    }
+  }
+}
+
+// Intersect maps as sets. The value type must be equality-comparable.
+template <typename Map>
+void LocalValueNumbering::InPlaceIntersectMaps(Map* work_map, const Map& other_map) {
+  auto work_it = work_map->begin(), work_end = work_map->end();
+  auto cmp = work_map->value_comp();
+  for (const auto& entry : other_map) {
+    while (work_it != work_end &&
+        (cmp(*work_it, entry) ||
+         (!cmp(entry, *work_it) && !(work_it->second == entry.second)))) {
+      work_it = work_map->erase(work_it);
+    }
+  }
+}
+
+template <typename Set, Set LocalValueNumbering::*set_ptr, void (LocalValueNumbering::*MergeFn)(
+    const typename Set::value_type& entry, typename Set::iterator hint)>
+void LocalValueNumbering::MergeSets() {
+  auto cmp = (this->*set_ptr).value_comp();
+  for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
+    auto my_it = (this->*set_ptr).begin(), my_end = (this->*set_ptr).end();
+    for (const auto& entry : lvn->*set_ptr) {
+      while (my_it != my_end && cmp(*my_it, entry)) {
+        ++my_it;
+      }
+      if (my_it != my_end && !cmp(entry, *my_it)) {
+        // Already handled.
+        ++my_it;
+      } else {
+        // Merge values for this field_id.
+        (this->*MergeFn)(entry, my_it);  // my_it remains valid across inserts to std::set/SafeMap.
+      }
+    }
+  }
+}
+
+void LocalValueNumbering::IntersectAliasingValueLocations(AliasingValues* work_values,
+                                                          const AliasingValues* values) {
+  auto cmp = work_values->load_value_map.key_comp();
+  auto work_it = work_values->load_value_map.begin(), work_end = work_values->load_value_map.end();
+  auto store_it = values->store_loc_set.begin(), store_end = values->store_loc_set.end();
+  auto load_it = values->load_value_map.begin(), load_end = values->load_value_map.end();
+  while (store_it != store_end || load_it != load_end) {
+    uint16_t loc;
+    if (store_it != store_end && (load_it == load_end || *store_it < load_it->first)) {
+      loc = *store_it;
+      ++store_it;
+    } else {
+      loc = load_it->first;
+      ++load_it;
+      DCHECK(store_it == store_end || cmp(loc, *store_it));
+    }
+    while (work_it != work_end && cmp(work_it->first, loc)) {
+      work_it = work_values->load_value_map.erase(work_it);
+    }
+    if (work_it != work_end && !cmp(loc, work_it->first)) {
+      // The location matches, keep it.
+      ++work_it;
+    }
+  }
+  while (work_it != work_end) {
+    work_it = work_values->load_value_map.erase(work_it);
+  }
+}
+
+void LocalValueNumbering::MergeEscapedRefs(const ValueNameSet::value_type& entry,
+                                           ValueNameSet::iterator hint) {
+  // See if the ref is either escaped or non-aliasing in each predecessor.
+  bool is_escaped = true;
+  for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
+    if (lvn->non_aliasing_refs_.count(entry) == 0u &&
+        lvn->escaped_refs_.count(entry) == 0u) {
+      is_escaped = false;
+      break;
+    }
+  }
+  if (is_escaped) {
+    escaped_refs_.emplace_hint(hint, entry);
+  }
+}
+
+void LocalValueNumbering::MergeEscapedIFieldTypeClobberSets(
+    const EscapedIFieldClobberSet::value_type& entry, EscapedIFieldClobberSet::iterator hint) {
+  // Insert only type-clobber entries (field_id == kNoValue) of escaped refs.
+  if (entry.field_id == kNoValue && escaped_refs_.count(entry.base) != 0u) {
+    escaped_ifield_clobber_set_.emplace_hint(hint, entry);
+  }
+}
+
+void LocalValueNumbering::MergeEscapedIFieldClobberSets(
+    const EscapedIFieldClobberSet::value_type& entry, EscapedIFieldClobberSet::iterator hint) {
+  // Insert only those entries of escaped refs that are not overridden by a type clobber.
+  if (!(hint == escaped_ifield_clobber_set_.end() &&
+        hint->base == entry.base && hint->type == entry.type) &&
+      escaped_refs_.count(entry.base) != 0u) {
+    escaped_ifield_clobber_set_.emplace_hint(hint, entry);
+  }
+}
+
+void LocalValueNumbering::MergeEscapedArrayClobberSets(
+    const EscapedArrayClobberSet::value_type& entry, EscapedArrayClobberSet::iterator hint) {
+  if (escaped_refs_.count(entry.base) != 0u) {
+    escaped_array_clobber_set_.emplace_hint(hint, entry);
+  }
+}
+
+void LocalValueNumbering::MergeNullChecked(const ValueNameSet::value_type& entry,
+                                           ValueNameSet::iterator hint) {
+  // Merge null_checked_ for this ref.
+  merge_names_.clear();
+  merge_names_.resize(gvn_->merge_lvns_.size(), entry);
+  if (gvn_->NullCheckedInAllPredecessors(merge_names_)) {
+    null_checked_.insert(hint, entry);
+  }
+}
+
+void LocalValueNumbering::MergeSFieldValues(const SFieldToValueMap::value_type& entry,
+                                            SFieldToValueMap::iterator hint) {
+  uint16_t field_id = entry.first;
+  merge_names_.clear();
+  uint16_t value_name = kNoValue;
+  bool same_values = true;
+  for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
+    // Get the value name as in HandleSGet() but don't modify *lvn.
+    auto it = lvn->sfield_value_map_.find(field_id);
+    if (it != lvn->sfield_value_map_.end()) {
+      value_name = it->second;
+    } else {
+      uint16_t type = gvn_->GetFieldType(field_id);
+      value_name = gvn_->LookupValue(kResolvedSFieldOp, field_id,
+                                     lvn->unresolved_sfield_version_[type],
+                                     lvn->global_memory_version_);
+    }
+
+    same_values = same_values && (merge_names_.empty() || value_name == merge_names_.back());
+    merge_names_.push_back(value_name);
+  }
+  if (same_values) {
+    // value_name already contains the result.
+  } else {
+    auto lb = merge_map_.lower_bound(merge_names_);
+    if (lb != merge_map_.end() && !merge_map_.key_comp()(merge_names_, lb->first)) {
+      value_name = lb->second;
+    } else {
+      value_name = gvn_->LookupValue(kMergeBlockSFieldVersionBumpOp, field_id, id_, kNoValue);
+      merge_map_.PutBefore(lb, merge_names_, value_name);
+      if (gvn_->NullCheckedInAllPredecessors(merge_names_)) {
+        null_checked_.insert(value_name);
+      }
+    }
+  }
+  sfield_value_map_.PutBefore(hint, field_id, value_name);
+}
+
+void LocalValueNumbering::MergeNonAliasingIFieldValues(const IFieldLocToValueMap::value_type& entry,
+                                                       IFieldLocToValueMap::iterator hint) {
+  uint16_t field_loc = entry.first;
+  merge_names_.clear();
+  uint16_t value_name = kNoValue;
+  bool same_values = true;
+  for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
+    // Get the value name as in HandleIGet() but don't modify *lvn.
+    auto it = lvn->non_aliasing_ifield_value_map_.find(field_loc);
+    if (it != lvn->non_aliasing_ifield_value_map_.end()) {
+      value_name = it->second;
+    } else {
+      value_name = gvn_->LookupValue(kNonAliasingIFieldInitialOp, field_loc, kNoValue, kNoValue);
+    }
+
+    same_values = same_values && (merge_names_.empty() || value_name == merge_names_.back());
+    merge_names_.push_back(value_name);
+  }
+  if (same_values) {
+    // value_name already contains the result.
+  } else {
+    auto lb = merge_map_.lower_bound(merge_names_);
+    if (lb != merge_map_.end() && !merge_map_.key_comp()(merge_names_, lb->first)) {
+      value_name = lb->second;
+    } else {
+      value_name = gvn_->LookupValue(kMergeBlockNonAliasingIFieldVersionBumpOp, field_loc,
+                                     id_, kNoValue);
+      merge_map_.PutBefore(lb, merge_names_, value_name);
+      if (gvn_->NullCheckedInAllPredecessors(merge_names_)) {
+        null_checked_.insert(value_name);
+      }
+    }
+  }
+  non_aliasing_ifield_value_map_.PutBefore(hint, field_loc, value_name);
+}
+
+template <typename Map, Map LocalValueNumbering::*map_ptr, typename Versions>
+void LocalValueNumbering::MergeAliasingValues(const typename Map::value_type& entry,
+                                              typename Map::iterator hint) {
+  const typename Map::key_type& key = entry.first;
+
+  (this->*map_ptr).PutBefore(hint, key, AliasingValues(gvn_->allocator_));
+  DCHECK(hint != (this->*map_ptr).begin());
+  AliasingIFieldValuesMap::iterator it = hint;
+  --it;
+  DCHECK_EQ(it->first, key);
+  AliasingValues* my_values = &it->second;
+
+  const AliasingValues* cmp_values = nullptr;
+  bool same_version = !Versions::HasNewBaseVersion(gvn_, this, key);
+  uint16_t load_memory_version_for_same_version = kNoValue;
+  if (same_version) {
+    // Find the first non-null values.
+    for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
+      auto it = (lvn->*map_ptr).find(key);
+      if (it != (lvn->*map_ptr).end()) {
+        cmp_values = &it->second;
+        break;
+      }
+    }
+    DCHECK(cmp_values != nullptr);  // There must be at least one non-null values.
+
+    // Check if we have identical memory versions, i.e. the global memory version, unresolved
+    // field version and the values' memory_version_before_stores, last_stored_value
+    // and store_loc_set are identical.
+    for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
+      auto it = (lvn->*map_ptr).find(key);
+      if (it == (lvn->*map_ptr).end()) {
+        if (cmp_values->memory_version_before_stores != kNoValue) {
+          same_version = false;
+          break;
+        }
+      } else if (cmp_values->last_stored_value != it->second.last_stored_value ||
+          cmp_values->memory_version_before_stores != it->second.memory_version_before_stores ||
+          cmp_values->store_loc_set != it->second.store_loc_set) {
+        same_version = false;
+        break;
+      } else if (it->second.last_load_memory_version != kNoValue) {
+        DCHECK(load_memory_version_for_same_version == kNoValue ||
+               load_memory_version_for_same_version == it->second.last_load_memory_version);
+        load_memory_version_for_same_version = it->second.last_load_memory_version;
+      }
+    }
+  }
+
+  if (same_version) {
+    // Copy the identical values.
+    my_values->memory_version_before_stores = cmp_values->memory_version_before_stores;
+    my_values->last_stored_value = cmp_values->last_stored_value;
+    my_values->store_loc_set = cmp_values->store_loc_set;
+    my_values->last_load_memory_version = load_memory_version_for_same_version;
+    // Merge load values seen in all incoming arcs (i.e. an intersection).
+    if (!cmp_values->load_value_map.empty()) {
+      my_values->load_value_map = cmp_values->load_value_map;
+      for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
+        auto it = (lvn->*map_ptr).find(key);
+        if (it == (lvn->*map_ptr).end() || it->second.load_value_map.empty()) {
+          my_values->load_value_map.clear();
+          break;
+        }
+        InPlaceIntersectMaps(&my_values->load_value_map, it->second.load_value_map);
+        if (my_values->load_value_map.empty()) {
+          break;
+        }
+      }
+    }
+  } else {
+    // Bump version number for the merge.
+    my_values->memory_version_before_stores = my_values->last_load_memory_version =
+        Versions::LookupMergeBlockValue(gvn_, id_, key);
+
+    // Calculate the locations that have been either read from or written to in each incoming LVN.
+    bool first_lvn = true;
+    for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
+      auto it = (lvn->*map_ptr).find(key);
+      if (it == (lvn->*map_ptr).end()) {
+        my_values->load_value_map.clear();
+        break;
+      }
+      if (first_lvn) {
+        first_lvn = false;
+        // Copy the first LVN's locations. Values will be overwritten later.
+        my_values->load_value_map = it->second.load_value_map;
+        for (uint16_t location : it->second.store_loc_set) {
+          my_values->load_value_map.Put(location, 0u);
+        }
+      } else {
+        IntersectAliasingValueLocations(my_values, &it->second);
+      }
+    }
+    // Calculate merged values for the intersection.
+    for (auto& load_value_entry : my_values->load_value_map) {
+      uint16_t location = load_value_entry.first;
+      bool same_values = true;
+      uint16_t value_name = kNoValue;
+      merge_names_.clear();
+      for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
+        value_name = Versions::LookupMergeValue(gvn_, lvn, key, location);
+        same_values = same_values && (merge_names_.empty() || value_name == merge_names_.back());
+        merge_names_.push_back(value_name);
+      }
+      if (same_values) {
+        // value_name already contains the result.
+      } else {
+        auto lb = merge_map_.lower_bound(merge_names_);
+        if (lb != merge_map_.end() && !merge_map_.key_comp()(merge_names_, lb->first)) {
+          value_name = lb->second;
+        } else {
+          // NOTE: In addition to the key and id_ which don't change on an LVN recalculation
+          // during GVN, we also add location which can actually change on recalculation, so the
+          // value_name below may change. This could lead to an infinite loop if the location
+          // value name always changed when the refereced value name changes. However, given that
+          // we assign unique value names for other merges, such as Phis, such a dependency is
+          // not possible in a well-formed SSA graph.
+          value_name = Versions::LookupMergeLocationValue(gvn_, id_, key, location);
+          merge_map_.PutBefore(lb, merge_names_, value_name);
+          if (gvn_->NullCheckedInAllPredecessors(merge_names_)) {
+            null_checked_.insert(value_name);
+          }
+        }
+      }
+      load_value_entry.second = value_name;
+    }
+  }
+}
+
+void LocalValueNumbering::Merge(MergeType merge_type) {
+  DCHECK_GE(gvn_->merge_lvns_.size(), 2u);
+
+  IntersectMaps<SregValueMap, &LocalValueNumbering::sreg_value_map_>();
+  IntersectMaps<SregValueMap, &LocalValueNumbering::sreg_wide_value_map_>();
+  if (merge_type == kReturnMerge) {
+    // RETURN or PHI+RETURN. We need only sreg value maps.
+    return;
+  }
+
+  MergeMemoryVersions(merge_type == kCatchMerge);
+
+  // Merge non-aliasing maps/sets.
+  MergeSets<IFieldLocToValueMap, &LocalValueNumbering::non_aliasing_ifield_value_map_,
+            &LocalValueNumbering::MergeNonAliasingIFieldValues>();
+  MergeSets<NonAliasingArrayValuesMap, &LocalValueNumbering::non_aliasing_array_value_map_,
+            &LocalValueNumbering::MergeAliasingValues<
+                NonAliasingArrayValuesMap, &LocalValueNumbering::non_aliasing_array_value_map_,
+                NonAliasingArrayVersions>>();
+  IntersectSets<ValueNameSet, &LocalValueNumbering::non_aliasing_refs_>();
+
+  // We won't do anything complicated for range checks, just calculate the intersection.
+  IntersectSets<RangeCheckSet, &LocalValueNumbering::range_checked_>();
+
+  // Merge null_checked_. We may later insert more, such as merged object field values.
+  MergeSets<ValueNameSet, &LocalValueNumbering::null_checked_,
+            &LocalValueNumbering::MergeNullChecked>();
+
+  if (merge_type == kCatchMerge) {
+    // Memory is clobbered. New memory version already created, don't merge aliasing locations.
+    PruneNonAliasingRefsForCatch();
+    return;
+  }
+
+  DCHECK(merge_type == kNormalMerge);
+
+  // Merge escaped refs and clobber sets.
+  MergeSets<ValueNameSet, &LocalValueNumbering::escaped_refs_,
+            &LocalValueNumbering::MergeEscapedRefs>();
+  if (!escaped_refs_.empty()) {
+    MergeSets<EscapedIFieldClobberSet, &LocalValueNumbering::escaped_ifield_clobber_set_,
+              &LocalValueNumbering::MergeEscapedIFieldTypeClobberSets>();
+    MergeSets<EscapedIFieldClobberSet, &LocalValueNumbering::escaped_ifield_clobber_set_,
+              &LocalValueNumbering::MergeEscapedIFieldClobberSets>();
+    MergeSets<EscapedArrayClobberSet, &LocalValueNumbering::escaped_array_clobber_set_,
+              &LocalValueNumbering::MergeEscapedArrayClobberSets>();
+  }
+
+  MergeSets<SFieldToValueMap, &LocalValueNumbering::sfield_value_map_,
+            &LocalValueNumbering::MergeSFieldValues>();
+  MergeSets<AliasingIFieldValuesMap, &LocalValueNumbering::aliasing_ifield_value_map_,
+            &LocalValueNumbering::MergeAliasingValues<
+                AliasingIFieldValuesMap, &LocalValueNumbering::aliasing_ifield_value_map_,
+                AliasingIFieldVersions>>();
+  MergeSets<AliasingArrayValuesMap, &LocalValueNumbering::aliasing_array_value_map_,
+            &LocalValueNumbering::MergeAliasingValues<
+                AliasingArrayValuesMap, &LocalValueNumbering::aliasing_array_value_map_,
+                AliasingArrayVersions>>();
 }
 
 uint16_t LocalValueNumbering::MarkNonAliasingNonNull(MIR* mir) {
   uint16_t res = GetOperandValue(mir->ssa_rep->defs[0]);
-  SetOperandValue(mir->ssa_rep->defs[0], res);
   DCHECK(null_checked_.find(res) == null_checked_.end());
   null_checked_.insert(res);
   non_aliasing_refs_.insert(res);
   return res;
 }
 
-bool LocalValueNumbering::IsNonAliasing(uint16_t reg) {
+bool LocalValueNumbering::IsNonAliasing(uint16_t reg) const {
   return non_aliasing_refs_.find(reg) != non_aliasing_refs_.end();
 }
 
-bool LocalValueNumbering::IsNonAliasingIField(uint16_t reg, uint16_t field_id, uint16_t type) {
+bool LocalValueNumbering::IsNonAliasingIField(uint16_t reg, uint16_t field_id,
+                                              uint16_t type) const {
   if (IsNonAliasing(reg)) {
     return true;
   }
-  NonAliasingIFieldKey key = { reg, field_id, type };
-  return non_aliasing_ifields_.count(key) != 0u;
+  if (escaped_refs_.find(reg) == escaped_refs_.end()) {
+    return false;
+  }
+  // Check for IPUTs to unresolved fields.
+  EscapedIFieldClobberKey key1 = { reg, type, kNoValue };
+  if (escaped_ifield_clobber_set_.find(key1) != escaped_ifield_clobber_set_.end()) {
+    return false;
+  }
+  // Check for aliased IPUTs to the same field.
+  EscapedIFieldClobberKey key2 = { reg, type, field_id };
+  return escaped_ifield_clobber_set_.find(key2) == escaped_ifield_clobber_set_.end();
 }
 
-bool LocalValueNumbering::IsNonAliasingArray(uint16_t reg, uint16_t type) {
+bool LocalValueNumbering::IsNonAliasingArray(uint16_t reg, uint16_t type) const {
   if (IsNonAliasing(reg)) {
     return true;
   }
-  EscapedArrayKey key = { reg, type };
-  return escaped_array_refs_.count(key) != 0u;
+  if (escaped_refs_.count(reg) == 0u) {
+    return false;
+  }
+  // Check for aliased APUTs.
+  EscapedArrayClobberKey key = { reg, type };
+  return escaped_array_clobber_set_.find(key) == escaped_array_clobber_set_.end();
 }
 
-
 void LocalValueNumbering::HandleNullCheck(MIR* mir, uint16_t reg) {
   auto lb = null_checked_.lower_bound(reg);
   if (lb != null_checked_.end() && *lb == reg) {
-    if (LIKELY(Good())) {
-      if (cu_->verbose) {
+    if (LIKELY(gvn_->CanModify())) {
+      if (gvn_->GetCompilationUnit()->verbose) {
         LOG(INFO) << "Removing null check for 0x" << std::hex << mir->offset;
       }
       mir->optimization_flags |= MIR_IGNORE_NULL_CHECK;
@@ -120,8 +957,8 @@
   RangeCheckKey key = { array, index };
   auto lb = range_checked_.lower_bound(key);
   if (lb != range_checked_.end() && !RangeCheckKeyComparator()(key, *lb)) {
-    if (LIKELY(Good())) {
-      if (cu_->verbose) {
+    if (LIKELY(gvn_->CanModify())) {
+      if (gvn_->GetCompilationUnit()->verbose) {
         LOG(INFO) << "Removing range check for 0x" << std::hex << mir->offset;
       }
       mir->optimization_flags |= MIR_IGNORE_RANGE_CHECK;
@@ -141,26 +978,72 @@
 void LocalValueNumbering::HandleEscapingRef(uint16_t base) {
   auto it = non_aliasing_refs_.find(base);
   if (it != non_aliasing_refs_.end()) {
-    uint64_t iget_key = BuildKey(Instruction::IGET, base, 0u, 0u);
-    for (auto iget_it = value_map_.lower_bound(iget_key), iget_end = value_map_.end();
-        iget_it != iget_end && EqualOpAndOperand1(iget_it->first, iget_key); ++iget_it) {
-      uint16_t field_id = ExtractOperand2(iget_it->first);
-      uint16_t type = ExtractModifier(iget_it->first);
-      NonAliasingIFieldKey key = { base, field_id, type };
-      non_aliasing_ifields_.insert(key);
-    }
-    uint64_t aget_key = BuildKey(kNonAliasingArrayStartVersionOp, base, 0u, 0u);
-    auto aget_it = value_map_.lower_bound(aget_key);
-    if (aget_it != value_map_.end() && EqualOpAndOperand1(aget_key, aget_it->first)) {
-      DCHECK_EQ(ExtractOperand2(aget_it->first), kNoValue);
-      uint16_t type = ExtractModifier(aget_it->first);
-      EscapedArrayKey key = { base, type };
-      escaped_array_refs_.insert(key);
-    }
     non_aliasing_refs_.erase(it);
+    escaped_refs_.insert(base);
   }
 }
 
+uint16_t LocalValueNumbering::HandlePhi(MIR* mir) {
+  if (gvn_->merge_lvns_.empty()) {
+    // Running LVN without a full GVN?
+    return kNoValue;
+  }
+  int16_t num_uses = mir->ssa_rep->num_uses;
+  int32_t* uses = mir->ssa_rep->uses;
+  // Try to find out if this is merging wide regs.
+  if (mir->ssa_rep->defs[0] != 0 &&
+      sreg_wide_value_map_.count(mir->ssa_rep->defs[0] - 1) != 0u) {
+    // This is the high part of a wide reg. Ignore the Phi.
+    return kNoValue;
+  }
+  bool wide = false;
+  for (int16_t i = 0; i != num_uses; ++i) {
+    if (sreg_wide_value_map_.count(uses[i]) != 0u) {
+      wide = true;
+      break;
+    }
+  }
+  // Iterate over *merge_lvns_ and skip incoming sregs for BBs without associated LVN.
+  uint16_t value_name = kNoValue;
+  merge_names_.clear();
+  BasicBlockId* incoming = mir->meta.phi_incoming;
+  int16_t pos = 0;
+  bool same_values = true;
+  for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
+    DCHECK_LT(pos, mir->ssa_rep->num_uses);
+    while (incoming[pos] != lvn->Id()) {
+      ++pos;
+      DCHECK_LT(pos, mir->ssa_rep->num_uses);
+    }
+    int s_reg = uses[pos];
+    ++pos;
+    value_name = wide ? lvn->GetOperandValueWide(s_reg) : lvn->GetOperandValue(s_reg);
+
+    same_values = same_values && (merge_names_.empty() || value_name == merge_names_.back());
+    merge_names_.push_back(value_name);
+  }
+  if (same_values) {
+    // value_name already contains the result.
+  } else {
+    auto lb = merge_map_.lower_bound(merge_names_);
+    if (lb != merge_map_.end() && !merge_map_.key_comp()(merge_names_, lb->first)) {
+      value_name = lb->second;
+    } else {
+      value_name = gvn_->LookupValue(kNoValue, mir->ssa_rep->defs[0], kNoValue, kNoValue);
+      merge_map_.PutBefore(lb, merge_names_, value_name);
+      if (!wide && gvn_->NullCheckedInAllPredecessors(merge_names_)) {
+        null_checked_.insert(value_name);
+      }
+    }
+  }
+  if (wide) {
+    SetOperandValueWide(mir->ssa_rep->defs[0], value_name);
+  } else {
+    SetOperandValue(mir->ssa_rep->defs[0], value_name);
+  }
+  return value_name;
+}
+
 uint16_t LocalValueNumbering::HandleAGet(MIR* mir, uint16_t opcode) {
   // uint16_t type = opcode - Instruction::AGET;
   uint16_t array = GetOperandValue(mir->ssa_rep->uses[0]);
@@ -171,22 +1054,12 @@
   // Establish value number for loaded register.
   uint16_t res;
   if (IsNonAliasingArray(array, type)) {
-    // Get the start version that accounts for aliasing within the array (different index names).
-    uint16_t start_version = LookupValue(kNonAliasingArrayStartVersionOp, array, kNoValue, type);
-    // Find the current version from the non_aliasing_array_version_map_.
-    uint16_t memory_version = start_version;
-    auto it = non_aliasing_array_version_map_.find(start_version);
-    if (it != non_aliasing_array_version_map_.end()) {
-      memory_version = it->second;
-    } else {
-      // Just use the start_version.
-    }
-    res = LookupValue(kNonAliasingArrayOp, array, index, memory_version);
+    res = HandleAliasingValuesGet<NonAliasingArrayVersions>(&non_aliasing_array_value_map_,
+                                                            array, index);
   } else {
-    // Get the memory version of aliased array accesses of this type.
-    uint16_t memory_version = LookupValue(kAliasingArrayMemoryVersionOp, global_memory_version_,
-                                          aliasing_array_version_[type], kNoValue);
-    res = LookupValue(kAliasingArrayOp, array, index, memory_version);
+    uint16_t location = gvn_->GetArrayLocation(array, index);
+    res = HandleAliasingValuesGet<AliasingArrayVersions>(&aliasing_array_value_map_,
+                                                         type, location);
   }
   if (opcode == Instruction::AGET_WIDE) {
     SetOperandValueWide(mir->ssa_rep->defs[0], res);
@@ -209,46 +1082,27 @@
                    ? GetOperandValueWide(mir->ssa_rep->uses[0])
                    : GetOperandValue(mir->ssa_rep->uses[0]);
   if (IsNonAliasing(array)) {
-    // Get the start version that accounts for aliasing within the array (different index values).
-    uint16_t start_version = LookupValue(kNonAliasingArrayStartVersionOp, array, kNoValue, type);
-    auto it = non_aliasing_array_version_map_.find(start_version);
-    uint16_t memory_version = start_version;
-    if (it != non_aliasing_array_version_map_.end()) {
-      memory_version = it->second;
-    }
-    // We need to take 4 values (array, index, memory_version, value) into account for bumping
-    // the memory version but the key can take only 3. Merge array and index into a location.
-    uint16_t array_access_location = LookupValue(kArrayAccessLocOp, array, index, kNoValue);
-    // Bump the version, adding to the chain.
-    memory_version = LookupValue(kAliasingArrayBumpVersionOp, memory_version,
-                                 array_access_location, value);
-    non_aliasing_array_version_map_.Overwrite(start_version, memory_version);
-    StoreValue(kNonAliasingArrayOp, array, index, memory_version, value);
-  } else {
-    // Get the memory version based on global_memory_version_ and aliasing_array_version_[type].
-    uint16_t memory_version = LookupValue(kAliasingArrayMemoryVersionOp, global_memory_version_,
-                                          aliasing_array_version_[type], kNoValue);
-    if (HasValue(kAliasingArrayOp, array, index, memory_version, value)) {
+    bool put_is_live = HandleAliasingValuesPut<NonAliasingArrayVersions>(
+        &non_aliasing_array_value_map_, array, index, value);
+    if (!put_is_live) {
       // This APUT can be eliminated, it stores the same value that's already in the field.
       // TODO: Eliminate the APUT.
       return;
     }
-    // We need to take 4 values (array, index, memory_version, value) into account for bumping
-    // the memory version but the key can take only 3. Merge array and index into a location.
-    uint16_t array_access_location = LookupValue(kArrayAccessLocOp, array, index, kNoValue);
-    // Bump the version, adding to the chain.
-    uint16_t bumped_version = LookupValue(kAliasingArrayBumpVersionOp, memory_version,
-                                          array_access_location, value);
-    aliasing_array_version_[type] = bumped_version;
-    memory_version = LookupValue(kAliasingArrayMemoryVersionOp, global_memory_version_,
-                                 bumped_version, kNoValue);
-    StoreValue(kAliasingArrayOp, array, index, memory_version, value);
+  } else {
+    uint16_t location = gvn_->GetArrayLocation(array, index);
+    bool put_is_live = HandleAliasingValuesPut<AliasingArrayVersions>(
+        &aliasing_array_value_map_, type, location, value);
+    if (!put_is_live) {
+      // This APUT can be eliminated, it stores the same value that's already in the field.
+      // TODO: Eliminate the APUT.
+      return;
+    }
 
-    // Clear escaped array refs for this type.
-    EscapedArrayKey array_key = { type, 0u };
-    auto it = escaped_array_refs_.lower_bound(array_key), end = escaped_array_refs_.end();
-    while (it != end && it->type == type) {
-      it = escaped_array_refs_.erase(it);
+    // Clobber all escaped array refs for this type.
+    for (uint16_t escaped_array : escaped_refs_) {
+      EscapedArrayClobberKey clobber_key = { escaped_array, type };
+      escaped_array_clobber_set_.insert(clobber_key);
     }
   }
 }
@@ -256,32 +1110,28 @@
 uint16_t LocalValueNumbering::HandleIGet(MIR* mir, uint16_t opcode) {
   uint16_t base = GetOperandValue(mir->ssa_rep->uses[0]);
   HandleNullCheck(mir, base);
-  const MirFieldInfo& field_info = cu_->mir_graph->GetIFieldLoweringInfo(mir);
+  const MirFieldInfo& field_info = gvn_->GetMirGraph()->GetIFieldLoweringInfo(mir);
   uint16_t res;
   if (!field_info.IsResolved() || field_info.IsVolatile()) {
     // Volatile fields always get a new memory version; field id is irrelevant.
     // Unresolved fields may be volatile, so handle them as such to be safe.
     // Use result s_reg - will be unique.
-    res = LookupValue(kNoValue, mir->ssa_rep->defs[0], kNoValue, kNoValue);
+    res = gvn_->LookupValue(kNoValue, mir->ssa_rep->defs[0], kNoValue, kNoValue);
   } else {
     uint16_t type = opcode - Instruction::IGET;
-    uint16_t field_id = GetFieldId(field_info);
+    uint16_t field_id = gvn_->GetFieldId(field_info, type);
     if (IsNonAliasingIField(base, field_id, type)) {
-      res = LookupValue(kNonAliasingIFieldOp, base, field_id, type);
-    } else {
-      // Get the start version that accounts for aliasing with unresolved fields of the same type
-      // and make it unique for the field by including the field_id.
-      uint16_t start_version = LookupValue(kAliasingIFieldStartVersionOp, global_memory_version_,
-                                           unresolved_ifield_version_[type], field_id);
-      // Find the current version from the aliasing_ifield_version_map_.
-      uint16_t memory_version = start_version;
-      auto version_it = aliasing_ifield_version_map_.find(start_version);
-      if (version_it != aliasing_ifield_version_map_.end()) {
-        memory_version = version_it->second;
+      uint16_t loc = gvn_->LookupValue(kNonAliasingIFieldLocOp, base, field_id, type);
+      auto lb = non_aliasing_ifield_value_map_.lower_bound(loc);
+      if (lb != non_aliasing_ifield_value_map_.end() && lb->first == loc) {
+        res = lb->second;
       } else {
-        // Just use the start_version.
+        res = gvn_->LookupValue(kNonAliasingIFieldInitialOp, loc, kNoValue, kNoValue);
+        non_aliasing_ifield_value_map_.PutBefore(lb, loc, res);
       }
-      res = LookupValue(kAliasingIFieldOp, base, field_id, memory_version);
+    } else {
+      res = HandleAliasingValuesGet<AliasingIFieldVersions>(&aliasing_ifield_value_map_,
+                                                            field_id, base);
     }
   }
   if (opcode == Instruction::IGET_WIDE) {
@@ -297,80 +1147,96 @@
   int base_reg = (opcode == Instruction::IPUT_WIDE) ? 2 : 1;
   uint16_t base = GetOperandValue(mir->ssa_rep->uses[base_reg]);
   HandleNullCheck(mir, base);
-  const MirFieldInfo& field_info = cu_->mir_graph->GetIFieldLoweringInfo(mir);
+  const MirFieldInfo& field_info = gvn_->GetMirGraph()->GetIFieldLoweringInfo(mir);
   if (!field_info.IsResolved()) {
     // Unresolved fields always alias with everything of the same type.
     // Use mir->offset as modifier; without elaborate inlining, it will be unique.
     unresolved_ifield_version_[type] =
-        LookupValue(kUnresolvedIFieldOp, kNoValue, kNoValue, mir->offset);
+        gvn_->LookupValue(kUnresolvedIFieldOp, kNoValue, kNoValue, mir->offset);
 
-    // Treat fields of escaped references of the same type as potentially modified.
-    NonAliasingIFieldKey key = { type, 0u, 0u };  // lowest possible key of this type.
-    auto it = non_aliasing_ifields_.lower_bound(key), end = non_aliasing_ifields_.end();
-    while (it != end && it->type == type) {
-      it = non_aliasing_ifields_.erase(it);
+    // For simplicity, treat base as escaped now.
+    HandleEscapingRef(base);
+
+    // Clobber all fields of escaped references of the same type.
+    for (uint16_t escaped_ref : escaped_refs_) {
+      EscapedIFieldClobberKey clobber_key = { escaped_ref, type, kNoValue };
+      escaped_ifield_clobber_set_.insert(clobber_key);
+    }
+
+    // Aliasing fields of the same type may have been overwritten.
+    auto it = aliasing_ifield_value_map_.begin(), end = aliasing_ifield_value_map_.end();
+    while (it != end) {
+      if (gvn_->GetFieldType(it->first) != type) {
+        ++it;
+      } else {
+        it = aliasing_ifield_value_map_.erase(it);
+      }
     }
   } else if (field_info.IsVolatile()) {
     // Nothing to do, resolved volatile fields always get a new memory version anyway and
     // can't alias with resolved non-volatile fields.
   } else {
-    uint16_t field_id = GetFieldId(field_info);
+    uint16_t field_id = gvn_->GetFieldId(field_info, type);
     uint16_t value = (opcode == Instruction::IPUT_WIDE)
                      ? GetOperandValueWide(mir->ssa_rep->uses[0])
                      : GetOperandValue(mir->ssa_rep->uses[0]);
     if (IsNonAliasing(base)) {
-      StoreValue(kNonAliasingIFieldOp, base, field_id, type, value);
-    } else {
-      // Get the start version that accounts for aliasing with unresolved fields of the same type
-      // and make it unique for the field by including the field_id.
-      uint16_t start_version = LookupValue(kAliasingIFieldStartVersionOp, global_memory_version_,
-                                           unresolved_ifield_version_[type], field_id);
-      // Find the old version from the aliasing_ifield_version_map_.
-      uint16_t old_version = start_version;
-      auto version_it = aliasing_ifield_version_map_.find(start_version);
-      if (version_it != aliasing_ifield_version_map_.end()) {
-        old_version = version_it->second;
+      uint16_t loc = gvn_->LookupValue(kNonAliasingIFieldLocOp, base, field_id, type);
+      auto lb = non_aliasing_ifield_value_map_.lower_bound(loc);
+      if (lb != non_aliasing_ifield_value_map_.end() && lb->first == loc) {
+        if (lb->second == value) {
+          // This IPUT can be eliminated, it stores the same value that's already in the field.
+          // TODO: Eliminate the IPUT.
+          return;
+        }
+        lb->second = value;  // Overwrite.
+      } else {
+        non_aliasing_ifield_value_map_.PutBefore(lb, loc, value);
       }
-      // Check if the field currently contains the value, making this a NOP.
-      if (HasValue(kAliasingIFieldOp, base, field_id, old_version, value)) {
+    } else {
+      bool put_is_live = HandleAliasingValuesPut<AliasingIFieldVersions>(
+          &aliasing_ifield_value_map_, field_id, base, value);
+      if (!put_is_live) {
         // This IPUT can be eliminated, it stores the same value that's already in the field.
         // TODO: Eliminate the IPUT.
         return;
       }
-      // Bump the version, adding to the chain started by start_version.
-      uint16_t memory_version = LookupValue(kAliasingIFieldBumpVersionOp, old_version, base, value);
-      // Update the aliasing_ifield_version_map_ so that HandleIGet() can get the memory_version
-      // without knowing the values used to build the chain.
-      aliasing_ifield_version_map_.Overwrite(start_version, memory_version);
-      StoreValue(kAliasingIFieldOp, base, field_id, memory_version, value);
 
-      // Clear non-aliasing fields for this field_id.
-      NonAliasingIFieldKey field_key = { type, field_id, 0u };
-      auto it = non_aliasing_ifields_.lower_bound(field_key), end = non_aliasing_ifields_.end();
-      while (it != end && it->field_id == field_id) {
-        DCHECK_EQ(type, it->type);
-        it = non_aliasing_ifields_.erase(it);
+      // Clobber all fields of escaped references for this field.
+      for (uint16_t escaped_ref : escaped_refs_) {
+        EscapedIFieldClobberKey clobber_key = { escaped_ref, type, field_id };
+        escaped_ifield_clobber_set_.insert(clobber_key);
       }
     }
   }
 }
 
 uint16_t LocalValueNumbering::HandleSGet(MIR* mir, uint16_t opcode) {
-  const MirFieldInfo& field_info = cu_->mir_graph->GetSFieldLoweringInfo(mir);
+  const MirSFieldLoweringInfo& field_info = gvn_->GetMirGraph()->GetSFieldLoweringInfo(mir);
+  if (!field_info.IsInitialized() && (mir->optimization_flags & MIR_IGNORE_CLINIT_CHECK) == 0) {
+    // Class initialization can call arbitrary functions, we need to wipe aliasing values.
+    HandleInvokeOrClInit(mir);
+  }
   uint16_t res;
   if (!field_info.IsResolved() || field_info.IsVolatile()) {
     // Volatile fields always get a new memory version; field id is irrelevant.
     // Unresolved fields may be volatile, so handle them as such to be safe.
     // Use result s_reg - will be unique.
-    res = LookupValue(kNoValue, mir->ssa_rep->defs[0], kNoValue, kNoValue);
+    res = gvn_->LookupValue(kNoValue, mir->ssa_rep->defs[0], kNoValue, kNoValue);
   } else {
-    uint16_t field_id = GetFieldId(field_info);
-    // Resolved non-volatile static fields can alias with non-resolved fields of the same type,
-    // so we need to use unresolved_sfield_version_[type] in addition to global_memory_version_
-    // to determine the version of the field.
     uint16_t type = opcode - Instruction::SGET;
-    res = LookupValue(kResolvedSFieldOp, field_id,
-                      unresolved_sfield_version_[type], global_memory_version_);
+    uint16_t field_id = gvn_->GetFieldId(field_info, type);
+    auto lb = sfield_value_map_.lower_bound(field_id);
+    if (lb != sfield_value_map_.end() && lb->first == field_id) {
+      res = lb->second;
+    } else {
+      // Resolved non-volatile static fields can alias with non-resolved fields of the same type,
+      // so we need to use unresolved_sfield_version_[type] in addition to global_memory_version_
+      // to determine the version of the field.
+      res = gvn_->LookupValue(kResolvedSFieldOp, field_id,
+                              unresolved_sfield_version_[type], global_memory_version_);
+      sfield_value_map_.PutBefore(lb, field_id, res);
+    }
   }
   if (opcode == Instruction::SGET_WIDE) {
     SetOperandValueWide(mir->ssa_rep->defs[0], res);
@@ -381,30 +1247,68 @@
 }
 
 void LocalValueNumbering::HandleSPut(MIR* mir, uint16_t opcode) {
+  const MirSFieldLoweringInfo& field_info = gvn_->GetMirGraph()->GetSFieldLoweringInfo(mir);
+  if (!field_info.IsInitialized() && (mir->optimization_flags & MIR_IGNORE_CLINIT_CHECK) == 0) {
+    // Class initialization can call arbitrary functions, we need to wipe aliasing values.
+    HandleInvokeOrClInit(mir);
+  }
   uint16_t type = opcode - Instruction::SPUT;
-  const MirFieldInfo& field_info = cu_->mir_graph->GetSFieldLoweringInfo(mir);
   if (!field_info.IsResolved()) {
     // Unresolved fields always alias with everything of the same type.
     // Use mir->offset as modifier; without elaborate inlining, it will be unique.
     unresolved_sfield_version_[type] =
-        LookupValue(kUnresolvedSFieldOp, kNoValue, kNoValue, mir->offset);
+        gvn_->LookupValue(kUnresolvedSFieldOp, kNoValue, kNoValue, mir->offset);
+    RemoveSFieldsForType(type);
   } else if (field_info.IsVolatile()) {
     // Nothing to do, resolved volatile fields always get a new memory version anyway and
     // can't alias with resolved non-volatile fields.
   } else {
-    uint16_t field_id = GetFieldId(field_info);
+    uint16_t field_id = gvn_->GetFieldId(field_info, type);
     uint16_t value = (opcode == Instruction::SPUT_WIDE)
                      ? GetOperandValueWide(mir->ssa_rep->uses[0])
                      : GetOperandValue(mir->ssa_rep->uses[0]);
     // Resolved non-volatile static fields can alias with non-resolved fields of the same type,
     // so we need to use unresolved_sfield_version_[type] in addition to global_memory_version_
     // to determine the version of the field.
-    uint16_t type = opcode - Instruction::SGET;
-    StoreValue(kResolvedSFieldOp, field_id,
-               unresolved_sfield_version_[type], global_memory_version_, value);
+    auto lb = sfield_value_map_.lower_bound(field_id);
+    if (lb != sfield_value_map_.end() && lb->first == field_id) {
+      if (lb->second == value) {
+        // This SPUT can be eliminated, it stores the same value that's already in the field.
+        // TODO: Eliminate the SPUT.
+        return;
+      }
+      lb->second = value;  // Overwrite.
+    } else {
+      sfield_value_map_.PutBefore(lb, field_id, value);
+    }
   }
 }
 
+void LocalValueNumbering::RemoveSFieldsForType(uint16_t type) {
+  // Erase all static fields of this type from the sfield_value_map_.
+  for (auto it = sfield_value_map_.begin(), end = sfield_value_map_.end(); it != end; ) {
+    if (gvn_->GetFieldType(it->first) == type) {
+      it = sfield_value_map_.erase(it);
+    } else {
+      ++it;
+    }
+  }
+}
+
+void LocalValueNumbering::HandleInvokeOrClInit(MIR* mir) {
+  // Use mir->offset as modifier; without elaborate inlining, it will be unique.
+  global_memory_version_ =
+      gvn_->LookupValue(kInvokeMemoryVersionBumpOp, 0u, 0u, mir->offset);
+  // All static fields and instance fields and array elements of aliasing references,
+  // including escaped references, may have been modified.
+  sfield_value_map_.clear();
+  aliasing_ifield_value_map_.clear();
+  aliasing_array_value_map_.clear();
+  escaped_refs_.clear();
+  escaped_ifield_clobber_set_.clear();
+  escaped_array_clobber_set_.clear();
+}
+
 uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) {
   uint16_t res = kNoValue;
   uint16_t opcode = mir->dalvikInsn.opcode;
@@ -414,8 +1318,6 @@
     case Instruction::RETURN:
     case Instruction::RETURN_OBJECT:
     case Instruction::RETURN_WIDE:
-    case Instruction::MONITOR_ENTER:
-    case Instruction::MONITOR_EXIT:
     case Instruction::GOTO:
     case Instruction::GOTO_16:
     case Instruction::GOTO_32:
@@ -444,12 +1346,42 @@
       // Nothing defined - take no action.
       break;
 
+    case Instruction::MONITOR_ENTER:
+      HandleNullCheck(mir, GetOperandValue(mir->ssa_rep->uses[0]));
+      // NOTE: Keeping all aliasing values intact. Programs that rely on loads/stores of the
+      // same non-volatile locations outside and inside a synchronized block being different
+      // contain races that we cannot fix.
+      break;
+
+    case Instruction::MONITOR_EXIT:
+      HandleNullCheck(mir, GetOperandValue(mir->ssa_rep->uses[0]));
+      // If we're running GVN and CanModify(), uneliminated null check indicates bytecode error.
+      if ((gvn_->cu_->disable_opt & (1 << kGlobalValueNumbering)) == 0 && gvn_->CanModify() &&
+          (mir->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0) {
+        LOG(WARNING) << "Bytecode error: MONITOR_EXIT is still null checked at 0x" << std::hex
+            << mir->offset << " in " << PrettyMethod(gvn_->cu_->method_idx, *gvn_->cu_->dex_file);
+      }
+      break;
+
     case Instruction::FILLED_NEW_ARRAY:
     case Instruction::FILLED_NEW_ARRAY_RANGE:
       // Nothing defined but the result will be unique and non-null.
       if (mir->next != nullptr && mir->next->dalvikInsn.opcode == Instruction::MOVE_RESULT_OBJECT) {
-        MarkNonAliasingNonNull(mir->next);
-        // TUNING: We could track value names stored in the array.
+        uint16_t array = MarkNonAliasingNonNull(mir->next);
+        // Do not SetOperandValue(), we'll do that when we process the MOVE_RESULT_OBJECT.
+        if (kLocalValueNumberingEnableFilledNewArrayTracking && mir->ssa_rep->num_uses != 0u) {
+          AliasingValues* values = GetAliasingValues(&non_aliasing_array_value_map_, array);
+          // Clear the value if we got a merged version in a loop.
+          *values = AliasingValues(gvn_->allocator_);
+          for (size_t i = 0u, count = mir->ssa_rep->num_uses; i != count; ++i) {
+            DCHECK_EQ(High16Bits(i), 0u);
+            uint16_t index = gvn_->LookupValue(Instruction::CONST, i, 0u, 0);
+            uint16_t value = GetOperandValue(mir->ssa_rep->uses[i]);
+            values->load_value_map.Put(index, value);
+            RangeCheckKey key = { array, index };
+            range_checked_.insert(key);
+          }
+        }
         // The MOVE_RESULT_OBJECT will be processed next and we'll return the value name then.
       }
       // All args escaped (if references).
@@ -475,17 +1407,12 @@
     case Instruction::INVOKE_STATIC:
     case Instruction::INVOKE_STATIC_RANGE:
       if ((mir->optimization_flags & MIR_INLINED) == 0) {
-        // Use mir->offset as modifier; without elaborate inlining, it will be unique.
-        global_memory_version_ = LookupValue(kInvokeMemoryVersionBumpOp, 0u, 0u, mir->offset);
         // Make ref args aliasing.
         for (size_t i = 0u, count = mir->ssa_rep->num_uses; i != count; ++i) {
           uint16_t reg = GetOperandValue(mir->ssa_rep->uses[i]);
           non_aliasing_refs_.erase(reg);
         }
-        // All fields of escaped references need to be treated as potentially modified.
-        non_aliasing_ifields_.clear();
-        // Array elements may also have been modified via escaped array refs.
-        escaped_array_refs_.clear();
+        HandleInvokeOrClInit(mir);
       }
       break;
 
@@ -502,12 +1429,13 @@
     case Instruction::NEW_ARRAY:
       // 1 result, treat as unique each time, use result s_reg - will be unique.
       res = MarkNonAliasingNonNull(mir);
+      SetOperandValue(mir->ssa_rep->defs[0], res);
       break;
     case Instruction::CONST_STRING:
     case Instruction::CONST_STRING_JUMBO:
       // These strings are internalized, so assign value based on the string pool index.
-      res = LookupValue(Instruction::CONST_STRING, Low16Bits(mir->dalvikInsn.vB),
-                        High16Bits(mir->dalvikInsn.vB), 0);
+      res = gvn_->LookupValue(Instruction::CONST_STRING, Low16Bits(mir->dalvikInsn.vB),
+                              High16Bits(mir->dalvikInsn.vB), 0);
       SetOperandValue(mir->ssa_rep->defs[0], res);
       null_checked_.insert(res);  // May already be there.
       // NOTE: Hacking the contents of an internalized string via reflection is possible
@@ -523,10 +1451,7 @@
       break;
 
     case kMirOpPhi:
-      /*
-       * Because we'll only see phi nodes at the beginning of an extended basic block,
-       * we can ignore them.  Revisit if we shift to global value numbering.
-       */
+      res = HandlePhi(mir);
       break;
 
     case Instruction::MOVE:
@@ -552,27 +1477,27 @@
     case Instruction::CONST:
     case Instruction::CONST_4:
     case Instruction::CONST_16:
-      res = LookupValue(Instruction::CONST, Low16Bits(mir->dalvikInsn.vB),
-                        High16Bits(mir->dalvikInsn.vB), 0);
+      res = gvn_->LookupValue(Instruction::CONST, Low16Bits(mir->dalvikInsn.vB),
+                              High16Bits(mir->dalvikInsn.vB), 0);
       SetOperandValue(mir->ssa_rep->defs[0], res);
       break;
 
     case Instruction::CONST_HIGH16:
-      res = LookupValue(Instruction::CONST, 0, mir->dalvikInsn.vB, 0);
+      res = gvn_->LookupValue(Instruction::CONST, 0, mir->dalvikInsn.vB, 0);
       SetOperandValue(mir->ssa_rep->defs[0], res);
       break;
 
     case Instruction::CONST_WIDE_16:
     case Instruction::CONST_WIDE_32: {
-        uint16_t low_res = LookupValue(Instruction::CONST, Low16Bits(mir->dalvikInsn.vB),
-                                       High16Bits(mir->dalvikInsn.vB >> 16), 1);
+        uint16_t low_res = gvn_->LookupValue(Instruction::CONST, Low16Bits(mir->dalvikInsn.vB),
+                                             High16Bits(mir->dalvikInsn.vB >> 16), 1);
         uint16_t high_res;
         if (mir->dalvikInsn.vB & 0x80000000) {
-          high_res = LookupValue(Instruction::CONST, 0xffff, 0xffff, 2);
+          high_res = gvn_->LookupValue(Instruction::CONST, 0xffff, 0xffff, 2);
         } else {
-          high_res = LookupValue(Instruction::CONST, 0, 0, 2);
+          high_res = gvn_->LookupValue(Instruction::CONST, 0, 0, 2);
         }
-        res = LookupValue(Instruction::CONST, low_res, high_res, 3);
+        res = gvn_->LookupValue(Instruction::CONST, low_res, high_res, 3);
         SetOperandValueWide(mir->ssa_rep->defs[0], res);
       }
       break;
@@ -580,24 +1505,30 @@
     case Instruction::CONST_WIDE: {
         uint32_t low_word = Low32Bits(mir->dalvikInsn.vB_wide);
         uint32_t high_word = High32Bits(mir->dalvikInsn.vB_wide);
-        uint16_t low_res = LookupValue(Instruction::CONST, Low16Bits(low_word),
-                                       High16Bits(low_word), 1);
-        uint16_t high_res = LookupValue(Instruction::CONST, Low16Bits(high_word),
-                                       High16Bits(high_word), 2);
-        res = LookupValue(Instruction::CONST, low_res, high_res, 3);
+        uint16_t low_res = gvn_->LookupValue(Instruction::CONST, Low16Bits(low_word),
+                                             High16Bits(low_word), 1);
+        uint16_t high_res = gvn_->LookupValue(Instruction::CONST, Low16Bits(high_word),
+                                              High16Bits(high_word), 2);
+        res = gvn_->LookupValue(Instruction::CONST, low_res, high_res, 3);
         SetOperandValueWide(mir->ssa_rep->defs[0], res);
       }
       break;
 
     case Instruction::CONST_WIDE_HIGH16: {
-        uint16_t low_res = LookupValue(Instruction::CONST, 0, 0, 1);
-        uint16_t high_res = LookupValue(Instruction::CONST, 0, Low16Bits(mir->dalvikInsn.vB), 2);
-        res = LookupValue(Instruction::CONST, low_res, high_res, 3);
+        uint16_t low_res = gvn_->LookupValue(Instruction::CONST, 0, 0, 1);
+        uint16_t high_res = gvn_->LookupValue(Instruction::CONST, 0,
+                                              Low16Bits(mir->dalvikInsn.vB), 2);
+        res = gvn_->LookupValue(Instruction::CONST, low_res, high_res, 3);
         SetOperandValueWide(mir->ssa_rep->defs[0], res);
       }
       break;
 
-    case Instruction::ARRAY_LENGTH:
+    case Instruction::ARRAY_LENGTH: {
+        // Handle the null check.
+        uint16_t reg = GetOperandValue(mir->ssa_rep->uses[0]);
+        HandleNullCheck(mir, reg);
+      }
+      // Intentional fall-through.
     case Instruction::NEG_INT:
     case Instruction::NOT_INT:
     case Instruction::NEG_FLOAT:
@@ -608,7 +1539,7 @@
     case Instruction::FLOAT_TO_INT: {
         // res = op + 1 operand
         uint16_t operand1 = GetOperandValue(mir->ssa_rep->uses[0]);
-        res = LookupValue(opcode, operand1, kNoValue, kNoValue);
+        res = gvn_->LookupValue(opcode, operand1, kNoValue, kNoValue);
         SetOperandValue(mir->ssa_rep->defs[0], res);
       }
       break;
@@ -619,7 +1550,7 @@
     case Instruction::DOUBLE_TO_INT: {
         // res = op + 1 wide operand
         uint16_t operand1 = GetOperandValueWide(mir->ssa_rep->uses[0]);
-        res = LookupValue(opcode, operand1, kNoValue, kNoValue);
+        res = gvn_->LookupValue(opcode, operand1, kNoValue, kNoValue);
         SetOperandValue(mir->ssa_rep->defs[0], res);
       }
       break;
@@ -632,7 +1563,7 @@
     case Instruction::NEG_DOUBLE: {
         // wide res = op + 1 wide operand
         uint16_t operand1 = GetOperandValueWide(mir->ssa_rep->uses[0]);
-        res = LookupValue(opcode, operand1, kNoValue, kNoValue);
+        res = gvn_->LookupValue(opcode, operand1, kNoValue, kNoValue);
         SetOperandValueWide(mir->ssa_rep->defs[0], res);
       }
       break;
@@ -643,7 +1574,7 @@
     case Instruction::INT_TO_LONG: {
         // wide res = op + 1 operand
         uint16_t operand1 = GetOperandValue(mir->ssa_rep->uses[0]);
-        res = LookupValue(opcode, operand1, kNoValue, kNoValue);
+        res = gvn_->LookupValue(opcode, operand1, kNoValue, kNoValue);
         SetOperandValueWide(mir->ssa_rep->defs[0], res);
       }
       break;
@@ -654,7 +1585,7 @@
         // res = op + 2 wide operands
         uint16_t operand1 = GetOperandValueWide(mir->ssa_rep->uses[0]);
         uint16_t operand2 = GetOperandValueWide(mir->ssa_rep->uses[2]);
-        res = LookupValue(opcode, operand1, operand2, kNoValue);
+        res = gvn_->LookupValue(opcode, operand1, operand2, kNoValue);
         SetOperandValue(mir->ssa_rep->defs[0], res);
       }
       break;
@@ -686,7 +1617,7 @@
         // res = op + 2 operands
         uint16_t operand1 = GetOperandValue(mir->ssa_rep->uses[0]);
         uint16_t operand2 = GetOperandValue(mir->ssa_rep->uses[1]);
-        res = LookupValue(opcode, operand1, operand2, kNoValue);
+        res = gvn_->LookupValue(opcode, operand1, operand2, kNoValue);
         SetOperandValue(mir->ssa_rep->defs[0], res);
       }
       break;
@@ -720,7 +1651,7 @@
         // wide res = op + 2 wide operands
         uint16_t operand1 = GetOperandValueWide(mir->ssa_rep->uses[0]);
         uint16_t operand2 = GetOperandValueWide(mir->ssa_rep->uses[2]);
-        res = LookupValue(opcode, operand1, operand2, kNoValue);
+        res = gvn_->LookupValue(opcode, operand1, operand2, kNoValue);
         SetOperandValueWide(mir->ssa_rep->defs[0], res);
       }
       break;
@@ -734,7 +1665,7 @@
         // wide res = op + 1 wide operand + 1 operand
         uint16_t operand1 = GetOperandValueWide(mir->ssa_rep->uses[0]);
         uint16_t operand2 = GetOperandValue(mir->ssa_rep->uses[2]);
-        res = LookupValue(opcode, operand1, operand2, kNoValue);
+        res = gvn_->LookupValue(opcode, operand1, operand2, kNoValue);
         SetOperandValueWide(mir->ssa_rep->defs[0], res);
       }
       break;
@@ -752,7 +1683,7 @@
         // res = op + 2 operands
         uint16_t operand1 = GetOperandValue(mir->ssa_rep->uses[0]);
         uint16_t operand2 = GetOperandValue(mir->ssa_rep->uses[1]);
-        res = LookupValue(opcode, operand1, operand2, kNoValue);
+        res = gvn_->LookupValue(opcode, operand1, operand2, kNoValue);
         SetOperandValue(mir->ssa_rep->defs[0], res);
       }
       break;
@@ -778,8 +1709,8 @@
     case Instruction::USHR_INT_LIT8: {
         // Same as res = op + 2 operands, except use vC as operand 2
         uint16_t operand1 = GetOperandValue(mir->ssa_rep->uses[0]);
-        uint16_t operand2 = LookupValue(Instruction::CONST, mir->dalvikInsn.vC, 0, 0);
-        res = LookupValue(opcode, operand1, operand2, kNoValue);
+        uint16_t operand2 = gvn_->LookupValue(Instruction::CONST, mir->dalvikInsn.vC, 0, 0);
+        res = gvn_->LookupValue(opcode, operand1, operand2, kNoValue);
         SetOperandValue(mir->ssa_rep->defs[0], res);
       }
       break;
diff --git a/compiler/dex/local_value_numbering.h b/compiler/dex/local_value_numbering.h
index 2a815be..190eab4 100644
--- a/compiler/dex/local_value_numbering.h
+++ b/compiler/dex/local_value_numbering.h
@@ -20,17 +20,57 @@
 #include <memory>
 
 #include "compiler_internals.h"
+#include "global_value_numbering.h"
 #include "utils/scoped_arena_allocator.h"
 #include "utils/scoped_arena_containers.h"
 
 namespace art {
 
 class DexFile;
-class MirFieldInfo;
+
+// Enable/disable tracking values stored in the FILLED_NEW_ARRAY result.
+static constexpr bool kLocalValueNumberingEnableFilledNewArrayTracking = true;
 
 class LocalValueNumbering {
+ private:
+  static constexpr uint16_t kNoValue = GlobalValueNumbering::kNoValue;
+
  public:
-  LocalValueNumbering(CompilationUnit* cu, ScopedArenaAllocator* allocator);
+  LocalValueNumbering(GlobalValueNumbering* gvn, BasicBlockId id);
+
+  BasicBlockId Id() const {
+    return id_;
+  }
+
+  bool Equals(const LocalValueNumbering& other) const;
+
+  // Set non-static method's "this".
+  void SetSRegNullChecked(uint16_t s_reg) {
+    uint16_t value_name = GetOperandValue(s_reg);
+    null_checked_.insert(value_name);
+  }
+
+  bool IsValueNullChecked(uint16_t value_name) const {
+    return null_checked_.find(value_name) != null_checked_.end();
+  }
+
+  bool IsSregValue(uint16_t s_reg, uint16_t value_name) const {
+    auto it = sreg_value_map_.find(s_reg);
+    if (it != sreg_value_map_.end()) {
+      return it->second == value_name;
+    } else {
+      return gvn_->HasValue(kNoValue, s_reg, kNoValue, kNoValue, value_name);
+    }
+  }
+
+  enum MergeType {
+    kNormalMerge,
+    kCatchMerge,
+    kReturnMerge,  // RETURN or PHI+RETURN. Merge only sreg maps.
+  };
+
+  void MergeOne(const LocalValueNumbering& other, MergeType merge_type);
+  void Merge(MergeType merge_type);  // Merge gvn_->merge_lvns_.
 
   uint16_t GetValueNumber(MIR* mir);
 
@@ -42,13 +82,9 @@
   // Allow delete-expression to destroy a LocalValueNumbering object without deallocation.
   static void operator delete(void* ptr) { UNUSED(ptr); }
 
-  // Checks that the value names didn't overflow.
-  bool Good() const {
-    return last_value_ < kNoValue;
-  }
-
  private:
-  static constexpr uint16_t kNoValue = 0xffffu;
+  // A set of value names.
+  typedef GlobalValueNumbering::ValueNameSet ValueNameSet;
 
   // Field types correspond to the ordering of GET/PUT instructions; this order is the same
   // for IGET, IPUT, SGET, SPUT, AGET and APUT:
@@ -61,27 +97,51 @@
   // op_SHORT   6
   static constexpr size_t kFieldTypeCount = 7;
 
-  // FieldReference represents a unique resolved field.
-  struct FieldReference {
-    const DexFile* dex_file;
-    uint16_t field_idx;
-  };
+  // Key is s_reg, value is value name.
+  typedef ScopedArenaSafeMap<uint16_t, uint16_t> SregValueMap;
 
-  struct FieldReferenceComparator {
-    bool operator()(const FieldReference& lhs, const FieldReference& rhs) const {
-      if (lhs.field_idx != rhs.field_idx) {
-        return lhs.field_idx < rhs.field_idx;
-      }
-      return lhs.dex_file < rhs.dex_file;
+  void SetOperandValueImpl(uint16_t s_reg, uint16_t value, SregValueMap* map) {
+    DCHECK_EQ(map->count(s_reg), 0u) << PrettyMethod(gvn_->cu_->method_idx, *gvn_->cu_->dex_file)
+        << " LVN id: " << id_ << ", s_reg: " << s_reg;
+    map->Put(s_reg, value);
+  }
+
+  uint16_t GetOperandValueImpl(int s_reg, const SregValueMap* map) const {
+    uint16_t res = kNoValue;
+    auto lb = map->find(s_reg);
+    if (lb != map->end()) {
+      res = lb->second;
+    } else {
+      // Using the original value; s_reg refers to an input reg.
+      res = gvn_->LookupValue(kNoValue, s_reg, kNoValue, kNoValue);
     }
+    return res;
+  }
+
+  void SetOperandValue(uint16_t s_reg, uint16_t value) {
+    SetOperandValueImpl(s_reg, value, &sreg_value_map_);
   };
 
-  // Maps field key to field id for resolved fields.
-  typedef ScopedArenaSafeMap<FieldReference, uint32_t, FieldReferenceComparator> FieldIndexMap;
+  uint16_t GetOperandValue(int s_reg) const {
+    return GetOperandValueImpl(s_reg, &sreg_value_map_);
+  };
+
+  void SetOperandValueWide(uint16_t s_reg, uint16_t value) {
+    SetOperandValueImpl(s_reg, value, &sreg_wide_value_map_);
+  };
+
+  uint16_t GetOperandValueWide(int s_reg) const {
+    return GetOperandValueImpl(s_reg, &sreg_wide_value_map_);
+  };
 
   struct RangeCheckKey {
     uint16_t array;
     uint16_t index;
+
+    // NOTE: Can't define this at namespace scope for a private struct.
+    bool operator==(const RangeCheckKey& other) const {
+      return array == other.array && index == other.index;
+    }
   };
 
   struct RangeCheckKeyComparator {
@@ -95,210 +155,233 @@
 
   typedef ScopedArenaSet<RangeCheckKey, RangeCheckKeyComparator> RangeCheckSet;
 
-  typedef ScopedArenaSafeMap<uint16_t, uint16_t> AliasingIFieldVersionMap;
-  typedef ScopedArenaSafeMap<uint16_t, uint16_t> NonAliasingArrayVersionMap;
+  // Maps instance field "location" (derived from base, field_id and type) to value name.
+  typedef ScopedArenaSafeMap<uint16_t, uint16_t> IFieldLocToValueMap;
 
-  struct NonAliasingIFieldKey {
-    uint16_t base;
-    uint16_t field_id;
+  // Maps static field id to value name
+  typedef ScopedArenaSafeMap<uint16_t, uint16_t> SFieldToValueMap;
+
+  struct EscapedIFieldClobberKey {
+    uint16_t base;      // Or array.
     uint16_t type;
+    uint16_t field_id;  // None (kNoValue) for arrays and unresolved instance field stores.
+
+    // NOTE: Can't define this at namespace scope for a private struct.
+    bool operator==(const EscapedIFieldClobberKey& other) const {
+      return base == other.base && type == other.type && field_id == other.field_id;
+    }
   };
 
-  struct NonAliasingIFieldKeyComparator {
-    bool operator()(const NonAliasingIFieldKey& lhs, const NonAliasingIFieldKey& rhs) const {
-      // Compare the type first. This allows iterating across all the entries for a certain type
-      // as needed when we need to purge them for an unresolved field IPUT.
+  struct EscapedIFieldClobberKeyComparator {
+    bool operator()(const EscapedIFieldClobberKey& lhs, const EscapedIFieldClobberKey& rhs) const {
+      // Compare base first. This makes sequential iteration respect the order of base.
+      if (lhs.base != rhs.base) {
+        return lhs.base < rhs.base;
+      }
+      // Compare type second. This makes the type-clobber entries (field_id == kNoValue) last
+      // for given base and type and makes it easy to prune unnecessary entries when merging
+      // escaped_ifield_clobber_set_ from multiple LVNs.
       if (lhs.type != rhs.type) {
         return lhs.type < rhs.type;
       }
-      // Compare the field second. This allows iterating across all the entries for a certain
-      // field as needed when we need to purge them for an aliasing field IPUT.
-      if (lhs.field_id != rhs.field_id) {
-        return lhs.field_id < rhs.field_id;
-      }
-      // Compare the base last.
-      return lhs.base < rhs.base;
+      return lhs.field_id < rhs.field_id;
     }
   };
 
-  // Set of instance fields still holding non-aliased values after the base has been stored.
-  typedef ScopedArenaSet<NonAliasingIFieldKey, NonAliasingIFieldKeyComparator> NonAliasingFieldSet;
+  typedef ScopedArenaSet<EscapedIFieldClobberKey, EscapedIFieldClobberKeyComparator>
+      EscapedIFieldClobberSet;
 
-  struct EscapedArrayKey {
+  struct EscapedArrayClobberKey {
     uint16_t base;
     uint16_t type;
+
+    // NOTE: Can't define this at namespace scope for a private struct.
+    bool operator==(const EscapedArrayClobberKey& other) const {
+      return base == other.base && type == other.type;
+    }
   };
 
-  struct EscapedArrayKeyComparator {
-    bool operator()(const EscapedArrayKey& lhs, const EscapedArrayKey& rhs) const {
-      // Compare the type first. This allows iterating across all the entries for a certain type
-      // as needed when we need to purge them for an unresolved field APUT.
-      if (lhs.type != rhs.type) {
-        return lhs.type < rhs.type;
+  struct EscapedArrayClobberKeyComparator {
+    bool operator()(const EscapedArrayClobberKey& lhs, const EscapedArrayClobberKey& rhs) const {
+      // Compare base first. This makes sequential iteration respect the order of base.
+      if (lhs.base != rhs.base) {
+        return lhs.base < rhs.base;
       }
-      // Compare the base last.
-      return lhs.base < rhs.base;
+      return lhs.type < rhs.type;
     }
   };
 
-  // Set of previously non-aliasing array refs that escaped.
-  typedef ScopedArenaSet<EscapedArrayKey, EscapedArrayKeyComparator> EscapedArraySet;
+  // Clobber set for previously non-aliasing array refs that escaped.
+  typedef ScopedArenaSet<EscapedArrayClobberKey, EscapedArrayClobberKeyComparator>
+      EscapedArrayClobberSet;
 
-  // Key is s_reg, value is value name.
-  typedef ScopedArenaSafeMap<uint16_t, uint16_t> SregValueMap;
-  // Key is concatenation of opcode, operand1, operand2 and modifier, value is value name.
-  typedef ScopedArenaSafeMap<uint64_t, uint16_t> ValueMap;
-  // Key represents a memory address, value is generation.
-  // A set of value names.
-  typedef ScopedArenaSet<uint16_t> ValueNameSet;
-
-  static uint64_t BuildKey(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier) {
-    return (static_cast<uint64_t>(op) << 48 | static_cast<uint64_t>(operand1) << 32 |
-            static_cast<uint64_t>(operand2) << 16 | static_cast<uint64_t>(modifier));
-  };
-
-  static uint16_t ExtractOp(uint64_t key) {
-    return static_cast<uint16_t>(key >> 48);
-  }
-
-  static uint16_t ExtractOperand1(uint64_t key) {
-    return static_cast<uint16_t>(key >> 32);
-  }
-
-  static uint16_t ExtractOperand2(uint64_t key) {
-    return static_cast<uint16_t>(key >> 16);
-  }
-
-  static uint16_t ExtractModifier(uint64_t key) {
-    return static_cast<uint16_t>(key);
-  }
-
-  static bool EqualOpAndOperand1(uint64_t key1, uint64_t key2) {
-    return static_cast<uint32_t>(key1 >> 32) == static_cast<uint32_t>(key2 >> 32);
-  }
-
-  uint16_t LookupValue(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier) {
-    uint16_t res;
-    uint64_t key = BuildKey(op, operand1, operand2, modifier);
-    ValueMap::iterator it = value_map_.find(key);
-    if (it != value_map_.end()) {
-      res = it->second;
-    } else {
-      ++last_value_;
-      res = last_value_;
-      value_map_.Put(key, res);
+  // Known location values for an aliasing set. The set can be tied to one of:
+  //   1. Instance field. The locations are aliasing references used to access the field.
+  //   2. Non-aliasing array reference. The locations are indexes to the array.
+  //   3. Aliasing array type. The locations are (reference, index) pair ids assigned by GVN.
+  // In each case we keep track of the last stored value, if any, and the set of locations
+  // where it was stored. We also keep track of all values known for the current write state
+  // (load_value_map), which can be known either because they have been loaded since the last
+  // store or because they contained the last_stored_value before the store and thus could not
+  // have changed as a result.
+  struct AliasingValues {
+    explicit AliasingValues(ScopedArenaAllocator* allocator)
+        : memory_version_before_stores(kNoValue),
+          last_stored_value(kNoValue),
+          store_loc_set(std::less<uint16_t>(), allocator->Adapter()),
+          last_load_memory_version(kNoValue),
+          load_value_map(std::less<uint16_t>(), allocator->Adapter()) {
     }
-    return res;
-  };
 
-  void StoreValue(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier,
-                  uint16_t value) {
-    uint64_t key = BuildKey(op, operand1, operand2, modifier);
-    value_map_.Overwrite(key, value);
-  }
+    uint16_t memory_version_before_stores;  // kNoValue if start version for the field.
+    uint16_t last_stored_value;             // Last stored value name, kNoValue if none.
+    ValueNameSet store_loc_set;             // Where was last_stored_value stored.
 
-  bool HasValue(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier,
-                uint16_t value) const {
-    uint64_t key = BuildKey(op, operand1, operand2, modifier);
-    ValueMap::const_iterator it = value_map_.find(key);
-    return (it != value_map_.end() && it->second == value);
-  };
+    // Maps refs (other than stored_to) to currently known values for this field other. On write,
+    // anything that differs from the written value is removed as it may be overwritten.
+    uint16_t last_load_memory_version;    // kNoValue if not known.
+    ScopedArenaSafeMap<uint16_t, uint16_t> load_value_map;
 
-  bool ValueExists(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier) const {
-    uint64_t key = BuildKey(op, operand1, operand2, modifier);
-    ValueMap::const_iterator it = value_map_.find(key);
-    return (it != value_map_.end());
-  };
-
-  void SetOperandValue(uint16_t s_reg, uint16_t value) {
-    SregValueMap::iterator it = sreg_value_map_.find(s_reg);
-    if (it != sreg_value_map_.end()) {
-      DCHECK_EQ(it->second, value);
-    } else {
-      sreg_value_map_.Put(s_reg, value);
+    // NOTE: Can't define this at namespace scope for a private struct.
+    bool operator==(const AliasingValues& other) const {
+      return memory_version_before_stores == other.memory_version_before_stores &&
+          last_load_memory_version == other.last_load_memory_version &&
+          last_stored_value == other.last_stored_value &&
+          store_loc_set == other.store_loc_set &&
+          load_value_map == other.load_value_map;
     }
   };
 
-  uint16_t GetOperandValue(int s_reg) {
-    uint16_t res = kNoValue;
-    SregValueMap::iterator it = sreg_value_map_.find(s_reg);
-    if (it != sreg_value_map_.end()) {
-      res = it->second;
-    } else {
-      // First use
-      res = LookupValue(kNoValue, s_reg, kNoValue, kNoValue);
-      sreg_value_map_.Put(s_reg, res);
-    }
-    return res;
-  };
+  // Maps instance field id to AliasingValues, locations are object refs.
+  typedef ScopedArenaSafeMap<uint16_t, AliasingValues> AliasingIFieldValuesMap;
 
-  void SetOperandValueWide(uint16_t s_reg, uint16_t value) {
-    SregValueMap::iterator it = sreg_wide_value_map_.find(s_reg);
-    if (it != sreg_wide_value_map_.end()) {
-      DCHECK_EQ(it->second, value);
-    } else {
-      sreg_wide_value_map_.Put(s_reg, value);
-    }
-  };
+  // Maps non-aliasing array reference to AliasingValues, locations are array indexes.
+  typedef ScopedArenaSafeMap<uint16_t, AliasingValues> NonAliasingArrayValuesMap;
 
-  uint16_t GetOperandValueWide(int s_reg) {
-    uint16_t res = kNoValue;
-    SregValueMap::iterator it = sreg_wide_value_map_.find(s_reg);
-    if (it != sreg_wide_value_map_.end()) {
-      res = it->second;
-    } else {
-      // First use
-      res = LookupValue(kNoValue, s_reg, kNoValue, kNoValue);
-      sreg_wide_value_map_.Put(s_reg, res);
-    }
-    return res;
-  };
+  // Maps aliasing array type to AliasingValues, locations are (array, index) pair ids.
+  typedef ScopedArenaSafeMap<uint16_t, AliasingValues> AliasingArrayValuesMap;
 
-  uint16_t GetFieldId(const MirFieldInfo& field_info);
+  // Helper classes defining versions for updating and merging the AliasingValues maps above.
+  class AliasingIFieldVersions;
+  class NonAliasingArrayVersions;
+  class AliasingArrayVersions;
+
+  template <typename Map>
+  AliasingValues* GetAliasingValues(Map* map, const typename Map::key_type& key);
+
+  template <typename Versions, typename KeyType>
+  void UpdateAliasingValuesLoadVersion(const KeyType& key, AliasingValues* values);
+
+  template <typename Versions, typename Map>
+  static uint16_t AliasingValuesMergeGet(GlobalValueNumbering* gvn,
+                                         const LocalValueNumbering* lvn,
+                                         Map* map, const typename Map::key_type& key,
+                                         uint16_t location);
+
+  template <typename Versions, typename Map>
+  uint16_t HandleAliasingValuesGet(Map* map, const typename Map::key_type& key,
+                                   uint16_t location);
+
+  template <typename Versions, typename Map>
+  bool HandleAliasingValuesPut(Map* map, const typename Map::key_type& key,
+                               uint16_t location, uint16_t value);
+
   uint16_t MarkNonAliasingNonNull(MIR* mir);
-  bool IsNonAliasing(uint16_t reg);
-  bool IsNonAliasingIField(uint16_t reg, uint16_t field_id, uint16_t type);
-  bool IsNonAliasingArray(uint16_t reg, uint16_t type);
+  bool IsNonAliasing(uint16_t reg) const;
+  bool IsNonAliasingIField(uint16_t reg, uint16_t field_id, uint16_t type) const;
+  bool IsNonAliasingArray(uint16_t reg, uint16_t type) const;
   void HandleNullCheck(MIR* mir, uint16_t reg);
   void HandleRangeCheck(MIR* mir, uint16_t array, uint16_t index);
   void HandlePutObject(MIR* mir);
   void HandleEscapingRef(uint16_t base);
+  uint16_t HandlePhi(MIR* mir);
   uint16_t HandleAGet(MIR* mir, uint16_t opcode);
   void HandleAPut(MIR* mir, uint16_t opcode);
   uint16_t HandleIGet(MIR* mir, uint16_t opcode);
   void HandleIPut(MIR* mir, uint16_t opcode);
   uint16_t HandleSGet(MIR* mir, uint16_t opcode);
   void HandleSPut(MIR* mir, uint16_t opcode);
+  void RemoveSFieldsForType(uint16_t type);
+  void HandleInvokeOrClInit(MIR* mir);
 
-  CompilationUnit* const cu_;
+  bool SameMemoryVersion(const LocalValueNumbering& other) const;
 
-  // We have 32-bit last_value_ so that we can detect when we run out of value names, see Good().
-  // We usually don't check Good() until the end of LVN unless we're about to modify code.
-  uint32_t last_value_;
+  uint16_t NewMemoryVersion(uint16_t* new_version);
+  void MergeMemoryVersions(bool clobbered_catch);
+
+  void PruneNonAliasingRefsForCatch();
+
+  template <typename Set, Set LocalValueNumbering::* set_ptr>
+  void IntersectSets();
+
+  // Intersect maps as sets. The value type must be equality-comparable.
+  template <typename Map, Map LocalValueNumbering::* map_ptr>
+  void IntersectMaps();
+
+  // Intersect maps as sets. The value type must be equality-comparable.
+  template <typename Map>
+  static void InPlaceIntersectMaps(Map* work_map, const Map& other_map);
+
+  template <typename Set, Set LocalValueNumbering::*set_ptr, void (LocalValueNumbering::*MergeFn)(
+      const typename Set::value_type& entry, typename Set::iterator hint)>
+  void MergeSets();
+
+  void IntersectAliasingValueLocations(AliasingValues* work_values, const AliasingValues* values);
+
+  void MergeEscapedRefs(const ValueNameSet::value_type& entry, ValueNameSet::iterator hint);
+  void MergeEscapedIFieldTypeClobberSets(const EscapedIFieldClobberSet::value_type& entry,
+                                         EscapedIFieldClobberSet::iterator hint);
+  void MergeEscapedIFieldClobberSets(const EscapedIFieldClobberSet::value_type& entry,
+                                     EscapedIFieldClobberSet::iterator hint);
+  void MergeEscapedArrayClobberSets(const EscapedArrayClobberSet::value_type& entry,
+                                    EscapedArrayClobberSet::iterator hint);
+  void MergeNullChecked(const ValueNameSet::value_type& entry, ValueNameSet::iterator hint);
+  void MergeSFieldValues(const SFieldToValueMap::value_type& entry,
+                         SFieldToValueMap::iterator hint);
+  void MergeNonAliasingIFieldValues(const IFieldLocToValueMap::value_type& entry,
+                                    IFieldLocToValueMap::iterator hint);
+
+  template <typename Map, Map LocalValueNumbering::*map_ptr, typename Versions>
+  void MergeAliasingValues(const typename Map::value_type& entry, typename Map::iterator hint);
+
+  GlobalValueNumbering* gvn_;
+
+  // We're using the block id as a 16-bit operand value for some lookups.
+  COMPILE_ASSERT(sizeof(BasicBlockId) == sizeof(uint16_t), BasicBlockId_must_be_16_bit);
+  BasicBlockId id_;
 
   SregValueMap sreg_value_map_;
   SregValueMap sreg_wide_value_map_;
-  ValueMap value_map_;
+
+  SFieldToValueMap sfield_value_map_;
+  IFieldLocToValueMap non_aliasing_ifield_value_map_;
+  AliasingIFieldValuesMap aliasing_ifield_value_map_;
+  NonAliasingArrayValuesMap non_aliasing_array_value_map_;
+  AliasingArrayValuesMap aliasing_array_value_map_;
 
   // Data for dealing with memory clobbering and store/load aliasing.
   uint16_t global_memory_version_;
   uint16_t unresolved_sfield_version_[kFieldTypeCount];
   uint16_t unresolved_ifield_version_[kFieldTypeCount];
-  uint16_t aliasing_array_version_[kFieldTypeCount];
-  AliasingIFieldVersionMap aliasing_ifield_version_map_;
-  NonAliasingArrayVersionMap non_aliasing_array_version_map_;
-  FieldIndexMap field_index_map_;
   // Value names of references to objects that cannot be reached through a different value name.
   ValueNameSet non_aliasing_refs_;
-  // Instance fields still holding non-aliased values after the base has escaped.
-  NonAliasingFieldSet non_aliasing_ifields_;
-  // Previously non-aliasing array refs that escaped but can still be used for non-aliasing AGET.
-  EscapedArraySet escaped_array_refs_;
+  // Previously non-aliasing refs that escaped but can still be used for non-aliasing AGET/IGET.
+  ValueNameSet escaped_refs_;
+  // Blacklists for cases where escaped_refs_ can't be used.
+  EscapedIFieldClobberSet escaped_ifield_clobber_set_;
+  EscapedArrayClobberSet escaped_array_clobber_set_;
 
   // Range check and null check elimination.
   RangeCheckSet range_checked_;
   ValueNameSet null_checked_;
 
+  // Reuse one vector for all merges to avoid leaking too much memory on the ArenaStack.
+  ScopedArenaVector<BasicBlockId> merge_names_;
+  // Map to identify when different locations merge the same values.
+  ScopedArenaSafeMap<ScopedArenaVector<BasicBlockId>, uint16_t> merge_map_;
+  // New memory version for merge, kNoValue if all memory versions matched.
+  uint16_t merge_new_memory_version_;
+
   DISALLOW_COPY_AND_ASSIGN(LocalValueNumbering);
 };
 
diff --git a/compiler/dex/local_value_numbering_test.cc b/compiler/dex/local_value_numbering_test.cc
index efc4fc8..b3eae42 100644
--- a/compiler/dex/local_value_numbering_test.cc
+++ b/compiler/dex/local_value_numbering_test.cc
@@ -14,10 +14,9 @@
  * limitations under the License.
  */
 
-#include <vector>
-
-#include "local_value_numbering.h"
 #include "compiler_internals.h"
+#include "global_value_numbering.h"
+#include "local_value_numbering.h"
 #include "gtest/gtest.h"
 
 namespace art {
@@ -113,11 +112,13 @@
     for (size_t i = 0u; i != count; ++i) {
       const SFieldDef* def = &defs[i];
       MirSFieldLoweringInfo field_info(def->field_idx);
+      // Mark even unresolved fields as initialized.
+      field_info.flags_ = MirSFieldLoweringInfo::kFlagIsStatic |
+          MirSFieldLoweringInfo::kFlagIsInitialized;
       if (def->declaring_dex_file != 0u) {
         field_info.declaring_dex_file_ = reinterpret_cast<const DexFile*>(def->declaring_dex_file);
         field_info.declaring_field_idx_ = def->declaring_field_idx;
-        field_info.flags_ = MirSFieldLoweringInfo::kFlagIsStatic |
-            (def->is_volatile ? MirSFieldLoweringInfo::kFlagIsVolatile : 0u);
+        field_info.flags_ |= (def->is_volatile ? MirSFieldLoweringInfo::kFlagIsVolatile : 0u);
       }
       cu_.mir_graph->sfield_lowering_infos_.Insert(field_info);
     }
@@ -168,12 +169,18 @@
     DoPrepareMIRs(defs, count);
   }
 
+  void MakeSFieldUninitialized(uint32_t sfield_index) {
+    CHECK_LT(sfield_index, cu_.mir_graph->sfield_lowering_infos_.Size());
+    cu_.mir_graph->sfield_lowering_infos_.GetRawStorage()[sfield_index].flags_ &=
+        ~MirSFieldLoweringInfo::kFlagIsInitialized;
+  }
+
   void PerformLVN() {
     value_names_.resize(mir_count_);
     for (size_t i = 0; i != mir_count_; ++i) {
       value_names_[i] =  lvn_->GetValueNumber(&mirs_[i]);
     }
-    EXPECT_TRUE(lvn_->Good());
+    EXPECT_TRUE(gvn_->Good());
   }
 
   LocalValueNumberingTest()
@@ -181,11 +188,16 @@
         cu_(&pool_),
         mir_count_(0u),
         mirs_(nullptr),
+        ssa_reps_(),
         allocator_(),
-        lvn_() {
+        gvn_(),
+        lvn_(),
+        value_names_() {
     cu_.mir_graph.reset(new MIRGraph(&cu_, &cu_.arena));
     allocator_.reset(ScopedArenaAllocator::Create(&cu_.arena_stack));
-    lvn_.reset(new (allocator_.get()) LocalValueNumbering(&cu_, allocator_.get()));
+    gvn_.reset(new (allocator_.get()) GlobalValueNumbering(&cu_, allocator_.get()));
+    lvn_.reset(new (allocator_.get()) LocalValueNumbering(gvn_.get(), 0u));
+    gvn_->AllowModifications();
   }
 
   ArenaPool pool_;
@@ -193,9 +205,10 @@
   size_t mir_count_;
   MIR* mirs_;
   std::vector<SSARepresentation> ssa_reps_;
-  std::vector<uint16_t> value_names_;
   std::unique_ptr<ScopedArenaAllocator> allocator_;
+  std::unique_ptr<GlobalValueNumbering> gvn_;
   std::unique_ptr<LocalValueNumbering> lvn_;
+  std::vector<uint16_t> value_names_;
 };
 
 TEST_F(LocalValueNumberingTest, IGetIGetInvokeIGet) {
@@ -240,11 +253,10 @@
   ASSERT_EQ(value_names_.size(), 5u);
   EXPECT_NE(value_names_[0], value_names_[2]);
   EXPECT_NE(value_names_[3], value_names_[4]);
-  EXPECT_EQ(mirs_[0].optimization_flags, 0u);
-  EXPECT_EQ(mirs_[1].optimization_flags, 0u);
-  EXPECT_EQ(mirs_[2].optimization_flags, MIR_IGNORE_NULL_CHECK);
-  EXPECT_EQ(mirs_[3].optimization_flags, 0u);
-  EXPECT_EQ(mirs_[4].optimization_flags, 0u);
+  for (size_t i = 0; i != arraysize(mirs); ++i) {
+    EXPECT_EQ((i == 2u) ? MIR_IGNORE_NULL_CHECK : 0,
+              mirs_[i].optimization_flags) << i;
+  }
 }
 
 TEST_F(LocalValueNumberingTest, UniquePreserve1) {
@@ -263,9 +275,10 @@
   PerformLVN();
   ASSERT_EQ(value_names_.size(), 4u);
   EXPECT_EQ(value_names_[1], value_names_[3]);
-  EXPECT_EQ(mirs_[1].optimization_flags, MIR_IGNORE_NULL_CHECK);
-  EXPECT_EQ(mirs_[2].optimization_flags, 0u);
-  EXPECT_EQ(mirs_[3].optimization_flags, MIR_IGNORE_NULL_CHECK);
+  for (size_t i = 0; i != arraysize(mirs); ++i) {
+    EXPECT_EQ((i == 1u || i == 3u) ? MIR_IGNORE_NULL_CHECK : 0,
+              mirs_[i].optimization_flags) << i;
+  }
 }
 
 TEST_F(LocalValueNumberingTest, UniquePreserve2) {
@@ -284,9 +297,10 @@
   PerformLVN();
   ASSERT_EQ(value_names_.size(), 4u);
   EXPECT_EQ(value_names_[1], value_names_[3]);
-  EXPECT_EQ(mirs_[1].optimization_flags, 0u);
-  EXPECT_EQ(mirs_[2].optimization_flags, MIR_IGNORE_NULL_CHECK);
-  EXPECT_EQ(mirs_[3].optimization_flags, MIR_IGNORE_NULL_CHECK);
+  for (size_t i = 0; i != arraysize(mirs); ++i) {
+    EXPECT_EQ((i == 2u || i == 3u) ? MIR_IGNORE_NULL_CHECK : 0,
+              mirs_[i].optimization_flags) << i;
+  }
 }
 
 TEST_F(LocalValueNumberingTest, UniquePreserveAndEscape) {
@@ -308,9 +322,10 @@
   ASSERT_EQ(value_names_.size(), 6u);
   EXPECT_EQ(value_names_[1], value_names_[3]);
   EXPECT_NE(value_names_[1], value_names_[5]);
-  EXPECT_EQ(mirs_[1].optimization_flags, MIR_IGNORE_NULL_CHECK);
-  EXPECT_EQ(mirs_[3].optimization_flags, MIR_IGNORE_NULL_CHECK);
-  EXPECT_EQ(mirs_[5].optimization_flags, MIR_IGNORE_NULL_CHECK);
+  for (size_t i = 0; i != arraysize(mirs); ++i) {
+    EXPECT_EQ((i == 1u || i == 3u || i == 4u || i == 5u) ? MIR_IGNORE_NULL_CHECK : 0,
+              mirs_[i].optimization_flags) << i;
+  }
 }
 
 TEST_F(LocalValueNumberingTest, Volatile) {
@@ -331,10 +346,10 @@
   ASSERT_EQ(value_names_.size(), 4u);
   EXPECT_NE(value_names_[0], value_names_[2]);  // Volatile has always different value name.
   EXPECT_NE(value_names_[1], value_names_[3]);  // Used different base because of volatile.
-  EXPECT_EQ(mirs_[0].optimization_flags, 0u);
-  EXPECT_EQ(mirs_[1].optimization_flags, 0u);
-  EXPECT_EQ(mirs_[2].optimization_flags, MIR_IGNORE_NULL_CHECK);
-  EXPECT_EQ(mirs_[3].optimization_flags, 0u);
+  for (size_t i = 0; i != arraysize(mirs); ++i) {
+    EXPECT_EQ((i == 2u) ? MIR_IGNORE_NULL_CHECK : 0,
+              mirs_[i].optimization_flags) << i;
+  }
 }
 
 TEST_F(LocalValueNumberingTest, UnresolvedIField) {
@@ -352,29 +367,36 @@
       DEF_IGET(Instruction::IGET, 5u, 20u, 0u),             // Resolved field #1, unique object.
       DEF_IGET(Instruction::IGET, 6u, 21u, 0u),             // Resolved field #1.
       DEF_IGET_WIDE(Instruction::IGET_WIDE, 7u, 21u, 1u),   // Resolved field #2.
-      DEF_IPUT(Instruction::IPUT, 8u, 22u, 2u),             // IPUT clobbers field #1 (#2 if wide).
+      DEF_IPUT(Instruction::IPUT, 8u, 22u, 2u),             // IPUT clobbers field #1 (#2 is wide).
       DEF_IGET(Instruction::IGET, 9u, 20u, 0u),             // Resolved field #1, unique object.
       DEF_IGET(Instruction::IGET, 10u, 21u, 0u),            // Resolved field #1, new value name.
       DEF_IGET_WIDE(Instruction::IGET_WIDE, 11u, 21u, 1u),  // Resolved field #2.
+      DEF_IGET_WIDE(Instruction::IGET_WIDE, 12u, 20u, 1u),  // Resolved field #2, unique object.
+      DEF_IPUT(Instruction::IPUT, 13u, 20u, 2u),            // IPUT clobbers field #1 (#2 is wide).
+      DEF_IGET(Instruction::IGET, 14u, 20u, 0u),            // Resolved field #1, unique object.
+      DEF_IGET_WIDE(Instruction::IGET_WIDE, 15u, 20u, 1u),  // Resolved field #2, unique object.
   };
 
   PrepareIFields(ifields);
   PrepareMIRs(mirs);
   PerformLVN();
-  ASSERT_EQ(value_names_.size(), 12u);
+  ASSERT_EQ(value_names_.size(), 16u);
   EXPECT_EQ(value_names_[1], value_names_[5]);
   EXPECT_EQ(value_names_[2], value_names_[6]);
   EXPECT_EQ(value_names_[3], value_names_[7]);
   EXPECT_EQ(value_names_[1], value_names_[9]);
   EXPECT_NE(value_names_[2], value_names_[10]);  // This aliased with unresolved IPUT.
   EXPECT_EQ(value_names_[3], value_names_[11]);
+  EXPECT_EQ(value_names_[12], value_names_[15]);
+  EXPECT_NE(value_names_[1], value_names_[14]);  // This aliased with unresolved IPUT.
   EXPECT_EQ(mirs_[0].optimization_flags, 0u);
   EXPECT_EQ(mirs_[1].optimization_flags, MIR_IGNORE_NULL_CHECK);
   EXPECT_EQ(mirs_[2].optimization_flags, 0u);
   EXPECT_EQ(mirs_[3].optimization_flags, MIR_IGNORE_NULL_CHECK);
   EXPECT_EQ(mirs_[4].optimization_flags, 0u);
   for (size_t i = 5u; i != mir_count_; ++i) {
-    EXPECT_EQ(mirs_[i].optimization_flags, MIR_IGNORE_NULL_CHECK);
+    EXPECT_EQ((i == 1u || i == 3u || i >=5u) ? MIR_IGNORE_NULL_CHECK : 0,
+              mirs_[i].optimization_flags) << i;
   }
 }
 
@@ -404,10 +426,40 @@
   EXPECT_NE(value_names_[0], value_names_[6]);  // This aliased with unresolved IPUT.
   EXPECT_EQ(value_names_[1], value_names_[7]);
   for (size_t i = 0u; i != mir_count_; ++i) {
-    EXPECT_EQ(mirs_[i].optimization_flags, 0u) << i;
+    EXPECT_EQ(0, mirs_[i].optimization_flags) << i;
   }
 }
 
+TEST_F(LocalValueNumberingTest, UninitializedSField) {
+  static const IFieldDef ifields[] = {
+      { 1u, 1u, 1u, false },  // Resolved field #1.
+  };
+  static const SFieldDef sfields[] = {
+      { 1u, 1u, 1u, false },  // Resolved field #1.
+      { 2u, 1u, 2u, false },  // Resolved field #2; uninitialized.
+  };
+  static const MIRDef mirs[] = {
+      DEF_UNIQUE_REF(Instruction::NEW_INSTANCE, 200u),
+      DEF_IGET(Instruction::IGET, 1u, 100u, 0u),
+      DEF_IGET(Instruction::IGET, 2u, 200u, 0u),
+      DEF_SGET(Instruction::SGET, 3u, 0u),
+      DEF_SGET(Instruction::SGET, 4u, 1u),            // Can call <clinit>().
+      DEF_IGET(Instruction::IGET, 5u, 100u, 0u),      // Differs from 1u.
+      DEF_IGET(Instruction::IGET, 6u, 200u, 0u),      // Same as 2u.
+      DEF_SGET(Instruction::SGET, 7u, 0u),            // Differs from 3u.
+  };
+
+  PrepareIFields(ifields);
+  PrepareSFields(sfields);
+  MakeSFieldUninitialized(1u);
+  PrepareMIRs(mirs);
+  PerformLVN();
+  ASSERT_EQ(value_names_.size(), 8u);
+  EXPECT_NE(value_names_[1], value_names_[5]);
+  EXPECT_EQ(value_names_[2], value_names_[6]);
+  EXPECT_NE(value_names_[3], value_names_[7]);
+}
+
 TEST_F(LocalValueNumberingTest, ConstString) {
   static const MIRDef mirs[] = {
       DEF_CONST_STRING(Instruction::CONST_STRING, 0u, 0u),
@@ -436,33 +488,39 @@
       { 3u, 1u, 3u, false },
   };
   static const MIRDef mirs[] = {
-      DEF_IGET(Instruction::IGET, 0u, 10u, 0u),
-      DEF_IPUT(Instruction::IPUT, 0u, 10u, 1u),
+      DEF_UNIQUE_REF(Instruction::NEW_ARRAY, 201u),
+      DEF_IGET(Instruction::IGET, 0u, 100u, 0u),
+      DEF_IPUT(Instruction::IPUT, 0u, 100u, 1u),
+      DEF_IPUT(Instruction::IPUT, 0u, 101u, 1u),
+      DEF_APUT(Instruction::APUT, 0u, 200u, 300u),
+      DEF_APUT(Instruction::APUT, 0u, 200u, 301u),
+      DEF_APUT(Instruction::APUT, 0u, 201u, 300u),
+      DEF_APUT(Instruction::APUT, 0u, 201u, 301u),
       DEF_SPUT(Instruction::SPUT, 0u, 0u),
-      DEF_APUT(Instruction::APUT, 0u, 11u, 12u),
-      DEF_IGET(Instruction::IGET, 1u, 10u, 0u),
-      DEF_IGET(Instruction::IGET, 2u, 10u, 1u),
-      DEF_AGET(Instruction::AGET, 3u, 11u, 12u),
-      DEF_SGET(Instruction::SGET, 4u, 0u),
+      DEF_IGET(Instruction::IGET, 9u, 100u, 0u),
+      DEF_IGET(Instruction::IGET, 10u, 100u, 1u),
+      DEF_IGET(Instruction::IGET, 11u, 101u, 1u),
+      DEF_AGET(Instruction::AGET, 12u, 200u, 300u),
+      DEF_AGET(Instruction::AGET, 13u, 200u, 301u),
+      DEF_AGET(Instruction::AGET, 14u, 201u, 300u),
+      DEF_AGET(Instruction::AGET, 15u, 201u, 301u),
+      DEF_SGET(Instruction::SGET, 16u, 0u),
   };
 
   PrepareIFields(ifields);
   PrepareSFields(sfields);
   PrepareMIRs(mirs);
   PerformLVN();
-  ASSERT_EQ(value_names_.size(), 8u);
-  EXPECT_EQ(value_names_[4], value_names_[0]);
-  EXPECT_EQ(value_names_[5], value_names_[0]);
-  EXPECT_EQ(value_names_[6], value_names_[0]);
-  EXPECT_EQ(value_names_[7], value_names_[0]);
-  EXPECT_EQ(mirs_[0].optimization_flags, 0u);
-  EXPECT_EQ(mirs_[1].optimization_flags, MIR_IGNORE_NULL_CHECK);
-  EXPECT_EQ(mirs_[2].optimization_flags, 0u);
-  EXPECT_EQ(mirs_[3].optimization_flags, 0u);
-  EXPECT_EQ(mirs_[4].optimization_flags, MIR_IGNORE_NULL_CHECK);
-  EXPECT_EQ(mirs_[5].optimization_flags, MIR_IGNORE_NULL_CHECK);
-  EXPECT_EQ(mirs_[6].optimization_flags, MIR_IGNORE_NULL_CHECK | MIR_IGNORE_RANGE_CHECK);
-  EXPECT_EQ(mirs_[7].optimization_flags, 0u);
+  ASSERT_EQ(value_names_.size(), 17u);
+  for (size_t i = 9; i != arraysize(mirs); ++i) {
+    EXPECT_EQ(value_names_[1], value_names_[i]) << i;
+  }
+  for (size_t i = 0; i != arraysize(mirs); ++i) {
+    int expected_flags =
+        ((i == 2u || (i >= 5u && i <= 7u) || (i >= 9u && i <= 15u)) ? MIR_IGNORE_NULL_CHECK : 0) |
+        ((i >= 12u && i <= 15u) ? MIR_IGNORE_RANGE_CHECK : 0);
+    EXPECT_EQ(expected_flags, mirs_[i].optimization_flags) << i;
+  }
 }
 
 TEST_F(LocalValueNumberingTest, UniqueArrayAliasing) {
@@ -477,10 +535,12 @@
   PerformLVN();
   ASSERT_EQ(value_names_.size(), 4u);
   EXPECT_NE(value_names_[1], value_names_[3]);
-  EXPECT_EQ(mirs_[0].optimization_flags, 0u);
-  EXPECT_EQ(mirs_[1].optimization_flags, MIR_IGNORE_NULL_CHECK);
-  EXPECT_EQ(mirs_[2].optimization_flags, MIR_IGNORE_NULL_CHECK);
-  EXPECT_EQ(mirs_[3].optimization_flags, MIR_IGNORE_NULL_CHECK | MIR_IGNORE_RANGE_CHECK);
+  for (size_t i = 0; i != arraysize(mirs); ++i) {
+    int expected_flags =
+        ((i >= 1u) ? MIR_IGNORE_NULL_CHECK : 0) |
+        ((i == 3u) ? MIR_IGNORE_RANGE_CHECK : 0);
+    EXPECT_EQ(expected_flags, mirs_[i].optimization_flags) << i;
+  }
 }
 
 TEST_F(LocalValueNumberingTest, EscapingRefs) {
@@ -528,7 +588,7 @@
   EXPECT_NE(value_names_[13], value_names_[16]);  // New value.
   EXPECT_NE(value_names_[14], value_names_[17]);  // New value.
   for (size_t i = 0u; i != mir_count_; ++i) {
-    int expected = (i != 0u && i != 3u && i != 6u) ? MIR_IGNORE_NULL_CHECK : 0u;
+    int expected = (i != 0u && i != 3u && i != 6u) ? MIR_IGNORE_NULL_CHECK : 0;
     EXPECT_EQ(expected, mirs_[i].optimization_flags) << i;
   }
 }
@@ -569,32 +629,104 @@
 TEST_F(LocalValueNumberingTest, StoringSameValueKeepsMemoryVersion) {
   static const IFieldDef ifields[] = {
       { 1u, 1u, 1u, false },
+      { 2u, 1u, 2u, false },
+  };
+  static const SFieldDef sfields[] = {
+      { 2u, 1u, 2u, false },
   };
   static const MIRDef mirs[] = {
-      DEF_IGET(Instruction::IGET, 0u, 10u, 0u),
-      DEF_IGET(Instruction::IGET, 1u, 11u, 0u),
-      DEF_IPUT(Instruction::IPUT, 1u, 11u, 0u),   // Store the same value.
-      DEF_IGET(Instruction::IGET, 3u, 10u, 0u),
-      DEF_AGET(Instruction::AGET, 4u, 12u, 40u),
-      DEF_AGET(Instruction::AGET, 5u, 13u, 40u),
-      DEF_APUT(Instruction::APUT, 5u, 13u, 40u),  // Store the same value.
-      DEF_AGET(Instruction::AGET, 7u, 12u, 40u),
+      DEF_IGET(Instruction::IGET, 0u, 30u, 0u),
+      DEF_IGET(Instruction::IGET, 1u, 31u, 0u),
+      DEF_IPUT(Instruction::IPUT, 1u, 31u, 0u),            // Store the same value.
+      DEF_IGET(Instruction::IGET, 3u, 30u, 0u),
+      DEF_AGET(Instruction::AGET, 4u, 32u, 40u),
+      DEF_AGET(Instruction::AGET, 5u, 33u, 40u),
+      DEF_APUT(Instruction::APUT, 5u, 33u, 40u),           // Store the same value.
+      DEF_AGET(Instruction::AGET, 7u, 32u, 40u),
+      DEF_SGET(Instruction::SGET, 8u, 0u),
+      DEF_SPUT(Instruction::SPUT, 8u, 0u),                 // Store the same value.
+      DEF_SGET(Instruction::SGET, 10u, 0u),
+      DEF_UNIQUE_REF(Instruction::NEW_INSTANCE, 50u),      // Test with unique references.
+      { Instruction::FILLED_NEW_ARRAY, 0, 0u, 2, { 12u, 13u }, 0, { } },
+      DEF_UNIQUE_REF(Instruction::MOVE_RESULT_OBJECT, 51u),
+      DEF_IGET(Instruction::IGET, 14u, 50u, 0u),
+      DEF_IGET(Instruction::IGET, 15u, 50u, 1u),
+      DEF_IPUT(Instruction::IPUT, 15u, 50u, 1u),           // Store the same value.
+      DEF_IGET(Instruction::IGET, 17u, 50u, 0u),
+      DEF_AGET(Instruction::AGET, 18u, 51u, 40u),
+      DEF_AGET(Instruction::AGET, 19u, 51u, 41u),
+      DEF_APUT(Instruction::APUT, 19u, 51u, 41u),          // Store the same value.
+      DEF_AGET(Instruction::AGET, 21u, 51u, 40u),
   };
 
   PrepareIFields(ifields);
+  PrepareSFields(sfields);
   PrepareMIRs(mirs);
   PerformLVN();
-  ASSERT_EQ(value_names_.size(), 8u);
+  ASSERT_EQ(value_names_.size(), 22u);
   EXPECT_NE(value_names_[0], value_names_[1]);
   EXPECT_EQ(value_names_[0], value_names_[3]);
   EXPECT_NE(value_names_[4], value_names_[5]);
   EXPECT_EQ(value_names_[4], value_names_[7]);
+  EXPECT_EQ(value_names_[8], value_names_[10]);
+  EXPECT_NE(value_names_[14], value_names_[15]);
+  EXPECT_EQ(value_names_[14], value_names_[17]);
+  EXPECT_NE(value_names_[18], value_names_[19]);
+  EXPECT_EQ(value_names_[18], value_names_[21]);
   for (size_t i = 0u; i != mir_count_; ++i) {
     int expected =
-        ((i == 2u || i == 3u || i == 6u || i == 7u) ? MIR_IGNORE_NULL_CHECK : 0u) |
-        ((i == 6u || i == 7u) ? MIR_IGNORE_RANGE_CHECK : 0u);
+        ((i == 2u || i == 3u || i == 6u || i == 7u || (i >= 14u)) ? MIR_IGNORE_NULL_CHECK : 0u) |
+        ((i == 6u || i == 7u || i >= 20u) ? MIR_IGNORE_RANGE_CHECK : 0u);
     EXPECT_EQ(expected, mirs_[i].optimization_flags) << i;
   }
 }
 
+TEST_F(LocalValueNumberingTest, FilledNewArrayTracking) {
+  if (!kLocalValueNumberingEnableFilledNewArrayTracking) {
+    // Feature disabled.
+    return;
+  }
+  static const MIRDef mirs[] = {
+      DEF_CONST(Instruction::CONST, 0u, 100),
+      DEF_CONST(Instruction::CONST, 1u, 200),
+      { Instruction::FILLED_NEW_ARRAY, 0, 0u, 2, { 0u, 1u }, 0, { } },
+      DEF_UNIQUE_REF(Instruction::MOVE_RESULT_OBJECT, 10u),
+      DEF_CONST(Instruction::CONST, 20u, 0),
+      DEF_CONST(Instruction::CONST, 21u, 1),
+      DEF_AGET(Instruction::AGET, 6u, 10u, 20u),
+      DEF_AGET(Instruction::AGET, 7u, 10u, 21u),
+  };
+
+  PrepareMIRs(mirs);
+  PerformLVN();
+  ASSERT_EQ(value_names_.size(), 8u);
+  EXPECT_EQ(value_names_[0], value_names_[6]);
+  EXPECT_EQ(value_names_[1], value_names_[7]);
+  for (size_t i = 0u; i != mir_count_; ++i) {
+    int expected = (i == 6u || i == 7u) ? (MIR_IGNORE_NULL_CHECK | MIR_IGNORE_RANGE_CHECK) : 0u;
+    EXPECT_EQ(expected, mirs_[i].optimization_flags) << i;
+  }
+}
+
+TEST_F(LocalValueNumberingTest, ClInitOnSget) {
+  static const SFieldDef sfields[] = {
+      { 0u, 1u, 0u, false },
+      { 1u, 2u, 1u, false },
+  };
+  static const MIRDef mirs[] = {
+      DEF_SGET(Instruction::SGET_OBJECT, 0u, 0u),
+      DEF_AGET(Instruction::AGET, 1u, 0u, 100u),
+      DEF_SGET(Instruction::SGET_OBJECT, 2u, 1u),
+      DEF_SGET(Instruction::SGET_OBJECT, 3u, 0u),
+      DEF_AGET(Instruction::AGET, 4u, 3u, 100u),
+  };
+
+  PrepareSFields(sfields);
+  MakeSFieldUninitialized(1u);
+  PrepareMIRs(mirs);
+  PerformLVN();
+  ASSERT_EQ(value_names_.size(), 5u);
+  EXPECT_NE(value_names_[0], value_names_[3]);
+}
+
 }  // namespace art
diff --git a/compiler/dex/mir_field_info.h b/compiler/dex/mir_field_info.h
index cad516d..9745c41 100644
--- a/compiler/dex/mir_field_info.h
+++ b/compiler/dex/mir_field_info.h
@@ -137,6 +137,7 @@
   // The member offset of the field, 0u if unresolved.
   MemberOffset field_offset_;
 
+  friend class GlobalValueNumberingTest;
   friend class LocalValueNumberingTest;
 };
 
@@ -204,6 +205,7 @@
   uint32_t storage_index_;
 
   friend class ClassInitCheckEliminationTest;
+  friend class GlobalValueNumberingTest;
   friend class LocalValueNumberingTest;
 };
 
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 76c9815..1fbf450 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -23,6 +23,7 @@
 #include "compiler_internals.h"
 #include "dex_file-inl.h"
 #include "dex_instruction-inl.h"
+#include "dex/global_value_numbering.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "dex/quick/dex_file_method_inliner.h"
 #include "leb128.h"
@@ -89,6 +90,7 @@
       temp_insn_data_(nullptr),
       temp_bit_vector_size_(0u),
       temp_bit_vector_(nullptr),
+      temp_gvn_(),
       block_list_(arena, 100, kGrowableArrayBlockList),
       try_block_addr_(NULL),
       entry_block_(NULL),
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index f812165..6ee48a4 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -32,6 +32,8 @@
 
 namespace art {
 
+class GlobalValueNumbering;
+
 enum InstructionAnalysisAttributePos {
   kUninterestingOp = 0,
   kArithmeticOp,
@@ -899,6 +901,9 @@
   bool EliminateClassInitChecksGate();
   bool EliminateClassInitChecks(BasicBlock* bb);
   void EliminateClassInitChecksEnd();
+  bool ApplyGlobalValueNumberingGate();
+  bool ApplyGlobalValueNumbering(BasicBlock* bb);
+  void ApplyGlobalValueNumberingEnd();
   /*
    * Type inference handling helpers.  Because Dalvik's bytecode is not fully typed,
    * we have to do some work to figure out the sreg type.  For some operations it is
@@ -951,10 +956,10 @@
   void ComputeTopologicalSortOrder();
   BasicBlock* CreateNewBB(BBType block_type);
 
-  bool InlineCallsGate();
-  void InlineCallsStart();
-  void InlineCalls(BasicBlock* bb);
-  void InlineCallsEnd();
+  bool InlineSpecialMethodsGate();
+  void InlineSpecialMethodsStart();
+  void InlineSpecialMethods(BasicBlock* bb);
+  void InlineSpecialMethodsEnd();
 
   /**
    * @brief Perform the initial preparation for the Method Uses.
@@ -1123,6 +1128,7 @@
   uint16_t* temp_insn_data_;
   uint32_t temp_bit_vector_size_;
   ArenaBitVector* temp_bit_vector_;
+  std::unique_ptr<GlobalValueNumbering> temp_gvn_;
   static const int kInvalidEntry = -1;
   GrowableArray<BasicBlock*> block_list_;
   ArenaBitVector* try_block_addr_;
@@ -1159,6 +1165,7 @@
   GrowableArray<BasicBlock*> gen_suspend_test_list_;  // List of blocks containing suspend tests
 
   friend class ClassInitCheckEliminationTest;
+  friend class GlobalValueNumberingTest;
   friend class LocalValueNumberingTest;
 };
 
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index 4b2bc4a..8a474f0 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -15,8 +15,10 @@
  */
 
 #include "compiler_internals.h"
+#include "global_value_numbering.h"
 #include "local_value_numbering.h"
 #include "dataflow_iterator-inl.h"
+#include "dex/global_value_numbering.h"
 #include "dex/quick/dex_file_method_inliner.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "utils/scoped_arena_containers.h"
@@ -318,12 +320,15 @@
   if (bb->block_type == kDead) {
     return true;
   }
-  bool use_lvn = bb->use_lvn;
+  // Don't do a separate LVN if we did the GVN.
+  bool use_lvn = bb->use_lvn && (cu_->disable_opt & (1 << kGlobalValueNumbering)) != 0;
   std::unique_ptr<ScopedArenaAllocator> allocator;
+  std::unique_ptr<GlobalValueNumbering> global_valnum;
   std::unique_ptr<LocalValueNumbering> local_valnum;
   if (use_lvn) {
     allocator.reset(ScopedArenaAllocator::Create(&cu_->arena_stack));
-    local_valnum.reset(new (allocator.get()) LocalValueNumbering(cu_, allocator.get()));
+    global_valnum.reset(new (allocator.get()) GlobalValueNumbering(cu_, allocator.get()));
+    local_valnum.reset(new (allocator.get()) LocalValueNumbering(global_valnum.get(), bb->id));
   }
   while (bb != NULL) {
     for (MIR* mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
@@ -558,7 +563,7 @@
     }
     bb = ((cu_->disable_opt & (1 << kSuppressExceptionEdges)) != 0) ? NextDominatedBlock(bb) : NULL;
   }
-  if (use_lvn && UNLIKELY(!local_valnum->Good())) {
+  if (use_lvn && UNLIKELY(!global_valnum->Good())) {
     LOG(WARNING) << "LVN overflow in " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
   }
 
@@ -998,11 +1003,14 @@
             mir->dalvikInsn.opcode <= Instruction::SPUT_SHORT) {
           const MirSFieldLoweringInfo& field_info = GetSFieldLoweringInfo(mir);
           uint16_t index = 0xffffu;
-          if (field_info.IsResolved() && !field_info.IsInitialized()) {
+          if (!field_info.IsInitialized()) {
             DCHECK_LT(class_to_index_map.size(), 0xffffu);
             MapEntry entry = {
-                field_info.DeclaringDexFile(),
-                field_info.DeclaringClassIndex(),
+                // Treat unresolved fields as if each had its own class.
+                field_info.IsResolved() ? field_info.DeclaringDexFile()
+                                        : nullptr,
+                field_info.IsResolved() ? field_info.DeclaringClassIndex()
+                                        : field_info.FieldIndex(),
                 static_cast<uint16_t>(class_to_index_map.size())
             };
             index = class_to_index_map.insert(entry).first->index;
@@ -1133,6 +1141,60 @@
   temp_scoped_alloc_.reset();
 }
 
+bool MIRGraph::ApplyGlobalValueNumberingGate() {
+  if ((cu_->disable_opt & (1 << kGlobalValueNumbering)) != 0) {
+    return false;
+  }
+
+  if ((merged_df_flags_ & DF_LVN) == 0) {
+    return false;
+  }
+
+  DCHECK(temp_scoped_alloc_ == nullptr);
+  temp_scoped_alloc_.reset(ScopedArenaAllocator::Create(&cu_->arena_stack));
+  DCHECK(temp_gvn_ == nullptr);
+  temp_gvn_.reset(
+      new (temp_scoped_alloc_.get()) GlobalValueNumbering(cu_, temp_scoped_alloc_.get()));
+  return true;
+}
+
+bool MIRGraph::ApplyGlobalValueNumbering(BasicBlock* bb) {
+  DCHECK(temp_gvn_ != nullptr);
+  LocalValueNumbering* lvn = temp_gvn_->PrepareBasicBlock(bb);
+  if (lvn != nullptr) {
+    for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
+      lvn->GetValueNumber(mir);
+    }
+  }
+  bool change = (lvn != nullptr) && temp_gvn_->FinishBasicBlock(bb);
+  return change;
+}
+
+void MIRGraph::ApplyGlobalValueNumberingEnd() {
+  // Perform modifications.
+  if (temp_gvn_->Good()) {
+    temp_gvn_->AllowModifications();
+    PreOrderDfsIterator iter(this);
+    for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
+      LocalValueNumbering* lvn = temp_gvn_->PrepareBasicBlock(bb);
+      if (lvn != nullptr) {
+        for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
+          lvn->GetValueNumber(mir);
+        }
+        bool change = temp_gvn_->FinishBasicBlock(bb);
+        DCHECK(!change);
+      }
+    }
+  } else {
+    LOG(WARNING) << "GVN failed for " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
+  }
+
+  DCHECK(temp_gvn_ != nullptr);
+  temp_gvn_.reset();
+  DCHECK(temp_scoped_alloc_ != nullptr);
+  temp_scoped_alloc_.reset();
+}
+
 void MIRGraph::ComputeInlineIFieldLoweringInfo(uint16_t field_idx, MIR* invoke, MIR* iget_or_iput) {
   uint32_t method_index = invoke->meta.method_lowering_info;
   if (temp_bit_vector_->IsBitSet(method_index)) {
@@ -1158,7 +1220,7 @@
   iget_or_iput->meta.ifield_lowering_info = field_info_index;
 }
 
-bool MIRGraph::InlineCallsGate() {
+bool MIRGraph::InlineSpecialMethodsGate() {
   if ((cu_->disable_opt & (1 << kSuppressMethodInlining)) != 0 ||
       method_lowering_infos_.Size() == 0u) {
     return false;
@@ -1170,7 +1232,7 @@
   return true;
 }
 
-void MIRGraph::InlineCallsStart() {
+void MIRGraph::InlineSpecialMethodsStart() {
   // Prepare for inlining getters/setters. Since we're inlining at most 1 IGET/IPUT from
   // each INVOKE, we can index the data by the MIR::meta::method_lowering_info index.
 
@@ -1184,7 +1246,7 @@
       temp_bit_vector_size_ * sizeof(*temp_insn_data_), kArenaAllocGrowableArray));
 }
 
-void MIRGraph::InlineCalls(BasicBlock* bb) {
+void MIRGraph::InlineSpecialMethods(BasicBlock* bb) {
   if (bb->block_type != kDalvikByteCode) {
     return;
   }
@@ -1208,17 +1270,17 @@
     MethodReference target = method_info.GetTargetMethod();
     if (cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(target.dex_file)
             ->GenInline(this, bb, mir, target.dex_method_index)) {
-      if (cu_->verbose) {
-        LOG(INFO) << "In \"" << PrettyMethod(cu_->method_idx, *cu_->dex_file)
-            << "\" @0x" << std::hex << mir->offset
-            << " inlined " << method_info.GetInvokeType() << " (" << sharp_type << ") call to \""
-            << PrettyMethod(target.dex_method_index, *target.dex_file) << "\"";
+      if (cu_->verbose || cu_->print_pass) {
+        LOG(INFO) << "SpecialMethodInliner: Inlined " << method_info.GetInvokeType() << " ("
+            << sharp_type << ") call to \"" << PrettyMethod(target.dex_method_index, *target.dex_file)
+            << "\" from \"" << PrettyMethod(cu_->method_idx, *cu_->dex_file)
+            << "\" @0x" << std::hex << mir->offset;
       }
     }
   }
 }
 
-void MIRGraph::InlineCallsEnd() {
+void MIRGraph::InlineSpecialMethodsEnd() {
   DCHECK(temp_insn_data_ != nullptr);
   temp_insn_data_ = nullptr;
   DCHECK(temp_bit_vector_ != nullptr);
diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc
index 9b2e798..8c70b5c 100644
--- a/compiler/dex/mir_optimization_test.cc
+++ b/compiler/dex/mir_optimization_test.cc
@@ -248,7 +248,7 @@
       DEF_MIR(Instruction::SGET, 3u, 4u),
   };
   static const bool expected_ignore_clinit_check[] = {
-      false, false, false, false, false, true, true, true, false, false, true
+      false, false, false, false, true, true, true, true, true, false, true
   };
 
   PrepareSFields(sfields);
@@ -312,7 +312,7 @@
       DEF_MIR(Instruction::SPUT, 6u, 9u),  // Eliminated (with sfield[8] in block #4).
   };
   static const bool expected_ignore_clinit_check[] = {
-      false, false,         // Unresolved: sfield[10], method[2]
+      false, true,          // Unresolved: sfield[10], method[2]
       false, true,          // sfield[0]
       false, false,         // sfield[1]
       false, true,          // sfield[2]
diff --git a/compiler/dex/pass_driver_me_opts.cc b/compiler/dex/pass_driver_me_opts.cc
index 52a2273..c72a4a6 100644
--- a/compiler/dex/pass_driver_me_opts.cc
+++ b/compiler/dex/pass_driver_me_opts.cc
@@ -35,10 +35,11 @@
 const Pass* const PassDriver<PassDriverMEOpts>::g_passes[] = {
   GetPassInstance<CacheFieldLoweringInfo>(),
   GetPassInstance<CacheMethodLoweringInfo>(),
-  GetPassInstance<CallInlining>(),
+  GetPassInstance<SpecialMethodInliner>(),
   GetPassInstance<CodeLayout>(),
   GetPassInstance<NullCheckEliminationAndTypeInference>(),
   GetPassInstance<ClassInitCheckElimination>(),
+  GetPassInstance<GlobalValueNumberingPass>(),
   GetPassInstance<BBCombine>(),
   GetPassInstance<BBOptimizations>(),
 };
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index dafefea..6dc019a 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -766,8 +766,9 @@
 
 // Generate code for all slow paths.
 void Mir2Lir::HandleSlowPaths() {
-  int n = slow_paths_.Size();
-  for (int i = 0; i < n; ++i) {
+  // We should check slow_paths_.Size() every time, because a new slow path
+  // may be created during slowpath->Compile().
+  for (size_t i = 0; i < slow_paths_.Size(); ++i) {
     LIRSlowPath* slowpath = slow_paths_.Get(i);
     slowpath->Compile();
   }
@@ -819,11 +820,12 @@
     } else {
       GenIgetCall<4>(this, is_long_or_double, is_object, &field_info, rl_obj);
     }
+    // FIXME: pGetXXInstance always return an int or int64 regardless of rl_dest.fp.
     if (is_long_or_double) {
-      RegLocation rl_result = GetReturnWide(LocToRegClass(rl_dest));
+      RegLocation rl_result = GetReturnWide(kCoreReg);
       StoreValueWide(rl_dest, rl_result);
     } else {
-      RegLocation rl_result = GetReturn(LocToRegClass(rl_dest));
+      RegLocation rl_result = GetReturn(rl_dest.ref ? kRefReg : kCoreReg);
       StoreValue(rl_dest, rl_result);
     }
   }
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 4e973d8..8df5b6d 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -327,6 +327,13 @@
 { kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \
 { kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE12, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
 
+// This is a special encoding with r8_form on the second register only
+// for Movzx8 and Movsx8.
+#define EXT_0F_R8_FORM_ENCODING_MAP(opname, prefix, opcode, reg_def) \
+{ kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP   | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, 0, 0, 0, 0, true }, #opname "RR", "!0r,!1r" }, \
+{ kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \
+{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE12, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
+
 #define EXT_0F_REX_W_ENCODING_MAP(opname, prefix, opcode, reg_def) \
 { kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP   | reg_def | REG_USE1,  { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RR", "!0r,!1r" }, \
 { kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \
@@ -488,9 +495,9 @@
   { kX86LockCmpxchg64A, kArray,   IS_STORE | IS_QUAD_OP | REG_USE01 | REG_DEFAD_USEAD | REG_USEC | REG_USEB | SETS_CCODES,  { 0xF0, 0, 0x0F, 0xC7, 0, 1, 0, 0, false }, "Lock Cmpxchg8b", "[!0r+!1r<<!2d+!3d]" },
   { kX86XchgMR, kMemReg,          IS_STORE | IS_LOAD | IS_TERTIARY_OP | REG_DEF2 | REG_USE02,          { 0, 0, 0x87, 0, 0, 0, 0, 0, false }, "Xchg", "[!0r+!1d],!2r" },
 
-  EXT_0F_ENCODING_MAP(Movzx8,  0x00, 0xB6, REG_DEF0),
+  EXT_0F_R8_FORM_ENCODING_MAP(Movzx8,  0x00, 0xB6, REG_DEF0),
   EXT_0F_ENCODING_MAP(Movzx16, 0x00, 0xB7, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Movsx8,  0x00, 0xBE, REG_DEF0),
+  EXT_0F_R8_FORM_ENCODING_MAP(Movsx8,  0x00, 0xBE, REG_DEF0),
   EXT_0F_ENCODING_MAP(Movsx16, 0x00, 0xBF, REG_DEF0),
   EXT_0F_ENCODING_MAP(Movzx8q,  REX_W, 0xB6, REG_DEF0),
   EXT_0F_ENCODING_MAP(Movzx16q, REX_W, 0xB7, REG_DEF0),
@@ -593,6 +600,10 @@
   }
 }
 
+static bool IsByteSecondOperand(const X86EncodingMap* entry) {
+  return StartsWith(entry->name, "Movzx8") || StartsWith(entry->name, "Movsx8");
+}
+
 size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_index,
                                int32_t raw_base, int32_t displacement) {
   bool has_modrm = HasModrm(entry);
@@ -613,7 +624,8 @@
     bool registers_need_rex_prefix = NeedsRex(raw_reg) || NeedsRex(raw_index) || NeedsRex(raw_base);
     if (r8_form) {
       // Do we need an empty REX prefix to normalize byte registers?
-      registers_need_rex_prefix = registers_need_rex_prefix || (RegStorage::RegNum(raw_reg) >= 4);
+      registers_need_rex_prefix = registers_need_rex_prefix ||
+          (RegStorage::RegNum(raw_reg) >= 4 && !IsByteSecondOperand(entry));
       registers_need_rex_prefix = registers_need_rex_prefix ||
           (modrm_is_reg_reg && (RegStorage::RegNum(raw_base) >= 4));
     }
@@ -877,7 +889,7 @@
   uint8_t rex = 0;
   if (r8_form) {
     // Do we need an empty REX prefix to normalize byte register addressing?
-    if (RegStorage::RegNum(raw_reg_r) >= 4) {
+    if (RegStorage::RegNum(raw_reg_r) >= 4 && !IsByteSecondOperand(entry)) {
       rex |= 0x40;  // REX.0000
     } else if (modrm_is_reg_reg && RegStorage::RegNum(raw_reg_b) >= 4) {
       rex |= 0x40;  // REX.0000
@@ -1167,7 +1179,9 @@
 }
 
 void X86Mir2Lir::EmitRegReg(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2) {
-  CheckValidByteRegister(entry, raw_reg1);
+  if (!IsByteSecondOperand(entry)) {
+    CheckValidByteRegister(entry, raw_reg1);
+  }
   CheckValidByteRegister(entry, raw_reg2);
   EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2);
   uint8_t low_reg1 = LowRegisterBits(raw_reg1);
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index cf29e52..f1166f6 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -761,54 +761,59 @@
 }
 
 bool X86Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
-  return false;
-// Turned off until tests available in Art.
-//
-//  RegLocation rl_src_address = info->args[0];  // long address
-//  RegLocation rl_address;
-//  if (!cu_->target64) {
-//    rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[0]
-//    rl_address = LoadValue(rl_src_address, kCoreReg);
-//  } else {
-//    rl_address = LoadValueWide(rl_src_address, kCoreReg);
-//  }
-//  RegLocation rl_dest = size == k64 ? InlineTargetWide(info) : InlineTarget(info);
-//  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-//  // Unaligned access is allowed on x86.
-//  LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile);
-//  if (size == k64) {
-//    StoreValueWide(rl_dest, rl_result);
-//  } else {
-//    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
-//    StoreValue(rl_dest, rl_result);
-//  }
-//  return true;
+  RegLocation rl_src_address = info->args[0];  // long address
+  RegLocation rl_address;
+  if (!cu_->target64) {
+    rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[0]
+    rl_address = LoadValue(rl_src_address, kCoreReg);
+  } else {
+    rl_address = LoadValueWide(rl_src_address, kCoreReg);
+  }
+  RegLocation rl_dest = size == k64 ? InlineTargetWide(info) : InlineTarget(info);
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  // Unaligned access is allowed on x86.
+  LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile);
+  if (size == k64) {
+    StoreValueWide(rl_dest, rl_result);
+  } else {
+    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
+    StoreValue(rl_dest, rl_result);
+  }
+  return true;
 }
 
 bool X86Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
-  return false;
-// Turned off until tests available in Art.
-//
-//  RegLocation rl_src_address = info->args[0];  // long address
-//  RegLocation rl_address;
-//  if (!cu_->target64) {
-//    rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[0]
-//    rl_address = LoadValue(rl_src_address, kCoreReg);
-//  } else {
-//    rl_address = LoadValueWide(rl_src_address, kCoreReg);
-//  }
-//  RegLocation rl_src_value = info->args[2];  // [size] value
-//  if (size == k64) {
-//    // Unaligned access is allowed on x86.
-//    RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg);
-//    StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile);
-//  } else {
-//    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
-//    // Unaligned access is allowed on x86.
-//    RegLocation rl_value = LoadValue(rl_src_value, kCoreReg);
-//    StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile);
-//  }
-//  return true;
+  RegLocation rl_src_address = info->args[0];  // long address
+  RegLocation rl_address;
+  if (!cu_->target64) {
+    rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[0]
+    rl_address = LoadValue(rl_src_address, kCoreReg);
+  } else {
+    rl_address = LoadValueWide(rl_src_address, kCoreReg);
+  }
+  RegLocation rl_src_value = info->args[2];  // [size] value
+  RegLocation rl_value;
+  if (size == k64) {
+    // Unaligned access is allowed on x86.
+    rl_value = LoadValueWide(rl_src_value, kCoreReg);
+  } else {
+    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
+    // In 32-bit mode the only EAX..EDX registers can be used with Mov8MR.
+    if (!cu_->target64 && size == kSignedByte) {
+      rl_src_value = UpdateLocTyped(rl_src_value, kCoreReg);
+      if (rl_src_value.location == kLocPhysReg && !IsByteRegister(rl_src_value.reg)) {
+        RegStorage temp = AllocateByteRegister();
+        OpRegCopy(temp, rl_src_value.reg);
+        rl_value.reg = temp;
+      } else {
+        rl_value = LoadValue(rl_src_value, kCoreReg);
+      }
+    } else {
+      rl_value = LoadValue(rl_src_value, kCoreReg);
+    }
+  }
+  StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile);
+  return true;
 }
 
 void X86Mir2Lir::OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset) {
@@ -831,14 +836,12 @@
 
 bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
   DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
-  if (cu_->instruction_set == kX86_64) {
-    return false;  // TODO: Verify working on x86-64.
-  }
-
   // Unused - RegLocation rl_src_unsafe = info->args[0];
   RegLocation rl_src_obj = info->args[1];  // Object - known non-null
   RegLocation rl_src_offset = info->args[2];  // long low
-  rl_src_offset = NarrowRegLoc(rl_src_offset);  // ignore high half in info->args[3]
+  if (!cu_->target64) {
+    rl_src_offset = NarrowRegLoc(rl_src_offset);  // ignore high half in info->args[3]
+  }
   RegLocation rl_src_expected = info->args[4];  // int, long or Object
   // If is_long, high half is in info->args[5]
   RegLocation rl_src_new_value = info->args[is_long ? 6 : 5];  // int, long or Object
@@ -846,21 +849,21 @@
 
   if (is_long && cu_->target64) {
     // RAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in RAX.
-    FlushReg(rs_r0);
-    Clobber(rs_r0);
-    LockTemp(rs_r0);
+    FlushReg(rs_r0q);
+    Clobber(rs_r0q);
+    LockTemp(rs_r0q);
 
     RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
     RegLocation rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
-    RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
-    LoadValueDirectWide(rl_src_expected, rs_r0);
+    RegLocation rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
+    LoadValueDirectWide(rl_src_expected, rs_r0q);
     NewLIR5(kX86LockCmpxchg64AR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0, rl_new_value.reg.GetReg());
 
     // After a store we need to insert barrier in case of potential load. Since the
     // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
     GenMemBarrier(kStoreLoad);
 
-    FreeTemp(rs_r0);
+    FreeTemp(rs_r0q);
   } else if (is_long) {
     // TODO: avoid unnecessary loads of SI and DI when the values are in registers.
     // TODO: CFI support.
@@ -942,7 +945,12 @@
       LockTemp(rs_r0);
     }
 
-    RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
+    RegLocation rl_offset;
+    if (cu_->target64) {
+      rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
+    } else {
+      rl_offset = LoadValue(rl_src_offset, kCoreReg);
+    }
     LoadValueDirect(rl_src_expected, rs_r0);
     NewLIR5(kX86LockCmpxchgAR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0, rl_new_value.reg.GetReg());
 
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index e1b6992..4b6d501 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -354,12 +354,12 @@
         bool deduped = false;
 
         // Deduplicate code arrays.
-        auto code_iter = dedupe_map_.find(compiled_method);
-        if (code_iter != dedupe_map_.end()) {
-          quick_code_offset = code_iter->second;
+        auto lb = dedupe_map_.lower_bound(compiled_method);
+        if (lb != dedupe_map_.end() && !dedupe_map_.key_comp()(compiled_method, lb->first)) {
+          quick_code_offset = lb->second;
           deduped = true;
         } else {
-          dedupe_map_.Put(compiled_method, quick_code_offset);
+          dedupe_map_.PutBefore(lb, compiled_method, quick_code_offset);
         }
 
         // Update quick method header.
@@ -386,7 +386,7 @@
                                               code_size);
 
         // Update checksum if this wasn't a duplicate.
-        if (code_iter == dedupe_map_.end()) {
+        if (!deduped) {
           writer_->oat_header_->UpdateChecksum(method_header, sizeof(*method_header));
           offset_ += sizeof(*method_header);  // Method header is prepended before code.
           writer_->oat_header_->UpdateChecksum(&(*quick_code)[0], code_size);
@@ -485,12 +485,12 @@
       const std::vector<uint8_t>* map = DataAccess::GetData(compiled_method);
       uint32_t map_size = map->size() * sizeof((*map)[0]);
       if (map_size != 0u) {
-        auto it = dedupe_map_.find(map);
-        if (it != dedupe_map_.end()) {
-          DataAccess::SetOffset(oat_class, method_offsets_index_, it->second);
+        auto lb = dedupe_map_.lower_bound(map);
+        if (lb != dedupe_map_.end() && !dedupe_map_.key_comp()(map, lb->first)) {
+          DataAccess::SetOffset(oat_class, method_offsets_index_, lb->second);
         } else {
           DataAccess::SetOffset(oat_class, method_offsets_index_, offset_);
-          dedupe_map_.Put(map, offset_);
+          dedupe_map_.PutBefore(lb, map, offset_);
           offset_ += map_size;
           writer_->oat_header_->UpdateChecksum(&(*map)[0], map_size);
         }
diff --git a/runtime/atomic.h b/runtime/atomic.h
index ed83a33..5ddafb4 100644
--- a/runtime/atomic.h
+++ b/runtime/atomic.h
@@ -343,6 +343,14 @@
     return this->fetch_sub(value, std::memory_order_seq_cst);  // Return old value.
   }
 
+  T FetchAndOrSequentiallyConsistent(const T value) {
+    return this->fetch_or(value, std::memory_order_seq_cst);  // Return old_value.
+  }
+
+  T FetchAndAndSequentiallyConsistent(const T value) {
+    return this->fetch_and(value, std::memory_order_seq_cst);  // Return old_value.
+  }
+
   volatile T* Address() {
     return reinterpret_cast<T*>(this);
   }
@@ -382,6 +390,20 @@
   }
 };
 
+// Interpret the bit pattern of input (type U) as type V. Requires the size
+// of V >= size of U (compile-time checked).
+// Reproduced here from utils.h to keep dependencies small.
+template<typename U, typename V>
+static inline V bit_cast_atomic(U in) {
+  COMPILE_ASSERT(sizeof(U) == sizeof(V), size_of_u_not_eq_size_of_v);
+  union {
+    U u;
+    V v;
+  } tmp;
+  tmp.u = in;
+  return tmp.v;
+}
+
 template<class T> struct AtomicHelper<8, T> {
   friend class Atomic<T>;
 
@@ -392,15 +414,14 @@
     // sizeof(T) == 8
     volatile const int64_t* loc_ptr =
               reinterpret_cast<volatile const int64_t*>(loc);
-    return static_cast<T>(QuasiAtomic::Read64(loc_ptr));
+    return bit_cast_atomic<int64_t, T>(QuasiAtomic::Read64(loc_ptr));
   }
 
   static void StoreRelaxed(volatile T* loc, T desired) {
     // sizeof(T) == 8
     volatile int64_t* loc_ptr =
                 reinterpret_cast<volatile int64_t*>(loc);
-    QuasiAtomic::Write64(loc_ptr,
-                         static_cast<int64_t>(desired));
+    QuasiAtomic::Write64(loc_ptr, bit_cast_atomic<T, int64_t>(desired));
   }
 
 
@@ -408,14 +429,14 @@
                                                   T expected_value, T desired_value) {
     // sizeof(T) == 8
     volatile int64_t* loc_ptr = reinterpret_cast<volatile int64_t*>(loc);
-    return QuasiAtomic::Cas64(
-                 static_cast<int64_t>(reinterpret_cast<uintptr_t>(expected_value)),
-                 static_cast<int64_t>(reinterpret_cast<uintptr_t>(desired_value)), loc_ptr);
+    return QuasiAtomic::Cas64(bit_cast_atomic<T, int64_t>(expected_value),
+                              bit_cast_atomic<T, int64_t>(desired_value),
+                              loc_ptr);
   }
 };
 
 template<typename T>
-class Atomic {
+class PACKED(sizeof(T)) Atomic {
  private:
   COMPILE_ASSERT(sizeof(T) <= 4 || sizeof(T) == 8, bad_atomic_arg);
 
@@ -521,6 +542,30 @@
     }
   }
 
+  T FetchAndOrSequentiallyConsistent(const T value) {
+    if (sizeof(T) <= 4) {
+      return __sync_fetch_and_or(&value_, value);  // Return old value.
+    } else {
+      T expected;
+      do {
+        expected = LoadRelaxed();
+      } while (!CompareExchangeWeakSequentiallyConsistent(expected, expected | value));
+      return expected;
+    }
+  }
+
+  T FetchAndAndSequentiallyConsistent(const T value) {
+    if (sizeof(T) <= 4) {
+      return __sync_fetch_and_and(&value_, value);  // Return old value.
+    } else {
+      T expected;
+      do {
+        expected = LoadRelaxed();
+      } while (!CompareExchangeWeakSequentiallyConsistent(expected, expected & value));
+      return expected;
+    }
+  }
+
   T operator++() {  // Prefix operator.
     if (sizeof(T) <= 4) {
       return __sync_add_and_fetch(&value_, 1);  // Return new value.
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index 1890181..3e5cdba 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -23,7 +23,6 @@
 
 #define ATRACE_TAG ATRACE_TAG_DALVIK
 
-#include "cutils/atomic-inline.h"
 #include "cutils/trace.h"
 
 #include "base/stringprintf.h"
@@ -152,20 +151,20 @@
 #if ART_USE_FUTEXES
   bool done = false;
   do {
-    int32_t cur_state = state_;
+    int32_t cur_state = state_.LoadRelaxed();
     if (LIKELY(cur_state >= 0)) {
       // Add as an extra reader.
-      done = android_atomic_acquire_cas(cur_state, cur_state + 1, &state_) == 0;
+      done = state_.CompareExchangeWeakAcquire(cur_state, cur_state + 1);
     } else {
       // Owner holds it exclusively, hang up.
       ScopedContentionRecorder scr(this, GetExclusiveOwnerTid(), SafeGetTid(self));
-      android_atomic_inc(&num_pending_readers_);
-      if (futex(&state_, FUTEX_WAIT, cur_state, NULL, NULL, 0) != 0) {
+      ++num_pending_readers_;
+      if (futex(state_.Address(), FUTEX_WAIT, cur_state, NULL, NULL, 0) != 0) {
         if (errno != EAGAIN) {
           PLOG(FATAL) << "futex wait failed for " << name_;
         }
       }
-      android_atomic_dec(&num_pending_readers_);
+      --num_pending_readers_;
     }
   } while (!done);
 #else
@@ -184,14 +183,18 @@
 #if ART_USE_FUTEXES
   bool done = false;
   do {
-    int32_t cur_state = state_;
+    int32_t cur_state = state_.LoadRelaxed();
     if (LIKELY(cur_state > 0)) {
-      // Reduce state by 1.
-      done = android_atomic_release_cas(cur_state, cur_state - 1, &state_) == 0;
-      if (done && (cur_state - 1) == 0) {  // cas may fail due to noise?
-        if (num_pending_writers_.LoadRelaxed() > 0 || num_pending_readers_ > 0) {
+      // Reduce state by 1 and impose lock release load/store ordering.
+      // Note, the relaxed loads below musn't reorder before the CompareExchange.
+      // TODO: the ordering here is non-trivial as state is split across 3 fields, fix by placing
+      // a status bit into the state on contention.
+      done = state_.CompareExchangeWeakSequentiallyConsistent(cur_state, cur_state - 1);
+      if (done && (cur_state - 1) == 0) {  // Weak CAS may fail spuriously.
+        if (num_pending_writers_.LoadRelaxed() > 0 ||
+            num_pending_readers_.LoadRelaxed() > 0) {
           // Wake any exclusive waiters as there are now no readers.
-          futex(&state_, FUTEX_WAKE, -1, NULL, NULL, 0);
+          futex(state_.Address(), FUTEX_WAKE, -1, NULL, NULL, 0);
         }
       }
     } else {
@@ -233,7 +236,7 @@
 
 inline uint64_t ReaderWriterMutex::GetExclusiveOwnerTid() const {
 #if ART_USE_FUTEXES
-  int32_t state = state_;
+  int32_t state = state_.LoadRelaxed();
   if (state == 0) {
     return 0;  // No owner.
   } else if (state > 0) {
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index fd1eb12..7779547 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -30,6 +30,7 @@
 namespace art {
 
 Mutex* Locks::abort_lock_ = nullptr;
+Mutex* Locks::allocated_monitor_ids_lock_ = nullptr;
 Mutex* Locks::allocated_thread_ids_lock_ = nullptr;
 Mutex* Locks::breakpoint_lock_ = nullptr;
 ReaderWriterMutex* Locks::classlinker_classes_lock_ = nullptr;
@@ -262,7 +263,7 @@
 Mutex::Mutex(const char* name, LockLevel level, bool recursive)
     : BaseMutex(name, level), recursive_(recursive), recursion_count_(0) {
 #if ART_USE_FUTEXES
-  state_ = 0;
+  DCHECK_EQ(0, state_.LoadRelaxed());
   DCHECK_EQ(0, num_contenders_.LoadRelaxed());
 #else
   CHECK_MUTEX_CALL(pthread_mutex_init, (&mutex_, nullptr));
@@ -272,13 +273,13 @@
 
 Mutex::~Mutex() {
 #if ART_USE_FUTEXES
-  if (state_ != 0) {
+  if (state_.LoadRelaxed() != 0) {
     Runtime* runtime = Runtime::Current();
     bool shutting_down = runtime == nullptr || runtime->IsShuttingDown(Thread::Current());
     LOG(shutting_down ? WARNING : FATAL) << "destroying mutex with owner: " << exclusive_owner_;
   } else {
     CHECK_EQ(exclusive_owner_, 0U)  << "unexpectedly found an owner on unlocked mutex " << name_;
-    CHECK_EQ(num_contenders_.LoadRelaxed(), 0)
+    CHECK_EQ(num_contenders_.LoadSequentiallyConsistent(), 0)
         << "unexpectedly found a contender on mutex " << name_;
   }
 #else
@@ -305,15 +306,15 @@
 #if ART_USE_FUTEXES
     bool done = false;
     do {
-      int32_t cur_state = state_;
+      int32_t cur_state = state_.LoadRelaxed();
       if (LIKELY(cur_state == 0)) {
-        // Change state from 0 to 1.
-        done = __sync_bool_compare_and_swap(&state_, 0 /* cur_state */, 1 /* new state */);
+        // Change state from 0 to 1 and impose load/store ordering appropriate for lock acquisition.
+        done = state_.CompareExchangeWeakAcquire(0 /* cur_state */, 1 /* new state */);
       } else {
         // Failed to acquire, hang up.
         ScopedContentionRecorder scr(this, SafeGetTid(self), GetExclusiveOwnerTid());
         num_contenders_++;
-        if (futex(&state_, FUTEX_WAIT, 1, NULL, NULL, 0) != 0) {
+        if (futex(state_.Address(), FUTEX_WAIT, 1, NULL, NULL, 0) != 0) {
           // EAGAIN and EINTR both indicate a spurious failure, try again from the beginning.
           // We don't use TEMP_FAILURE_RETRY so we can intentionally retry to acquire the lock.
           if ((errno != EAGAIN) && (errno != EINTR)) {
@@ -323,11 +324,7 @@
         num_contenders_--;
       }
     } while (!done);
-    // We assert that no memory fence is needed here, since
-    // __sync_bool_compare_and_swap includes it.
-    // TODO: Change state_ to be a art::Atomic and use an intention revealing CAS operation
-    // that exposes the ordering semantics.
-    DCHECK_EQ(state_, 1);
+    DCHECK_EQ(state_.LoadRelaxed(), 1);
 #else
     CHECK_MUTEX_CALL(pthread_mutex_lock, (&mutex_));
 #endif
@@ -352,16 +349,15 @@
 #if ART_USE_FUTEXES
     bool done = false;
     do {
-      int32_t cur_state = state_;
+      int32_t cur_state = state_.LoadRelaxed();
       if (cur_state == 0) {
-        // Change state from 0 to 1.
-        done = __sync_bool_compare_and_swap(&state_, 0 /* cur_state */, 1 /* new state */);
+        // Change state from 0 to 1 and impose load/store ordering appropriate for lock acquisition.
+        done = state_.CompareExchangeWeakAcquire(0 /* cur_state */, 1 /* new state */);
       } else {
         return false;
       }
     } while (!done);
-    // We again assert no memory fence is needed.
-    DCHECK_EQ(state_, 1);
+    DCHECK_EQ(state_.LoadRelaxed(), 1);
 #else
     int result = pthread_mutex_trylock(&mutex_);
     if (result == EBUSY) {
@@ -399,17 +395,19 @@
 #if ART_USE_FUTEXES
     bool done = false;
     do {
-      int32_t cur_state = state_;
+      int32_t cur_state = state_.LoadRelaxed();
       if (LIKELY(cur_state == 1)) {
-        // The __sync_bool_compare_and_swap enforces the necessary memory ordering.
         // We're no longer the owner.
         exclusive_owner_ = 0;
-        // Change state to 0.
-        done =  __sync_bool_compare_and_swap(&state_, cur_state, 0 /* new state */);
+        // Change state to 0 and impose load/store ordering appropriate for lock release.
+        // Note, the relaxed loads below musn't reorder before the CompareExchange.
+        // TODO: the ordering here is non-trivial as state is split across 3 fields, fix by placing
+        // a status bit into the state on contention.
+        done =  state_.CompareExchangeWeakSequentiallyConsistent(cur_state, 0 /* new state */);
         if (LIKELY(done)) {  // Spurious fail?
-          // Wake a contender
+          // Wake a contender.
           if (UNLIKELY(num_contenders_.LoadRelaxed() > 0)) {
-            futex(&state_, FUTEX_WAKE, 1, NULL, NULL, 0);
+            futex(state_.Address(), FUTEX_WAKE, 1, NULL, NULL, 0);
           }
         }
       } else {
@@ -459,9 +457,9 @@
 
 ReaderWriterMutex::~ReaderWriterMutex() {
 #if ART_USE_FUTEXES
-  CHECK_EQ(state_, 0);
+  CHECK_EQ(state_.LoadRelaxed(), 0);
   CHECK_EQ(exclusive_owner_, 0U);
-  CHECK_EQ(num_pending_readers_, 0);
+  CHECK_EQ(num_pending_readers_.LoadRelaxed(), 0);
   CHECK_EQ(num_pending_writers_.LoadRelaxed(), 0);
 #else
   // We can't use CHECK_MUTEX_CALL here because on shutdown a suspended daemon thread
@@ -484,25 +482,25 @@
 #if ART_USE_FUTEXES
   bool done = false;
   do {
-    int32_t cur_state = state_;
+    int32_t cur_state = state_.LoadRelaxed();
     if (LIKELY(cur_state == 0)) {
-      // Change state from 0 to -1.
-      done =  __sync_bool_compare_and_swap(&state_, 0 /* cur_state*/, -1 /* new state */);
+      // Change state from 0 to -1 and impose load/store ordering appropriate for lock acquisition.
+      done =  state_.CompareExchangeWeakAcquire(0 /* cur_state*/, -1 /* new state */);
     } else {
       // Failed to acquire, hang up.
       ScopedContentionRecorder scr(this, SafeGetTid(self), GetExclusiveOwnerTid());
-      num_pending_writers_++;
-      if (futex(&state_, FUTEX_WAIT, cur_state, NULL, NULL, 0) != 0) {
+      ++num_pending_writers_;
+      if (futex(state_.Address(), FUTEX_WAIT, cur_state, NULL, NULL, 0) != 0) {
         // EAGAIN and EINTR both indicate a spurious failure, try again from the beginning.
         // We don't use TEMP_FAILURE_RETRY so we can intentionally retry to acquire the lock.
         if ((errno != EAGAIN) && (errno != EINTR)) {
           PLOG(FATAL) << "futex wait failed for " << name_;
         }
       }
-      num_pending_writers_--;
+      --num_pending_writers_;
     }
   } while (!done);
-  DCHECK_EQ(state_, -1);
+  DCHECK_EQ(state_.LoadRelaxed(), -1);
 #else
   CHECK_MUTEX_CALL(pthread_rwlock_wrlock, (&rwlock_));
 #endif
@@ -520,16 +518,20 @@
 #if ART_USE_FUTEXES
   bool done = false;
   do {
-    int32_t cur_state = state_;
+    int32_t cur_state = state_.LoadRelaxed();
     if (LIKELY(cur_state == -1)) {
       // We're no longer the owner.
       exclusive_owner_ = 0;
-      // Change state from -1 to 0.
-      done =  __sync_bool_compare_and_swap(&state_, -1 /* cur_state*/, 0 /* new state */);
-      if (LIKELY(done)) {  // cmpxchg may fail due to noise?
+      // Change state from -1 to 0 and impose load/store ordering appropriate for lock release.
+      // Note, the relaxed loads below musn't reorder before the CompareExchange.
+      // TODO: the ordering here is non-trivial as state is split across 3 fields, fix by placing
+      // a status bit into the state on contention.
+      done =  state_.CompareExchangeWeakSequentiallyConsistent(-1 /* cur_state*/, 0 /* new state */);
+      if (LIKELY(done)) {  // Weak CAS may fail spuriously.
         // Wake any waiters.
-        if (UNLIKELY(num_pending_readers_ > 0 || num_pending_writers_.LoadRelaxed() > 0)) {
-          futex(&state_, FUTEX_WAKE, -1, NULL, NULL, 0);
+        if (UNLIKELY(num_pending_readers_.LoadRelaxed() > 0 ||
+                     num_pending_writers_.LoadRelaxed() > 0)) {
+          futex(state_.Address(), FUTEX_WAKE, -1, NULL, NULL, 0);
         }
       }
     } else {
@@ -550,10 +552,10 @@
   timespec end_abs_ts;
   InitTimeSpec(true, CLOCK_REALTIME, ms, ns, &end_abs_ts);
   do {
-    int32_t cur_state = state_;
+    int32_t cur_state = state_.LoadRelaxed();
     if (cur_state == 0) {
-      // Change state from 0 to -1.
-      done =  __sync_bool_compare_and_swap(&state_, 0 /* cur_state */, -1 /* new state */);
+      // Change state from 0 to -1 and impose load/store ordering appropriate for lock acquisition.
+      done =  state_.CompareExchangeWeakAcquire(0 /* cur_state */, -1 /* new state */);
     } else {
       // Failed to acquire, hang up.
       timespec now_abs_ts;
@@ -563,10 +565,10 @@
         return false;  // Timed out.
       }
       ScopedContentionRecorder scr(this, SafeGetTid(self), GetExclusiveOwnerTid());
-      num_pending_writers_++;
-      if (futex(&state_, FUTEX_WAIT, cur_state, &rel_ts, NULL, 0) != 0) {
+      ++num_pending_writers_;
+      if (futex(state_.Address(), FUTEX_WAIT, cur_state, &rel_ts, NULL, 0) != 0) {
         if (errno == ETIMEDOUT) {
-          num_pending_writers_--;
+          --num_pending_writers_;
           return false;  // Timed out.
         } else if ((errno != EAGAIN) && (errno != EINTR)) {
           // EAGAIN and EINTR both indicate a spurious failure,
@@ -575,7 +577,7 @@
           PLOG(FATAL) << "timed futex wait failed for " << name_;
         }
       }
-      num_pending_writers_--;
+      --num_pending_writers_;
     }
   } while (!done);
 #else
@@ -602,10 +604,10 @@
 #if ART_USE_FUTEXES
   bool done = false;
   do {
-    int32_t cur_state = state_;
+    int32_t cur_state = state_.LoadRelaxed();
     if (cur_state >= 0) {
-      // Add as an extra reader.
-      done =  __sync_bool_compare_and_swap(&state_, cur_state, cur_state + 1);
+      // Add as an extra reader and impose load/store ordering appropriate for lock acquisition.
+      done =  state_.CompareExchangeWeakAcquire(cur_state, cur_state + 1);
     } else {
       // Owner holds it exclusively.
       return false;
@@ -702,7 +704,7 @@
       // mutex unlocks will awaken the requeued waiter thread.
       done = futex(sequence_.Address(), FUTEX_CMP_REQUEUE, 0,
                    reinterpret_cast<const timespec*>(std::numeric_limits<int32_t>::max()),
-                   &guard_.state_, cur_sequence) != -1;
+                   guard_.state_.Address(), cur_sequence) != -1;
       if (!done) {
         if (errno != EAGAIN) {
           PLOG(FATAL) << "futex cmp requeue failed for " << name_;
@@ -831,6 +833,7 @@
       DCHECK(modify_ldt_lock_ == nullptr);
     }
     DCHECK(abort_lock_ != nullptr);
+    DCHECK(allocated_monitor_ids_lock_ != nullptr);
     DCHECK(allocated_thread_ids_lock_ != nullptr);
     DCHECK(breakpoint_lock_ != nullptr);
     DCHECK(classlinker_classes_lock_ != nullptr);
@@ -882,6 +885,10 @@
     classlinker_classes_lock_ = new ReaderWriterMutex("ClassLinker classes lock",
                                                       current_lock_level);
 
+    UPDATE_CURRENT_LOCK_LEVEL(kMonitorPoolLock);
+    DCHECK(allocated_monitor_ids_lock_ == nullptr);
+    allocated_monitor_ids_lock_ =  new Mutex("allocated monitor ids lock", current_lock_level);
+
     UPDATE_CURRENT_LOCK_LEVEL(kAllocatedThreadIdsLock);
     DCHECK(allocated_thread_ids_lock_ == nullptr);
     allocated_thread_ids_lock_ =  new Mutex("allocated thread ids lock", current_lock_level);
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 81e62ab..8d2cd07 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -70,7 +70,6 @@
   kMarkSweepMarkStackLock,
   kTransactionLogLock,
   kInternTableLock,
-  kMonitorPoolLock,
   kDefaultMutexLevel,
   kMarkSweepLargeObjectLock,
   kPinTableLock,
@@ -78,6 +77,7 @@
   kJdwpObjectRegistryLock,
   kModifyLdtLock,
   kAllocatedThreadIdsLock,
+  kMonitorPoolLock,
   kClassLinkerClassesLock,
   kBreakpointLock,
   kMonitorLock,
@@ -226,7 +226,8 @@
   }
   void AssertNotHeld(const Thread* self) { AssertNotHeldExclusive(self); }
 
-  // Id associated with exclusive owner.
+  // Id associated with exclusive owner. No memory ordering semantics if called from a thread other
+  // than the owner.
   uint64_t GetExclusiveOwnerTid() const;
 
   // Returns how many times this Mutex has been locked, it is better to use AssertHeld/NotHeld.
@@ -239,7 +240,7 @@
  private:
 #if ART_USE_FUTEXES
   // 0 is unheld, 1 is held.
-  volatile int32_t state_;
+  AtomicInteger state_;
   // Exclusive owner.
   volatile uint64_t exclusive_owner_;
   // Number of waiting contenders.
@@ -343,7 +344,8 @@
     }
   }
 
-  // Id associated with exclusive owner.
+  // Id associated with exclusive owner. No memory ordering semantics if called from a thread other
+  // than the owner.
   uint64_t GetExclusiveOwnerTid() const;
 
   virtual void Dump(std::ostream& os) const;
@@ -351,12 +353,12 @@
  private:
 #if ART_USE_FUTEXES
   // -1 implies held exclusive, +ve shared held by state_ many owners.
-  volatile int32_t state_;
-  // Exclusive owner.
+  AtomicInteger state_;
+  // Exclusive owner. Modification guarded by this mutex.
   volatile uint64_t exclusive_owner_;
-  // Pending readers.
-  volatile int32_t num_pending_readers_;
-  // Pending writers.
+  // Number of contenders waiting for a reader share.
+  AtomicInteger num_pending_readers_;
+  // Number of contenders waiting to be the writer.
   AtomicInteger num_pending_writers_;
 #else
   pthread_rwlock_t rwlock_;
@@ -558,8 +560,10 @@
   // doesn't try to hold a higher level Mutex.
   #define DEFAULT_MUTEX_ACQUIRED_AFTER ACQUIRED_AFTER(Locks::classlinker_classes_lock_)
 
+  static Mutex* allocated_monitor_ids_lock_ ACQUIRED_AFTER(classlinker_classes_lock_);
+
   // Guard the allocation/deallocation of thread ids.
-  static Mutex* allocated_thread_ids_lock_ ACQUIRED_AFTER(classlinker_classes_lock_);
+  static Mutex* allocated_thread_ids_lock_ ACQUIRED_AFTER(allocated_monitor_ids_lock_);
 
   // Guards modification of the LDT on x86.
   static Mutex* modify_ldt_lock_ ACQUIRED_AFTER(allocated_thread_ids_lock_);
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index fdbc9c2..289dc1d 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -26,7 +26,7 @@
 #include <fstream>
 #include <memory>
 
-#include "../../external/icu4c/common/unicode/uvernum.h"
+#include "../../external/icu/icu4c/source/common/unicode/uvernum.h"
 #include "base/macros.h"
 #include "base/stl_util.h"
 #include "base/stringprintf.h"
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 6161aff..c95be01 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -1996,13 +1996,14 @@
     case kTerminated:
       return JDWP::TS_ZOMBIE;
     case kTimedWaiting:
+    case kWaitingForCheckPointsToRun:
     case kWaitingForDebuggerSend:
     case kWaitingForDebuggerSuspension:
     case kWaitingForDebuggerToAttach:
     case kWaitingForDeoptimization:
     case kWaitingForGcToComplete:
-    case kWaitingForCheckPointsToRun:
     case kWaitingForJniOnLoad:
+    case kWaitingForMethodTracingStart:
     case kWaitingForSignalCatcherOutput:
     case kWaitingInMainDebuggerLoop:
     case kWaitingInMainSignalCatcherLoop:
diff --git a/runtime/gc/accounting/card_table-inl.h b/runtime/gc/accounting/card_table-inl.h
index a1d001e..ad0a4f43 100644
--- a/runtime/gc/accounting/card_table-inl.h
+++ b/runtime/gc/accounting/card_table-inl.h
@@ -17,9 +17,9 @@
 #ifndef ART_RUNTIME_GC_ACCOUNTING_CARD_TABLE_INL_H_
 #define ART_RUNTIME_GC_ACCOUNTING_CARD_TABLE_INL_H_
 
+#include "atomic.h"
 #include "base/logging.h"
 #include "card_table.h"
-#include "cutils/atomic-inline.h"
 #include "space_bitmap.h"
 #include "utils.h"
 
@@ -28,18 +28,23 @@
 namespace accounting {
 
 static inline bool byte_cas(byte old_value, byte new_value, byte* address) {
+#if defined(__i386__) || defined(__x86_64__)
+  Atomic<byte>* byte_atomic = reinterpret_cast<Atomic<byte>*>(address);
+  return byte_atomic->CompareExchangeWeakRelaxed(old_value, new_value);
+#else
   // Little endian means most significant byte is on the left.
   const size_t shift_in_bytes = reinterpret_cast<uintptr_t>(address) % sizeof(uintptr_t);
   // Align the address down.
   address -= shift_in_bytes;
   const size_t shift_in_bits = shift_in_bytes * kBitsPerByte;
-  int32_t* word_address = reinterpret_cast<int32_t*>(address);
+  AtomicInteger* word_atomic = reinterpret_cast<AtomicInteger*>(address);
+
   // Word with the byte we are trying to cas cleared.
-  const int32_t cur_word = *word_address & ~(0xFF << shift_in_bits);
+  const int32_t cur_word = word_atomic->LoadRelaxed() & ~(0xFF << shift_in_bits);
   const int32_t old_word = cur_word | (static_cast<int32_t>(old_value) << shift_in_bits);
   const int32_t new_word = cur_word | (static_cast<int32_t>(new_value) << shift_in_bits);
-  bool success = android_atomic_cas(old_word, new_word, word_address) == 0;
-  return success;
+  return word_atomic->CompareExchangeWeakRelaxed(old_word, new_word);
+#endif
 }
 
 template <typename Visitor>
@@ -174,8 +179,8 @@
       for (size_t i = 0; i < sizeof(uintptr_t); ++i) {
         new_bytes[i] = visitor(expected_bytes[i]);
       }
-      if (LIKELY(android_atomic_cas(expected_word, new_word,
-                                    reinterpret_cast<int32_t*>(word_cur)) == 0)) {
+      Atomic<uintptr_t>* atomic_word = reinterpret_cast<Atomic<uintptr_t>*>(word_cur);
+      if (LIKELY(atomic_word->CompareExchangeWeakRelaxed(expected_word, new_word))) {
         for (size_t i = 0; i < sizeof(uintptr_t); ++i) {
           const byte expected_byte = expected_bytes[i];
           const byte new_byte = new_bytes[i];
diff --git a/runtime/gc/accounting/space_bitmap-inl.h b/runtime/gc/accounting/space_bitmap-inl.h
index 7f1da79..1e9556a 100644
--- a/runtime/gc/accounting/space_bitmap-inl.h
+++ b/runtime/gc/accounting/space_bitmap-inl.h
@@ -21,6 +21,7 @@
 
 #include <memory>
 
+#include "atomic.h"
 #include "base/logging.h"
 #include "dex_file-inl.h"
 #include "heap_bitmap.h"
@@ -43,17 +44,17 @@
   const uintptr_t offset = addr - heap_begin_;
   const size_t index = OffsetToIndex(offset);
   const uword mask = OffsetToMask(offset);
-  uword* const address = &bitmap_begin_[index];
+  Atomic<uword>* atomic_entry = reinterpret_cast<Atomic<uword>*>(&bitmap_begin_[index]);
   DCHECK_LT(index, bitmap_size_ / kWordSize) << " bitmap_size_ = " << bitmap_size_;
   uword old_word;
   do {
-    old_word = *address;
+    old_word = atomic_entry->LoadRelaxed();
     // Fast path: The bit is already set.
     if ((old_word & mask) != 0) {
       DCHECK(Test(obj));
       return true;
     }
-  } while (!__sync_bool_compare_and_swap(address, old_word, old_word | mask));
+  } while (!atomic_entry->CompareExchangeWeakSequentiallyConsistent(old_word, old_word | mask));
   DCHECK(Test(obj));
   return false;
 }
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index 27fb087..6d1ba87 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -60,17 +60,17 @@
   // <offset> is the difference from .base to a pointer address.
   // <index> is the index of .bits that contains the bit representing
   //         <offset>.
-  static size_t OffsetToIndex(size_t offset) ALWAYS_INLINE {
+  static constexpr size_t OffsetToIndex(size_t offset) {
     return offset / kAlignment / kBitsPerWord;
   }
 
   template<typename T>
-  static T IndexToOffset(T index) {
+  static constexpr T IndexToOffset(T index) {
     return static_cast<T>(index * kAlignment * kBitsPerWord);
   }
 
   // Bits are packed in the obvious way.
-  static uword OffsetToMask(uintptr_t offset) ALWAYS_INLINE {
+  static constexpr uword OffsetToMask(uintptr_t offset) {
     return (static_cast<size_t>(1)) << ((offset / kAlignment) % kBitsPerWord);
   }
 
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 09fb97a..722576f 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -159,7 +159,7 @@
     if (it != free_page_runs_.rend() && (last_free_page_run = *it)->End(this) == base_ + footprint_) {
       // There is a free page run at the end.
       DCHECK(last_free_page_run->IsFree());
-      DCHECK_EQ(page_map_[ToPageMapIndex(last_free_page_run)], kPageMapEmpty);
+      DCHECK(IsFreePage(ToPageMapIndex(last_free_page_run)));
       last_free_page_run_size = last_free_page_run->ByteSize(this);
     } else {
       // There is no free page run at the end.
@@ -248,7 +248,7 @@
     // Update the page map.
     size_t page_map_idx = ToPageMapIndex(res);
     for (size_t i = 0; i < num_pages; i++) {
-      DCHECK_EQ(page_map_[page_map_idx + i], kPageMapEmpty);
+      DCHECK(IsFreePage(page_map_idx + i));
     }
     switch (page_map_type) {
     case kPageMapRun:
@@ -301,8 +301,7 @@
     pm_part_type = kPageMapLargeObjectPart;
     break;
   default:
-    pm_part_type = kPageMapEmpty;
-    LOG(FATAL) << "Unreachable - RosAlloc::FreePages() : " << "pm_idx=" << pm_idx << ", pm_type="
+    LOG(FATAL) << "Unreachable - " << __PRETTY_FUNCTION__ << " : " << "pm_idx=" << pm_idx << ", pm_type="
                << static_cast<int>(pm_type) << ", ptr=" << std::hex
                << reinterpret_cast<intptr_t>(ptr);
     return 0;
@@ -330,7 +329,7 @@
   }
 
   if (kTraceRosAlloc) {
-    LOG(INFO) << "RosAlloc::FreePages() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr)
+    LOG(INFO) << __PRETTY_FUNCTION__ << " : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr)
               << "-0x" << (reinterpret_cast<intptr_t>(ptr) + byte_size)
               << "(" << std::dec << (num_pages * kPageSize) << ")";
   }
@@ -347,7 +346,7 @@
   if (!free_page_runs_.empty()) {
     // Try to coalesce in the higher address direction.
     if (kTraceRosAlloc) {
-      LOG(INFO) << "RosAlloc::FreePages() : trying to coalesce a free page run 0x"
+      LOG(INFO) << __PRETTY_FUNCTION__ << "RosAlloc::FreePages() : trying to coalesce a free page run 0x"
                 << std::hex << reinterpret_cast<uintptr_t>(fpr) << " [" << std::dec << pm_idx << "] -0x"
                 << std::hex << reinterpret_cast<uintptr_t>(fpr->End(this)) << " [" << std::dec
                 << (fpr->End(this) == End() ? page_map_size_ : ToPageMapIndex(fpr->End(this))) << "]";
@@ -497,27 +496,27 @@
                 << ", page_map_entry=" << static_cast<int>(page_map_entry);
     }
     switch (page_map_[pm_idx]) {
-      case kPageMapEmpty:
-        LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
-        return 0;
       case kPageMapLargeObject:
         return FreePages(self, ptr, false);
       case kPageMapLargeObjectPart:
         LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
         return 0;
-      case kPageMapRun:
       case kPageMapRunPart: {
-        size_t pi = pm_idx;
-        DCHECK(page_map_[pi] == kPageMapRun || page_map_[pi] == kPageMapRunPart);
         // Find the beginning of the run.
-        while (page_map_[pi] != kPageMapRun) {
-          pi--;
-          DCHECK_LT(pi, capacity_ / kPageSize);
-        }
-        DCHECK_EQ(page_map_[pi], kPageMapRun);
-        run = reinterpret_cast<Run*>(base_ + pi * kPageSize);
+        do {
+          --pm_idx;
+          DCHECK_LT(pm_idx, capacity_ / kPageSize);
+        } while (page_map_[pm_idx] != kPageMapRun);
+        // Fall-through.
+      case kPageMapRun:
+        run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
         DCHECK_EQ(run->magic_num_, kMagicNum);
         break;
+      case kPageMapReleased:
+        // Fall-through.
+      case kPageMapEmpty:
+        LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
+        return 0;
       }
       default:
         LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
@@ -594,7 +593,8 @@
     if (kIsDebugBuild && current_run != dedicated_full_run_) {
       full_runs_[idx].insert(current_run);
       if (kTraceRosAlloc) {
-        LOG(INFO) << __FUNCTION__ << " : Inserted run 0x" << std::hex << reinterpret_cast<intptr_t>(current_run)
+        LOG(INFO) << __PRETTY_FUNCTION__ << " : Inserted run 0x" << std::hex
+                  << reinterpret_cast<intptr_t>(current_run)
                   << " into full_runs_[" << std::dec << idx << "]";
       }
       DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
@@ -1358,6 +1358,8 @@
   for (size_t i = 0; i < end; ++i) {
     byte pm = page_map_[i];
     switch (pm) {
+      case kPageMapReleased:
+        // Fall-through.
       case kPageMapEmpty: {
         FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
         if (free_page_runs_.find(fpr) != free_page_runs_.end()) {
@@ -1370,8 +1372,8 @@
           curr_fpr_size = fpr->ByteSize(this);
           DCHECK_EQ(curr_fpr_size % kPageSize, static_cast<size_t>(0));
           remaining_curr_fpr_size = curr_fpr_size - kPageSize;
-          stream << "[" << i << "]=Empty (FPR start)"
-                 << " fpr_size=" << curr_fpr_size
+          stream << "[" << i << "]=" << (pm == kPageMapReleased ? "Released" : "Empty")
+                 << " (FPR start) fpr_size=" << curr_fpr_size
                  << " remaining_fpr_size=" << remaining_curr_fpr_size << std::endl;
           if (remaining_curr_fpr_size == 0) {
             // Reset at the end of the current free page run.
@@ -1441,43 +1443,46 @@
   size_t pm_idx = RoundDownToPageMapIndex(ptr);
   MutexLock mu(Thread::Current(), lock_);
   switch (page_map_[pm_idx]) {
-  case kPageMapEmpty:
-    LOG(FATAL) << "Unreachable - RosAlloc::UsableSize(): pm_idx=" << pm_idx << ", ptr=" << std::hex
-               << reinterpret_cast<intptr_t>(ptr);
-    break;
-  case kPageMapLargeObject: {
-    size_t num_pages = 1;
-    size_t idx = pm_idx + 1;
-    size_t end = page_map_size_;
-    while (idx < end && page_map_[idx] == kPageMapLargeObjectPart) {
-      num_pages++;
-      idx++;
+    case kPageMapReleased:
+      // Fall-through.
+    case kPageMapEmpty:
+      LOG(FATAL) << "Unreachable - " << __PRETTY_FUNCTION__ << ": pm_idx=" << pm_idx << ", ptr="
+                 << std::hex << reinterpret_cast<intptr_t>(ptr);
+      break;
+    case kPageMapLargeObject: {
+      size_t num_pages = 1;
+      size_t idx = pm_idx + 1;
+      size_t end = page_map_size_;
+      while (idx < end && page_map_[idx] == kPageMapLargeObjectPart) {
+        num_pages++;
+        idx++;
+      }
+      return num_pages * kPageSize;
     }
-    return num_pages * kPageSize;
-  }
-  case kPageMapLargeObjectPart:
-    LOG(FATAL) << "Unreachable - RosAlloc::UsableSize(): pm_idx=" << pm_idx << ", ptr=" << std::hex
-               << reinterpret_cast<intptr_t>(ptr);
-    break;
-  case kPageMapRun:
-  case kPageMapRunPart: {
-    // Find the beginning of the run.
-    while (page_map_[pm_idx] != kPageMapRun) {
-      pm_idx--;
-      DCHECK_LT(pm_idx, capacity_ / kPageSize);
+    case kPageMapLargeObjectPart:
+      LOG(FATAL) << "Unreachable - " << __PRETTY_FUNCTION__ << ": pm_idx=" << pm_idx << ", ptr="
+                 << std::hex << reinterpret_cast<intptr_t>(ptr);
+      break;
+    case kPageMapRun:
+    case kPageMapRunPart: {
+      // Find the beginning of the run.
+      while (page_map_[pm_idx] != kPageMapRun) {
+        pm_idx--;
+        DCHECK_LT(pm_idx, capacity_ / kPageSize);
+      }
+      DCHECK_EQ(page_map_[pm_idx], kPageMapRun);
+      Run* run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
+      DCHECK_EQ(run->magic_num_, kMagicNum);
+      size_t idx = run->size_bracket_idx_;
+      size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
+          - (reinterpret_cast<byte*>(run) + headerSizes[idx]);
+      DCHECK_EQ(offset_from_slot_base % bracketSizes[idx], static_cast<size_t>(0));
+      return IndexToBracketSize(idx);
     }
-    DCHECK_EQ(page_map_[pm_idx], kPageMapRun);
-    Run* run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
-    DCHECK_EQ(run->magic_num_, kMagicNum);
-    size_t idx = run->size_bracket_idx_;
-    size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
-        - (reinterpret_cast<byte*>(run) + headerSizes[idx]);
-    DCHECK_EQ(offset_from_slot_base % bracketSizes[idx], static_cast<size_t>(0));
-    return IndexToBracketSize(idx);
-  }
-  default:
-    LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
-    break;
+    default: {
+      LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
+      break;
+    }
   }
   return 0;
 }
@@ -1490,7 +1495,7 @@
   if (it != free_page_runs_.rend() && (last_free_page_run = *it)->End(this) == base_ + footprint_) {
     // Remove the last free page run, if any.
     DCHECK(last_free_page_run->IsFree());
-    DCHECK_EQ(page_map_[ToPageMapIndex(last_free_page_run)], kPageMapEmpty);
+    DCHECK(IsFreePage(ToPageMapIndex(last_free_page_run)));
     DCHECK_EQ(last_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
     DCHECK_EQ(last_free_page_run->End(this), base_ + footprint_);
     free_page_runs_.erase(last_free_page_run);
@@ -1500,7 +1505,7 @@
     size_t new_num_of_pages = new_footprint / kPageSize;
     DCHECK_GE(page_map_size_, new_num_of_pages);
     // Zero out the tail of the page map.
-    byte* zero_begin = page_map_ + new_num_of_pages;
+    byte* zero_begin = const_cast<byte*>(page_map_) + new_num_of_pages;
     byte* madvise_begin = AlignUp(zero_begin, kPageSize);
     DCHECK_LE(madvise_begin, page_map_mem_map_->End());
     size_t madvise_size = page_map_mem_map_->End() - madvise_begin;
@@ -1543,6 +1548,8 @@
   while (i < pm_end) {
     byte pm = page_map_[i];
     switch (pm) {
+      case kPageMapReleased:
+        // Fall-through.
       case kPageMapEmpty: {
         // The start of a free page run.
         FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
@@ -1560,7 +1567,7 @@
         size_t num_pages = fpr_size / kPageSize;
         if (kIsDebugBuild) {
           for (size_t j = i + 1; j < i + num_pages; ++j) {
-            DCHECK_EQ(page_map_[j], kPageMapEmpty);
+            DCHECK(IsFreePage(j));
           }
         }
         i += fpr_size / kPageSize;
@@ -1672,7 +1679,7 @@
       full_runs_[idx].insert(run);
       DCHECK(full_runs_[idx].find(run) != full_runs_[idx].end());
       if (kTraceRosAlloc) {
-        LOG(INFO) << __FUNCTION__  << " : Inserted run 0x" << std::hex
+        LOG(INFO) << __PRETTY_FUNCTION__  << " : Inserted run 0x" << std::hex
                   << reinterpret_cast<intptr_t>(run)
                   << " into full_runs_[" << std::dec << idx << "]";
       }
@@ -1685,7 +1692,7 @@
     non_full_runs_[idx].insert(run);
     DCHECK(non_full_runs_[idx].find(run) != non_full_runs_[idx].end());
     if (kTraceRosAlloc) {
-      LOG(INFO) << __FUNCTION__ << " : Inserted run 0x" << std::hex
+      LOG(INFO) << __PRETTY_FUNCTION__ << " : Inserted run 0x" << std::hex
                 << reinterpret_cast<intptr_t>(run)
                 << " into non_full_runs_[" << std::dec << idx << "]";
     }
@@ -1865,7 +1872,7 @@
 void RosAlloc::Verify() {
   Thread* self = Thread::Current();
   CHECK(Locks::mutator_lock_->IsExclusiveHeld(self))
-      << "The mutator locks isn't exclusively locked at RosAlloc::Verify()";
+      << "The mutator locks isn't exclusively locked at " << __PRETTY_FUNCTION__;
   MutexLock mu(self, *Locks::thread_list_lock_);
   ReaderMutexLock wmu(self, bulk_free_lock_);
   std::vector<Run*> runs;
@@ -1876,6 +1883,8 @@
     while (i < pm_end) {
       byte pm = page_map_[i];
       switch (pm) {
+        case kPageMapReleased:
+          // Fall-through.
         case kPageMapEmpty: {
           // The start of a free page run.
           FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
@@ -1889,7 +1898,7 @@
           CHECK_GT(num_pages, static_cast<uintptr_t>(0))
               << "A free page run size must be > 0 : " << fpr_size;
           for (size_t j = i + 1; j < i + num_pages; ++j) {
-            CHECK_EQ(page_map_[j], kPageMapEmpty)
+            CHECK(IsFreePage(j))
                 << "A mismatch between the page map table for kPageMapEmpty "
                 << " at page index " << j
                 << " and the free page run size : page index range : "
@@ -2097,48 +2106,36 @@
   Thread* self = Thread::Current();
   size_t reclaimed_bytes = 0;
   size_t i = 0;
-  while (true) {
-    MutexLock mu(self, lock_);
-    // Check the page map size which might have changed due to grow/shrink.
-    size_t pm_end = page_map_size_;
-    if (i >= pm_end) {
-      // Reached the end.
-      break;
-    }
+  // Check the page map size which might have changed due to grow/shrink.
+  while (i < page_map_size_) {
+    // Reading the page map without a lock is racy but the race is benign since it should only
+    // result in occasionally not releasing pages which we could release.
     byte pm = page_map_[i];
     switch (pm) {
       case kPageMapEmpty: {
-        // The start of a free page run. Release pages.
-        FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
-        DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end());
-        size_t fpr_size = fpr->ByteSize(this);
-        DCHECK(IsAligned<kPageSize>(fpr_size));
-        byte* start = reinterpret_cast<byte*>(fpr);
-        if (kIsDebugBuild) {
-          // In the debug build, the first page of a free page run
-          // contains a magic number for debugging. Exclude it.
-          start = reinterpret_cast<byte*>(fpr) + kPageSize;
+        // Only lock if we have an empty page since we want to prevent other threads racing in.
+        MutexLock mu(self, lock_);
+        // Check that it's still empty after we acquired the lock since another thread could have
+        // raced in and placed an allocation here.
+        pm = page_map_[i];
+        if (LIKELY(pm == kPageMapEmpty)) {
+          // The start of a free page run. Release pages.
+          FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
+          DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end());
+          size_t fpr_size = fpr->ByteSize(this);
+          DCHECK(IsAligned<kPageSize>(fpr_size));
+          byte* start = reinterpret_cast<byte*>(fpr);
+          reclaimed_bytes += ReleasePageRange(start, start + fpr_size);
+          i += fpr_size / kPageSize;
+          DCHECK_LE(i, page_map_size_);
         }
-        byte* end = reinterpret_cast<byte*>(fpr) + fpr_size;
-        if (!kMadviseZeroes) {
-          memset(start, 0, end - start);
-        }
-        CHECK_EQ(madvise(start, end - start, MADV_DONTNEED), 0);
-        reclaimed_bytes += fpr_size;
-        size_t num_pages = fpr_size / kPageSize;
-        if (kIsDebugBuild) {
-          for (size_t j = i + 1; j < i + num_pages; ++j) {
-            DCHECK_EQ(page_map_[j], kPageMapEmpty);
-          }
-        }
-        i += num_pages;
-        DCHECK_LE(i, pm_end);
         break;
       }
       case kPageMapLargeObject:      // Fall through.
       case kPageMapLargeObjectPart:  // Fall through.
       case kPageMapRun:              // Fall through.
       case kPageMapRunPart:          // Fall through.
+      case kPageMapReleased:         // Fall through since it is already released.
         ++i;
         break;  // Skip.
       default:
@@ -2149,6 +2146,35 @@
   return reclaimed_bytes;
 }
 
+size_t RosAlloc::ReleasePageRange(byte* start, byte* end) {
+  DCHECK_ALIGNED(start, kPageSize);
+  DCHECK_ALIGNED(end, kPageSize);
+  DCHECK_LT(start, end);
+  if (kIsDebugBuild) {
+    // In the debug build, the first page of a free page run
+    // contains a magic number for debugging. Exclude it.
+    start += kPageSize;
+  }
+  if (!kMadviseZeroes) {
+    // TODO: Do this when we resurrect the page instead.
+    memset(start, 0, end - start);
+  }
+  CHECK_EQ(madvise(start, end - start, MADV_DONTNEED), 0);
+  size_t pm_idx = ToPageMapIndex(start);
+  size_t reclaimed_bytes = 0;
+  // Calculate reclaimed bytes and upate page map.
+  const size_t max_idx = pm_idx + (end - start) / kPageSize;
+  for (; pm_idx < max_idx; ++pm_idx) {
+    DCHECK(IsFreePage(pm_idx));
+    if (page_map_[pm_idx] == kPageMapEmpty) {
+      // Mark the page as released and update how many bytes we released.
+      reclaimed_bytes += kPageSize;
+      page_map_[pm_idx] = kPageMapReleased;
+    }
+  }
+  return reclaimed_bytes;
+}
+
 }  // namespace allocator
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index 13f61ec..fad0dc8 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -99,27 +99,8 @@
       byte* start = reinterpret_cast<byte*>(this);
       size_t byte_size = ByteSize(rosalloc);
       DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
-      bool release_pages = ShouldReleasePages(rosalloc);
-      if (kIsDebugBuild) {
-        // Exclude the first page that stores the magic number.
-        DCHECK_GE(byte_size, static_cast<size_t>(kPageSize));
-        start += kPageSize;
-        byte_size -= kPageSize;
-        if (byte_size > 0) {
-          if (release_pages) {
-            if (!kMadviseZeroes) {
-              memset(start, 0, byte_size);
-            }
-            madvise(start, byte_size, MADV_DONTNEED);
-          }
-        }
-      } else {
-        if (release_pages) {
-          if (!kMadviseZeroes) {
-            memset(start, 0, byte_size);
-          }
-          madvise(start, byte_size, MADV_DONTNEED);
-        }
+      if (ShouldReleasePages(rosalloc)) {
+        rosalloc->ReleasePageRange(start, start + byte_size);
       }
     }
   };
@@ -462,14 +443,15 @@
   std::string size_bracket_lock_names[kNumOfSizeBrackets];
   // The types of page map entries.
   enum {
-    kPageMapEmpty           = 0,  // Not allocated.
-    kPageMapRun             = 1,  // The beginning of a run.
-    kPageMapRunPart         = 2,  // The non-beginning part of a run.
-    kPageMapLargeObject     = 3,  // The beginning of a large object.
-    kPageMapLargeObjectPart = 4,  // The non-beginning part of a large object.
+    kPageMapReleased = 0,     // Zero and released back to the OS.
+    kPageMapEmpty,            // Zero but probably dirty.
+    kPageMapRun,              // The beginning of a run.
+    kPageMapRunPart,          // The non-beginning part of a run.
+    kPageMapLargeObject,      // The beginning of a large object.
+    kPageMapLargeObjectPart,  // The non-beginning part of a large object.
   };
   // The table that indicates what pages are currently used for.
-  byte* page_map_;  // No GUARDED_BY(lock_) for kReadPageMapEntryWithoutLockInBulkFree.
+  volatile byte* page_map_;  // No GUARDED_BY(lock_) for kReadPageMapEntryWithoutLockInBulkFree.
   size_t page_map_size_;
   size_t max_page_map_size_;
   std::unique_ptr<MemMap> page_map_mem_map_;
@@ -536,6 +518,9 @@
   // Revoke the current runs which share an index with the thread local runs.
   void RevokeThreadUnsafeCurrentRuns();
 
+  // Release a range of pages.
+  size_t ReleasePageRange(byte* start, byte* end) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+
  public:
   RosAlloc(void* base, size_t capacity, size_t max_capacity,
            PageReleaseMode page_release_mode,
@@ -588,6 +573,11 @@
   static Run* GetDedicatedFullRun() {
     return dedicated_full_run_;
   }
+  bool IsFreePage(size_t idx) const {
+    DCHECK_LT(idx, capacity_ / kPageSize);
+    byte pm_type = page_map_[idx];
+    return pm_type == kPageMapReleased || pm_type == kPageMapEmpty;
+  }
 
   // Callbacks for InspectAll that will count the number of bytes
   // allocated and objects allocated, respectively.
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 696728b..e9adca0 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -114,7 +114,7 @@
       desired_collector_type_(foreground_collector_type_),
       heap_trim_request_lock_(nullptr),
       last_trim_time_(0),
-      heap_transition_target_time_(0),
+      heap_transition_or_trim_target_time_(0),
       heap_trim_request_pending_(false),
       parallel_gc_threads_(parallel_gc_threads),
       conc_gc_threads_(conc_gc_threads),
@@ -850,10 +850,10 @@
       MutexLock mu(self, *heap_trim_request_lock_);
       desired_collector_type = desired_collector_type_;
       uint64_t current_time = NanoTime();
-      if (current_time >= heap_transition_target_time_) {
+      if (current_time >= heap_transition_or_trim_target_time_) {
         break;
       }
-      wait_time = heap_transition_target_time_ - current_time;
+      wait_time = heap_transition_or_trim_target_time_ - current_time;
     }
     ScopedThreadStateChange tsc(self, kSleeping);
     usleep(wait_time / 1000);  // Usleep takes microseconds.
@@ -871,9 +871,9 @@
     VLOG(heap) << "Deflating " << count << " monitors took "
         << PrettyDuration(NanoTime() - start_time);
     runtime->GetThreadList()->ResumeAll();
-    // Do a heap trim if it is needed.
-    Trim();
   }
+  // Do a heap trim if it is needed.
+  Trim();
 }
 
 void Heap::Trim() {
@@ -904,9 +904,13 @@
   uint64_t managed_reclaimed = 0;
   for (const auto& space : continuous_spaces_) {
     if (space->IsMallocSpace()) {
-      gc::space::MallocSpace* alloc_space = space->AsMallocSpace();
-      total_alloc_space_size += alloc_space->Size();
-      managed_reclaimed += alloc_space->Trim();
+      gc::space::MallocSpace* malloc_space = space->AsMallocSpace();
+      if (malloc_space->IsRosAllocSpace() || !CareAboutPauseTimes()) {
+        // Don't trim dlmalloc spaces if we care about pauses since this can hold the space lock
+        // for a long period of time.
+        managed_reclaimed += malloc_space->Trim();
+      }
+      total_alloc_space_size += malloc_space->Size();
     }
   }
   total_alloc_space_allocated = GetBytesAllocated() - large_object_space_->GetBytesAllocated();
@@ -919,15 +923,18 @@
   // We never move things in the native heap, so we can finish the GC at this point.
   FinishGC(self, collector::kGcTypeNone);
   size_t native_reclaimed = 0;
+  // Only trim the native heap if we don't care about pauses.
+  if (!CareAboutPauseTimes()) {
 #if defined(USE_DLMALLOC)
-  // Trim the native heap.
-  dlmalloc_trim(0);
-  dlmalloc_inspect_all(DlmallocMadviseCallback, &native_reclaimed);
+    // Trim the native heap.
+    dlmalloc_trim(0);
+    dlmalloc_inspect_all(DlmallocMadviseCallback, &native_reclaimed);
 #elif defined(USE_JEMALLOC)
-  // Jemalloc does it's own internal trimming.
+    // Jemalloc does it's own internal trimming.
 #else
-  UNIMPLEMENTED(WARNING) << "Add trimming support";
+    UNIMPLEMENTED(WARNING) << "Add trimming support";
 #endif
+  }
   uint64_t end_ns = NanoTime();
   VLOG(heap) << "Heap trim of managed (duration=" << PrettyDuration(gc_heap_end_ns - start_ns)
       << ", advised=" << PrettySize(managed_reclaimed) << ") and native (duration="
@@ -2693,17 +2700,14 @@
     if (desired_collector_type_ == desired_collector_type) {
       return;
     }
-    heap_transition_target_time_ = std::max(heap_transition_target_time_, NanoTime() + delta_time);
+    heap_transition_or_trim_target_time_ =
+        std::max(heap_transition_or_trim_target_time_, NanoTime() + delta_time);
     desired_collector_type_ = desired_collector_type;
   }
   SignalHeapTrimDaemon(self);
 }
 
 void Heap::RequestHeapTrim() {
-  // Request a heap trim only if we do not currently care about pause times.
-  if (CareAboutPauseTimes()) {
-    return;
-  }
   // GC completed and now we must decide whether to request a heap trim (advising pages back to the
   // kernel) or not. Issuing a request will also cause trimming of the libc heap. As a trim scans
   // a space it will hold its lock and can become a cause of jank.
@@ -2733,6 +2737,10 @@
       return;
     }
     heap_trim_request_pending_ = true;
+    uint64_t current_time = NanoTime();
+    if (heap_transition_or_trim_target_time_ < current_time) {
+      heap_transition_or_trim_target_time_ = current_time + kHeapTrimWait;
+    }
   }
   // Notify the daemon thread which will actually do the heap trim.
   SignalHeapTrimDaemon(self);
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 6d70a38..c9ea03e 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -769,8 +769,8 @@
   Mutex* heap_trim_request_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   // When we want to perform the next heap trim (nano seconds).
   uint64_t last_trim_time_ GUARDED_BY(heap_trim_request_lock_);
-  // When we want to perform the next heap transition (nano seconds).
-  uint64_t heap_transition_target_time_ GUARDED_BY(heap_trim_request_lock_);
+  // When we want to perform the next heap transition (nano seconds) or heap trim.
+  uint64_t heap_transition_or_trim_target_time_ GUARDED_BY(heap_trim_request_lock_);
   // If we have a heap trim request pending.
   bool heap_trim_request_pending_ GUARDED_BY(heap_trim_request_lock_);
 
@@ -981,6 +981,7 @@
   friend class VerifyReferenceCardVisitor;
   friend class VerifyReferenceVisitor;
   friend class VerifyObjectVisitor;
+  friend class ScopedHeapFill;
   friend class ScopedHeapLock;
   friend class space::SpaceTest;
 
@@ -997,6 +998,25 @@
   DISALLOW_IMPLICIT_CONSTRUCTORS(Heap);
 };
 
+// ScopedHeapFill changes the bytes allocated counter to be equal to the growth limit. This
+// causes the next allocation to perform a GC and possibly an OOM. It can be used to ensure that a
+// GC happens in specific methods such as ThrowIllegalMonitorStateExceptionF in Monitor::Wait.
+class ScopedHeapFill {
+ public:
+  explicit ScopedHeapFill(Heap* heap)
+      : heap_(heap),
+        delta_(heap_->GetMaxMemory() - heap_->GetBytesAllocated()) {
+    heap_->num_bytes_allocated_.FetchAndAddSequentiallyConsistent(delta_);
+  }
+  ~ScopedHeapFill() {
+    heap_->num_bytes_allocated_.FetchAndSubSequentiallyConsistent(delta_);
+  }
+
+ private:
+  Heap* const heap_;
+  const int64_t delta_;
+};
+
 }  // namespace gc
 }  // namespace art
 
diff --git a/runtime/gc/space/bump_pointer_space-inl.h b/runtime/gc/space/bump_pointer_space-inl.h
index 71c295e..ee3c979 100644
--- a/runtime/gc/space/bump_pointer_space-inl.h
+++ b/runtime/gc/space/bump_pointer_space-inl.h
@@ -41,11 +41,12 @@
                                                            size_t* usable_size) {
   Locks::mutator_lock_->AssertExclusiveHeld(self);
   num_bytes = RoundUp(num_bytes, kAlignment);
-  if (end_ + num_bytes > growth_end_) {
+  byte* end = end_.LoadRelaxed();
+  if (end + num_bytes > growth_end_) {
     return nullptr;
   }
-  mirror::Object* obj = reinterpret_cast<mirror::Object*>(end_);
-  end_ += num_bytes;
+  mirror::Object* obj = reinterpret_cast<mirror::Object*>(end);
+  end_.StoreRelaxed(end + num_bytes);
   *bytes_allocated = num_bytes;
   // Use the CAS free versions as an optimization.
   objects_allocated_.StoreRelaxed(objects_allocated_.LoadRelaxed() + 1);
@@ -61,15 +62,13 @@
   byte* old_end;
   byte* new_end;
   do {
-    old_end = end_;
+    old_end = end_.LoadRelaxed();
     new_end = old_end + num_bytes;
     // If there is no more room in the region, we are out of memory.
     if (UNLIKELY(new_end > growth_end_)) {
       return nullptr;
     }
-  } while (!__sync_bool_compare_and_swap(reinterpret_cast<volatile intptr_t*>(&end_),
-                                         reinterpret_cast<intptr_t>(old_end),
-                                         reinterpret_cast<intptr_t>(new_end)));
+  } while (!end_.CompareExchangeWeakSequentiallyConsistent(old_end, new_end));
   return reinterpret_cast<mirror::Object*>(old_end);
 }
 
diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc
index 41a0458..5123e47 100644
--- a/runtime/gc/space/dlmalloc_space.cc
+++ b/runtime/gc/space/dlmalloc_space.cc
@@ -293,7 +293,7 @@
   madvise(GetMemMap()->Begin(), GetMemMap()->Size(), MADV_DONTNEED);
   live_bitmap_->Clear();
   mark_bitmap_->Clear();
-  end_ = Begin() + starting_size_;
+  SetEnd(Begin() + starting_size_);
   mspace_ = CreateMspace(mem_map_->Begin(), starting_size_, initial_size_);
   SetFootprintLimit(footprint_limit);
 }
diff --git a/runtime/gc/space/malloc_space.cc b/runtime/gc/space/malloc_space.cc
index 4d74f3c..27f92b5 100644
--- a/runtime/gc/space/malloc_space.cc
+++ b/runtime/gc/space/malloc_space.cc
@@ -123,13 +123,13 @@
   growth_limit = RoundUp(growth_limit, kPageSize);
   growth_limit_ = growth_limit;
   if (Size() > growth_limit_) {
-    end_ = begin_ + growth_limit;
+    SetEnd(begin_ + growth_limit);
   }
 }
 
 void* MallocSpace::MoreCore(intptr_t increment) {
   CheckMoreCoreForPrecondition();
-  byte* original_end = end_;
+  byte* original_end = End();
   if (increment != 0) {
     VLOG(heap) << "MallocSpace::MoreCore " << PrettySize(increment);
     byte* new_end = original_end + increment;
@@ -151,8 +151,8 @@
       CHECK_MEMORY_CALL(madvise, (new_end, size, MADV_DONTNEED), GetName());
       CHECK_MEMORY_CALL(mprotect, (new_end, size, PROT_NONE), GetName());
     }
-    // Update end_
-    end_ = new_end;
+    // Update end_.
+    SetEnd(new_end);
   }
   return original_end;
 }
@@ -163,11 +163,11 @@
   // alloc space so that we won't mix thread local runs from different
   // alloc spaces.
   RevokeAllThreadLocalBuffers();
-  end_ = reinterpret_cast<byte*>(RoundUp(reinterpret_cast<uintptr_t>(end_), kPageSize));
+  SetEnd(reinterpret_cast<byte*>(RoundUp(reinterpret_cast<uintptr_t>(End()), kPageSize)));
   DCHECK(IsAligned<accounting::CardTable::kCardSize>(begin_));
-  DCHECK(IsAligned<accounting::CardTable::kCardSize>(end_));
+  DCHECK(IsAligned<accounting::CardTable::kCardSize>(End()));
   DCHECK(IsAligned<kPageSize>(begin_));
-  DCHECK(IsAligned<kPageSize>(end_));
+  DCHECK(IsAligned<kPageSize>(End()));
   size_t size = RoundUp(Size(), kPageSize);
   // Trimming the heap should be done by the caller since we may have invalidated the accounting
   // stored in between objects.
@@ -175,7 +175,7 @@
   const size_t growth_limit = growth_limit_ - size;
   const size_t capacity = Capacity() - size;
   VLOG(heap) << "Begin " << reinterpret_cast<const void*>(begin_) << "\n"
-             << "End " << reinterpret_cast<const void*>(end_) << "\n"
+             << "End " << reinterpret_cast<const void*>(End()) << "\n"
              << "Size " << size << "\n"
              << "GrowthLimit " << growth_limit_ << "\n"
              << "Capacity " << Capacity();
@@ -188,16 +188,17 @@
   VLOG(heap) << "Capacity " << PrettySize(capacity);
   // Remap the tail.
   std::string error_msg;
-  std::unique_ptr<MemMap> mem_map(GetMemMap()->RemapAtEnd(end_, alloc_space_name,
-                                                    PROT_READ | PROT_WRITE, &error_msg));
+  std::unique_ptr<MemMap> mem_map(GetMemMap()->RemapAtEnd(End(), alloc_space_name,
+                                                          PROT_READ | PROT_WRITE, &error_msg));
   CHECK(mem_map.get() != nullptr) << error_msg;
-  void* allocator = CreateAllocator(end_, starting_size_, initial_size_, capacity, low_memory_mode);
+  void* allocator = CreateAllocator(End(), starting_size_, initial_size_, capacity,
+                                    low_memory_mode);
   // Protect memory beyond the initial size.
   byte* end = mem_map->Begin() + starting_size_;
   if (capacity > initial_size_) {
     CHECK_MEMORY_CALL(mprotect, (end, capacity - initial_size_, PROT_NONE), alloc_space_name);
   }
-  *out_malloc_space = CreateInstance(alloc_space_name, mem_map.release(), allocator, end_, end,
+  *out_malloc_space = CreateInstance(alloc_space_name, mem_map.release(), allocator, End(), end,
                                      limit_, growth_limit, CanMoveObjects());
   SetLimit(End());
   live_bitmap_->SetHeapLimit(reinterpret_cast<uintptr_t>(End()));
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
index a1511e7..5738d47 100644
--- a/runtime/gc/space/rosalloc_space.cc
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -349,7 +349,7 @@
   madvise(GetMemMap()->Begin(), GetMemMap()->Size(), MADV_DONTNEED);
   live_bitmap_->Clear();
   mark_bitmap_->Clear();
-  end_ = begin_ + starting_size_;
+  SetEnd(begin_ + starting_size_);
   delete rosalloc_;
   rosalloc_ = CreateRosAlloc(mem_map_->Begin(), starting_size_, initial_size_, Capacity(),
                              low_memory_mode_);
diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h
index 8444a70..fff4df1 100644
--- a/runtime/gc/space/space.h
+++ b/runtime/gc/space/space.h
@@ -20,6 +20,7 @@
 #include <memory>
 #include <string>
 
+#include "atomic.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "gc/accounting/space_bitmap.h"
@@ -249,7 +250,7 @@
 
   // Current address at which the space ends, which may vary as the space is filled.
   byte* End() const {
-    return end_;
+    return end_.LoadRelaxed();
   }
 
   // The end of the address range covered by the space.
@@ -260,7 +261,7 @@
   // Change the end of the space. Be careful with use since changing the end of a space to an
   // invalid value may break the GC.
   void SetEnd(byte* end) {
-    end_ = end;
+    end_.StoreRelaxed(end);
   }
 
   void SetLimit(byte* limit) {
@@ -307,7 +308,7 @@
   byte* begin_;
 
   // Current end of the space.
-  byte* volatile end_;
+  Atomic<byte*> end_;
 
   // Limit of the space.
   byte* limit_;
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 8f5da83..f459b59 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -137,7 +137,8 @@
       new_quick_code = GetQuickResolutionTrampoline(class_linker);
     }
   } else {  // !uninstall
-    if ((interpreter_stubs_installed_ || IsDeoptimized(method)) && !method->IsNative()) {
+    if ((interpreter_stubs_installed_ || forced_interpret_only_ || IsDeoptimized(method)) &&
+        !method->IsNative()) {
       new_portable_code = GetPortableToInterpreterBridge();
       new_quick_code = GetQuickToInterpreterBridge();
     } else {
@@ -150,7 +151,9 @@
         new_quick_code = class_linker->GetQuickOatCodeFor(method);
         DCHECK(new_quick_code != GetQuickToInterpreterBridgeTrampoline(class_linker));
         if (entry_exit_stubs_installed_ && new_quick_code != GetQuickToInterpreterBridge()) {
-          DCHECK(new_portable_code != GetPortableToInterpreterBridge());
+          // TODO: portable to quick bridge. Bug: 8196384. We cannot enable the check below as long
+          // as GetPortableToQuickBridge() == GetPortableToInterpreterBridge().
+          // DCHECK(new_portable_code != GetPortableToInterpreterBridge());
           new_portable_code = GetPortableToInterpreterBridge();
           new_quick_code = GetQuickInstrumentationEntryPoint();
         }
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index cb4d444..729444e 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -95,9 +95,11 @@
     jint newValue = args[4];
     bool success;
     if (Runtime::Current()->IsActiveTransaction()) {
-      success = obj->CasField32<true>(MemberOffset(offset), expectedValue, newValue);
+      success = obj->CasFieldWeakSequentiallyConsistent32<true>(MemberOffset(offset),
+                                                                expectedValue, newValue);
     } else {
-      success = obj->CasField32<false>(MemberOffset(offset), expectedValue, newValue);
+      success = obj->CasFieldWeakSequentiallyConsistent32<false>(MemberOffset(offset),
+                                                                 expectedValue, newValue);
     }
     result->SetZ(success ? JNI_TRUE : JNI_FALSE);
   } else if (name == "void sun.misc.Unsafe.putObject(java.lang.Object, long, java.lang.Object)") {
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index c7fb884..9f04b90 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -772,8 +772,13 @@
     // shadow_frame.GetMethod()->GetDeclaringClass()->GetClassLoader();
     Class* found = Runtime::Current()->GetClassLinker()->FindClass(
         self, descriptor.c_str(), NullHandle<mirror::ClassLoader>());
-    CHECK(found != NULL) << "Class.forName failed in un-started runtime for class: "
-        << PrettyDescriptor(descriptor);
+    if (found == NULL) {
+      if (!self->IsExceptionPending()) {
+        AbortTransaction(self, "Class.forName failed in un-started runtime for class: %s",
+                         PrettyDescriptor(descriptor).c_str());
+      }
+      return;
+    }
     result->SetL(found);
   } else if (name == "java.lang.Class java.lang.Void.lookupType()") {
     result->SetL(Runtime::Current()->GetClassLinker()->FindPrimitiveClass('V'));
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 62c1162..089ef57 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -69,10 +69,10 @@
   }
 }
 
-inline bool Object::CasLockWord(LockWord old_val, LockWord new_val) {
+inline bool Object::CasLockWordWeakSequentiallyConsistent(LockWord old_val, LockWord new_val) {
   // Force use of non-transactional mode and do not check.
-  return CasField32<false, false>(OFFSET_OF_OBJECT_MEMBER(Object, monitor_), old_val.GetValue(),
-                                  new_val.GetValue());
+  return CasFieldWeakSequentiallyConsistent32<false, false>(
+      OFFSET_OF_OBJECT_MEMBER(Object, monitor_), old_val.GetValue(), new_val.GetValue());
 }
 
 inline uint32_t Object::GetLockOwnerThreadId() {
@@ -131,21 +131,17 @@
   DCHECK(kUseBakerOrBrooksReadBarrier);
   MemberOffset offset = OFFSET_OF_OBJECT_MEMBER(Object, x_rb_ptr_);
   byte* raw_addr = reinterpret_cast<byte*>(this) + offset.SizeValue();
-  HeapReference<Object>* ref = reinterpret_cast<HeapReference<Object>*>(raw_addr);
+  Atomic<uint32_t>* atomic_rb_ptr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
   HeapReference<Object> expected_ref(HeapReference<Object>::FromMirrorPtr(expected_rb_ptr));
   HeapReference<Object> new_ref(HeapReference<Object>::FromMirrorPtr(rb_ptr));
-  uint32_t expected_val = expected_ref.reference_;
-  uint32_t new_val;
   do {
-    uint32_t old_val = ref->reference_;
-    if (old_val != expected_val) {
+    if (UNLIKELY(atomic_rb_ptr->LoadRelaxed() != expected_ref.reference_)) {
       // Lost the race.
       return false;
     }
-    new_val = new_ref.reference_;
-  } while (!__sync_bool_compare_and_swap(
-      reinterpret_cast<uint32_t*>(raw_addr), expected_val, new_val));
-  DCHECK_EQ(new_val, ref->reference_);
+  } while (!atomic_rb_ptr->CompareExchangeWeakSequentiallyConsistent(expected_ref.reference_,
+                                                                     new_ref.reference_));
+  DCHECK_EQ(new_ref.reference_, atomic_rb_ptr->LoadRelaxed());
   return true;
 #else
   LOG(FATAL) << "Unreachable";
@@ -448,7 +444,8 @@
 }
 
 template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
-inline bool Object::CasField32(MemberOffset field_offset, int32_t old_value, int32_t new_value) {
+inline bool Object::CasFieldWeakSequentiallyConsistent32(MemberOffset field_offset,
+                                                         int32_t old_value, int32_t new_value) {
   if (kCheckTransaction) {
     DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
   }
@@ -459,9 +456,9 @@
     VerifyObject(this);
   }
   byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
-  volatile int32_t* addr = reinterpret_cast<volatile int32_t*>(raw_addr);
+  AtomicInteger* atomic_addr = reinterpret_cast<AtomicInteger*>(raw_addr);
 
-  return __sync_bool_compare_and_swap(addr, old_value, new_value);
+  return atomic_addr->CompareExchangeWeakSequentiallyConsistent(old_value, new_value);
 }
 
 template<VerifyObjectFlags kVerifyFlags, bool kIsVolatile>
@@ -513,7 +510,8 @@
 }
 
 template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
-inline bool Object::CasField64(MemberOffset field_offset, int64_t old_value, int64_t new_value) {
+inline bool Object::CasFieldWeakSequentiallyConsistent64(MemberOffset field_offset,
+                                                         int64_t old_value, int64_t new_value) {
   if (kCheckTransaction) {
     DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
   }
@@ -524,8 +522,8 @@
     VerifyObject(this);
   }
   byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
-  volatile int64_t* addr = reinterpret_cast<volatile int64_t*>(raw_addr);
-  return QuasiAtomic::Cas64(old_value, new_value, addr);
+  Atomic<int64_t>* atomic_addr = reinterpret_cast<Atomic<int64_t>*>(raw_addr);
+  return atomic_addr->CompareExchangeWeakSequentiallyConsistent(old_value, new_value);
 }
 
 template<class T, VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption,
@@ -615,8 +613,8 @@
 }
 
 template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
-inline bool Object::CasFieldObject(MemberOffset field_offset, Object* old_value,
-                                   Object* new_value) {
+inline bool Object::CasFieldWeakSequentiallyConsistentObject(MemberOffset field_offset,
+                                                             Object* old_value, Object* new_value) {
   if (kCheckTransaction) {
     DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
   }
@@ -632,11 +630,14 @@
   if (kTransactionActive) {
     Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
   }
-  byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
-  volatile int32_t* addr = reinterpret_cast<volatile int32_t*>(raw_addr);
   HeapReference<Object> old_ref(HeapReference<Object>::FromMirrorPtr(old_value));
   HeapReference<Object> new_ref(HeapReference<Object>::FromMirrorPtr(new_value));
-  bool success =  __sync_bool_compare_and_swap(addr, old_ref.reference_, new_ref.reference_);
+  byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
+  Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
+
+  bool success = atomic_addr->CompareExchangeWeakSequentiallyConsistent(old_ref.reference_,
+                                                                        new_ref.reference_);
+
   if (success) {
     Runtime::Current()->GetHeap()->WriteBarrierField(this, field_offset, new_value);
   }
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index 422a88b..e58091f 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -156,7 +156,7 @@
         // loop iteration.
         LockWord hash_word(LockWord::FromHashCode(GenerateIdentityHashCode()));
         DCHECK_EQ(hash_word.GetState(), LockWord::kHashCode);
-        if (const_cast<Object*>(this)->CasLockWord(lw, hash_word)) {
+        if (const_cast<Object*>(this)->CasLockWordWeakSequentiallyConsistent(lw, hash_word)) {
           return hash_word.GetHashCode();
         }
         break;
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index c082443..d29011a 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -110,7 +110,8 @@
   // have C++11 "strong" semantics.
   // TODO: In most, possibly all, cases, these assumptions are too strong.
   // Confirm and weaken the implementation.
-  bool CasLockWord(LockWord old_val, LockWord new_val) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool CasLockWordWeakSequentiallyConsistent(LockWord old_val, LockWord new_val)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   uint32_t GetLockOwnerThreadId();
 
   mirror::Object* MonitorEnter(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
@@ -226,7 +227,8 @@
 
   template<bool kTransactionActive, bool kCheckTransaction = true,
       VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  bool CasFieldObject(MemberOffset field_offset, Object* old_value, Object* new_value)
+  bool CasFieldWeakSequentiallyConsistentObject(MemberOffset field_offset, Object* old_value,
+                                                Object* new_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
@@ -252,7 +254,8 @@
 
   template<bool kTransactionActive, bool kCheckTransaction = true,
       VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  bool CasField32(MemberOffset field_offset, int32_t old_value, int32_t new_value) ALWAYS_INLINE
+  bool CasFieldWeakSequentiallyConsistent32(MemberOffset field_offset, int32_t old_value,
+                                            int32_t new_value) ALWAYS_INLINE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kIsVolatile = false>
@@ -275,7 +278,8 @@
 
   template<bool kTransactionActive, bool kCheckTransaction = true,
       VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  bool CasField64(MemberOffset field_offset, int64_t old_value, int64_t new_value)
+  bool CasFieldWeakSequentiallyConsistent64(MemberOffset field_offset, int64_t old_value,
+                                            int64_t new_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<bool kTransactionActive, bool kCheckTransaction = true,
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index eb62a69..5633a77 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -90,7 +90,33 @@
       hash_code_(hash_code),
       locking_method_(NULL),
       locking_dex_pc_(0),
-      monitor_id_(MonitorPool::CreateMonitorId(self, this)) {
+      monitor_id_(MonitorPool::ComputeMonitorId(this, self)) {
+#ifdef __LP64__
+  DCHECK(false) << "Should not be reached in 64b";
+  next_free_ = nullptr;
+#endif
+  // We should only inflate a lock if the owner is ourselves or suspended. This avoids a race
+  // with the owner unlocking the thin-lock.
+  CHECK(owner == nullptr || owner == self || owner->IsSuspended());
+  // The identity hash code is set for the life time of the monitor.
+}
+
+Monitor::Monitor(Thread* self, Thread* owner, mirror::Object* obj, int32_t hash_code,
+                 MonitorId id)
+    : monitor_lock_("a monitor lock", kMonitorLock),
+      monitor_contenders_("monitor contenders", monitor_lock_),
+      num_waiters_(0),
+      owner_(owner),
+      lock_count_(0),
+      obj_(obj),
+      wait_set_(NULL),
+      hash_code_(hash_code),
+      locking_method_(NULL),
+      locking_dex_pc_(0),
+      monitor_id_(id) {
+#ifdef __LP64__
+  next_free_ = nullptr;
+#endif
   // We should only inflate a lock if the owner is ourselves or suspended. This avoids a race
   // with the owner unlocking the thin-lock.
   CHECK(owner == nullptr || owner == self || owner->IsSuspended());
@@ -137,7 +163,7 @@
   }
   LockWord fat(this);
   // Publish the updated lock word, which may race with other threads.
-  bool success = GetObject()->CasLockWord(lw, fat);
+  bool success = GetObject()->CasLockWordWeakSequentiallyConsistent(lw, fat);
   // Lock profiling.
   if (success && owner_ != nullptr && lock_profiling_threshold_ != 0) {
     locking_method_ = owner_->GetCurrentMethod(&locking_dex_pc_);
@@ -146,7 +172,6 @@
 }
 
 Monitor::~Monitor() {
-  MonitorPool::ReleaseMonitorId(monitor_id_);
   // Deflated monitors have a null object.
 }
 
@@ -621,20 +646,23 @@
  * inflating the lock and so the caller should read the monitor following the call.
  */
 void Monitor::Inflate(Thread* self, Thread* owner, mirror::Object* obj, int32_t hash_code) {
-  DCHECK(self != NULL);
-  DCHECK(obj != NULL);
+  DCHECK(self != nullptr);
+  DCHECK(obj != nullptr);
   // Allocate and acquire a new monitor.
-  std::unique_ptr<Monitor> m(new Monitor(self, owner, obj, hash_code));
+  Monitor* m = MonitorPool::CreateMonitor(self, owner, obj, hash_code);
+  DCHECK(m != nullptr);
   if (m->Install(self)) {
     if (owner != nullptr) {
       VLOG(monitor) << "monitor: thread" << owner->GetThreadId()
-          << " created monitor " << m.get() << " for object " << obj;
+          << " created monitor " << m << " for object " << obj;
     } else {
       VLOG(monitor) << "monitor: Inflate with hashcode " << hash_code
-          << " created monitor " << m.get() << " for object " << obj;
+          << " created monitor " << m << " for object " << obj;
     }
-    Runtime::Current()->GetMonitorList()->Add(m.release());
+    Runtime::Current()->GetMonitorList()->Add(m);
     CHECK_EQ(obj->GetLockWord(true).GetState(), LockWord::kFatLocked);
+  } else {
+    MonitorPool::ReleaseMonitor(self, m);
   }
 }
 
@@ -694,7 +722,7 @@
     switch (lock_word.GetState()) {
       case LockWord::kUnlocked: {
         LockWord thin_locked(LockWord::FromThinLockId(thread_id, 0));
-        if (h_obj->CasLockWord(lock_word, thin_locked)) {
+        if (h_obj->CasLockWordWeakSequentiallyConsistent(lock_word, thin_locked)) {
           // CasLockWord enforces more than the acquire ordering we need here.
           return h_obj.Get();  // Success!
         }
@@ -1071,8 +1099,12 @@
 }
 
 MonitorList::~MonitorList() {
-  MutexLock mu(Thread::Current(), monitor_list_lock_);
-  STLDeleteElements(&list_);
+  Thread* self = Thread::Current();
+  MutexLock mu(self, monitor_list_lock_);
+  // Release all monitors to the pool.
+  // TODO: Is it an invariant that *all* open monitors are in the list? Then we could
+  // clear faster in the pool.
+  MonitorPool::ReleaseMonitors(self, &list_);
 }
 
 void MonitorList::DisallowNewMonitors() {
@@ -1097,7 +1129,8 @@
 }
 
 void MonitorList::SweepMonitorList(IsMarkedCallback* callback, void* arg) {
-  MutexLock mu(Thread::Current(), monitor_list_lock_);
+  Thread* self = Thread::Current();
+  MutexLock mu(self, monitor_list_lock_);
   for (auto it = list_.begin(); it != list_.end(); ) {
     Monitor* m = *it;
     // Disable the read barrier in GetObject() as this is called by GC.
@@ -1107,7 +1140,7 @@
     if (new_obj == nullptr) {
       VLOG(monitor) << "freeing monitor " << m << " belonging to unmarked object "
                     << obj;
-      delete m;
+      MonitorPool::ReleaseMonitor(self, m);
       it = list_.erase(it);
     } else {
       m->SetObject(new_obj);
diff --git a/runtime/monitor.h b/runtime/monitor.h
index d7552a3..0d0ad0b 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -124,7 +124,9 @@
 
  private:
   explicit Monitor(Thread* self, Thread* owner, mirror::Object* obj, int32_t hash_code)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  explicit Monitor(Thread* self, Thread* owner, mirror::Object* obj, int32_t hash_code,
+                   MonitorId id) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Install the monitor into its object, may fail if another thread installs a different monitor
   // first.
@@ -212,8 +214,14 @@
   // The denser encoded version of this monitor as stored in the lock word.
   MonitorId monitor_id_;
 
+#ifdef __LP64__
+  // Free list for monitor pool.
+  Monitor* next_free_ GUARDED_BY(Locks::allocated_monitor_ids_lock_);
+#endif
+
   friend class MonitorInfo;
   friend class MonitorList;
+  friend class MonitorPool;
   friend class mirror::Object;
   DISALLOW_COPY_AND_ASSIGN(Monitor);
 };
diff --git a/runtime/monitor_pool.cc b/runtime/monitor_pool.cc
index eb7525a..440a6be 100644
--- a/runtime/monitor_pool.cc
+++ b/runtime/monitor_pool.cc
@@ -23,36 +23,118 @@
 
 namespace art {
 
-MonitorPool::MonitorPool() : allocated_ids_lock_("allocated monitor ids lock",
-                                                 LockLevel::kMonitorPoolLock) {
+namespace mirror {
+  class Object;
+}  // namespace mirror
+
+MonitorPool::MonitorPool()
+    : num_chunks_(0), capacity_(0), first_free_(nullptr) {
+  AllocateChunk();  // Get our first chunk.
 }
 
-Monitor* MonitorPool::LookupMonitorFromTable(MonitorId mon_id) {
-  ReaderMutexLock mu(Thread::Current(), allocated_ids_lock_);
-  return table_.Get(mon_id);
-}
+// Assumes locks are held appropriately when necessary.
+// We do not need a lock in the constructor, but we need one when in CreateMonitorInPool.
+void MonitorPool::AllocateChunk() {
+  DCHECK(first_free_ == nullptr);
 
-MonitorId MonitorPool::AllocMonitorIdFromTable(Thread* self, Monitor* mon) {
-  WriterMutexLock mu(self, allocated_ids_lock_);
-  for (size_t i = 0; i < allocated_ids_.size(); ++i) {
-    if (!allocated_ids_[i]) {
-      allocated_ids_.set(i);
-      MonitorId mon_id = i + 1;  // Zero is reserved to mean "invalid".
-      table_.Put(mon_id, mon);
-      return mon_id;
+  // Do we need to resize?
+  if (num_chunks_ == capacity_) {
+    if (capacity_ == 0U) {
+      // Initialization.
+      capacity_ = kInitialChunkStorage;
+      uintptr_t* new_backing = new uintptr_t[capacity_];
+      monitor_chunks_.StoreRelaxed(new_backing);
+    } else {
+      size_t new_capacity = 2 * capacity_;
+      uintptr_t* new_backing = new uintptr_t[new_capacity];
+      uintptr_t* old_backing = monitor_chunks_.LoadRelaxed();
+      memcpy(new_backing, old_backing, sizeof(uintptr_t) * capacity_);
+      monitor_chunks_.StoreRelaxed(new_backing);
+      capacity_ = new_capacity;
+      old_chunk_arrays_.push_back(old_backing);
+      LOG(INFO) << "Resizing to capacity " << capacity_;
     }
   }
-  LOG(FATAL) << "Out of internal monitor ids";
-  return 0;
+
+  // Allocate the chunk.
+  void* chunk = malloc(kChunkSize);
+  // Check we allocated memory.
+  CHECK_NE(reinterpret_cast<uintptr_t>(nullptr), reinterpret_cast<uintptr_t>(chunk));
+  // Check it is aligned as we need it.
+  CHECK_EQ(0U, reinterpret_cast<uintptr_t>(chunk) % kMonitorAlignment);
+
+  // Add the chunk.
+  *(monitor_chunks_.LoadRelaxed()+num_chunks_) = reinterpret_cast<uintptr_t>(chunk);
+  num_chunks_++;
+
+  // Set up the free list
+  Monitor* last = reinterpret_cast<Monitor*>(reinterpret_cast<uintptr_t>(chunk) +
+                                             (kChunkCapacity - 1) * kAlignedMonitorSize);
+  last->next_free_ = nullptr;
+  // Eagerly compute id.
+  last->monitor_id_ = OffsetToMonitorId((num_chunks_ - 1) * kChunkSize +
+                                        (kChunkCapacity - 1) * kAlignedMonitorSize);
+  for (size_t i = 0; i < kChunkCapacity - 1; ++i) {
+    Monitor* before = reinterpret_cast<Monitor*>(reinterpret_cast<uintptr_t>(last) -
+                                                 kAlignedMonitorSize);
+    before->next_free_ = last;
+    // Derive monitor_id from last.
+    before->monitor_id_ = OffsetToMonitorId(MonitorIdToOffset(last->monitor_id_) -
+                                            kAlignedMonitorSize);
+
+    last = before;
+  }
+  DCHECK(last == reinterpret_cast<Monitor*>(chunk));
+  first_free_ = last;
 }
 
-void MonitorPool::ReleaseMonitorIdFromTable(MonitorId mon_id) {
-  WriterMutexLock mu(Thread::Current(), allocated_ids_lock_);
-  DCHECK(table_.Get(mon_id) != nullptr);
-  table_.erase(mon_id);
-  --mon_id;  // Zero is reserved to mean "invalid".
-  DCHECK(allocated_ids_[mon_id]) << mon_id;
-  allocated_ids_.reset(mon_id);
+Monitor* MonitorPool::CreateMonitorInPool(Thread* self, Thread* owner, mirror::Object* obj,
+                                          int32_t hash_code)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  // We are gonna allocate, so acquire the writer lock.
+  MutexLock mu(self, *Locks::allocated_monitor_ids_lock_);
+
+  // Enough space, or need to resize?
+  if (first_free_ == nullptr) {
+    LOG(INFO) << "Allocating a new chunk.";
+    AllocateChunk();
+  }
+
+  Monitor* mon_uninitialized = first_free_;
+  first_free_ = first_free_->next_free_;
+
+  // Pull out the id which was preinitialized.
+  MonitorId id = mon_uninitialized->monitor_id_;
+
+  // Initialize it.
+  Monitor* monitor = new(mon_uninitialized) Monitor(self, owner, obj, hash_code, id);
+
+  return monitor;
+}
+
+void MonitorPool::ReleaseMonitorToPool(Thread* self, Monitor* monitor) {
+  // Might be racy with allocation, so acquire lock.
+  MutexLock mu(self, *Locks::allocated_monitor_ids_lock_);
+
+  // Keep the monitor id. Don't trust it's not cleared.
+  MonitorId id = monitor->monitor_id_;
+
+  // Call the destructor.
+  // TODO: Exception safety?
+  monitor->~Monitor();
+
+  // Add to the head of the free list.
+  monitor->next_free_ = first_free_;
+  first_free_ = monitor;
+
+  // Rewrite monitor id.
+  monitor->monitor_id_ = id;
+}
+
+void MonitorPool::ReleaseMonitorsToPool(Thread* self, std::list<Monitor*>* monitors) {
+  for (Monitor* mon : *monitors) {
+    ReleaseMonitorToPool(self, mon);
+  }
 }
 
 }  // namespace art
diff --git a/runtime/monitor_pool.h b/runtime/monitor_pool.h
index 32e1553..5bc28f1 100644
--- a/runtime/monitor_pool.h
+++ b/runtime/monitor_pool.h
@@ -20,11 +20,11 @@
 #include "monitor.h"
 
 #ifdef __LP64__
-#include <bitset>
 #include <stdint.h>
-
+#include "atomic.h"
 #include "runtime.h"
-#include "safe_map.h"
+#else
+#include "base/stl_util.h"     // STLDeleteElements
 #endif
 
 namespace art {
@@ -41,11 +41,36 @@
 #endif
   }
 
+  static Monitor* CreateMonitor(Thread* self, Thread* owner, mirror::Object* obj, int32_t hash_code)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+#ifndef __LP64__
+    return new Monitor(self, owner, obj, hash_code);
+#else
+    return GetMonitorPool()->CreateMonitorInPool(self, owner, obj, hash_code);
+#endif
+  }
+
+  static void ReleaseMonitor(Thread* self, Monitor* monitor) {
+#ifndef __LP64__
+    delete monitor;
+#else
+    GetMonitorPool()->ReleaseMonitorToPool(self, monitor);
+#endif
+  }
+
+  static void ReleaseMonitors(Thread* self, std::list<Monitor*>* monitors) {
+#ifndef __LP64__
+    STLDeleteElements(monitors);
+#else
+    GetMonitorPool()->ReleaseMonitorsToPool(self, monitors);
+#endif
+  }
+
   static Monitor* MonitorFromMonitorId(MonitorId mon_id) {
 #ifndef __LP64__
     return reinterpret_cast<Monitor*>(mon_id << 3);
 #else
-    return Runtime::Current()->GetMonitorPool()->LookupMonitorFromTable(mon_id);
+    return GetMonitorPool()->LookupMonitor(mon_id);
 #endif
   }
 
@@ -57,39 +82,98 @@
 #endif
   }
 
-  static MonitorId CreateMonitorId(Thread* self, Monitor* mon) {
+  static MonitorId ComputeMonitorId(Monitor* mon, Thread* self) {
 #ifndef __LP64__
-    UNUSED(self);
     return MonitorIdFromMonitor(mon);
 #else
-    return Runtime::Current()->GetMonitorPool()->AllocMonitorIdFromTable(self, mon);
+    return GetMonitorPool()->ComputeMonitorIdInPool(mon, self);
 #endif
   }
 
-  static void ReleaseMonitorId(MonitorId mon_id) {
+  static MonitorPool* GetMonitorPool() {
 #ifndef __LP64__
-    UNUSED(mon_id);
+    return nullptr;
 #else
-    Runtime::Current()->GetMonitorPool()->ReleaseMonitorIdFromTable(mon_id);
+    return Runtime::Current()->GetMonitorPool();
 #endif
   }
 
  private:
 #ifdef __LP64__
-  MonitorPool();
+  // When we create a monitor pool, threads have not been initialized, yet, so ignore thread-safety
+  // analysis.
+  MonitorPool() NO_THREAD_SAFETY_ANALYSIS;
 
-  Monitor* LookupMonitorFromTable(MonitorId mon_id);
+  void AllocateChunk() EXCLUSIVE_LOCKS_REQUIRED(Locks::allocated_monitor_ids_lock_);
 
-  MonitorId LookupMonitorIdFromTable(Monitor* mon);
+  Monitor* CreateMonitorInPool(Thread* self, Thread* owner, mirror::Object* obj, int32_t hash_code)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  MonitorId AllocMonitorIdFromTable(Thread* self, Monitor* mon);
+  void ReleaseMonitorToPool(Thread* self, Monitor* monitor);
+  void ReleaseMonitorsToPool(Thread* self, std::list<Monitor*>* monitors);
 
-  void ReleaseMonitorIdFromTable(MonitorId mon_id);
+  // Note: This is safe as we do not ever move chunks.
+  Monitor* LookupMonitor(MonitorId mon_id) {
+    size_t offset = MonitorIdToOffset(mon_id);
+    size_t index = offset / kChunkSize;
+    size_t offset_in_chunk = offset % kChunkSize;
+    uintptr_t base = *(monitor_chunks_.LoadRelaxed()+index);
+    return reinterpret_cast<Monitor*>(base + offset_in_chunk);
+  }
 
-  ReaderWriterMutex allocated_ids_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  static constexpr uint32_t kMaxMonitorId = 0xFFFF;
-  std::bitset<kMaxMonitorId> allocated_ids_ GUARDED_BY(allocated_ids_lock_);
-  SafeMap<MonitorId, Monitor*> table_ GUARDED_BY(allocated_ids_lock_);
+  static bool IsInChunk(uintptr_t base_addr, Monitor* mon) {
+    uintptr_t mon_ptr = reinterpret_cast<uintptr_t>(mon);
+    return base_addr <= mon_ptr && (mon_ptr - base_addr < kChunkSize);
+  }
+
+  // Note: This is safe as we do not ever move chunks.
+  MonitorId ComputeMonitorIdInPool(Monitor* mon, Thread* self) {
+    MutexLock mu(self, *Locks::allocated_monitor_ids_lock_);
+    for (size_t index = 0; index < num_chunks_; ++index) {
+      uintptr_t chunk_addr = *(monitor_chunks_.LoadRelaxed() + index);
+      if (IsInChunk(chunk_addr, mon)) {
+        return OffsetToMonitorId(reinterpret_cast<uintptr_t>(mon) - chunk_addr + index * kChunkSize);
+      }
+    }
+    LOG(FATAL) << "Did not find chunk that contains monitor.";
+    return 0;
+  }
+
+  static size_t MonitorIdToOffset(MonitorId id) {
+    return id << 3;
+  }
+
+  static MonitorId OffsetToMonitorId(size_t offset) {
+    return static_cast<MonitorId>(offset >> 3);
+  }
+
+  // TODO: There are assumptions in the code that monitor addresses are 8B aligned (>>3).
+  static constexpr size_t kMonitorAlignment = 8;
+  // Size of a monitor, rounded up to a multiple of alignment.
+  static constexpr size_t kAlignedMonitorSize = (sizeof(Monitor) + kMonitorAlignment - 1) &
+                                                -kMonitorAlignment;
+  // As close to a page as we can get seems a good start.
+  static constexpr size_t kChunkCapacity = kPageSize / kAlignedMonitorSize;
+  // Chunk size that is referenced in the id. We can collapse this to the actually used storage
+  // in a chunk, i.e., kChunkCapacity * kAlignedMonitorSize, but this will mean proper divisions.
+  static constexpr size_t kChunkSize = kPageSize;
+  // The number of initial chunks storable in monitor_chunks_. The number is large enough to make
+  // resizing unlikely, but small enough to not waste too much memory.
+  static constexpr size_t kInitialChunkStorage = 8U;
+
+  // List of memory chunks. Each chunk is kChunkSize.
+  Atomic<uintptr_t*> monitor_chunks_;
+  // Number of chunks stored.
+  size_t num_chunks_ GUARDED_BY(Locks::allocated_monitor_ids_lock_);
+  // Number of chunks storable.
+  size_t capacity_ GUARDED_BY(Locks::allocated_monitor_ids_lock_);
+
+  // To avoid race issues when resizing, we keep all the previous arrays.
+  std::vector<uintptr_t*> old_chunk_arrays_ GUARDED_BY(Locks::allocated_monitor_ids_lock_);
+
+  // Start of free list of monitors.
+  // Note: these point to the right memory regions, but do *not* denote initialized objects.
+  Monitor* first_free_ GUARDED_BY(Locks::allocated_monitor_ids_lock_);
 #endif
 };
 
diff --git a/runtime/monitor_pool_test.cc b/runtime/monitor_pool_test.cc
new file mode 100644
index 0000000..cddc245
--- /dev/null
+++ b/runtime/monitor_pool_test.cc
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "monitor_pool.h"
+
+#include "common_runtime_test.h"
+
+namespace art {
+
+class MonitorPoolTest : public CommonRuntimeTest {};
+
+class RandGen {
+ public:
+  explicit RandGen(uint32_t seed) : val_(seed) {}
+
+  uint32_t next() {
+    val_ = val_ * 48271 % 2147483647 + 13;
+    return val_;
+  }
+
+  uint32_t val_;
+};
+
+static void VerifyMonitor(Monitor* mon, Thread* self) {
+  // Check whether the monitor id is correct.
+  EXPECT_EQ(MonitorPool::MonitorIdFromMonitor(mon), mon->GetMonitorId());
+  // Check whether the monitor id agrees with the compuation.
+  EXPECT_EQ(MonitorPool::ComputeMonitorId(mon, self), mon->GetMonitorId());
+  // Check whether we can use the monitor ID to get the monitor.
+  EXPECT_EQ(mon, MonitorPool::MonitorFromMonitorId(mon->GetMonitorId()));
+}
+
+TEST_F(MonitorPoolTest, MonitorPoolTest) {
+  std::vector<Monitor*> monitors;
+  RandGen r(0x1234);
+
+  // 1) Create and release monitors without increasing the storage.
+
+  // Number of max alive monitors before resize.
+  // Note: for correct testing, make sure this is corresponding to monitor-pool's initial size.
+  const size_t kMaxUsage = 28;
+
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  // Allocate and release monitors.
+  for (size_t i = 0; i < 1000 ; i++) {
+    bool alloc;
+    if (monitors.size() == 0) {
+      alloc = true;
+    } else if (monitors.size() == kMaxUsage) {
+      alloc = false;
+    } else {
+      // Random decision.
+      alloc = r.next() % 2 == 0;
+    }
+
+    if (alloc) {
+      Monitor* mon = MonitorPool::CreateMonitor(self, self, nullptr, static_cast<int32_t>(i));
+      monitors.push_back(mon);
+
+      VerifyMonitor(mon, self);
+    } else {
+      // Release a random monitor.
+      size_t index = r.next() % monitors.size();
+      Monitor* mon = monitors[index];
+      monitors.erase(monitors.begin() + index);
+
+      // Recheck the monitor.
+      VerifyMonitor(mon, self);
+
+      MonitorPool::ReleaseMonitor(self, mon);
+    }
+  }
+
+  // Loop some time.
+
+  for (size_t i = 0; i < 10; ++i) {
+    // 2.1) Create enough monitors to require new chunks.
+    size_t target_size = monitors.size() + 2*kMaxUsage;
+    while (monitors.size() < target_size) {
+      Monitor* mon = MonitorPool::CreateMonitor(self, self, nullptr,
+                                                static_cast<int32_t>(-monitors.size()));
+      monitors.push_back(mon);
+
+      VerifyMonitor(mon, self);
+    }
+
+    // 2.2) Verify all monitors.
+    for (Monitor* mon : monitors) {
+      VerifyMonitor(mon, self);
+    }
+
+    // 2.3) Release a number of monitors randomly.
+    for (size_t j = 0; j < kMaxUsage; j++) {
+      // Release a random monitor.
+      size_t index = r.next() % monitors.size();
+      Monitor* mon = monitors[index];
+      monitors.erase(monitors.begin() + index);
+
+      MonitorPool::ReleaseMonitor(self, mon);
+    }
+  }
+
+  // Check and release all remaining monitors.
+  for (Monitor* mon : monitors) {
+    VerifyMonitor(mon, self);
+    MonitorPool::ReleaseMonitor(self, mon);
+  }
+}
+
+}  // namespace art
diff --git a/runtime/native/java_lang_Thread.cc b/runtime/native/java_lang_Thread.cc
index 86db893..bae67f2 100644
--- a/runtime/native/java_lang_Thread.cc
+++ b/runtime/native/java_lang_Thread.cc
@@ -85,6 +85,7 @@
     case kWaitingForJniOnLoad:            return kJavaWaiting;
     case kWaitingForSignalCatcherOutput:  return kJavaWaiting;
     case kWaitingInMainSignalCatcherLoop: return kJavaWaiting;
+    case kWaitingForMethodTracingStart:   return kJavaWaiting;
     case kSuspended:                      return kJavaRunnable;
     // Don't add a 'default' here so the compiler can spot incompatible enum changes.
   }
diff --git a/runtime/native/sun_misc_Unsafe.cc b/runtime/native/sun_misc_Unsafe.cc
index d23cfff..7cc4cac 100644
--- a/runtime/native/sun_misc_Unsafe.cc
+++ b/runtime/native/sun_misc_Unsafe.cc
@@ -28,7 +28,8 @@
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
   // JNI must use non transactional mode.
-  bool success = obj->CasField32<false>(MemberOffset(offset), expectedValue, newValue);
+  bool success = obj->CasFieldWeakSequentiallyConsistent32<false>(MemberOffset(offset),
+                                                                  expectedValue, newValue);
   return success ? JNI_TRUE : JNI_FALSE;
 }
 
@@ -37,7 +38,8 @@
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
   // JNI must use non transactional mode.
-  bool success = obj->CasField64<false>(MemberOffset(offset), expectedValue, newValue);
+  bool success = obj->CasFieldWeakSequentiallyConsistent64<false>(MemberOffset(offset),
+                                                                  expectedValue, newValue);
   return success ? JNI_TRUE : JNI_FALSE;
 }
 
@@ -48,7 +50,8 @@
   mirror::Object* expectedValue = soa.Decode<mirror::Object*>(javaExpectedValue);
   mirror::Object* newValue = soa.Decode<mirror::Object*>(javaNewValue);
   // JNI must use non transactional mode.
-  bool success = obj->CasFieldObject<false>(MemberOffset(offset), expectedValue, newValue);
+  bool success = obj->CasFieldWeakSequentiallyConsistentObject<false>(MemberOffset(offset),
+                                                                      expectedValue, newValue);
   return success ? JNI_TRUE : JNI_FALSE;
 }
 
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 3b14aaa..efa205e 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -147,6 +147,13 @@
 }
 
 Runtime::~Runtime() {
+  if (method_trace_ && Thread::Current() == nullptr) {
+    // We need a current thread to shutdown method tracing: re-attach it now.
+    JNIEnv* unused_env;
+    if (GetJavaVM()->AttachCurrentThread(&unused_env, nullptr) != JNI_OK) {
+      LOG(ERROR) << "Could not attach current thread before runtime shutdown.";
+    }
+  }
   if (dump_gc_performance_on_shutdown_) {
     // This can't be called from the Heap destructor below because it
     // could call RosAlloc::InspectAll() which needs the thread_list
@@ -681,6 +688,7 @@
   Trace::SetDefaultClockSource(options->profile_clock_source_);
 
   if (options->method_trace_) {
+    ScopedThreadStateChange tsc(self, kWaitingForMethodTracingStart);
     Trace::Start(options->method_trace_file_.c_str(), -1, options->method_trace_file_size_, 0,
                  false, false, 0);
   }
diff --git a/runtime/safe_map.h b/runtime/safe_map.h
index bf3a15e..941fd0e 100644
--- a/runtime/safe_map.h
+++ b/runtime/safe_map.h
@@ -34,10 +34,12 @@
 
  public:
   typedef typename ::std::map<K, V, Comparator, Allocator>::key_compare key_compare;
+  typedef typename ::std::map<K, V, Comparator, Allocator>::value_compare value_compare;
   typedef typename ::std::map<K, V, Comparator, Allocator>::allocator_type allocator_type;
   typedef typename ::std::map<K, V, Comparator, Allocator>::iterator iterator;
   typedef typename ::std::map<K, V, Comparator, Allocator>::const_iterator const_iterator;
   typedef typename ::std::map<K, V, Comparator, Allocator>::size_type size_type;
+  typedef typename ::std::map<K, V, Comparator, Allocator>::key_type key_type;
   typedef typename ::std::map<K, V, Comparator, Allocator>::value_type value_type;
 
   SafeMap() = default;
@@ -50,6 +52,9 @@
     return *this;
   }
 
+  key_compare key_comp() const { return map_.key_comp(); }
+  value_compare value_comp() const { return map_.value_comp(); }
+
   iterator begin() { return map_.begin(); }
   const_iterator begin() const { return map_.begin(); }
   iterator end() { return map_.end(); }
@@ -58,8 +63,9 @@
   bool empty() const { return map_.empty(); }
   size_type size() const { return map_.size(); }
 
+  void swap(Self& other) { map_.swap(other.map_); }
   void clear() { map_.clear(); }
-  void erase(iterator it) { map_.erase(it); }
+  iterator erase(iterator it) { return map_.erase(it); }
   size_type erase(const K& k) { return map_.erase(k); }
 
   iterator find(const K& k) { return map_.find(k); }
@@ -78,9 +84,18 @@
   }
 
   // Used to insert a new mapping.
-  void Put(const K& k, const V& v) {
-    std::pair<iterator, bool> result = map_.insert(std::make_pair(k, v));
+  iterator Put(const K& k, const V& v) {
+    std::pair<iterator, bool> result = map_.emplace(k, v);
     DCHECK(result.second);  // Check we didn't accidentally overwrite an existing value.
+    return result.first;
+  }
+
+  // Used to insert a new mapping at a known position for better performance.
+  iterator PutBefore(iterator pos, const K& k, const V& v) {
+    // Check that we're using the correct position and the key is not in the map.
+    DCHECK(pos == map_.end() || map_.key_comp()(k, pos->first));
+    DCHECK(pos == map_.begin() || map_.key_comp()((--iterator(pos))->first, k));
+    return map_.emplace_hint(pos, k, v);
   }
 
   // Used to insert a new mapping or overwrite an existing mapping. Note that if the value type
@@ -102,13 +117,15 @@
   ::std::map<K, V, Comparator, Allocator> map_;
 };
 
-template <typename K, typename V, typename Comparator>
-bool operator==(const SafeMap<K, V, Comparator>& lhs, const SafeMap<K, V, Comparator>& rhs) {
+template <typename K, typename V, typename Comparator, typename Allocator>
+bool operator==(const SafeMap<K, V, Comparator, Allocator>& lhs,
+                const SafeMap<K, V, Comparator, Allocator>& rhs) {
   return lhs.Equals(rhs);
 }
 
-template <typename K, typename V, typename Comparator>
-bool operator!=(const SafeMap<K, V, Comparator>& lhs, const SafeMap<K, V, Comparator>& rhs) {
+template <typename K, typename V, typename Comparator, typename Allocator>
+bool operator!=(const SafeMap<K, V, Comparator, Allocator>& lhs,
+                const SafeMap<K, V, Comparator, Allocator>& rhs) {
   return !(lhs == rhs);
 }
 
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index b1180bd..38f1307 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -21,8 +21,6 @@
 
 #include <pthread.h>
 
-#include "cutils/atomic-inline.h"
-
 #include "base/casts.h"
 #include "base/mutex-inl.h"
 #include "gc/heap.h"
@@ -99,9 +97,12 @@
     DCHECK_EQ((old_state_and_flags.as_struct.flags & kCheckpointRequest), 0);
     new_state_and_flags.as_struct.flags = old_state_and_flags.as_struct.flags;
     new_state_and_flags.as_struct.state = new_state;
-    int status = android_atomic_cas(old_state_and_flags.as_int, new_state_and_flags.as_int,
-                                       &tls32_.state_and_flags.as_int);
-    if (LIKELY(status == 0)) {
+
+    // CAS the value without a memory ordering as that is given by the lock release below.
+    bool done =
+        tls32_.state_and_flags.as_atomic_int.CompareExchangeWeakRelaxed(old_state_and_flags.as_int,
+                                                                        new_state_and_flags.as_int);
+    if (LIKELY(done)) {
       break;
     }
   }
@@ -141,9 +142,10 @@
       union StateAndFlags new_state_and_flags;
       new_state_and_flags.as_int = old_state_and_flags.as_int;
       new_state_and_flags.as_struct.state = kRunnable;
-      // CAS the value without a memory barrier, that occurred in the lock above.
-      done = android_atomic_cas(old_state_and_flags.as_int, new_state_and_flags.as_int,
-                                &tls32_.state_and_flags.as_int) == 0;
+      // CAS the value without a memory ordering as that is given by the lock acquisition above.
+      done =
+          tls32_.state_and_flags.as_atomic_int.CompareExchangeWeakRelaxed(old_state_and_flags.as_int,
+                                                                          new_state_and_flags.as_int);
     }
     if (UNLIKELY(!done)) {
       // Failed to transition to Runnable. Release shared mutator_lock_ access and try again.
diff --git a/runtime/thread.cc b/runtime/thread.cc
index d60fb49..7827dfb 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -34,8 +34,6 @@
 #include "base/mutex.h"
 #include "class_linker.h"
 #include "class_linker-inl.h"
-#include "cutils/atomic.h"
-#include "cutils/atomic-inline.h"
 #include "debugger.h"
 #include "dex_file-inl.h"
 #include "entrypoints/entrypoint_utils.h"
@@ -591,14 +589,6 @@
 #endif
 }
 
-void Thread::AtomicSetFlag(ThreadFlag flag) {
-  android_atomic_or(flag, &tls32_.state_and_flags.as_int);
-}
-
-void Thread::AtomicClearFlag(ThreadFlag flag) {
-  android_atomic_and(-1 ^ flag, &tls32_.state_and_flags.as_int);
-}
-
 // Attempt to rectify locks so that we dump thread list with required locks before exiting.
 static void UnsafeLogFatalForSuspendCount(Thread* self, Thread* thread) NO_THREAD_SAFETY_ANALYSIS {
   LOG(ERROR) << *thread << " suspend count already zero.";
@@ -704,9 +694,10 @@
   union StateAndFlags new_state_and_flags;
   new_state_and_flags.as_int = old_state_and_flags.as_int;
   new_state_and_flags.as_struct.flags |= kCheckpointRequest;
-  int succeeded = android_atomic_acquire_cas(old_state_and_flags.as_int, new_state_and_flags.as_int,
-                                             &tls32_.state_and_flags.as_int);
-  if (UNLIKELY(succeeded != 0)) {
+  bool success =
+      tls32_.state_and_flags.as_atomic_int.CompareExchangeStrongSequentiallyConsistent(old_state_and_flags.as_int,
+                                                                                       new_state_and_flags.as_int);
+  if (UNLIKELY(!success)) {
     // The thread changed state before the checkpoint was installed.
     CHECK_EQ(tlsPtr_.checkpoint_functions[available_checkpoint], function);
     tlsPtr_.checkpoint_functions[available_checkpoint] = nullptr;
@@ -714,7 +705,7 @@
     CHECK_EQ(ReadFlag(kCheckpointRequest), true);
     TriggerSuspend();
   }
-  return succeeded == 0;
+  return success;
 }
 
 void Thread::FullSuspendCheck() {
diff --git a/runtime/thread.h b/runtime/thread.h
index 7cd86de..4312741 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -24,6 +24,7 @@
 #include <memory>
 #include <string>
 
+#include "atomic.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "entrypoints/interpreter/interpreter_entrypoints.h"
@@ -738,9 +739,13 @@
     return (tls32_.state_and_flags.as_struct.flags != 0);
   }
 
-  void AtomicSetFlag(ThreadFlag flag);
+  void AtomicSetFlag(ThreadFlag flag) {
+    tls32_.state_and_flags.as_atomic_int.FetchAndOrSequentiallyConsistent(flag);
+  }
 
-  void AtomicClearFlag(ThreadFlag flag);
+  void AtomicClearFlag(ThreadFlag flag) {
+    tls32_.state_and_flags.as_atomic_int.FetchAndAndSequentiallyConsistent(-1 ^ flag);
+  }
 
   void ResetQuickAllocEntryPointsForThread();
 
@@ -864,6 +869,7 @@
       // change to Runnable as a GC or other operation is in progress.
       volatile uint16_t state;
     } as_struct;
+    AtomicInteger as_atomic_int;
     volatile int32_t as_int;
 
    private:
@@ -871,6 +877,7 @@
     // See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=47409
     DISALLOW_COPY_AND_ASSIGN(StateAndFlags);
   };
+  COMPILE_ASSERT(sizeof(StateAndFlags) == sizeof(int32_t), weird_state_and_flags_size);
 
   static void ThreadExitCallback(void* arg);
 
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index d20a459..54732fa 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -153,8 +153,8 @@
 
 #if HAVE_TIMED_RWLOCK
 // Attempt to rectify locks so that we dump thread list with required locks before exiting.
-static void UnsafeLogFatalForThreadSuspendAllTimeout(Thread* self) NO_THREAD_SAFETY_ANALYSIS __attribute__((noreturn));
-static void UnsafeLogFatalForThreadSuspendAllTimeout(Thread* self) {
+static void UnsafeLogFatalForThreadSuspendAllTimeout() NO_THREAD_SAFETY_ANALYSIS __attribute__((noreturn));
+static void UnsafeLogFatalForThreadSuspendAllTimeout() {
   Runtime* runtime = Runtime::Current();
   std::ostringstream ss;
   ss << "Thread suspend timeout\n";
@@ -332,7 +332,7 @@
 #if HAVE_TIMED_RWLOCK
   // Timeout if we wait more than 30 seconds.
   if (!Locks::mutator_lock_->ExclusiveLockWithTimeout(self, 30 * 1000, 0)) {
-    UnsafeLogFatalForThreadSuspendAllTimeout(self);
+    UnsafeLogFatalForThreadSuspendAllTimeout();
   }
 #else
   Locks::mutator_lock_->ExclusiveLock(self);
@@ -351,6 +351,7 @@
 
 void ThreadList::ResumeAll() {
   Thread* self = Thread::Current();
+  DCHECK(self != nullptr);
 
   VLOG(threads) << *self << " ResumeAll starting";
 
@@ -587,7 +588,7 @@
 #if HAVE_TIMED_RWLOCK
   // Timeout if we wait more than 30 seconds.
   if (!Locks::mutator_lock_->ExclusiveLockWithTimeout(self, 30 * 1000, 0)) {
-    UnsafeLogFatalForThreadSuspendAllTimeout(self);
+    UnsafeLogFatalForThreadSuspendAllTimeout();
   } else {
     Locks::mutator_lock_->ExclusiveUnlock(self);
   }
diff --git a/runtime/thread_state.h b/runtime/thread_state.h
index 57bf4f1..0e47d21 100644
--- a/runtime/thread_state.h
+++ b/runtime/thread_state.h
@@ -38,6 +38,7 @@
   kWaitingForSignalCatcherOutput,   // WAITING        TS_WAIT      waiting for signal catcher IO to complete
   kWaitingInMainSignalCatcherLoop,  // WAITING        TS_WAIT      blocking/reading/processing signals
   kWaitingForDeoptimization,        // WAITING        TS_WAIT      waiting for deoptimization suspend all
+  kWaitingForMethodTracingStart,    // WAITING        TS_WAIT      waiting for method tracing to start
   kStarting,                        // NEW            TS_WAIT      native thread started, not yet ready to run managed code
   kNative,                          // RUNNABLE       TS_RUNNING   running in a JNI native method
   kSuspended,                       // RUNNABLE       TS_RUNNING   suspended by GC or debugger
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 032a566..1a450c4 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -459,7 +459,7 @@
   }
 
   // Update current offset.
-  cur_offset_ = kTraceHeaderLength;
+  cur_offset_.StoreRelaxed(kTraceHeaderLength);
 }
 
 static void DumpBuf(uint8_t* buf, size_t buf_size, ProfilerClockSource clock_source)
@@ -480,7 +480,7 @@
   // Compute elapsed time.
   uint64_t elapsed = MicroTime() - start_time_;
 
-  size_t final_offset = cur_offset_;
+  size_t final_offset = cur_offset_.LoadRelaxed();
   uint32_t clock_overhead_ns = GetClockOverheadNanoSeconds(this);
 
   if ((flags_ & kTraceCountAllocs) != 0) {
@@ -623,13 +623,13 @@
   int32_t new_offset;
   int32_t old_offset;
   do {
-    old_offset = cur_offset_;
+    old_offset = cur_offset_.LoadRelaxed();
     new_offset = old_offset + GetRecordSize(clock_source_);
     if (new_offset > buffer_size_) {
       overflow_ = true;
       return;
     }
-  } while (android_atomic_release_cas(old_offset, new_offset, &cur_offset_) != 0);
+  } while (!cur_offset_.CompareExchangeWeakSequentiallyConsistent(old_offset, new_offset));
 
   TraceAction action = kTraceMethodEnter;
   switch (event) {
diff --git a/runtime/trace.h b/runtime/trace.h
index 08da16f..9c8d35b 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -23,6 +23,7 @@
 #include <string>
 #include <vector>
 
+#include "atomic.h"
 #include "base/macros.h"
 #include "globals.h"
 #include "instrumentation.h"
@@ -65,11 +66,14 @@
 
   static void Start(const char* trace_filename, int trace_fd, int buffer_size, int flags,
                     bool direct_to_ddms, bool sampling_enabled, int interval_us)
-  LOCKS_EXCLUDED(Locks::mutator_lock_,
-                 Locks::thread_list_lock_,
-                 Locks::thread_suspend_count_lock_,
-                 Locks::trace_lock_);
-  static void Stop() LOCKS_EXCLUDED(Locks::trace_lock_);
+      LOCKS_EXCLUDED(Locks::mutator_lock_,
+                     Locks::thread_list_lock_,
+                     Locks::thread_suspend_count_lock_,
+                     Locks::trace_lock_);
+  static void Stop()
+      LOCKS_EXCLUDED(Locks::mutator_lock_,
+                     Locks::thread_list_lock_,
+                     Locks::trace_lock_);
   static void Shutdown() LOCKS_EXCLUDED(Locks::trace_lock_);
   static TracingMode GetMethodTracingMode() LOCKS_EXCLUDED(Locks::trace_lock_);
 
@@ -163,7 +167,7 @@
   const uint64_t start_time_;
 
   // Offset into buf_.
-  volatile int32_t cur_offset_;
+  AtomicInteger cur_offset_;
 
   // Did we overflow the buffer recording traces?
   bool overflow_;
diff --git a/sigchainlib/sigchain.cc b/sigchainlib/sigchain.cc
index 26e7d31..5a5805f 100644
--- a/sigchainlib/sigchain.cc
+++ b/sigchainlib/sigchain.cc
@@ -101,11 +101,6 @@
   }
 
   const struct sigaction& action = user_sigactions[sig].GetAction();
-
-  // Only deliver the signal if the signal was not masked out.
-  if (sigismember(&action.sa_mask, sig)) {
-     return;
-  }
   if ((action.sa_flags & SA_SIGINFO) == 0) {
     if (action.sa_handler != NULL) {
       action.sa_handler(sig);
diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java
index 3b11879..f412034 100644
--- a/test/082-inline-execute/src/Main.java
+++ b/test/082-inline-execute/src/Main.java
@@ -15,9 +15,11 @@
  */
 
 import junit.framework.Assert;
+import java.util.Arrays;
+import java.lang.reflect.Method;
 
 public class Main {
-  public static void main(String args[]) {
+  public static void main(String args[]) throws Exception {
     test_Double_doubleToRawLongBits();
     test_Double_longBitsToDouble();
     test_Float_floatToRawIntBits();
@@ -50,6 +52,18 @@
     test_String_isEmpty();
     test_String_length();
     test_Thread_currentThread();
+    initSupportMethodsForPeekPoke();
+    test_Memory_peekByte();
+    test_Memory_peekShort();
+    test_Memory_peekInt();
+    test_Memory_peekLong();
+    test_Memory_pokeByte();
+    test_Memory_pokeShort();
+    test_Memory_pokeInt();
+    test_Memory_pokeLong();
+    test_AtomicBoolean_compareAndSet();
+    test_AtomicInteger_compareAndSet();
+    test_AtomicLong_compareAndSet();
   }
 
   /*
@@ -82,6 +96,60 @@
     Assert.assertNotNull(Thread.currentThread());
   }
 
+  /**
+   * Will test inlining CAS, by inclusion of AtomicBoolean in core.oat.
+   */
+  public static void test_AtomicBoolean_compareAndSet() {
+    java.util.concurrent.atomic.AtomicBoolean ab = new java.util.concurrent.atomic.AtomicBoolean();
+    Assert.assertEquals(ab.compareAndSet(false, false), true);
+    Assert.assertEquals(ab.compareAndSet(true, false), false);
+    Assert.assertEquals(ab.compareAndSet(true, true), false);
+    Assert.assertEquals(ab.compareAndSet(false, true), true);
+    Assert.assertEquals(ab.compareAndSet(false, true), false);
+    Assert.assertEquals(ab.compareAndSet(false, false), false);
+    Assert.assertEquals(ab.compareAndSet(true, true), true);
+    Assert.assertEquals(ab.compareAndSet(true, false), true);
+    Assert.assertEquals(ab.compareAndSet(true, false), false);
+    Assert.assertEquals(ab.compareAndSet(true, true), false);
+    Assert.assertEquals(ab.compareAndSet(false, false), true);
+  }
+
+  /**
+   * Will test inlining CAS, by inclusion of AtomicInteger in core.oat.
+   */
+  public static void test_AtomicInteger_compareAndSet() {
+    java.util.concurrent.atomic.AtomicInteger ab = new java.util.concurrent.atomic.AtomicInteger();
+    Assert.assertEquals(ab.compareAndSet(0, 0), true);
+    Assert.assertEquals(ab.compareAndSet(0x12345678, 0), false);
+    Assert.assertEquals(ab.compareAndSet(0x12345678, 0x12345678), false);
+    Assert.assertEquals(ab.compareAndSet(0, 0x12345678), true);
+    Assert.assertEquals(ab.compareAndSet(0, 0x12345678), false);
+    Assert.assertEquals(ab.compareAndSet(0, 0), false);
+    Assert.assertEquals(ab.compareAndSet(0x12345678, 0x12345678), true);
+    Assert.assertEquals(ab.compareAndSet(0x12345678, 0), true);
+    Assert.assertEquals(ab.compareAndSet(0x12345678, 0), false);
+    Assert.assertEquals(ab.compareAndSet(0x12345678, 0x12345678), false);
+    Assert.assertEquals(ab.compareAndSet(0, 0), true);
+  }
+
+  /**
+   * Will test inlining CAS, by inclusion of AtomicLong in core.oat.
+   */
+  public static void test_AtomicLong_compareAndSet() {
+    java.util.concurrent.atomic.AtomicLong ab = new java.util.concurrent.atomic.AtomicLong();
+    Assert.assertEquals(ab.compareAndSet(0l, 0l), true);
+    Assert.assertEquals(ab.compareAndSet(0x1234567890l, 0l), false);
+    Assert.assertEquals(ab.compareAndSet(0x1234567890l, 0x1234567890l), false);
+    Assert.assertEquals(ab.compareAndSet(0l, 0x1234567890l), true);
+    Assert.assertEquals(ab.compareAndSet(0l, 0x1234567890l), false);
+    Assert.assertEquals(ab.compareAndSet(0l, 0l), false);
+    Assert.assertEquals(ab.compareAndSet(0x1234567890l, 0x1234567890l), true);
+    Assert.assertEquals(ab.compareAndSet(0x1234567890l, 0l), true);
+    Assert.assertEquals(ab.compareAndSet(0x1234567890l, 0l), false);
+    Assert.assertEquals(ab.compareAndSet(0x1234567890l, 0x1234567890l), false);
+    Assert.assertEquals(ab.compareAndSet(0l, 0l), true);
+  }
+
   public static void test_String_length() {
     String str0 = "";
     String str1 = "x";
@@ -510,4 +578,131 @@
     Assert.assertEquals(Long.reverse(Long.MIN_VALUE), 1L);
   }
 
+  static Object runtime;
+  static Method address_of;
+  static Method peek_byte;
+  static Method peek_short;
+  static Method peek_int;
+  static Method peek_long;
+  static Method poke_byte;
+  static Method poke_short;
+  static Method poke_int;
+  static Method poke_long;
+
+  public static void initSupportMethodsForPeekPoke() throws Exception {
+    Class<?> vm_runtime = Class.forName("dalvik.system.VMRuntime");
+    Method get_runtime = vm_runtime.getDeclaredMethod("getRuntime");
+    runtime = get_runtime.invoke(null);
+    address_of = vm_runtime.getDeclaredMethod("addressOf", Object.class);
+
+    Class<?> io_memory = Class.forName("libcore.io.Memory");
+    peek_byte = io_memory.getDeclaredMethod("peekByte", Long.TYPE);
+    peek_int = io_memory.getDeclaredMethod("peekInt", Long.TYPE, Boolean.TYPE);
+    peek_short = io_memory.getDeclaredMethod("peekShort", Long.TYPE, Boolean.TYPE);
+    peek_long = io_memory.getDeclaredMethod("peekLong", Long.TYPE, Boolean.TYPE);
+    poke_byte = io_memory.getDeclaredMethod("pokeByte", Long.TYPE, Byte.TYPE);
+    poke_short = io_memory.getDeclaredMethod("pokeShort", Long.TYPE, Short.TYPE, Boolean.TYPE);
+    poke_int = io_memory.getDeclaredMethod("pokeInt", Long.TYPE, Integer.TYPE, Boolean.TYPE);
+    poke_long = io_memory.getDeclaredMethod("pokeLong", Long.TYPE, Long.TYPE, Boolean.TYPE);
+  }
+
+  public static void test_Memory_peekByte() throws Exception {
+    byte[] b = new byte [2];
+    b[0] = 0x12;
+    b[1] = 0x11;
+    long address = (long)address_of.invoke(runtime, b);
+    Assert.assertEquals((byte)peek_byte.invoke(null, address), 0x12);
+    Assert.assertEquals((byte)peek_byte.invoke(null, address + 1), 0x11);
+  }
+
+  public static void test_Memory_peekShort() throws Exception {
+    byte[] b = new byte [3];
+    b[0] = 0x13;
+    b[1] = 0x12;
+    b[2] = 0x11;
+    long address = (long)address_of.invoke(runtime, b);
+    Assert.assertEquals((short)peek_short.invoke(null, address, false), 0x1213);  // Aligned read
+    Assert.assertEquals((short)peek_short.invoke(null, address + 1, false), 0x1112);  // Unaligned read
+  }
+
+  public static void test_Memory_peekInt() throws Exception {
+    byte[] b = new byte [5];
+    b[0] = 0x15;
+    b[1] = 0x14;
+    b[2] = 0x13;
+    b[3] = 0x12;
+    b[4] = 0x11;
+    long address = (long)address_of.invoke(runtime, b);
+    Assert.assertEquals((int)peek_int.invoke(null, address, false), 0x12131415);
+    Assert.assertEquals((int)peek_int.invoke(null, address + 1, false), 0x11121314);
+  }
+
+  public static void test_Memory_peekLong() throws Exception {
+    byte[] b = new byte [9];
+    b[0] = 0x19;
+    b[1] = 0x18;
+    b[2] = 0x17;
+    b[3] = 0x16;
+    b[4] = 0x15;
+    b[5] = 0x14;
+    b[6] = 0x13;
+    b[7] = 0x12;
+    b[8] = 0x11;
+    long address = (long)address_of.invoke(runtime, b);
+    Assert.assertEquals((long)peek_long.invoke(null, address, false), 0x1213141516171819L);
+    Assert.assertEquals((long)peek_long.invoke(null, address + 1, false), 0x1112131415161718L);
+  }
+
+  public static void test_Memory_pokeByte() throws Exception {
+    byte[] r = {0x11, 0x12};
+    byte[] b = new byte [2];
+    long address = (long)address_of.invoke(runtime, b);
+    poke_byte.invoke(null, address, (byte)0x11);
+    poke_byte.invoke(null, address + 1, (byte)0x12);
+    Assert.assertTrue(Arrays.equals(r, b));
+  }
+
+  public static void test_Memory_pokeShort() throws Exception {
+    byte[] ra = {0x12, 0x11, 0x13};
+    byte[] ru = {0x12, 0x22, 0x21};
+    byte[] b = new byte [3];
+    long address = (long)address_of.invoke(runtime, b);
+
+    // Aligned write
+    b[2] = 0x13;
+    poke_short.invoke(null, address, (short)0x1112, false);
+    Assert.assertTrue(Arrays.equals(ra, b));
+
+    // Unaligned write
+    poke_short.invoke(null, address + 1, (short)0x2122, false);
+    Assert.assertTrue(Arrays.equals(ru, b));
+  }
+
+  public static void test_Memory_pokeInt() throws Exception {
+    byte[] ra = {0x14, 0x13, 0x12, 0x11, 0x15};
+    byte[] ru = {0x14, 0x24, 0x23, 0x22, 0x21};
+    byte[] b = new byte [5];
+    long address = (long)address_of.invoke(runtime, b);
+
+    b[4] = 0x15;
+    poke_int.invoke(null, address, (int)0x11121314, false);
+    Assert.assertTrue(Arrays.equals(ra, b));
+
+    poke_int.invoke(null, address + 1, (int)0x21222324, false);
+    Assert.assertTrue(Arrays.equals(ru, b));
+  }
+
+  public static void test_Memory_pokeLong() throws Exception {
+    byte[] ra = {0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x19};
+    byte[] ru = {0x18, 0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21};
+    byte[] b = new byte [9];
+    long address = (long)address_of.invoke(runtime, b);
+
+    b[8] = 0x19;
+    poke_long.invoke(null, address, (long)0x1112131415161718L, false);
+    Assert.assertTrue(Arrays.equals(ra, b));
+
+    poke_long.invoke(null, address + 1, (long)0x2122232425262728L, false);
+    Assert.assertTrue(Arrays.equals(ru, b));
+  }
 }
diff --git a/test/083-compiler-regressions/expected.txt b/test/083-compiler-regressions/expected.txt
index 7576b02..10406c7 100644
--- a/test/083-compiler-regressions/expected.txt
+++ b/test/083-compiler-regressions/expected.txt
@@ -13,6 +13,7 @@
 1
 false
 b13679511Test finishing
+b16177324TestWrapper caught NPE as expected.
 largeFrame passes
 largeFrameFloat passes
 mulBy1Test passes
diff --git a/test/083-compiler-regressions/src/Main.java b/test/083-compiler-regressions/src/Main.java
index 6a12ca9..0f7527c 100644
--- a/test/083-compiler-regressions/src/Main.java
+++ b/test/083-compiler-regressions/src/Main.java
@@ -35,6 +35,7 @@
         b2487514Test();
         b5884080Test();
         b13679511Test();
+        b16177324TestWrapper();
         largeFrameTest();
         largeFrameTestFloat();
         mulBy1Test();
@@ -908,6 +909,24 @@
        System.out.println("b13679511Test finishing");
     }
 
+    static void b16177324TestWrapper() {
+      try {
+        b16177324Test();
+      } catch (NullPointerException expected) {
+        System.out.println("b16177324TestWrapper caught NPE as expected.");
+      }
+    }
+
+    static void b16177324Test() {
+      // We need this to be a single BasicBlock. Putting it into a try block would cause it to
+      // be split at each insn that can throw. So we do the try-catch in a wrapper function.
+      int v1 = B16177324Values.values[0];        // Null-check on array element access.
+      int v2 = B16177324ValuesKiller.values[0];  // clinit<>() sets B16177324Values.values to null.
+      int v3 = B16177324Values.values[0];        // Should throw NPE.
+      // If the null-check for v3 was eliminated we should fail with SIGSEGV.
+      System.out.println("Unexpectedly retrieved all values: " + v1 + ", " + v2 + ", " + v3);
+    }
+
     static double TooManyArgs(
           long l00,
           long l01,
@@ -9743,3 +9762,14 @@
     }
   }
 }
+
+class B16177324Values {
+  public static int values[] = { 42 };
+}
+
+class B16177324ValuesKiller {
+  public static int values[] = { 1234 };
+  static {
+    B16177324Values.values = null;
+  }
+}
diff --git a/test/304-method-tracing/expected.txt b/test/304-method-tracing/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/304-method-tracing/expected.txt
diff --git a/test/304-method-tracing/info.txt b/test/304-method-tracing/info.txt
new file mode 100644
index 0000000..d3154e6
--- /dev/null
+++ b/test/304-method-tracing/info.txt
@@ -0,0 +1 @@
+Test method tracing from command-line.
diff --git a/test/304-method-tracing/run b/test/304-method-tracing/run
new file mode 100755
index 0000000..7bd1895
--- /dev/null
+++ b/test/304-method-tracing/run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Runs the test with method tracing enabled.
+exec ${RUN} "$@" --runtime-option -Xmethod-trace --runtime-option -Xmethod-trace-file:${DEX_LOCATION}/trace.bin
diff --git a/test/304-method-tracing/src/Main.java b/test/304-method-tracing/src/Main.java
new file mode 100644
index 0000000..25cee6d
--- /dev/null
+++ b/test/304-method-tracing/src/Main.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+
+public class Main {
+    static class ThreadRunnable implements Runnable {
+        public void run() {
+            for (int i = 0; i < 1000; ++i) {
+                doNothing();
+            }
+        }
+
+        private void doNothing() {}
+    }
+
+    public static void main(String[] args) {
+        ArrayList<Thread> threads = new ArrayList<Thread>();
+        for (int i = 0; i < 10; ++i) {
+            threads.add(new Thread(new ThreadRunnable(), "TestThread-" + i));
+        }
+
+        for (Thread t : threads) {
+            t.start();
+        }
+
+        for (Thread t : threads) {
+            try {
+                t.join();
+            } catch (InterruptedException e) {
+                System.out.println("Thread " + t.getName() + " has been interrupted");
+            }
+        }
+    }
+}
diff --git a/test/Android.oat.mk b/test/Android.oat.mk
index fec2540..16300bb 100644
--- a/test/Android.oat.mk
+++ b/test/Android.oat.mk
@@ -193,7 +193,7 @@
 $(3): $$(ART_TEST_HOST_OAT_$(1)_DEX) $(ART_TEST_HOST_OAT_DEPENDENCIES)
 	$(hide) mkdir -p $(ART_HOST_TEST_DIR)/android-data-$$@/dalvik-cache/$$($(2)HOST_ARCH)
 	$(hide) cp $$(realpath $$<) $(ART_HOST_TEST_DIR)/android-data-$$@/oat-test-dex-$(1).jar
-	$(hide) $(DEX2OATD) $(DEX2OAT_FLAGS) --runtime-arg $(DEX2OAT_XMS) --runtime-arg $(DEX2OAT_XMX) $(4) \
+	$(hide) $(DEX2OATD) $(DEX2OAT_FLAGS) --runtime-arg -Xms$(DEX2OAT_XMS) --runtime-arg -Xmx$(DEX2OAT_XMX) $(4) \
 	  --boot-image=$$(HOST_CORE_IMG_LOCATION) \
 	  --dex-file=$$(PRIVATE_DEX_FILE) --oat-file=$$(PRIVATE_OAT_FILE) \
 	  --instruction-set=$($(2)ART_HOST_ARCH) --host --android-root=$(HOST_OUT) \
diff --git a/test/run-all-tests b/test/run-all-tests
index 885ee44..25d5c5f 100755
--- a/test/run-all-tests
+++ b/test/run-all-tests
@@ -80,6 +80,9 @@
     elif [ "x$1" = "x--64" ]; then
         run_args="${run_args} --64"
         shift
+    elif [ "x$1" = "x--trace" ]; then
+        run_args="${run_args} --trace"
+        shift
     elif expr "x$1" : "x--" >/dev/null 2>&1; then
         echo "unknown $0 option: $1" 1>&2
         usage="yes"
diff --git a/test/run-test b/test/run-test
index d1c5bb2..2989f25 100755
--- a/test/run-test
+++ b/test/run-test
@@ -64,7 +64,6 @@
 target_mode="yes"
 dev_mode="no"
 update_mode="no"
-debug_mode="no"
 runtime="art"
 usage="no"
 build_only="no"
@@ -162,6 +161,9 @@
         run_args="${run_args} --64"
         suffix64="64"
         shift
+    elif [ "x$1" = "x--trace" ]; then
+        run_args="${run_args} --runtime-option -Xmethod-trace --runtime-option -Xmethod-trace-file:${DEX_LOCATION}/trace.bin"
+        shift
     elif expr "x$1" : "x--" >/dev/null 2>&1; then
         echo "unknown $0 option: $1" 1>&2
         usage="yes"
@@ -257,6 +259,7 @@
         echo "    --output-path [path] Location where to store the build" \
              "files."
         echo "    --64                 Run the test in 64-bit mode"
+        echo "    --trace              Run with method tracing"
     ) 1>&2
     exit 1
 fi