Merge "ART: Avoid recursive abort"
diff --git a/compiler/Android.mk b/compiler/Android.mk
index eb9ad47..84176a1 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -180,7 +180,8 @@
   driver/compiler_options.h \
   image_writer.h \
   optimizing/locations.h \
-  utils/arm/constants_arm.h
+  utils/arm/constants_arm.h \
+  utils/dex_instruction_utils.h
 
 # $(1): target or host
 # $(2): ndebug or debug
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index 97387a1..a3d9a0b 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -60,14 +60,18 @@
       const std::vector<uint8_t>& mapping_table = compiled_method->GetMappingTable();
       uint32_t mapping_table_offset = mapping_table.empty() ? 0u
           : sizeof(OatQuickMethodHeader) + vmap_table.size() + mapping_table.size();
-      OatQuickMethodHeader method_header(mapping_table_offset, vmap_table_offset,
+      const std::vector<uint8_t>& gc_map = *compiled_method->GetGcMap();
+      uint32_t gc_map_offset = gc_map.empty() ? 0u
+          : sizeof(OatQuickMethodHeader) + vmap_table.size() + mapping_table.size() + gc_map.size();
+      OatQuickMethodHeader method_header(mapping_table_offset, vmap_table_offset, gc_map_offset,
                                          compiled_method->GetFrameSizeInBytes(),
                                          compiled_method->GetCoreSpillMask(),
                                          compiled_method->GetFpSpillMask(), code_size);
 
       header_code_and_maps_chunks_.push_back(std::vector<uint8_t>());
       std::vector<uint8_t>* chunk = &header_code_and_maps_chunks_.back();
-      size_t size = sizeof(method_header) + code_size + vmap_table.size() + mapping_table.size();
+      size_t size = sizeof(method_header) + code_size + vmap_table.size() + mapping_table.size() +
+          gc_map.size();
       size_t code_offset = compiled_method->AlignCode(size - code_size);
       size_t padding = code_offset - (size - code_size);
       chunk->reserve(padding + size);
@@ -75,6 +79,7 @@
       memcpy(&(*chunk)[0], &method_header, sizeof(method_header));
       chunk->insert(chunk->begin(), vmap_table.begin(), vmap_table.end());
       chunk->insert(chunk->begin(), mapping_table.begin(), mapping_table.end());
+      chunk->insert(chunk->begin(), gc_map.begin(), gc_map.end());
       chunk->insert(chunk->begin(), padding, 0);
       chunk->insert(chunk->end(), code->begin(), code->end());
       CHECK_EQ(padding + size, chunk->size());
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index 20b750c..9cffbc8 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -42,7 +42,7 @@
   ~CommonCompilerTest();
 
   // Create an OatMethod based on pointers (for unit tests).
-  OatFile::OatMethod CreateOatMethod(const void* code, const uint8_t* gc_map);
+  OatFile::OatMethod CreateOatMethod(const void* code);
 
   void MakeExecutable(mirror::ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index b56fd6f..4d377df 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -316,9 +316,8 @@
 
 enum MIROptimizationFlagPositions {
   kMIRIgnoreNullCheck = 0,
-  kMIRNullCheckOnly,
   kMIRIgnoreRangeCheck,
-  kMIRRangeCheckOnly,
+  kMIRStoreNonNullValue,              // Storing non-null value, always mark GC card.
   kMIRClassIsInitialized,
   kMIRClassIsInDexCache,
   kMirIgnoreDivZeroCheck,
diff --git a/compiler/dex/global_value_numbering.cc b/compiler/dex/global_value_numbering.cc
index d311bc7..578952b 100644
--- a/compiler/dex/global_value_numbering.cc
+++ b/compiler/dex/global_value_numbering.cc
@@ -15,7 +15,6 @@
  */
 
 #include "global_value_numbering.h"
-
 #include "local_value_numbering.h"
 
 namespace art {
@@ -31,8 +30,6 @@
       modifications_allowed_(true),
       mode_(mode),
       global_value_map_(std::less<uint64_t>(), allocator->Adapter()),
-      field_index_map_(FieldReferenceComparator(), allocator->Adapter()),
-      field_index_reverse_map_(allocator->Adapter()),
       array_location_map_(ArrayLocationComparator(), allocator->Adapter()),
       array_location_reverse_map_(allocator->Adapter()),
       ref_set_map_(std::less<ValueNameSet>(), allocator->Adapter()),
@@ -107,15 +104,8 @@
     if (bb->catch_entry) {
       merge_type = LocalValueNumbering::kCatchMerge;
     } else if (bb->last_mir_insn != nullptr &&
-        (bb->last_mir_insn->dalvikInsn.opcode == Instruction::RETURN_VOID ||
-         bb->last_mir_insn->dalvikInsn.opcode == Instruction::RETURN ||
-         bb->last_mir_insn->dalvikInsn.opcode == Instruction::RETURN_OBJECT ||
-         bb->last_mir_insn->dalvikInsn.opcode == Instruction::RETURN_WIDE) &&
-        (bb->first_mir_insn == bb->last_mir_insn ||
-         (static_cast<int>(bb->first_mir_insn->dalvikInsn.opcode) == kMirOpPhi &&
-          (bb->first_mir_insn->next == bb->last_mir_insn ||
-           (static_cast<int>(bb->first_mir_insn->next->dalvikInsn.opcode) == kMirOpPhi &&
-            bb->first_mir_insn->next->next == bb->last_mir_insn))))) {
+        IsInstructionReturn(bb->last_mir_insn->dalvikInsn.opcode) &&
+        bb->GetFirstNonPhiInsn() == bb->last_mir_insn) {
       merge_type = LocalValueNumbering::kReturnMerge;
     }
     // At least one predecessor must have been processed before this bb.
@@ -145,19 +135,6 @@
   return change;
 }
 
-uint16_t GlobalValueNumbering::GetFieldId(const MirFieldInfo& field_info, uint16_t type) {
-  FieldReference key = { field_info.DeclaringDexFile(), field_info.DeclaringFieldIndex(), type };
-  auto lb = field_index_map_.lower_bound(key);
-  if (lb != field_index_map_.end() && !field_index_map_.key_comp()(key, lb->first)) {
-    return lb->second;
-  }
-  DCHECK_LT(field_index_map_.size(), kNoValue);
-  uint16_t id = field_index_map_.size();
-  auto it = field_index_map_.PutBefore(lb, key, id);
-  field_index_reverse_map_.push_back(&*it);
-  return id;
-}
-
 uint16_t GlobalValueNumbering::GetArrayLocation(uint16_t base, uint16_t index) {
   auto cmp = array_location_map_.key_comp();
   ArrayLocation key = { base, index };
@@ -207,4 +184,20 @@
   return true;
 }
 
+bool GlobalValueNumbering::DivZeroCheckedInAllPredecessors(
+    const ScopedArenaVector<uint16_t>& merge_names) const {
+  // Implicit parameters:
+  //   - *work_lvn: the LVN for which we're checking predecessors.
+  //   - merge_lvns_: the predecessor LVNs.
+  DCHECK_EQ(merge_lvns_.size(), merge_names.size());
+  for (size_t i = 0, size = merge_lvns_.size(); i != size; ++i) {
+    const LocalValueNumbering* pred_lvn = merge_lvns_[i];
+    uint16_t value_name = merge_names[i];
+    if (!pred_lvn->IsValueDivZeroChecked(value_name)) {
+      return false;
+    }
+  }
+  return true;
+}
+
 }  // namespace art
diff --git a/compiler/dex/global_value_numbering.h b/compiler/dex/global_value_numbering.h
index 72d1112..d72144a 100644
--- a/compiler/dex/global_value_numbering.h
+++ b/compiler/dex/global_value_numbering.h
@@ -39,6 +39,12 @@
         cu->mir_graph->GetMaxNestedLoops() > kMaxAllowedNestedLoops;
   }
 
+  // Instance and static field id map is held by MIRGraph to avoid multiple recalculations
+  // when doing LVN.
+  template <typename Container>  // Container of MirIFieldLoweringInfo or MirSFieldLoweringInfo.
+  static uint16_t* PrepareGvnFieldIds(ScopedArenaAllocator* allocator,
+                                      const Container& field_infos);
+
   GlobalValueNumbering(CompilationUnit* cu, ScopedArenaAllocator* allocator, Mode mode);
   ~GlobalValueNumbering();
 
@@ -114,34 +120,24 @@
     return (it != global_value_map_.end() && it->second == value);
   }
 
-  // FieldReference represents a unique resolved field.
-  struct FieldReference {
-    const DexFile* dex_file;
-    uint16_t field_idx;
-    uint16_t type;  // See comments for LocalValueNumbering::kFieldTypeCount.
-  };
+  // Get an instance field id.
+  uint16_t GetIFieldId(MIR* mir) {
+    return GetMirGraph()->GetGvnIFieldId(mir);
+  }
 
-  struct FieldReferenceComparator {
-    bool operator()(const FieldReference& lhs, const FieldReference& rhs) const {
-      if (lhs.field_idx != rhs.field_idx) {
-        return lhs.field_idx < rhs.field_idx;
-      }
-      // If the field_idx and dex_file match, the type must also match.
-      DCHECK(lhs.dex_file != rhs.dex_file || lhs.type == rhs.type);
-      return lhs.dex_file < rhs.dex_file;
-    }
-  };
+  // Get a static field id.
+  uint16_t GetSFieldId(MIR* mir) {
+    return GetMirGraph()->GetGvnSFieldId(mir);
+  }
 
-  // Maps field key to field id for resolved fields.
-  typedef ScopedArenaSafeMap<FieldReference, uint32_t, FieldReferenceComparator> FieldIndexMap;
+  // Get an instance field type based on field id.
+  uint16_t GetIFieldType(uint16_t field_id) {
+    return static_cast<uint16_t>(GetMirGraph()->GetIFieldLoweringInfo(field_id).MemAccessType());
+  }
 
-  // Get a field id.
-  uint16_t GetFieldId(const MirFieldInfo& field_info, uint16_t type);
-
-  // Get a field type based on field id.
-  uint16_t GetFieldType(uint16_t field_id) {
-    DCHECK_LT(field_id, field_index_reverse_map_.size());
-    return field_index_reverse_map_[field_id]->first.type;
+  // Get a static field type based on field id.
+  uint16_t GetSFieldType(uint16_t field_id) {
+    return static_cast<uint16_t>(GetMirGraph()->GetSFieldLoweringInfo(field_id).MemAccessType());
   }
 
   struct ArrayLocation {
@@ -199,6 +195,8 @@
 
   bool NullCheckedInAllPredecessors(const ScopedArenaVector<uint16_t>& merge_names) const;
 
+  bool DivZeroCheckedInAllPredecessors(const ScopedArenaVector<uint16_t>& merge_names) const;
+
   CompilationUnit* GetCompilationUnit() const {
     return cu_;
   }
@@ -239,8 +237,6 @@
   Mode mode_;
 
   ValueMap global_value_map_;
-  FieldIndexMap field_index_map_;
-  ScopedArenaVector<const FieldIndexMap::value_type*> field_index_reverse_map_;
   ArrayLocationMap array_location_map_;
   ScopedArenaVector<const ArrayLocationMap::value_type*> array_location_reverse_map_;
   RefSetIdMap ref_set_map_;
@@ -268,6 +264,32 @@
   return last_value_;
 }
 
+template <typename Container>  // Container of MirIFieldLoweringInfo or MirSFieldLoweringInfo.
+uint16_t* GlobalValueNumbering::PrepareGvnFieldIds(ScopedArenaAllocator* allocator,
+                                                   const Container& field_infos) {
+  size_t size = field_infos.size();
+  uint16_t* field_ids = reinterpret_cast<uint16_t*>(allocator->Alloc(size * sizeof(uint16_t),
+                                                                     kArenaAllocMisc));
+  for (size_t i = 0u; i != size; ++i) {
+    size_t idx = i;
+    const MirFieldInfo& cur_info = field_infos[i];
+    if (cur_info.IsResolved()) {
+      for (size_t j = 0; j != i; ++j) {
+        const MirFieldInfo& prev_info = field_infos[j];
+        if (prev_info.IsResolved() &&
+            prev_info.DeclaringDexFile() == cur_info.DeclaringDexFile() &&
+            prev_info.DeclaringFieldIndex() == cur_info.DeclaringFieldIndex()) {
+          DCHECK_EQ(cur_info.MemAccessType(), prev_info.MemAccessType());
+          idx = j;
+          break;
+        }
+      }
+    }
+    field_ids[i] = idx;
+  }
+  return field_ids;
+}
+
 }  // namespace art
 
 #endif  // ART_COMPILER_DEX_GLOBAL_VALUE_NUMBERING_H_
diff --git a/compiler/dex/global_value_numbering_test.cc b/compiler/dex/global_value_numbering_test.cc
index 35d5b99..7e3b4d8 100644
--- a/compiler/dex/global_value_numbering_test.cc
+++ b/compiler/dex/global_value_numbering_test.cc
@@ -17,6 +17,7 @@
 #include "compiler_internals.h"
 #include "dataflow_iterator.h"
 #include "dataflow_iterator-inl.h"
+#include "dex/mir_field_info.h"
 #include "global_value_numbering.h"
 #include "local_value_numbering.h"
 #include "gtest/gtest.h"
@@ -32,6 +33,7 @@
     uintptr_t declaring_dex_file;
     uint16_t declaring_field_idx;
     bool is_volatile;
+    DexMemAccessType type;
   };
 
   struct SFieldDef {
@@ -39,6 +41,7 @@
     uintptr_t declaring_dex_file;
     uint16_t declaring_field_idx;
     bool is_volatile;
+    DexMemAccessType type;
   };
 
   struct BBDef {
@@ -131,18 +134,19 @@
     { bb, opcode, 0u, 0u, 2, { src, src + 1 }, 2, { reg, reg + 1 } }
 #define DEF_PHI2(bb, reg, src1, src2) \
     { bb, static_cast<Instruction::Code>(kMirOpPhi), 0, 0u, 2u, { src1, src2 }, 1, { reg } }
+#define DEF_DIV_REM(bb, opcode, result, dividend, divisor) \
+    { bb, opcode, 0u, 0u, 2, { dividend, divisor }, 1, { result } }
 
   void DoPrepareIFields(const IFieldDef* defs, size_t count) {
     cu_.mir_graph->ifield_lowering_infos_.clear();
     cu_.mir_graph->ifield_lowering_infos_.reserve(count);
     for (size_t i = 0u; i != count; ++i) {
       const IFieldDef* def = &defs[i];
-      MirIFieldLoweringInfo field_info(def->field_idx);
+      MirIFieldLoweringInfo field_info(def->field_idx, def->type);
       if (def->declaring_dex_file != 0u) {
         field_info.declaring_dex_file_ = reinterpret_cast<const DexFile*>(def->declaring_dex_file);
         field_info.declaring_field_idx_ = def->declaring_field_idx;
-        field_info.flags_ = 0u |  // Without kFlagIsStatic.
-            (def->is_volatile ? MirIFieldLoweringInfo::kFlagIsVolatile : 0u);
+        field_info.flags_ &= ~(def->is_volatile ? 0u : MirSFieldLoweringInfo::kFlagIsVolatile);
       }
       cu_.mir_graph->ifield_lowering_infos_.push_back(field_info);
     }
@@ -158,15 +162,14 @@
     cu_.mir_graph->sfield_lowering_infos_.reserve(count);
     for (size_t i = 0u; i != count; ++i) {
       const SFieldDef* def = &defs[i];
-      MirSFieldLoweringInfo field_info(def->field_idx);
+      MirSFieldLoweringInfo field_info(def->field_idx, def->type);
       // Mark even unresolved fields as initialized.
-      field_info.flags_ = MirSFieldLoweringInfo::kFlagIsStatic |
-          MirSFieldLoweringInfo::kFlagClassIsInitialized;
+      field_info.flags_ |= MirSFieldLoweringInfo::kFlagClassIsInitialized;
       // NOTE: MirSFieldLoweringInfo::kFlagClassIsInDexCache isn't used by GVN.
       if (def->declaring_dex_file != 0u) {
         field_info.declaring_dex_file_ = reinterpret_cast<const DexFile*>(def->declaring_dex_file);
         field_info.declaring_field_idx_ = def->declaring_field_idx;
-        field_info.flags_ |= (def->is_volatile ? MirSFieldLoweringInfo::kFlagIsVolatile : 0u);
+        field_info.flags_ &= ~(def->is_volatile ? 0u : MirSFieldLoweringInfo::kFlagIsVolatile);
       }
       cu_.mir_graph->sfield_lowering_infos_.push_back(field_info);
     }
@@ -238,12 +241,16 @@
       mir->dalvikInsn.opcode = def->opcode;
       mir->dalvikInsn.vB = static_cast<int32_t>(def->value);
       mir->dalvikInsn.vB_wide = def->value;
-      if (def->opcode >= Instruction::IGET && def->opcode <= Instruction::IPUT_SHORT) {
+      if (IsInstructionIGetOrIPut(def->opcode)) {
         ASSERT_LT(def->field_info, cu_.mir_graph->ifield_lowering_infos_.size());
         mir->meta.ifield_lowering_info = def->field_info;
-      } else if (def->opcode >= Instruction::SGET && def->opcode <= Instruction::SPUT_SHORT) {
+        ASSERT_EQ(cu_.mir_graph->ifield_lowering_infos_[def->field_info].MemAccessType(),
+                  IGetOrIPutMemAccessType(def->opcode));
+      } else if (IsInstructionSGetOrSPut(def->opcode)) {
         ASSERT_LT(def->field_info, cu_.mir_graph->sfield_lowering_infos_.size());
         mir->meta.sfield_lowering_info = def->field_info;
+        ASSERT_EQ(cu_.mir_graph->sfield_lowering_infos_[def->field_info].MemAccessType(),
+                  SGetOrSPutMemAccessType(def->opcode));
       } else if (def->opcode == static_cast<Instruction::Code>(kMirOpPhi)) {
         mir->meta.phi_incoming = static_cast<BasicBlockId*>(
             allocator_->Alloc(def->num_uses * sizeof(BasicBlockId), kArenaAllocDFInfo));
@@ -288,6 +295,10 @@
     cu_.mir_graph->ComputeDominators();
     cu_.mir_graph->ComputeTopologicalSortOrder();
     cu_.mir_graph->SSATransformationEnd();
+    cu_.mir_graph->temp_.gvn.ifield_ids_ =  GlobalValueNumbering::PrepareGvnFieldIds(
+        allocator_.get(), cu_.mir_graph->ifield_lowering_infos_);
+    cu_.mir_graph->temp_.gvn.sfield_ids_ =  GlobalValueNumbering::PrepareGvnFieldIds(
+        allocator_.get(), cu_.mir_graph->sfield_lowering_infos_);
     ASSERT_TRUE(gvn_ == nullptr);
     gvn_.reset(new (allocator_.get()) GlobalValueNumbering(&cu_, allocator_.get(),
                                                            GlobalValueNumbering::kModeGvn));
@@ -498,18 +509,18 @@
 
 TEST_F(GlobalValueNumberingTestDiamond, NonAliasingIFields) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false },  // Int.
-      { 1u, 1u, 1u, false },  // Int.
-      { 2u, 1u, 2u, false },  // Int.
-      { 3u, 1u, 3u, false },  // Int.
-      { 4u, 1u, 4u, false },  // Short.
-      { 5u, 1u, 5u, false },  // Char.
-      { 6u, 0u, 0u, false },  // Unresolved, Short.
-      { 7u, 1u, 7u, false },  // Int.
-      { 8u, 0u, 0u, false },  // Unresolved, Int.
-      { 9u, 1u, 9u, false },  // Int.
-      { 10u, 1u, 10u, false },  // Int.
-      { 11u, 1u, 11u, false },  // Int.
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
+      { 2u, 1u, 2u, false, kDexMemAccessWord },
+      { 3u, 1u, 3u, false, kDexMemAccessWord },
+      { 4u, 1u, 4u, false, kDexMemAccessShort },
+      { 5u, 1u, 5u, false, kDexMemAccessChar },
+      { 6u, 0u, 0u, false, kDexMemAccessShort },   // Unresolved.
+      { 7u, 1u, 7u, false, kDexMemAccessWord },
+      { 8u, 0u, 0u, false, kDexMemAccessWord },    // Unresolved.
+      { 9u, 1u, 9u, false, kDexMemAccessWord },
+      { 10u, 1u, 10u, false, kDexMemAccessWord },
+      { 11u, 1u, 11u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
@@ -604,15 +615,15 @@
 
 TEST_F(GlobalValueNumberingTestDiamond, AliasingIFieldsSingleObject) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false },  // Int.
-      { 1u, 1u, 1u, false },  // Int.
-      { 2u, 1u, 2u, false },  // Int.
-      { 3u, 1u, 3u, false },  // Int.
-      { 4u, 1u, 4u, false },  // Short.
-      { 5u, 1u, 5u, false },  // Char.
-      { 6u, 0u, 0u, false },  // Unresolved, Short.
-      { 7u, 1u, 7u, false },  // Int.
-      { 8u, 1u, 8u, false },  // Int.
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
+      { 2u, 1u, 2u, false, kDexMemAccessWord },
+      { 3u, 1u, 3u, false, kDexMemAccessWord },
+      { 4u, 1u, 4u, false, kDexMemAccessShort },
+      { 5u, 1u, 5u, false, kDexMemAccessChar },
+      { 6u, 0u, 0u, false, kDexMemAccessShort },  // Unresolved.
+      { 7u, 1u, 7u, false, kDexMemAccessWord },
+      { 8u, 1u, 8u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
@@ -671,15 +682,15 @@
 
 TEST_F(GlobalValueNumberingTestDiamond, AliasingIFieldsTwoObjects) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false },  // Int.
-      { 1u, 1u, 1u, false },  // Int.
-      { 2u, 1u, 2u, false },  // Int.
-      { 3u, 1u, 3u, false },  // Int.
-      { 4u, 1u, 4u, false },  // Short.
-      { 5u, 1u, 5u, false },  // Char.
-      { 6u, 0u, 0u, false },  // Unresolved, Short.
-      { 7u, 1u, 7u, false },  // Int.
-      { 8u, 1u, 8u, false },  // Int.
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
+      { 2u, 1u, 2u, false, kDexMemAccessWord },
+      { 3u, 1u, 3u, false, kDexMemAccessWord },
+      { 4u, 1u, 4u, false, kDexMemAccessShort },
+      { 5u, 1u, 5u, false, kDexMemAccessChar },
+      { 6u, 0u, 0u, false, kDexMemAccessShort },   // Unresolved.
+      { 7u, 1u, 7u, false, kDexMemAccessWord },
+      { 8u, 1u, 8u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
@@ -740,15 +751,15 @@
 
 TEST_F(GlobalValueNumberingTestDiamond, SFields) {
   static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, false },  // Int.
-      { 1u, 1u, 1u, false },  // Int.
-      { 2u, 1u, 2u, false },  // Int.
-      { 3u, 1u, 3u, false },  // Int.
-      { 4u, 1u, 4u, false },  // Short.
-      { 5u, 1u, 5u, false },  // Char.
-      { 6u, 0u, 0u, false },  // Unresolved, Short.
-      { 7u, 1u, 7u, false },  // Int.
-      { 8u, 1u, 8u, false },  // Int.
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
+      { 2u, 1u, 2u, false, kDexMemAccessWord },
+      { 3u, 1u, 3u, false, kDexMemAccessWord },
+      { 4u, 1u, 4u, false, kDexMemAccessShort },
+      { 5u, 1u, 5u, false, kDexMemAccessChar },
+      { 6u, 0u, 0u, false, kDexMemAccessShort },   // Unresolved.
+      { 7u, 1u, 7u, false, kDexMemAccessWord },
+      { 8u, 1u, 8u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
@@ -1078,18 +1089,18 @@
 
 TEST_F(GlobalValueNumberingTestLoop, NonAliasingIFields) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false },  // Int.
-      { 1u, 1u, 1u, false },  // Int.
-      { 2u, 1u, 2u, false },  // Int.
-      { 3u, 1u, 3u, false },  // Int.
-      { 4u, 1u, 4u, false },  // Int.
-      { 5u, 1u, 5u, false },  // Short.
-      { 6u, 1u, 6u, false },  // Char.
-      { 7u, 0u, 0u, false },  // Unresolved, Short.
-      { 8u, 1u, 8u, false },  // Int.
-      { 9u, 0u, 0u, false },  // Unresolved, Int.
-      { 10u, 1u, 10u, false },  // Int.
-      { 11u, 1u, 11u, false },  // Int.
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
+      { 2u, 1u, 2u, false, kDexMemAccessWord },
+      { 3u, 1u, 3u, false, kDexMemAccessWord },
+      { 4u, 1u, 4u, false, kDexMemAccessWord },
+      { 5u, 1u, 5u, false, kDexMemAccessShort },
+      { 6u, 1u, 6u, false, kDexMemAccessChar },
+      { 7u, 0u, 0u, false, kDexMemAccessShort },   // Unresolved.
+      { 8u, 1u, 8u, false, kDexMemAccessWord },
+      { 9u, 0u, 0u, false, kDexMemAccessWord },    // Unresolved.
+      { 10u, 1u, 10u, false, kDexMemAccessWord },
+      { 11u, 1u, 11u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
@@ -1201,14 +1212,14 @@
 
 TEST_F(GlobalValueNumberingTestLoop, AliasingIFieldsSingleObject) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false },  // Int.
-      { 1u, 1u, 1u, false },  // Int.
-      { 2u, 1u, 2u, false },  // Int.
-      { 3u, 1u, 3u, false },  // Int.
-      { 4u, 1u, 4u, false },  // Int.
-      { 5u, 1u, 5u, false },  // Short.
-      { 6u, 1u, 6u, false },  // Char.
-      { 7u, 0u, 0u, false },  // Unresolved, Short.
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
+      { 2u, 1u, 2u, false, kDexMemAccessWord },
+      { 3u, 1u, 3u, false, kDexMemAccessWord },
+      { 4u, 1u, 4u, false, kDexMemAccessWord },
+      { 5u, 1u, 5u, false, kDexMemAccessShort },
+      { 6u, 1u, 6u, false, kDexMemAccessChar },
+      { 7u, 0u, 0u, false, kDexMemAccessShort },   // Unresolved.
   };
   static const MIRDef mirs[] = {
       // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
@@ -1272,14 +1283,14 @@
 
 TEST_F(GlobalValueNumberingTestLoop, AliasingIFieldsTwoObjects) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false },  // Int.
-      { 1u, 1u, 1u, false },  // Int.
-      { 2u, 1u, 2u, false },  // Int.
-      { 3u, 1u, 3u, false },  // Short.
-      { 4u, 1u, 4u, false },  // Char.
-      { 5u, 0u, 0u, false },  // Unresolved, Short.
-      { 6u, 1u, 6u, false },  // Int.
-      { 7u, 1u, 7u, false },  // Int.
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
+      { 2u, 1u, 2u, false, kDexMemAccessWord },
+      { 3u, 1u, 3u, false, kDexMemAccessShort },
+      { 4u, 1u, 4u, false, kDexMemAccessChar },
+      { 5u, 0u, 0u, false, kDexMemAccessShort },   // Unresolved.
+      { 6u, 1u, 6u, false, kDexMemAccessWord },
+      { 7u, 1u, 7u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
@@ -1341,7 +1352,7 @@
 
 TEST_F(GlobalValueNumberingTestLoop, IFieldToBaseDependency) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false },  // Int.
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       // For the IGET that loads sreg 3u using base 2u, the following IPUT creates a dependency
@@ -1366,9 +1377,9 @@
 
 TEST_F(GlobalValueNumberingTestLoop, SFields) {
   static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, false },  // Int.
-      { 1u, 1u, 1u, false },  // Int.
-      { 2u, 1u, 2u, false },  // Int.
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
+      { 2u, 1u, 2u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
@@ -1562,8 +1573,8 @@
 
 TEST_F(GlobalValueNumberingTestCatch, IFields) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false },
-      { 1u, 1u, 1u, false },
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 200u),
@@ -1608,8 +1619,8 @@
 
 TEST_F(GlobalValueNumberingTestCatch, SFields) {
   static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, false },
-      { 1u, 1u, 1u, false },
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       DEF_SGET(3, Instruction::SGET, 0u, 0u),
@@ -1731,8 +1742,8 @@
 
 TEST_F(GlobalValueNumberingTest, NullCheckIFields) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false },  // Object.
-      { 1u, 1u, 1u, false },  // Object.
+      { 0u, 1u, 0u, false, kDexMemAccessObject },  // Object.
+      { 1u, 1u, 1u, false, kDexMemAccessObject },  // Object.
   };
   static const BBDef bbs[] = {
       DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
@@ -1780,8 +1791,8 @@
 
 TEST_F(GlobalValueNumberingTest, NullCheckSFields) {
   static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, false },  // Object.
-      { 1u, 1u, 1u, false },  // Object.
+      { 0u, 1u, 0u, false, kDexMemAccessObject },
+      { 1u, 1u, 1u, false, kDexMemAccessObject },
   };
   static const BBDef bbs[] = {
       DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
@@ -1907,12 +1918,12 @@
 
 TEST_F(GlobalValueNumberingTestDiamond, MergeSameValueInDifferentMemoryLocations) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false },  // Int.
-      { 1u, 1u, 1u, false },  // Int.
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
   };
   static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, false },  // Int.
-      { 1u, 1u, 1u, false },  // Int.
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 100u),
@@ -1977,7 +1988,7 @@
   // LVN's aliasing_array_value_map_'s load_value_map for BBs #9, #4, #5, #7 because of the
   // DFS ordering of LVN evaluation.
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false },  // Object.
+      { 0u, 1u, 0u, false, kDexMemAccessObject },
   };
   static const BBDef bbs[] = {
       DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
@@ -2015,7 +2026,7 @@
 
 TEST_F(GlobalValueNumberingTestTwoConsecutiveLoops, IFieldAndPhi) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false },  // Int.
+      { 0u, 1u, 0u, false, kDexMemAccessObject },
   };
   static const MIRDef mirs[] = {
       DEF_MOVE(3, Instruction::MOVE_OBJECT, 0u, 100u),
@@ -2052,10 +2063,10 @@
 
 TEST_F(GlobalValueNumberingTestTwoConsecutiveLoops, NullCheck) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false },  // Int.
+      { 0u, 1u, 0u, false, kDexMemAccessObject },
   };
   static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, false },  // Int.
+      { 0u, 1u, 0u, false, kDexMemAccessObject },
   };
   static const MIRDef mirs[] = {
       DEF_MOVE(3, Instruction::MOVE_OBJECT, 0u, 100u),
@@ -2143,7 +2154,7 @@
 
 TEST_F(GlobalValueNumberingTestTwoNestedLoops, IFieldAndPhi) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false },  // Int.
+      { 0u, 1u, 0u, false, kDexMemAccessObject },
   };
   static const MIRDef mirs[] = {
       DEF_MOVE(3, Instruction::MOVE_OBJECT, 0u, 100u),
@@ -2213,4 +2224,45 @@
   PerformGVN();
 }
 
+TEST_F(GlobalValueNumberingTestDiamond, DivZeroCheckDiamond) {
+  static const MIRDef mirs[] = {
+      DEF_DIV_REM(3u, Instruction::DIV_INT, 1u, 20u, 21u),
+      DEF_DIV_REM(3u, Instruction::DIV_INT, 2u, 24u, 21u),
+      DEF_DIV_REM(3u, Instruction::DIV_INT, 3u, 20u, 23u),
+      DEF_DIV_REM(4u, Instruction::DIV_INT, 4u, 24u, 22u),
+      DEF_DIV_REM(4u, Instruction::DIV_INT, 9u, 24u, 25u),
+      DEF_DIV_REM(5u, Instruction::DIV_INT, 5u, 24u, 21u),
+      DEF_DIV_REM(5u, Instruction::DIV_INT, 10u, 24u, 26u),
+      DEF_PHI2(6u, 27u, 25u, 26u),
+      DEF_DIV_REM(6u, Instruction::DIV_INT, 12u, 20u, 27u),
+      DEF_DIV_REM(6u, Instruction::DIV_INT, 6u, 24u, 21u),
+      DEF_DIV_REM(6u, Instruction::DIV_INT, 7u, 20u, 23u),
+      DEF_DIV_REM(6u, Instruction::DIV_INT, 8u, 20u, 22u),
+  };
+
+  static const bool expected_ignore_div_zero_check[] = {
+      false,  // New divisor seen.
+      true,   // Eliminated since it has first divisor as first one.
+      false,  // New divisor seen.
+      false,  // New divisor seen.
+      false,  // New divisor seen.
+      true,   // Eliminated in dominating block.
+      false,  // New divisor seen.
+      false,  // Phi node.
+      true,   // Eliminated on both sides of diamond and merged via phi.
+      true,   // Eliminated in dominating block.
+      true,   // Eliminated in dominating block.
+      false,  // Only eliminated on one path of diamond.
+  };
+
+  PrepareMIRs(mirs);
+  PerformGVN();
+  PerformGVNCodeModifications();
+  ASSERT_EQ(arraysize(expected_ignore_div_zero_check), mir_count_);
+  for (size_t i = 0u; i != mir_count_; ++i) {
+    int expected = expected_ignore_div_zero_check[i] ? MIR_IGNORE_DIV_ZERO_CHECK : 0u;
+    EXPECT_EQ(expected, mirs_[i].optimization_flags) << i;
+  }
+}
+
 }  // namespace art
diff --git a/compiler/dex/local_value_numbering.cc b/compiler/dex/local_value_numbering.cc
index c1ce2ac..114346d 100644
--- a/compiler/dex/local_value_numbering.cc
+++ b/compiler/dex/local_value_numbering.cc
@@ -56,7 +56,7 @@
  public:
   static uint16_t StartMemoryVersion(GlobalValueNumbering* gvn, const LocalValueNumbering* lvn,
                                      uint16_t field_id) {
-    uint16_t type = gvn->GetFieldType(field_id);
+    uint16_t type = gvn->GetIFieldType(field_id);
     return gvn->LookupValue(kAliasingIFieldStartVersionOp, field_id,
                             lvn->global_memory_version_, lvn->unresolved_ifield_version_[type]);
   }
@@ -75,7 +75,7 @@
   static uint16_t LookupMergeValue(GlobalValueNumbering* gvn, const LocalValueNumbering* lvn,
                                    uint16_t field_id, uint16_t base) {
     // If the base/field_id is non-aliasing in lvn, use the non-aliasing value.
-    uint16_t type = gvn->GetFieldType(field_id);
+    uint16_t type = gvn->GetIFieldType(field_id);
     if (lvn->IsNonAliasingIField(base, field_id, type)) {
       uint16_t loc = gvn->LookupValue(kNonAliasingIFieldLocOp, base, field_id, type);
       auto lb = lvn->non_aliasing_ifield_value_map_.find(loc);
@@ -89,7 +89,7 @@
 
   static bool HasNewBaseVersion(GlobalValueNumbering* gvn, const LocalValueNumbering* lvn,
                                 uint16_t field_id) {
-    uint16_t type = gvn->GetFieldType(field_id);
+    uint16_t type = gvn->GetIFieldType(field_id);
     return lvn->unresolved_ifield_version_[type] == lvn->merge_new_memory_version_ ||
         lvn->global_memory_version_ == lvn->merge_new_memory_version_;
   }
@@ -339,11 +339,12 @@
       escaped_array_clobber_set_(EscapedArrayClobberKeyComparator(), allocator->Adapter()),
       range_checked_(RangeCheckKeyComparator() , allocator->Adapter()),
       null_checked_(std::less<uint16_t>(), allocator->Adapter()),
+      div_zero_checked_(std::less<uint16_t>(), allocator->Adapter()),
       merge_names_(allocator->Adapter()),
       merge_map_(std::less<ScopedArenaVector<BasicBlockId>>(), allocator->Adapter()),
       merge_new_memory_version_(kNoValue) {
-  std::fill_n(unresolved_sfield_version_, kFieldTypeCount, 0u);
-  std::fill_n(unresolved_ifield_version_, kFieldTypeCount, 0u);
+  std::fill_n(unresolved_sfield_version_, arraysize(unresolved_sfield_version_), 0u);
+  std::fill_n(unresolved_ifield_version_, arraysize(unresolved_ifield_version_), 0u);
 }
 
 bool LocalValueNumbering::Equals(const LocalValueNumbering& other) const {
@@ -362,7 +363,8 @@
       escaped_ifield_clobber_set_ == other.escaped_ifield_clobber_set_ &&
       escaped_array_clobber_set_ == other.escaped_array_clobber_set_ &&
       range_checked_ == other.range_checked_ &&
-      null_checked_ == other.null_checked_;
+      null_checked_ == other.null_checked_ &&
+      div_zero_checked_ == other.div_zero_checked_;
 }
 
 void LocalValueNumbering::MergeOne(const LocalValueNumbering& other, MergeType merge_type) {
@@ -379,6 +381,7 @@
   non_aliasing_refs_ = other.non_aliasing_refs_;
   range_checked_ = other.range_checked_;
   null_checked_ = other.null_checked_;
+  div_zero_checked_ = other.div_zero_checked_;
 
   const BasicBlock* pred_bb = gvn_->GetBasicBlock(other.Id());
   if (GlobalValueNumbering::HasNullCheckLastInsn(pred_bb, Id())) {
@@ -389,16 +392,20 @@
   if (merge_type == kCatchMerge) {
     // Memory is clobbered. Use new memory version and don't merge aliasing locations.
     global_memory_version_ = NewMemoryVersion(&merge_new_memory_version_);
-    std::fill_n(unresolved_sfield_version_, kFieldTypeCount, global_memory_version_);
-    std::fill_n(unresolved_ifield_version_, kFieldTypeCount, global_memory_version_);
+    std::fill_n(unresolved_sfield_version_, arraysize(unresolved_sfield_version_),
+                global_memory_version_);
+    std::fill_n(unresolved_ifield_version_, arraysize(unresolved_ifield_version_),
+                global_memory_version_);
     PruneNonAliasingRefsForCatch();
     return;
   }
 
   DCHECK(merge_type == kNormalMerge);
   global_memory_version_ = other.global_memory_version_;
-  std::copy_n(other.unresolved_ifield_version_, kFieldTypeCount, unresolved_ifield_version_);
-  std::copy_n(other.unresolved_sfield_version_, kFieldTypeCount, unresolved_sfield_version_);
+  std::copy_n(other.unresolved_ifield_version_, arraysize(unresolved_sfield_version_),
+              unresolved_ifield_version_);
+  std::copy_n(other.unresolved_sfield_version_, arraysize(unresolved_ifield_version_),
+              unresolved_sfield_version_);
   sfield_value_map_ = other.sfield_value_map_;
   CopyAliasingValuesMap(&aliasing_ifield_value_map_, other.aliasing_ifield_value_map_);
   CopyAliasingValuesMap(&aliasing_array_value_map_, other.aliasing_array_value_map_);
@@ -410,9 +417,11 @@
 bool LocalValueNumbering::SameMemoryVersion(const LocalValueNumbering& other) const {
   return
       global_memory_version_ == other.global_memory_version_ &&
-      std::equal(unresolved_ifield_version_, unresolved_ifield_version_ + kFieldTypeCount,
+      std::equal(unresolved_ifield_version_,
+                 unresolved_ifield_version_ + arraysize(unresolved_ifield_version_),
                  other.unresolved_ifield_version_) &&
-      std::equal(unresolved_sfield_version_, unresolved_sfield_version_ + kFieldTypeCount,
+      std::equal(unresolved_sfield_version_,
+                 unresolved_sfield_version_ + arraysize(unresolved_sfield_version_),
                  other.unresolved_sfield_version_);
 }
 
@@ -439,18 +448,22 @@
   }
   if (new_global_version) {
     global_memory_version_ = NewMemoryVersion(&merge_new_memory_version_);
-    std::fill_n(unresolved_sfield_version_, kFieldTypeCount, merge_new_memory_version_);
-    std::fill_n(unresolved_ifield_version_, kFieldTypeCount, merge_new_memory_version_);
+    std::fill_n(unresolved_sfield_version_, arraysize(unresolved_sfield_version_),
+                merge_new_memory_version_);
+    std::fill_n(unresolved_ifield_version_, arraysize(unresolved_ifield_version_),
+                merge_new_memory_version_);
   } else {
     // Initialize with a copy of memory versions from the comparison LVN.
     global_memory_version_ = cmp->global_memory_version_;
-    std::copy_n(cmp->unresolved_ifield_version_, kFieldTypeCount, unresolved_ifield_version_);
-    std::copy_n(cmp->unresolved_sfield_version_, kFieldTypeCount, unresolved_sfield_version_);
+    std::copy_n(cmp->unresolved_ifield_version_, arraysize(unresolved_sfield_version_),
+                unresolved_ifield_version_);
+    std::copy_n(cmp->unresolved_sfield_version_, arraysize(unresolved_ifield_version_),
+                unresolved_sfield_version_);
     for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
       if (lvn == cmp) {
         continue;
       }
-      for (size_t i = 0; i != kFieldTypeCount; ++i) {
+      for (size_t i = 0; i != kDexMemAccessTypeCount; ++i) {
         if (lvn->unresolved_ifield_version_[i] != cmp->unresolved_ifield_version_[i]) {
           unresolved_ifield_version_[i] = NewMemoryVersion(&merge_new_memory_version_);
         }
@@ -699,6 +712,28 @@
   }
 }
 
+void LocalValueNumbering::MergeDivZeroChecked() {
+  DCHECK_GE(gvn_->merge_lvns_.size(), 2u);
+
+  // Find the LVN with the least entries in the set.
+  const LocalValueNumbering* least_entries_lvn = gvn_->merge_lvns_[0];
+  for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
+    if (lvn->div_zero_checked_.size() < least_entries_lvn->div_zero_checked_.size()) {
+      least_entries_lvn = lvn;
+    }
+  }
+
+  // For each div-zero value name check if it's div-zero checked in all the LVNs.
+  for (const auto& value_name : least_entries_lvn->div_zero_checked_) {
+    // Merge null_checked_ for this ref.
+    merge_names_.clear();
+    merge_names_.resize(gvn_->merge_lvns_.size(), value_name);
+    if (gvn_->DivZeroCheckedInAllPredecessors(merge_names_)) {
+      div_zero_checked_.insert(div_zero_checked_.end(), value_name);
+    }
+  }
+}
+
 void LocalValueNumbering::MergeSFieldValues(const SFieldToValueMap::value_type& entry,
                                             SFieldToValueMap::iterator hint) {
   uint16_t field_id = entry.first;
@@ -711,7 +746,7 @@
     if (it != lvn->sfield_value_map_.end()) {
       value_name = it->second;
     } else {
-      uint16_t type = gvn_->GetFieldType(field_id);
+      uint16_t type = gvn_->GetSFieldType(field_id);
       value_name = gvn_->LookupValue(kResolvedSFieldOp, field_id,
                                      lvn->unresolved_sfield_version_[type],
                                      lvn->global_memory_version_);
@@ -931,6 +966,9 @@
   // Merge null_checked_. We may later insert more, such as merged object field values.
   MergeNullChecked();
 
+  // Now merge the div_zero_checked_.
+  MergeDivZeroChecked();
+
   if (merge_type == kCatchMerge) {
     // Memory is clobbered. New memory version already created, don't merge aliasing locations.
     return;
@@ -1054,10 +1092,30 @@
   }
 }
 
+void LocalValueNumbering::HandleDivZeroCheck(MIR* mir, uint16_t reg) {
+  auto lb = div_zero_checked_.lower_bound(reg);
+  if (lb != div_zero_checked_.end() && *lb == reg) {
+    if (LIKELY(gvn_->CanModify())) {
+      if (gvn_->GetCompilationUnit()->verbose) {
+        LOG(INFO) << "Removing div zero check for 0x" << std::hex << mir->offset;
+      }
+      mir->optimization_flags |= MIR_IGNORE_DIV_ZERO_CHECK;
+    }
+  } else {
+    div_zero_checked_.insert(lb, reg);
+  }
+}
+
 void LocalValueNumbering::HandlePutObject(MIR* mir) {
   // If we're storing a non-aliasing reference, stop tracking it as non-aliasing now.
   uint16_t base = GetOperandValue(mir->ssa_rep->uses[0]);
   HandleEscapingRef(base);
+  if (gvn_->CanModify() && null_checked_.count(base) != 0u) {
+    if (gvn_->GetCompilationUnit()->verbose) {
+      LOG(INFO) << "Removing GC card mark value null check for 0x" << std::hex << mir->offset;
+    }
+    mir->optimization_flags |= MIR_STORE_NON_NULL_VALUE;
+  }
 }
 
 void LocalValueNumbering::HandleEscapingRef(uint16_t base) {
@@ -1139,6 +1197,9 @@
       if (!wide && gvn_->NullCheckedInAllPredecessors(merge_names_)) {
         null_checked_.insert(value_name);
       }
+      if (gvn_->DivZeroCheckedInAllPredecessors(merge_names_)) {
+        div_zero_checked_.insert(value_name);
+      }
     }
   }
   if (wide) {
@@ -1150,12 +1211,11 @@
 }
 
 uint16_t LocalValueNumbering::HandleAGet(MIR* mir, uint16_t opcode) {
-  // uint16_t type = opcode - Instruction::AGET;
   uint16_t array = GetOperandValue(mir->ssa_rep->uses[0]);
   HandleNullCheck(mir, array);
   uint16_t index = GetOperandValue(mir->ssa_rep->uses[1]);
   HandleRangeCheck(mir, array, index);
-  uint16_t type = opcode - Instruction::AGET;
+  uint16_t type = AGetMemAccessType(static_cast<Instruction::Code>(opcode));
   // Establish value number for loaded register.
   uint16_t res;
   if (IsNonAliasingArray(array, type)) {
@@ -1182,7 +1242,7 @@
   uint16_t index = GetOperandValue(mir->ssa_rep->uses[index_idx]);
   HandleRangeCheck(mir, array, index);
 
-  uint16_t type = opcode - Instruction::APUT;
+  uint16_t type = APutMemAccessType(static_cast<Instruction::Code>(opcode));
   uint16_t value = (opcode == Instruction::APUT_WIDE)
                    ? GetOperandValueWide(mir->ssa_rep->uses[0])
                    : GetOperandValue(mir->ssa_rep->uses[0]);
@@ -1224,8 +1284,8 @@
     // Use result s_reg - will be unique.
     res = gvn_->LookupValue(kNoValue, mir->ssa_rep->defs[0], kNoValue, kNoValue);
   } else {
-    uint16_t type = opcode - Instruction::IGET;
-    uint16_t field_id = gvn_->GetFieldId(field_info, type);
+    uint16_t type = IGetMemAccessType(static_cast<Instruction::Code>(opcode));
+    uint16_t field_id = gvn_->GetIFieldId(mir);
     if (IsNonAliasingIField(base, field_id, type)) {
       uint16_t loc = gvn_->LookupValue(kNonAliasingIFieldLocOp, base, field_id, type);
       auto lb = non_aliasing_ifield_value_map_.lower_bound(loc);
@@ -1249,10 +1309,10 @@
 }
 
 void LocalValueNumbering::HandleIPut(MIR* mir, uint16_t opcode) {
-  uint16_t type = opcode - Instruction::IPUT;
   int base_reg = (opcode == Instruction::IPUT_WIDE) ? 2 : 1;
   uint16_t base = GetOperandValue(mir->ssa_rep->uses[base_reg]);
   HandleNullCheck(mir, base);
+  uint16_t type = IPutMemAccessType(static_cast<Instruction::Code>(opcode));
   const MirFieldInfo& field_info = gvn_->GetMirGraph()->GetIFieldLoweringInfo(mir);
   if (!field_info.IsResolved()) {
     // Unresolved fields always alias with everything of the same type.
@@ -1272,7 +1332,7 @@
     // Aliasing fields of the same type may have been overwritten.
     auto it = aliasing_ifield_value_map_.begin(), end = aliasing_ifield_value_map_.end();
     while (it != end) {
-      if (gvn_->GetFieldType(it->first) != type) {
+      if (gvn_->GetIFieldType(it->first) != type) {
         ++it;
       } else {
         it = aliasing_ifield_value_map_.erase(it);
@@ -1282,7 +1342,7 @@
     // Nothing to do, resolved volatile fields always get a new memory version anyway and
     // can't alias with resolved non-volatile fields.
   } else {
-    uint16_t field_id = gvn_->GetFieldId(field_info, type);
+    uint16_t field_id = gvn_->GetIFieldId(mir);
     uint16_t value = (opcode == Instruction::IPUT_WIDE)
                      ? GetOperandValueWide(mir->ssa_rep->uses[0])
                      : GetOperandValue(mir->ssa_rep->uses[0]);
@@ -1333,8 +1393,8 @@
     // Use result s_reg - will be unique.
     res = gvn_->LookupValue(kNoValue, mir->ssa_rep->defs[0], kNoValue, kNoValue);
   } else {
-    uint16_t type = opcode - Instruction::SGET;
-    uint16_t field_id = gvn_->GetFieldId(field_info, type);
+    uint16_t type = SGetMemAccessType(static_cast<Instruction::Code>(opcode));
+    uint16_t field_id = gvn_->GetSFieldId(mir);
     auto lb = sfield_value_map_.lower_bound(field_id);
     if (lb != sfield_value_map_.end() && lb->first == field_id) {
       res = lb->second;
@@ -1362,7 +1422,7 @@
     // Class initialization can call arbitrary functions, we need to wipe aliasing values.
     HandleInvokeOrClInitOrAcquireOp(mir);
   }
-  uint16_t type = opcode - Instruction::SPUT;
+  uint16_t type = SPutMemAccessType(static_cast<Instruction::Code>(opcode));
   if (!field_info.IsResolved()) {
     // Unresolved fields always alias with everything of the same type.
     // Use mir->offset as modifier; without elaborate inlining, it will be unique.
@@ -1373,7 +1433,7 @@
     // Nothing to do, resolved volatile fields always get a new memory version anyway and
     // can't alias with resolved non-volatile fields.
   } else {
-    uint16_t field_id = gvn_->GetFieldId(field_info, type);
+    uint16_t field_id = gvn_->GetSFieldId(mir);
     uint16_t value = (opcode == Instruction::SPUT_WIDE)
                      ? GetOperandValueWide(mir->ssa_rep->uses[0])
                      : GetOperandValue(mir->ssa_rep->uses[0]);
@@ -1397,7 +1457,7 @@
 void LocalValueNumbering::RemoveSFieldsForType(uint16_t type) {
   // Erase all static fields of this type from the sfield_value_map_.
   for (auto it = sfield_value_map_.begin(), end = sfield_value_map_.end(); it != end; ) {
-    if (gvn_->GetFieldType(it->first) == type) {
+    if (gvn_->GetSFieldType(it->first) == type) {
       it = sfield_value_map_.erase(it);
     } else {
       ++it;
@@ -1696,6 +1756,13 @@
       }
       break;
 
+    case Instruction::DIV_INT:
+    case Instruction::DIV_INT_2ADDR:
+    case Instruction::REM_INT:
+    case Instruction::REM_INT_2ADDR:
+      HandleDivZeroCheck(mir, GetOperandValue(mir->ssa_rep->uses[1]));
+      FALLTHROUGH_INTENDED;
+
     case Instruction::CMPG_FLOAT:
     case Instruction::CMPL_FLOAT:
     case Instruction::ADD_INT:
@@ -1710,10 +1777,6 @@
     case Instruction::XOR_INT_2ADDR:
     case Instruction::SUB_INT:
     case Instruction::SUB_INT_2ADDR:
-    case Instruction::DIV_INT:
-    case Instruction::DIV_INT_2ADDR:
-    case Instruction::REM_INT:
-    case Instruction::REM_INT_2ADDR:
     case Instruction::SHL_INT:
     case Instruction::SHL_INT_2ADDR:
     case Instruction::SHR_INT:
@@ -1728,19 +1791,22 @@
       }
       break;
 
+    case Instruction::DIV_LONG:
+    case Instruction::REM_LONG:
+    case Instruction::DIV_LONG_2ADDR:
+    case Instruction::REM_LONG_2ADDR:
+      HandleDivZeroCheck(mir, GetOperandValueWide(mir->ssa_rep->uses[2]));
+      FALLTHROUGH_INTENDED;
+
     case Instruction::ADD_LONG:
     case Instruction::SUB_LONG:
     case Instruction::MUL_LONG:
-    case Instruction::DIV_LONG:
-    case Instruction::REM_LONG:
     case Instruction::AND_LONG:
     case Instruction::OR_LONG:
     case Instruction::XOR_LONG:
     case Instruction::ADD_LONG_2ADDR:
     case Instruction::SUB_LONG_2ADDR:
     case Instruction::MUL_LONG_2ADDR:
-    case Instruction::DIV_LONG_2ADDR:
-    case Instruction::REM_LONG_2ADDR:
     case Instruction::AND_LONG_2ADDR:
     case Instruction::OR_LONG_2ADDR:
     case Instruction::XOR_LONG_2ADDR:
diff --git a/compiler/dex/local_value_numbering.h b/compiler/dex/local_value_numbering.h
index 979fd5a..9b89c95 100644
--- a/compiler/dex/local_value_numbering.h
+++ b/compiler/dex/local_value_numbering.h
@@ -22,6 +22,7 @@
 #include "compiler_internals.h"
 #include "global_value_numbering.h"
 #include "utils/arena_object.h"
+#include "utils/dex_instruction_utils.h"
 
 namespace art {
 
@@ -47,6 +48,10 @@
     return null_checked_.find(value_name) != null_checked_.end();
   }
 
+  bool IsValueDivZeroChecked(uint16_t value_name) const {
+    return div_zero_checked_.find(value_name) != div_zero_checked_.end();
+  }
+
   bool IsSregValue(uint16_t s_reg, uint16_t value_name) const {
     auto it = sreg_value_map_.find(s_reg);
     if (it != sreg_value_map_.end()) {
@@ -72,17 +77,6 @@
   // A set of value names.
   typedef GlobalValueNumbering::ValueNameSet ValueNameSet;
 
-  // Field types correspond to the ordering of GET/PUT instructions; this order is the same
-  // for IGET, IPUT, SGET, SPUT, AGET and APUT:
-  // op         0
-  // op_WIDE    1
-  // op_OBJECT  2
-  // op_BOOLEAN 3
-  // op_BYTE    4
-  // op_CHAR    5
-  // op_SHORT   6
-  static constexpr size_t kFieldTypeCount = 7;
-
   // Key is s_reg, value is value name.
   typedef ScopedArenaSafeMap<uint16_t, uint16_t> SregValueMap;
 
@@ -286,6 +280,7 @@
   bool IsNonAliasingArray(uint16_t reg, uint16_t type) const;
   void HandleNullCheck(MIR* mir, uint16_t reg);
   void HandleRangeCheck(MIR* mir, uint16_t array, uint16_t index);
+  void HandleDivZeroCheck(MIR* mir, uint16_t reg);
   void HandlePutObject(MIR* mir);
   void HandleEscapingRef(uint16_t base);
   void HandleInvokeArgs(const MIR* mir, const LocalValueNumbering* mir_lvn);
@@ -337,6 +332,7 @@
   void MergeNonAliasingIFieldValues(const IFieldLocToValueMap::value_type& entry,
                                     IFieldLocToValueMap::iterator hint);
   void MergeNullChecked();
+  void MergeDivZeroChecked();
 
   template <typename Map, Map LocalValueNumbering::*map_ptr, typename Versions>
   void MergeAliasingValues(const typename Map::value_type& entry, typename Map::iterator hint);
@@ -358,8 +354,8 @@
 
   // Data for dealing with memory clobbering and store/load aliasing.
   uint16_t global_memory_version_;
-  uint16_t unresolved_sfield_version_[kFieldTypeCount];
-  uint16_t unresolved_ifield_version_[kFieldTypeCount];
+  uint16_t unresolved_sfield_version_[kDexMemAccessTypeCount];
+  uint16_t unresolved_ifield_version_[kDexMemAccessTypeCount];
   // Value names of references to objects that cannot be reached through a different value name.
   ValueNameSet non_aliasing_refs_;
   // Previously non-aliasing refs that escaped but can still be used for non-aliasing AGET/IGET.
@@ -371,6 +367,7 @@
   // Range check and null check elimination.
   RangeCheckSet range_checked_;
   ValueNameSet null_checked_;
+  ValueNameSet div_zero_checked_;
 
   // Reuse one vector for all merges to avoid leaking too much memory on the ArenaStack.
   ScopedArenaVector<BasicBlockId> merge_names_;
diff --git a/compiler/dex/local_value_numbering_test.cc b/compiler/dex/local_value_numbering_test.cc
index 824c323..0fcb584 100644
--- a/compiler/dex/local_value_numbering_test.cc
+++ b/compiler/dex/local_value_numbering_test.cc
@@ -15,6 +15,7 @@
  */
 
 #include "compiler_internals.h"
+#include "dex/mir_field_info.h"
 #include "global_value_numbering.h"
 #include "local_value_numbering.h"
 #include "gtest/gtest.h"
@@ -28,6 +29,7 @@
     uintptr_t declaring_dex_file;
     uint16_t declaring_field_idx;
     bool is_volatile;
+    DexMemAccessType type;
   };
 
   struct SFieldDef {
@@ -35,6 +37,7 @@
     uintptr_t declaring_dex_file;
     uint16_t declaring_field_idx;
     bool is_volatile;
+    DexMemAccessType type;
   };
 
   struct MIRDef {
@@ -84,18 +87,21 @@
     { opcode, 0u, 0u, 1, { reg }, 0, { } }
 #define DEF_UNIQUE_REF(opcode, reg) \
     { opcode, 0u, 0u, 0, { }, 1, { reg } }  // CONST_CLASS, CONST_STRING, NEW_ARRAY, ...
+#define DEF_DIV_REM(opcode, result, dividend, divisor) \
+    { opcode, 0u, 0u, 2, { dividend, divisor }, 1, { result } }
+#define DEF_DIV_REM_WIDE(opcode, result, dividend, divisor) \
+    { opcode, 0u, 0u, 4, { dividend, dividend + 1, divisor, divisor + 1 }, 2, { result, result + 1 } }
 
   void DoPrepareIFields(const IFieldDef* defs, size_t count) {
     cu_.mir_graph->ifield_lowering_infos_.clear();
     cu_.mir_graph->ifield_lowering_infos_.reserve(count);
     for (size_t i = 0u; i != count; ++i) {
       const IFieldDef* def = &defs[i];
-      MirIFieldLoweringInfo field_info(def->field_idx);
+      MirIFieldLoweringInfo field_info(def->field_idx, def->type);
       if (def->declaring_dex_file != 0u) {
         field_info.declaring_dex_file_ = reinterpret_cast<const DexFile*>(def->declaring_dex_file);
         field_info.declaring_field_idx_ = def->declaring_field_idx;
-        field_info.flags_ = 0u |  // Without kFlagIsStatic.
-            (def->is_volatile ? MirIFieldLoweringInfo::kFlagIsVolatile : 0u);
+        field_info.flags_ &= ~(def->is_volatile ? 0u : MirSFieldLoweringInfo::kFlagIsVolatile);
       }
       cu_.mir_graph->ifield_lowering_infos_.push_back(field_info);
     }
@@ -111,15 +117,14 @@
     cu_.mir_graph->sfield_lowering_infos_.reserve(count);
     for (size_t i = 0u; i != count; ++i) {
       const SFieldDef* def = &defs[i];
-      MirSFieldLoweringInfo field_info(def->field_idx);
+      MirSFieldLoweringInfo field_info(def->field_idx, def->type);
       // Mark even unresolved fields as initialized.
-      field_info.flags_ = MirSFieldLoweringInfo::kFlagIsStatic |
-          MirSFieldLoweringInfo::kFlagClassIsInitialized;
+      field_info.flags_ |= MirSFieldLoweringInfo::kFlagClassIsInitialized;
       // NOTE: MirSFieldLoweringInfo::kFlagClassIsInDexCache isn't used by LVN.
       if (def->declaring_dex_file != 0u) {
         field_info.declaring_dex_file_ = reinterpret_cast<const DexFile*>(def->declaring_dex_file);
         field_info.declaring_field_idx_ = def->declaring_field_idx;
-        field_info.flags_ |= (def->is_volatile ? MirSFieldLoweringInfo::kFlagIsVolatile : 0u);
+        field_info.flags_ &= ~(def->is_volatile ? 0u : MirSFieldLoweringInfo::kFlagIsVolatile);
       }
       cu_.mir_graph->sfield_lowering_infos_.push_back(field_info);
     }
@@ -140,12 +145,16 @@
       mir->dalvikInsn.opcode = def->opcode;
       mir->dalvikInsn.vB = static_cast<int32_t>(def->value);
       mir->dalvikInsn.vB_wide = def->value;
-      if (def->opcode >= Instruction::IGET && def->opcode <= Instruction::IPUT_SHORT) {
+      if (IsInstructionIGetOrIPut(def->opcode)) {
         ASSERT_LT(def->field_info, cu_.mir_graph->ifield_lowering_infos_.size());
         mir->meta.ifield_lowering_info = def->field_info;
-      } else if (def->opcode >= Instruction::SGET && def->opcode <= Instruction::SPUT_SHORT) {
+        ASSERT_EQ(cu_.mir_graph->ifield_lowering_infos_[def->field_info].MemAccessType(),
+                  IGetOrIPutMemAccessType(def->opcode));
+      } else if (IsInstructionSGetOrSPut(def->opcode)) {
         ASSERT_LT(def->field_info, cu_.mir_graph->sfield_lowering_infos_.size());
         mir->meta.sfield_lowering_info = def->field_info;
+        ASSERT_EQ(cu_.mir_graph->sfield_lowering_infos_[def->field_info].MemAccessType(),
+                  SGetOrSPutMemAccessType(def->opcode));
       }
       mir->ssa_rep = &ssa_reps_[i];
       mir->ssa_rep->num_uses = def->num_uses;
@@ -177,6 +186,13 @@
   }
 
   void PerformLVN() {
+    cu_.mir_graph->temp_.gvn.ifield_ids_ =  GlobalValueNumbering::PrepareGvnFieldIds(
+        allocator_.get(), cu_.mir_graph->ifield_lowering_infos_);
+    cu_.mir_graph->temp_.gvn.sfield_ids_ =  GlobalValueNumbering::PrepareGvnFieldIds(
+        allocator_.get(), cu_.mir_graph->sfield_lowering_infos_);
+    gvn_.reset(new (allocator_.get()) GlobalValueNumbering(&cu_, allocator_.get(),
+                                                           GlobalValueNumbering::kModeLvn));
+    lvn_.reset(new (allocator_.get()) LocalValueNumbering(gvn_.get(), 0u, allocator_.get()));
     value_names_.resize(mir_count_);
     for (size_t i = 0; i != mir_count_; ++i) {
       value_names_[i] =  lvn_->GetValueNumber(&mirs_[i]);
@@ -196,9 +212,6 @@
         value_names_() {
     cu_.mir_graph.reset(new MIRGraph(&cu_, &cu_.arena));
     allocator_.reset(ScopedArenaAllocator::Create(&cu_.arena_stack));
-    gvn_.reset(new (allocator_.get()) GlobalValueNumbering(&cu_, allocator_.get(),
-                                                           GlobalValueNumbering::kModeLvn));
-    lvn_.reset(new (allocator_.get()) LocalValueNumbering(gvn_.get(), 0u, allocator_.get()));
   }
 
   ArenaPool pool_;
@@ -214,7 +227,7 @@
 
 TEST_F(LocalValueNumberingTest, IGetIGetInvokeIGet) {
   static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       DEF_IGET(Instruction::IGET, 0u, 10u, 0u),
@@ -237,8 +250,8 @@
 
 TEST_F(LocalValueNumberingTest, IGetIPutIGetIGetIGet) {
   static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false },
-      { 2u, 1u, 2u, false },
+      { 1u, 1u, 1u, false, kDexMemAccessObject },
+      { 2u, 1u, 2u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       DEF_IGET(Instruction::IGET_OBJECT, 0u, 10u, 0u),
@@ -262,7 +275,7 @@
 
 TEST_F(LocalValueNumberingTest, UniquePreserve1) {
   static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       DEF_UNIQUE_REF(Instruction::NEW_INSTANCE, 10u),
@@ -284,7 +297,7 @@
 
 TEST_F(LocalValueNumberingTest, UniquePreserve2) {
   static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       DEF_UNIQUE_REF(Instruction::NEW_INSTANCE, 11u),
@@ -306,7 +319,7 @@
 
 TEST_F(LocalValueNumberingTest, UniquePreserveAndEscape) {
   static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       DEF_UNIQUE_REF(Instruction::NEW_INSTANCE, 10u),
@@ -331,8 +344,8 @@
 
 TEST_F(LocalValueNumberingTest, Volatile) {
   static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false },
-      { 2u, 1u, 2u, true },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
+      { 2u, 1u, 2u, true, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       DEF_IGET(Instruction::IGET, 0u, 10u, 1u),  // Volatile.
@@ -358,9 +371,9 @@
 
 TEST_F(LocalValueNumberingTest, UnresolvedIField) {
   static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false },  // Resolved field #1.
-      { 2u, 1u, 2u, false },  // Resolved field #2.
-      { 3u, 0u, 0u, false },  // Unresolved field.
+      { 1u, 1u, 1u, false, kDexMemAccessWord },  // Resolved field #1.
+      { 2u, 1u, 2u, false, kDexMemAccessWide },  // Resolved field #2.
+      { 3u, 0u, 0u, false, kDexMemAccessWord },  // Unresolved field.
   };
   static const MIRDef mirs[] = {
       DEF_UNIQUE_REF(Instruction::NEW_INSTANCE, 20u),
@@ -407,9 +420,9 @@
 
 TEST_F(LocalValueNumberingTest, UnresolvedSField) {
   static const SFieldDef sfields[] = {
-      { 1u, 1u, 1u, false },  // Resolved field #1.
-      { 2u, 1u, 2u, false },  // Resolved field #2.
-      { 3u, 0u, 0u, false },  // Unresolved field.
+      { 1u, 1u, 1u, false, kDexMemAccessWord },  // Resolved field #1.
+      { 2u, 1u, 2u, false, kDexMemAccessWide },  // Resolved field #2.
+      { 3u, 0u, 0u, false, kDexMemAccessWord },  // Unresolved field.
   };
   static const MIRDef mirs[] = {
       DEF_SGET(Instruction::SGET, 0u, 0u),            // Resolved field #1.
@@ -438,11 +451,11 @@
 
 TEST_F(LocalValueNumberingTest, UninitializedSField) {
   static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false },  // Resolved field #1.
+      { 1u, 1u, 1u, false, kDexMemAccessWord },  // Resolved field #1.
   };
   static const SFieldDef sfields[] = {
-      { 1u, 1u, 1u, false },  // Resolved field #1.
-      { 2u, 1u, 2u, false },  // Resolved field #2; uninitialized.
+      { 1u, 1u, 1u, false, kDexMemAccessWord },  // Resolved field #1.
+      { 2u, 1u, 2u, false, kDexMemAccessWord },  // Resolved field #2; uninitialized.
   };
   static const MIRDef mirs[] = {
       DEF_UNIQUE_REF(Instruction::NEW_INSTANCE, 200u),
@@ -487,11 +500,11 @@
 
 TEST_F(LocalValueNumberingTest, SameValueInDifferentMemoryLocations) {
   static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false },
-      { 2u, 1u, 2u, false },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
+      { 2u, 1u, 2u, false, kDexMemAccessWord },
   };
   static const SFieldDef sfields[] = {
-      { 3u, 1u, 3u, false },
+      { 3u, 1u, 3u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       DEF_UNIQUE_REF(Instruction::NEW_ARRAY, 201u),
@@ -551,12 +564,12 @@
 
 TEST_F(LocalValueNumberingTest, EscapingRefs) {
   static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false },  // Field #1.
-      { 2u, 1u, 2u, false },  // Field #2.
-      { 3u, 1u, 3u, false },  // Reference field for storing escaping refs.
-      { 4u, 1u, 4u, false },  // Wide.
-      { 5u, 0u, 0u, false },  // Unresolved field, int.
-      { 6u, 0u, 0u, false },  // Unresolved field, wide.
+      { 1u, 1u, 1u, false, kDexMemAccessWord },    // Field #1.
+      { 2u, 1u, 2u, false, kDexMemAccessWord },    // Field #2.
+      { 3u, 1u, 3u, false, kDexMemAccessObject },  // For storing escaping refs.
+      { 4u, 1u, 4u, false, kDexMemAccessWide },    // Wide.
+      { 5u, 0u, 0u, false, kDexMemAccessWord },    // Unresolved field, int.
+      { 6u, 0u, 0u, false, kDexMemAccessWide },    // Unresolved field, wide.
   };
   static const MIRDef mirs[] = {
       DEF_UNIQUE_REF(Instruction::NEW_INSTANCE, 20u),
@@ -594,7 +607,9 @@
   EXPECT_NE(value_names_[13], value_names_[16]);  // New value.
   EXPECT_NE(value_names_[14], value_names_[17]);  // New value.
   for (size_t i = 0u; i != mir_count_; ++i) {
-    int expected = (i != 0u && i != 3u && i != 6u) ? MIR_IGNORE_NULL_CHECK : 0;
+    int expected =
+        ((i != 0u && i != 3u && i != 6u) ? MIR_IGNORE_NULL_CHECK : 0) |
+        ((i == 3u) ? MIR_STORE_NON_NULL_VALUE: 0);
     EXPECT_EQ(expected, mirs_[i].optimization_flags) << i;
   }
 }
@@ -627,18 +642,19 @@
   for (size_t i = 0u; i != mir_count_; ++i) {
     int expected =
         ((i != 0u && i != 3u && i != 6u && i != 9u) ? MIR_IGNORE_NULL_CHECK : 0u) |
-        ((i >= 4 && i != 6u && i != 9u) ? MIR_IGNORE_RANGE_CHECK : 0u);
+        ((i >= 4 && i != 6u && i != 9u) ? MIR_IGNORE_RANGE_CHECK : 0u) |
+        ((i == 3u) ? MIR_STORE_NON_NULL_VALUE: 0);
     EXPECT_EQ(expected, mirs_[i].optimization_flags) << i;
   }
 }
 
 TEST_F(LocalValueNumberingTest, StoringSameValueKeepsMemoryVersion) {
   static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false },
-      { 2u, 1u, 2u, false },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
+      { 2u, 1u, 2u, false, kDexMemAccessWord },
   };
   static const SFieldDef sfields[] = {
-      { 2u, 1u, 2u, false },
+      { 2u, 1u, 2u, false, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       DEF_IGET(Instruction::IGET, 0u, 30u, 0u),
@@ -716,8 +732,8 @@
 
 TEST_F(LocalValueNumberingTest, ClInitOnSget) {
   static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, false },
-      { 1u, 2u, 1u, false },
+      { 0u, 1u, 0u, false, kDexMemAccessObject },
+      { 1u, 2u, 1u, false, kDexMemAccessObject },
   };
   static const MIRDef mirs[] = {
       DEF_SGET(Instruction::SGET_OBJECT, 0u, 0u),
@@ -735,4 +751,26 @@
   EXPECT_NE(value_names_[0], value_names_[3]);
 }
 
+TEST_F(LocalValueNumberingTest, DivZeroCheck) {
+  static const MIRDef mirs[] = {
+      DEF_DIV_REM(Instruction::DIV_INT, 1u, 10u, 20u),
+      DEF_DIV_REM(Instruction::DIV_INT, 2u, 20u, 20u),
+      DEF_DIV_REM(Instruction::DIV_INT_2ADDR, 3u, 10u, 1u),
+      DEF_DIV_REM(Instruction::REM_INT, 4u, 30u, 20u),
+      DEF_DIV_REM_WIDE(Instruction::REM_LONG, 5u, 12u, 14u),
+      DEF_DIV_REM_WIDE(Instruction::DIV_LONG_2ADDR, 7u, 16u, 14u),
+  };
+
+  static const bool expected_ignore_div_zero_check[] = {
+      false, true, false, true, false, true,
+  };
+
+  PrepareMIRs(mirs);
+  PerformLVN();
+  for (size_t i = 0u; i != mir_count_; ++i) {
+    int expected = expected_ignore_div_zero_check[i] ? MIR_IGNORE_DIV_ZERO_CHECK : 0u;
+    EXPECT_EQ(expected, mirs_[i].optimization_flags) << i;
+  }
+}
+
 }  // namespace art
diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc
index 44f69ba..7b53b14 100644
--- a/compiler/dex/mir_analysis.cc
+++ b/compiler/dex/mir_analysis.cc
@@ -21,6 +21,7 @@
 #include "dataflow_iterator-inl.h"
 #include "dex_instruction.h"
 #include "dex_instruction-inl.h"
+#include "dex/mir_field_info.h"
 #include "dex/verified_method.h"
 #include "dex/quick/dex_file_method_inliner.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
@@ -1204,6 +1205,8 @@
   ScopedArenaAllocator allocator(&cu_->arena_stack);
   uint16_t* field_idxs =
       reinterpret_cast<uint16_t*>(allocator.Alloc(max_refs * sizeof(uint16_t), kArenaAllocMisc));
+  DexMemAccessType* field_types = reinterpret_cast<DexMemAccessType*>(
+      allocator.Alloc(max_refs * sizeof(DexMemAccessType), kArenaAllocMisc));
 
   // Find IGET/IPUT/SGET/SPUT insns, store IGET/IPUT fields at the beginning, SGET/SPUT at the end.
   size_t ifield_pos = 0u;
@@ -1214,38 +1217,41 @@
       continue;
     }
     for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-      if (mir->dalvikInsn.opcode >= Instruction::IGET &&
-          mir->dalvikInsn.opcode <= Instruction::SPUT_SHORT) {
-        // Get field index and try to find it among existing indexes. If found, it's usually among
-        // the last few added, so we'll start the search from ifield_pos/sfield_pos. Though this
-        // is a linear search, it actually performs much better than map based approach.
-        if (mir->dalvikInsn.opcode <= Instruction::IPUT_SHORT) {
-          uint16_t field_idx = mir->dalvikInsn.vC;
-          size_t i = ifield_pos;
-          while (i != 0u && field_idxs[i - 1] != field_idx) {
-            --i;
-          }
-          if (i != 0u) {
-            mir->meta.ifield_lowering_info = i - 1;
-          } else {
-            mir->meta.ifield_lowering_info = ifield_pos;
-            field_idxs[ifield_pos++] = field_idx;
-          }
-        } else {
-          uint16_t field_idx = mir->dalvikInsn.vB;
-          size_t i = sfield_pos;
-          while (i != max_refs && field_idxs[i] != field_idx) {
-            ++i;
-          }
-          if (i != max_refs) {
-            mir->meta.sfield_lowering_info = max_refs - i - 1u;
-          } else {
-            mir->meta.sfield_lowering_info = max_refs - sfield_pos;
-            field_idxs[--sfield_pos] = field_idx;
-          }
+      // Get field index and try to find it among existing indexes. If found, it's usually among
+      // the last few added, so we'll start the search from ifield_pos/sfield_pos. Though this
+      // is a linear search, it actually performs much better than map based approach.
+      if (IsInstructionIGetOrIPut(mir->dalvikInsn.opcode)) {
+        uint16_t field_idx = mir->dalvikInsn.vC;
+        size_t i = ifield_pos;
+        while (i != 0u && field_idxs[i - 1] != field_idx) {
+          --i;
         }
-        DCHECK_LE(ifield_pos, sfield_pos);
+        if (i != 0u) {
+          mir->meta.ifield_lowering_info = i - 1;
+          DCHECK_EQ(field_types[i - 1], IGetOrIPutMemAccessType(mir->dalvikInsn.opcode));
+        } else {
+          mir->meta.ifield_lowering_info = ifield_pos;
+          field_idxs[ifield_pos] = field_idx;
+          field_types[ifield_pos] = IGetOrIPutMemAccessType(mir->dalvikInsn.opcode);
+          ++ifield_pos;
+        }
+      } else if (IsInstructionSGetOrSPut(mir->dalvikInsn.opcode)) {
+        uint16_t field_idx = mir->dalvikInsn.vB;
+        size_t i = sfield_pos;
+        while (i != max_refs && field_idxs[i] != field_idx) {
+          ++i;
+        }
+        if (i != max_refs) {
+          mir->meta.sfield_lowering_info = max_refs - i - 1u;
+          DCHECK_EQ(field_types[i], SGetOrSPutMemAccessType(mir->dalvikInsn.opcode));
+        } else {
+          mir->meta.sfield_lowering_info = max_refs - sfield_pos;
+          --sfield_pos;
+          field_idxs[sfield_pos] = field_idx;
+          field_types[sfield_pos] = SGetOrSPutMemAccessType(mir->dalvikInsn.opcode);
+        }
       }
+      DCHECK_LE(ifield_pos, sfield_pos);
     }
   }
 
@@ -1254,7 +1260,7 @@
     DCHECK_EQ(ifield_lowering_infos_.size(), 0u);
     ifield_lowering_infos_.reserve(ifield_pos);
     for (size_t pos = 0u; pos != ifield_pos; ++pos) {
-      ifield_lowering_infos_.push_back(MirIFieldLoweringInfo(field_idxs[pos]));
+      ifield_lowering_infos_.push_back(MirIFieldLoweringInfo(field_idxs[pos], field_types[pos]));
     }
     MirIFieldLoweringInfo::Resolve(cu_->compiler_driver, GetCurrentDexCompilationUnit(),
                                    ifield_lowering_infos_.data(), ifield_pos);
@@ -1266,7 +1272,7 @@
     sfield_lowering_infos_.reserve(max_refs - sfield_pos);
     for (size_t pos = max_refs; pos != sfield_pos;) {
       --pos;
-      sfield_lowering_infos_.push_back(MirSFieldLoweringInfo(field_idxs[pos]));
+      sfield_lowering_infos_.push_back(MirSFieldLoweringInfo(field_idxs[pos], field_types[pos]));
     }
     MirSFieldLoweringInfo::Resolve(cu_->compiler_driver, GetCurrentDexCompilationUnit(),
                                    sfield_lowering_infos_.data(), max_refs - sfield_pos);
@@ -1329,19 +1335,10 @@
       continue;
     }
     for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-      if (mir->dalvikInsn.opcode >= Instruction::INVOKE_VIRTUAL &&
-          mir->dalvikInsn.opcode <= Instruction::INVOKE_INTERFACE_RANGE &&
-          mir->dalvikInsn.opcode != Instruction::RETURN_VOID_BARRIER) {
+      if (IsInstructionInvoke(mir->dalvikInsn.opcode)) {
         // Decode target method index and invoke type.
-        uint16_t target_method_idx;
-        uint16_t invoke_type_idx;
-        if (mir->dalvikInsn.opcode <= Instruction::INVOKE_INTERFACE) {
-          target_method_idx = mir->dalvikInsn.vB;
-          invoke_type_idx = mir->dalvikInsn.opcode - Instruction::INVOKE_VIRTUAL;
-        } else {
-          target_method_idx = mir->dalvikInsn.vB;
-          invoke_type_idx = mir->dalvikInsn.opcode - Instruction::INVOKE_VIRTUAL_RANGE;
-        }
+        uint16_t target_method_idx = mir->dalvikInsn.vB;
+        DexInvokeType invoke_type_idx = InvokeInstructionType(mir->dalvikInsn.opcode);
 
         // Find devirtualization target.
         // TODO: The devirt map is ordered by the dex pc here. Is there a way to get INVOKEs
diff --git a/compiler/dex/mir_field_info.cc b/compiler/dex/mir_field_info.cc
index 1db3b5b..53afcad 100644
--- a/compiler/dex/mir_field_info.cc
+++ b/compiler/dex/mir_field_info.cc
@@ -35,7 +35,7 @@
     DCHECK(field_infos != nullptr);
     DCHECK_NE(count, 0u);
     for (auto it = field_infos, end = field_infos + count; it != end; ++it) {
-      MirIFieldLoweringInfo unresolved(it->field_idx_);
+      MirIFieldLoweringInfo unresolved(it->field_idx_, it->MemAccessType());
       DCHECK_EQ(memcmp(&unresolved, &*it, sizeof(*it)), 0);
     }
   }
@@ -66,6 +66,7 @@
     std::pair<bool, bool> fast_path = compiler_driver->IsFastInstanceField(
         dex_cache.Get(), referrer_class.Get(), resolved_field, field_idx);
     it->flags_ = 0u |  // Without kFlagIsStatic.
+        (it->flags_ & (kMemAccessTypeMask << kBitMemAccessTypeBegin)) |
         (is_volatile ? kFlagIsVolatile : 0u) |
         (fast_path.first ? kFlagFastGet : 0u) |
         (fast_path.second ? kFlagFastPut : 0u);
@@ -79,7 +80,7 @@
     DCHECK(field_infos != nullptr);
     DCHECK_NE(count, 0u);
     for (auto it = field_infos, end = field_infos + count; it != end; ++it) {
-      MirSFieldLoweringInfo unresolved(it->field_idx_);
+      MirSFieldLoweringInfo unresolved(it->field_idx_, it->MemAccessType());
       // In 64-bit builds, there's padding after storage_index_, don't include it in memcmp.
       size_t size = OFFSETOF_MEMBER(MirSFieldLoweringInfo, storage_index_) +
           sizeof(it->storage_index_);
@@ -114,6 +115,7 @@
     std::pair<bool, bool> fast_path = compiler_driver->IsFastStaticField(
         dex_cache.Get(), referrer_class, resolved_field, field_idx, &it->storage_index_);
     uint16_t flags = kFlagIsStatic |
+        (it->flags_ & (kMemAccessTypeMask << kBitMemAccessTypeBegin)) |
         (is_volatile ? kFlagIsVolatile : 0u) |
         (fast_path.first ? kFlagFastGet : 0u) |
         (fast_path.second ? kFlagFastPut : 0u);
diff --git a/compiler/dex/mir_field_info.h b/compiler/dex/mir_field_info.h
index e97f7a0..ff427f8 100644
--- a/compiler/dex/mir_field_info.h
+++ b/compiler/dex/mir_field_info.h
@@ -20,6 +20,7 @@
 #include "base/macros.h"
 #include "dex_file.h"
 #include "offsets.h"
+#include "utils/dex_instruction_utils.h"
 
 namespace art {
 
@@ -63,18 +64,27 @@
     return (flags_ & kFlagIsVolatile) != 0u;
   }
 
+  DexMemAccessType MemAccessType() const {
+    return static_cast<DexMemAccessType>((flags_ >> kBitMemAccessTypeBegin) & kMemAccessTypeMask);
+  }
+
  protected:
   enum {
     kBitIsStatic = 0,
     kBitIsVolatile,
-    kFieldInfoBitEnd
+    kBitMemAccessTypeBegin,
+    kBitMemAccessTypeEnd = kBitMemAccessTypeBegin + 3,  // 3 bits for raw type.
+    kFieldInfoBitEnd = kBitMemAccessTypeEnd
   };
   static constexpr uint16_t kFlagIsVolatile = 1u << kBitIsVolatile;
   static constexpr uint16_t kFlagIsStatic = 1u << kBitIsStatic;
+  static constexpr uint16_t kMemAccessTypeMask = 7u;
+  static_assert((1u << (kBitMemAccessTypeEnd - kBitMemAccessTypeBegin)) - 1u == kMemAccessTypeMask,
+                "Invalid raw type mask");
 
-  MirFieldInfo(uint16_t field_idx, uint16_t flags)
+  MirFieldInfo(uint16_t field_idx, uint16_t flags, DexMemAccessType type)
       : field_idx_(field_idx),
-        flags_(flags),
+        flags_(flags | static_cast<uint16_t>(type) << kBitMemAccessTypeBegin),
         declaring_field_idx_(0u),
         declaring_class_idx_(0u),
         declaring_dex_file_(nullptr) {
@@ -107,8 +117,8 @@
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
   // Construct an unresolved instance field lowering info.
-  explicit MirIFieldLoweringInfo(uint16_t field_idx)
-      : MirFieldInfo(field_idx, kFlagIsVolatile),  // Without kFlagIsStatic.
+  explicit MirIFieldLoweringInfo(uint16_t field_idx, DexMemAccessType type)
+      : MirFieldInfo(field_idx, kFlagIsVolatile, type),  // Without kFlagIsStatic.
         field_offset_(0u) {
   }
 
@@ -155,8 +165,8 @@
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
   // Construct an unresolved static field lowering info.
-  explicit MirSFieldLoweringInfo(uint16_t field_idx)
-      : MirFieldInfo(field_idx, kFlagIsVolatile | kFlagIsStatic),
+  explicit MirSFieldLoweringInfo(uint16_t field_idx, DexMemAccessType type)
+      : MirFieldInfo(field_idx, kFlagIsVolatile | kFlagIsStatic, type),
         field_offset_(0u),
         storage_index_(DexFile::kDexNoIndex) {
   }
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index f69d63c..023abca 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -258,8 +258,6 @@
   DCHECK(insn != orig_block->first_mir_insn);
   DCHECK(insn == bottom_block->first_mir_insn);
   DCHECK_EQ(insn->offset, bottom_block->start_offset);
-  DCHECK(static_cast<int>(insn->dalvikInsn.opcode) == kMirOpCheck ||
-         !MIR::DecodedInstruction::IsPseudoMirOp(insn->dalvikInsn.opcode));
   DCHECK_EQ(dex_pc_to_block_map_[insn->offset], orig_block->id);
   // Scan the "bottom" instructions, remapping them to the
   // newly created "bottom" block.
@@ -767,8 +765,9 @@
       } else {
         DCHECK(cur_block->fall_through == NullBasicBlockId);
         DCHECK(cur_block->taken == NullBasicBlockId);
-        // Unreachable instruction, mark for no continuation.
+        // Unreachable instruction, mark for no continuation and end basic block.
         flags &= ~Instruction::kContinue;
+        FindBlock(current_offset_ + width, /* create */ true, /* immed_pred_block_p */ nullptr);
       }
     } else {
       cur_block->AppendMIR(insn);
@@ -915,7 +914,7 @@
                 bb->first_mir_insn ? " | " : " ");
         for (mir = bb->first_mir_insn; mir; mir = mir->next) {
             int opcode = mir->dalvikInsn.opcode;
-            fprintf(file, "    {%04x %s %s %s %s %s %s %s %s\\l}%s\\\n", mir->offset,
+            fprintf(file, "    {%04x %s %s %s %s %s %s %s %s %s\\l}%s\\\n", mir->offset,
                       mir->ssa_rep ? GetDalvikDisassembly(mir) :
                       !MIR::DecodedInstruction::IsPseudoMirOp(opcode) ?
                         Instruction::Name(mir->dalvikInsn.opcode) :
@@ -927,6 +926,7 @@
                       (mir->optimization_flags & MIR_CALLEE) != 0 ? " inlined" : " ",
                       (mir->optimization_flags & MIR_CLASS_IS_INITIALIZED) != 0 ? " cl_inited" : " ",
                       (mir->optimization_flags & MIR_CLASS_IS_IN_DEX_CACHE) != 0 ? " cl_in_cache" : " ",
+                      (mir->optimization_flags & MIR_IGNORE_DIV_ZERO_CHECK) != 0 ? " no_div_check" : " ",
                       mir->next ? " | " : " ");
         }
         fprintf(file, "  }\"];\n\n");
@@ -1169,6 +1169,14 @@
   return true;
 }
 
+MIR* BasicBlock::GetFirstNonPhiInsn() {
+  MIR* mir = first_mir_insn;
+  while (mir != nullptr && static_cast<int>(mir->dalvikInsn.opcode) == kMirOpPhi) {
+    mir = mir->next;
+  }
+  return mir;
+}
+
 MIR* BasicBlock::GetNextUnconditionalMir(MIRGraph* mir_graph, MIR* current) {
   MIR* next_mir = nullptr;
 
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index d77ad6f..1a18841 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -145,9 +145,8 @@
 #define INVALID_OFFSET (0xDEADF00FU)
 
 #define MIR_IGNORE_NULL_CHECK           (1 << kMIRIgnoreNullCheck)
-#define MIR_NULL_CHECK_ONLY             (1 << kMIRNullCheckOnly)
 #define MIR_IGNORE_RANGE_CHECK          (1 << kMIRIgnoreRangeCheck)
-#define MIR_RANGE_CHECK_ONLY            (1 << kMIRRangeCheckOnly)
+#define MIR_STORE_NON_NULL_VALUE        (1 << kMIRStoreNonNullValue)
 #define MIR_CLASS_IS_INITIALIZED        (1 << kMIRClassIsInitialized)
 #define MIR_CLASS_IS_IN_DEX_CACHE       (1 << kMIRClassIsInDexCache)
 #define MIR_IGNORE_DIV_ZERO_CHECK       (1 << kMirIgnoreDivZeroCheck)
@@ -444,6 +443,11 @@
   void UpdatePredecessor(BasicBlockId old_pred, BasicBlockId new_pred);
 
   /**
+   * @brief Return first non-Phi insn.
+   */
+  MIR* GetFirstNonPhiInsn();
+
+  /**
    * @brief Used to obtain the next MIR that follows unconditionally.
    * @details The implementation does not guarantee that a MIR does not
    * follow even if this method returns nullptr.
@@ -661,13 +665,29 @@
   void DoCacheFieldLoweringInfo();
 
   const MirIFieldLoweringInfo& GetIFieldLoweringInfo(MIR* mir) const {
-    DCHECK_LT(mir->meta.ifield_lowering_info, ifield_lowering_infos_.size());
-    return ifield_lowering_infos_[mir->meta.ifield_lowering_info];
+    return GetIFieldLoweringInfo(mir->meta.ifield_lowering_info);
+  }
+
+  const MirIFieldLoweringInfo& GetIFieldLoweringInfo(uint32_t lowering_info) const {
+    DCHECK_LT(lowering_info, ifield_lowering_infos_.size());
+    return ifield_lowering_infos_[lowering_info];
+  }
+
+  size_t GetIFieldLoweringInfoCount() const {
+    return ifield_lowering_infos_.size();
   }
 
   const MirSFieldLoweringInfo& GetSFieldLoweringInfo(MIR* mir) const {
-    DCHECK_LT(mir->meta.sfield_lowering_info, sfield_lowering_infos_.size());
-    return sfield_lowering_infos_[mir->meta.sfield_lowering_info];
+    return GetSFieldLoweringInfo(mir->meta.sfield_lowering_info);
+  }
+
+  const MirSFieldLoweringInfo& GetSFieldLoweringInfo(uint32_t lowering_info) const {
+    DCHECK_LT(lowering_info, sfield_lowering_infos_.size());
+    return sfield_lowering_infos_[lowering_info];
+  }
+
+  size_t GetSFieldLoweringInfoCount() const {
+    return sfield_lowering_infos_.size();
   }
 
   void DoCacheMethodLoweringInfo();
@@ -1035,6 +1055,21 @@
   bool ApplyGlobalValueNumberingGate();
   bool ApplyGlobalValueNumbering(BasicBlock* bb);
   void ApplyGlobalValueNumberingEnd();
+
+  uint16_t GetGvnIFieldId(MIR* mir) const {
+    DCHECK(IsInstructionIGetOrIPut(mir->dalvikInsn.opcode));
+    DCHECK_LT(mir->meta.ifield_lowering_info, ifield_lowering_infos_.size());
+    DCHECK(temp_.gvn.ifield_ids_ != nullptr);
+    return temp_.gvn.ifield_ids_[mir->meta.ifield_lowering_info];
+  }
+
+  uint16_t GetGvnSFieldId(MIR* mir) const {
+    DCHECK(IsInstructionSGetOrSPut(mir->dalvikInsn.opcode));
+    DCHECK_LT(mir->meta.sfield_lowering_info, sfield_lowering_infos_.size());
+    DCHECK(temp_.gvn.sfield_ids_ != nullptr);
+    return temp_.gvn.sfield_ids_[mir->meta.sfield_lowering_info];
+  }
+
   /*
    * Type inference handling helpers.  Because Dalvik's bytecode is not fully typed,
    * we have to do some work to figure out the sreg type.  For some operations it is
@@ -1300,6 +1335,8 @@
     // Global value numbering.
     struct {
       GlobalValueNumbering* gvn;
+      uint16_t* ifield_ids_;  // Part of GVN/LVN but cached here for LVN to avoid recalculation.
+      uint16_t* sfield_ids_;  // Ditto.
     } gvn;
   } temp_;
   static const int kInvalidEntry = -1;
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index d025d08..1f630f7 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -19,6 +19,7 @@
 #include "dataflow_iterator-inl.h"
 #include "global_value_numbering.h"
 #include "local_value_numbering.h"
+#include "mir_field_info.h"
 #include "quick/dex_file_method_inliner.h"
 #include "quick/dex_file_to_method_inliner_map.h"
 #include "stack.h"
@@ -217,10 +218,6 @@
 static_assert(arraysize(kIfCcZConditionCodes) == Instruction::IF_LEZ - Instruction::IF_EQZ + 1,
               "if_ccz_ccodes_size1");
 
-static constexpr bool IsInstructionIfCcZ(Instruction::Code opcode) {
-  return Instruction::IF_EQZ <= opcode && opcode <= Instruction::IF_LEZ;
-}
-
 static constexpr ConditionCode ConditionCodeForIfCcZ(Instruction::Code opcode) {
   return kIfCcZConditionCodes[opcode - Instruction::IF_EQZ];
 }
@@ -480,29 +477,25 @@
             }
           }
           break;
-        case Instruction::GOTO:
-        case Instruction::GOTO_16:
-        case Instruction::GOTO_32:
-        case Instruction::IF_EQ:
-        case Instruction::IF_NE:
-        case Instruction::IF_LT:
-        case Instruction::IF_GE:
-        case Instruction::IF_GT:
-        case Instruction::IF_LE:
-        case Instruction::IF_EQZ:
-        case Instruction::IF_NEZ:
-        case Instruction::IF_LTZ:
-        case Instruction::IF_GEZ:
-        case Instruction::IF_GTZ:
-        case Instruction::IF_LEZ:
-          // If we've got a backwards branch to return, no need to suspend check.
-          if ((IsBackedge(bb, bb->taken) && GetBasicBlock(bb->taken)->dominates_return) ||
-              (IsBackedge(bb, bb->fall_through) &&
-                          GetBasicBlock(bb->fall_through)->dominates_return)) {
-            mir->optimization_flags |= MIR_IGNORE_SUSPEND_CHECK;
-            if (cu_->verbose) {
-              LOG(INFO) << "Suppressed suspend check on branch to return at 0x" << std::hex
-                        << mir->offset;
+        case Instruction::RETURN_VOID:
+        case Instruction::RETURN:
+        case Instruction::RETURN_WIDE:
+        case Instruction::RETURN_OBJECT:
+          if (bb->GetFirstNonPhiInsn() == mir) {
+            // This is a simple return BB. Eliminate suspend checks on predecessor back-edges.
+            for (BasicBlockId pred_id : bb->predecessors) {
+              BasicBlock* pred_bb = GetBasicBlock(pred_id);
+              DCHECK(pred_bb != nullptr);
+              if (IsBackedge(pred_bb, bb->id) && pred_bb->last_mir_insn != nullptr &&
+                  (IsInstructionIfCc(pred_bb->last_mir_insn->dalvikInsn.opcode) ||
+                   IsInstructionIfCcZ(pred_bb->last_mir_insn->dalvikInsn.opcode) ||
+                   IsInstructionGoto(pred_bb->last_mir_insn->dalvikInsn.opcode))) {
+                pred_bb->last_mir_insn->optimization_flags |= MIR_IGNORE_SUSPEND_CHECK;
+                if (cu_->verbose) {
+                  LOG(INFO) << "Suppressed suspend check on branch to return at 0x" << std::hex
+                            << pred_bb->last_mir_insn->offset;
+                }
+              }
             }
           }
           break;
@@ -801,17 +794,18 @@
     BasicBlock* bb_next = GetBasicBlock(bb->fall_through);
     DCHECK(!bb_next->catch_entry);
     DCHECK_EQ(bb_next->predecessors.size(), 1u);
-    // Overwrite the kMirOpCheck insn with the paired opcode.
+
+    // Now move instructions from bb_next to bb. Start off with doing a sanity check
+    // that kMirOpCheck's throw instruction is first one in the bb_next.
     DCHECK_EQ(bb_next->first_mir_insn, throw_insn);
-    *bb->last_mir_insn = *throw_insn;
-    // And grab the rest of the instructions from bb_next.
-    bb->last_mir_insn = bb_next->last_mir_insn;
-    throw_insn->next = nullptr;
-    bb_next->last_mir_insn = throw_insn;
-    // Mark acquired instructions as belonging to bb.
-    for (MIR* insn = mir; insn != nullptr; insn = insn->next) {
-      insn->bb = bb->id;
-    }
+    // Now move all instructions (throw instruction to last one) from bb_next to bb.
+    MIR* last_to_move = bb_next->last_mir_insn;
+    bb_next->RemoveMIRList(throw_insn, last_to_move);
+    bb->InsertMIRListAfter(bb->last_mir_insn, throw_insn, last_to_move);
+    // The kMirOpCheck instruction is not needed anymore.
+    mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop);
+    bb->RemoveMIR(mir);
+
     // Before we overwrite successors, remove their predecessor links to bb.
     bb_next->ErasePredecessor(bb->id);
     if (bb->taken != NullBasicBlockId) {
@@ -891,7 +885,7 @@
 
   DCHECK(temp_scoped_alloc_.get() == nullptr);
   temp_scoped_alloc_.reset(ScopedArenaAllocator::Create(&cu_->arena_stack));
-  temp_.nce.num_vregs = GetNumOfCodeVRs();
+  temp_.nce.num_vregs = GetNumOfCodeAndTempVRs();
   temp_.nce.work_vregs_to_check = new (temp_scoped_alloc_.get()) ArenaBitVector(
       temp_scoped_alloc_.get(), temp_.nce.num_vregs, false, kBitMapNullCheck);
   temp_.nce.ending_vregs_to_check_matrix = static_cast<ArenaBitVector**>(
@@ -979,7 +973,10 @@
   for (MIR* mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
     uint64_t df_attributes = GetDataFlowAttributes(mir);
 
-    DCHECK_EQ(df_attributes & DF_NULL_TRANSFER_N, 0u);  // No Phis yet.
+    if ((df_attributes & DF_NULL_TRANSFER_N) != 0u) {
+      // The algorithm was written in a phi agnostic way.
+      continue;
+    }
 
     // Might need a null check?
     if (df_attributes & DF_HAS_NULL_CHKS) {
@@ -1159,8 +1156,7 @@
     for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
       if (bb->block_type == kDalvikByteCode) {
         for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-          if (mir->dalvikInsn.opcode >= Instruction::SGET &&
-              mir->dalvikInsn.opcode <= Instruction::SPUT_SHORT) {
+          if (IsInstructionSGetOrSPut(mir->dalvikInsn.opcode)) {
             const MirSFieldLoweringInfo& field_info = GetSFieldLoweringInfo(mir);
             if (!field_info.IsReferrersClass()) {
               DCHECK_LT(class_to_index_map.size(), 0xffffu);
@@ -1176,8 +1172,7 @@
               // Using offset/2 for index into temp_.cice.indexes.
               temp_.cice.indexes[mir->offset / 2u] = index;
             }
-          } else if (mir->dalvikInsn.opcode == Instruction::INVOKE_STATIC ||
-              mir->dalvikInsn.opcode == Instruction::INVOKE_STATIC_RANGE) {
+          } else if (IsInstructionInvokeStatic(mir->dalvikInsn.opcode)) {
             const MirMethodLoweringInfo& method_info = GetMethodLoweringInfo(mir);
             DCHECK(method_info.IsStatic());
             if (method_info.FastPath() && !method_info.IsReferrersClass()) {
@@ -1261,12 +1256,10 @@
       // NOTE: index != 0xffff does not guarantee that this is an SGET/SPUT/INVOKE_STATIC.
       // Dex instructions with width 1 can have the same offset/2.
 
-      if (mir->dalvikInsn.opcode >= Instruction::SGET &&
-          mir->dalvikInsn.opcode <= Instruction::SPUT_SHORT) {
+      if (IsInstructionSGetOrSPut(mir->dalvikInsn.opcode)) {
         check_initialization = true;
         check_dex_cache = true;
-      } else if (mir->dalvikInsn.opcode == Instruction::INVOKE_STATIC ||
-               mir->dalvikInsn.opcode == Instruction::INVOKE_STATIC_RANGE) {
+      } else if (IsInstructionInvokeStatic(mir->dalvikInsn.opcode)) {
         check_initialization = true;
         // NOTE: INVOKE_STATIC doesn't guarantee that the type will be in the dex cache.
       }
@@ -1333,6 +1326,10 @@
 
   DCHECK(temp_scoped_alloc_ == nullptr);
   temp_scoped_alloc_.reset(ScopedArenaAllocator::Create(&cu_->arena_stack));
+  temp_.gvn.ifield_ids_ =
+      GlobalValueNumbering::PrepareGvnFieldIds(temp_scoped_alloc_.get(), ifield_lowering_infos_);
+  temp_.gvn.sfield_ids_ =
+      GlobalValueNumbering::PrepareGvnFieldIds(temp_scoped_alloc_.get(), sfield_lowering_infos_);
   DCHECK(temp_.gvn.gvn == nullptr);
   temp_.gvn.gvn = new (temp_scoped_alloc_.get()) GlobalValueNumbering(
       cu_, temp_scoped_alloc_.get(), GlobalValueNumbering::kModeGvn);
@@ -1378,6 +1375,8 @@
 
   delete temp_.gvn.gvn;
   temp_.gvn.gvn = nullptr;
+  temp_.gvn.ifield_ids_ = nullptr;
+  temp_.gvn.sfield_ids_ = nullptr;
   DCHECK(temp_scoped_alloc_ != nullptr);
   temp_scoped_alloc_.reset();
 }
@@ -1396,7 +1395,8 @@
       cu_, cu_->class_loader, cu_->class_linker, *target.dex_file,
       nullptr /* code_item not used */, 0u /* class_def_idx not used */, target.dex_method_index,
       0u /* access_flags not used */, nullptr /* verified_method not used */);
-  MirIFieldLoweringInfo inlined_field_info(field_idx);
+  DexMemAccessType type = IGetOrIPutMemAccessType(iget_or_iput->dalvikInsn.opcode);
+  MirIFieldLoweringInfo inlined_field_info(field_idx, type);
   MirIFieldLoweringInfo::Resolve(cu_->compiler_driver, &inlined_unit, &inlined_field_info, 1u);
   DCHECK(inlined_field_info.IsResolved());
 
@@ -1544,6 +1544,14 @@
 }
 
 void MIRGraph::BasicBlockOptimization() {
+  if ((cu_->disable_opt & (1 << kLocalValueNumbering)) == 0) {
+    temp_scoped_alloc_.reset(ScopedArenaAllocator::Create(&cu_->arena_stack));
+    temp_.gvn.ifield_ids_ =
+        GlobalValueNumbering::PrepareGvnFieldIds(temp_scoped_alloc_.get(), ifield_lowering_infos_);
+    temp_.gvn.sfield_ids_ =
+        GlobalValueNumbering::PrepareGvnFieldIds(temp_scoped_alloc_.get(), sfield_lowering_infos_);
+  }
+
   if ((cu_->disable_opt & (1 << kSuppressExceptionEdges)) != 0) {
     ClearAllVisitedFlags();
     PreOrderDfsIterator iter2(this);
@@ -1560,6 +1568,11 @@
       BasicBlockOpt(bb);
     }
   }
+
+  // Clean up after LVN.
+  temp_.gvn.ifield_ids_ = nullptr;
+  temp_.gvn.sfield_ids_ = nullptr;
+  temp_scoped_alloc_.reset();
 }
 
 }  // namespace art
diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc
index 8874faf..c794cc6 100644
--- a/compiler/dex/mir_optimization_test.cc
+++ b/compiler/dex/mir_optimization_test.cc
@@ -19,6 +19,7 @@
 #include "compiler_internals.h"
 #include "dataflow_iterator.h"
 #include "dataflow_iterator-inl.h"
+#include "dex/mir_field_info.h"
 #include "gtest/gtest.h"
 
 namespace art {
@@ -236,15 +237,17 @@
       ASSERT_LT(def->bbid, cu_.mir_graph->block_list_.size());
       BasicBlock* bb = cu_.mir_graph->block_list_[def->bbid];
       bb->AppendMIR(mir);
-      if (def->opcode >= Instruction::SGET && def->opcode <= Instruction::SPUT_SHORT) {
-        ASSERT_LT(def->field_or_method_info, cu_.mir_graph->sfield_lowering_infos_.size());
-        mir->meta.sfield_lowering_info = def->field_or_method_info;
-      } else if (def->opcode >= Instruction::IGET && def->opcode <= Instruction::IPUT_SHORT) {
+      if (IsInstructionIGetOrIPut(def->opcode)) {
         ASSERT_LT(def->field_or_method_info, cu_.mir_graph->ifield_lowering_infos_.size());
         mir->meta.ifield_lowering_info = def->field_or_method_info;
-      } else if (def->opcode >= Instruction::INVOKE_VIRTUAL &&
-          def->opcode < Instruction::INVOKE_INTERFACE_RANGE &&
-          def->opcode != Instruction::RETURN_VOID_BARRIER) {
+        ASSERT_EQ(cu_.mir_graph->ifield_lowering_infos_[def->field_or_method_info].MemAccessType(),
+                  IGetOrIPutMemAccessType(def->opcode));
+      } else if (IsInstructionSGetOrSPut(def->opcode)) {
+        ASSERT_LT(def->field_or_method_info, cu_.mir_graph->sfield_lowering_infos_.size());
+        mir->meta.sfield_lowering_info = def->field_or_method_info;
+        ASSERT_EQ(cu_.mir_graph->sfield_lowering_infos_[def->field_or_method_info].MemAccessType(),
+                  SGetOrSPutMemAccessType(def->opcode));
+      } else if (IsInstructionInvoke(def->opcode)) {
         ASSERT_LT(def->field_or_method_info, cu_.mir_graph->method_lowering_infos_.size());
         mir->meta.method_lowering_info = def->field_or_method_info;
       }
@@ -294,6 +297,7 @@
     uintptr_t declaring_dex_file;
     uint16_t declaring_class_idx;
     uint16_t declaring_field_idx;
+    DexMemAccessType type;
   };
 
   void DoPrepareSFields(const SFieldDef* defs, size_t count) {
@@ -301,12 +305,12 @@
     cu_.mir_graph->sfield_lowering_infos_.reserve(count);
     for (size_t i = 0u; i != count; ++i) {
       const SFieldDef* def = &defs[i];
-      MirSFieldLoweringInfo field_info(def->field_idx);
+      MirSFieldLoweringInfo field_info(def->field_idx, def->type);
       if (def->declaring_dex_file != 0u) {
         field_info.declaring_dex_file_ = reinterpret_cast<const DexFile*>(def->declaring_dex_file);
         field_info.declaring_class_idx_ = def->declaring_class_idx;
         field_info.declaring_field_idx_ = def->declaring_field_idx;
-        field_info.flags_ = MirSFieldLoweringInfo::kFlagIsStatic;
+        // We don't care about the volatile flag in these tests.
       }
       ASSERT_EQ(def->declaring_dex_file != 0u, field_info.IsResolved());
       ASSERT_FALSE(field_info.IsClassInitialized());
@@ -343,6 +347,7 @@
     uintptr_t declaring_dex_file;
     uint16_t declaring_class_idx;
     uint16_t declaring_field_idx;
+    DexMemAccessType type;
   };
 
   void DoPrepareIFields(const IFieldDef* defs, size_t count) {
@@ -350,11 +355,12 @@
     cu_.mir_graph->ifield_lowering_infos_.reserve(count);
     for (size_t i = 0u; i != count; ++i) {
       const IFieldDef* def = &defs[i];
-      MirIFieldLoweringInfo field_info(def->field_idx);
+      MirIFieldLoweringInfo field_info(def->field_idx, def->type);
       if (def->declaring_dex_file != 0u) {
         field_info.declaring_dex_file_ = reinterpret_cast<const DexFile*>(def->declaring_dex_file);
         field_info.declaring_class_idx_ = def->declaring_class_idx;
         field_info.declaring_field_idx_ = def->declaring_field_idx;
+        // We don't care about the volatile flag in these tests.
       }
       ASSERT_EQ(def->declaring_dex_file != 0u, field_info.IsResolved());
       cu_.mir_graph->ifield_lowering_infos_.push_back(field_info);
@@ -393,12 +399,12 @@
 
 TEST_F(ClassInitCheckEliminationTest, SingleBlock) {
   static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, 0u },
-      { 1u, 1u, 1u, 1u },
-      { 2u, 1u, 2u, 2u },
-      { 3u, 1u, 3u, 3u },  // Same declaring class as sfield[4].
-      { 4u, 1u, 3u, 4u },  // Same declaring class as sfield[3].
-      { 5u, 0u, 0u, 0u },  // Unresolved.
+      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
+      { 1u, 1u, 1u, 1u, kDexMemAccessWord },
+      { 2u, 1u, 2u, 2u, kDexMemAccessWord },
+      { 3u, 1u, 3u, 3u, kDexMemAccessWord },  // Same declaring class as sfield[4].
+      { 4u, 1u, 3u, 4u, kDexMemAccessWord },  // Same declaring class as sfield[3].
+      { 5u, 0u, 0u, 0u, kDexMemAccessWord },  // Unresolved.
   };
   static const MIRDef mirs[] = {
       DEF_SGET_SPUT(3u, Instruction::SPUT, 0u, 5u),  // Unresolved.
@@ -432,9 +438,9 @@
 
 TEST_F(ClassInitCheckEliminationTest, SingleBlockWithInvokes) {
   static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, 0u },
-      { 1u, 1u, 1u, 1u },
-      { 2u, 1u, 2u, 2u },
+      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
+      { 1u, 1u, 1u, 1u, kDexMemAccessWord },
+      { 2u, 1u, 2u, 2u, kDexMemAccessWord },
   };
   static const MethodDef methods[] = {
       { 0u, 1u, 0u, 0u, kStatic, kStatic, false, false },
@@ -473,17 +479,17 @@
 
 TEST_F(ClassInitCheckEliminationTest, Diamond) {
   static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, 0u },
-      { 1u, 1u, 1u, 1u },
-      { 2u, 1u, 2u, 2u },
-      { 3u, 1u, 3u, 3u },
-      { 4u, 1u, 4u, 4u },
-      { 5u, 1u, 5u, 5u },
-      { 6u, 1u, 6u, 6u },
-      { 7u, 1u, 7u, 7u },
-      { 8u, 1u, 8u, 8u },  // Same declaring class as sfield[9].
-      { 9u, 1u, 8u, 9u },  // Same declaring class as sfield[8].
-      { 10u, 0u, 0u, 0u },  // Unresolved.
+      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
+      { 1u, 1u, 1u, 1u, kDexMemAccessWord },
+      { 2u, 1u, 2u, 2u, kDexMemAccessWord },
+      { 3u, 1u, 3u, 3u, kDexMemAccessWord },
+      { 4u, 1u, 4u, 4u, kDexMemAccessWord },
+      { 5u, 1u, 5u, 5u, kDexMemAccessWord },
+      { 6u, 1u, 6u, 6u, kDexMemAccessWord },
+      { 7u, 1u, 7u, 7u, kDexMemAccessWord },
+      { 8u, 1u, 8u, 8u, kDexMemAccessWord },   // Same declaring class as sfield[9].
+      { 9u, 1u, 8u, 9u, kDexMemAccessWord },   // Same declaring class as sfield[8].
+      { 10u, 0u, 0u, 0u, kDexMemAccessWord },  // Unresolved.
   };
   static const MIRDef mirs[] = {
       // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
@@ -539,11 +545,11 @@
 
 TEST_F(ClassInitCheckEliminationTest, DiamondWithInvokes) {
   static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, 0u },
-      { 1u, 1u, 1u, 1u },
-      { 2u, 1u, 2u, 2u },
-      { 3u, 1u, 3u, 3u },
-      { 4u, 1u, 4u, 4u },
+      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
+      { 1u, 1u, 1u, 1u, kDexMemAccessWord },
+      { 2u, 1u, 2u, 2u, kDexMemAccessWord },
+      { 3u, 1u, 3u, 3u, kDexMemAccessWord },
+      { 4u, 1u, 4u, 4u, kDexMemAccessWord },
   };
   static const MethodDef methods[] = {
       { 0u, 1u, 0u, 0u, kStatic, kStatic, false, false },
@@ -600,9 +606,9 @@
 
 TEST_F(ClassInitCheckEliminationTest, Loop) {
   static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, 0u },
-      { 1u, 1u, 1u, 1u },
-      { 2u, 1u, 2u, 2u },
+      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
+      { 1u, 1u, 1u, 1u, kDexMemAccessWord },
+      { 2u, 1u, 2u, 2u, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       DEF_SGET_SPUT(3u, Instruction::SGET, 0u, 0u),
@@ -631,7 +637,7 @@
 
 TEST_F(ClassInitCheckEliminationTest, LoopWithInvokes) {
   static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, 0u },
+      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
   };
   static const MethodDef methods[] = {
       { 0u, 1u, 0u, 0u, kStatic, kStatic, false, false },
@@ -671,10 +677,10 @@
 
 TEST_F(ClassInitCheckEliminationTest, Catch) {
   static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, 0u },
-      { 1u, 1u, 1u, 1u },
-      { 2u, 1u, 2u, 2u },
-      { 3u, 1u, 3u, 3u },
+      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
+      { 1u, 1u, 1u, 1u, kDexMemAccessWord },
+      { 2u, 1u, 2u, 2u, kDexMemAccessWord },
+      { 3u, 1u, 3u, 3u, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       DEF_SGET_SPUT(3u, Instruction::SGET, 0u, 0u),  // Before the exception edge.
@@ -707,9 +713,9 @@
 
 TEST_F(NullCheckEliminationTest, SingleBlock) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, 0u },
-      { 1u, 1u, 0u, 1u },
-      { 2u, 1u, 0u, 2u },  // Object.
+      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
+      { 1u, 1u, 0u, 1u, kDexMemAccessWord },
+      { 2u, 1u, 0u, 2u, kDexMemAccessObject },
   };
   static const MIRDef mirs[] = {
       DEF_IGET_IPUT(3u, Instruction::IGET_OBJECT, 0u, 100u, 2u),
@@ -768,9 +774,9 @@
 
 TEST_F(NullCheckEliminationTest, Diamond) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, 0u },
-      { 1u, 1u, 0u, 1u },
-      { 2u, 1u, 0u, 2u },  // int[].
+      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
+      { 1u, 1u, 0u, 1u, kDexMemAccessWord },
+      { 2u, 1u, 0u, 2u, kDexMemAccessObject },  // int[].
   };
   static const MIRDef mirs[] = {
       // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
@@ -816,8 +822,8 @@
 
 TEST_F(NullCheckEliminationTest, Loop) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, 0u },
-      { 1u, 1u, 1u, 1u },
+      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
+      { 1u, 1u, 1u, 1u, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       DEF_IGET_IPUT(3u, Instruction::IGET, 0u, 100u, 0u),
@@ -846,8 +852,8 @@
 
 TEST_F(NullCheckEliminationTest, Catch) {
   static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, 0u },
-      { 1u, 1u, 1u, 1u },
+      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
+      { 1u, 1u, 1u, 1u, kDexMemAccessWord },
   };
   static const MIRDef mirs[] = {
       DEF_IGET_IPUT(3u, Instruction::IGET, 0u, 100u, 0u),  // Before the exception edge.
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index cab039b..e38dbf5 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -872,7 +872,7 @@
 
   if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
     // Mark card for object assuming new value is stored.
-    MarkGCCard(rl_new_value.reg, rl_object.reg);
+    MarkGCCard(0, rl_new_value.reg, rl_object.reg);
   }
 
   RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
@@ -1471,7 +1471,7 @@
     FreeTemp(reg_ptr);
   }
   if (card_mark) {
-    MarkGCCard(rl_src.reg, rl_array.reg);
+    MarkGCCard(opt_flags, rl_src.reg, rl_array.reg);
   }
 }
 
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index 0e00698..57e67d5 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -483,6 +483,7 @@
       } else {
         reconstructed_imm = base + 1;
       }
+      DCHECK_EQ(reconstructed_imm, magic_table[lit].magic64) << " for literal " << lit;
     }
 
     // Load the magic constant in two instructions.
@@ -758,7 +759,7 @@
 
   if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
     // Mark card for object assuming new value is stored.
-    MarkGCCard(rl_new_value.reg, rl_object.reg);
+    MarkGCCard(0, rl_new_value.reg, rl_object.reg);
   }
 
   RegStorage r_ptr = AllocTempRef();
@@ -1281,7 +1282,7 @@
     FreeTemp(reg_ptr);
   }
   if (card_mark) {
-    MarkGCCard(rl_src.reg, rl_array.reg);
+    MarkGCCard(opt_flags, rl_src.reg, rl_array.reg);
   }
 }
 
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 80cb535..1cde01e 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -314,13 +314,17 @@
   }
 }
 
-void Mir2Lir::MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) {
+void Mir2Lir::MarkGCCard(int opt_flags, RegStorage val_reg, RegStorage tgt_addr_reg) {
   DCHECK(val_reg.Valid());
   DCHECK_EQ(val_reg.Is64Bit(), cu_->target64);
-  LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, nullptr);
-  UnconditionallyMarkGCCard(tgt_addr_reg);
-  LIR* target = NewLIR0(kPseudoTargetLabel);
-  branch_over->target = target;
+  if ((opt_flags & MIR_STORE_NON_NULL_VALUE) != 0) {
+    UnconditionallyMarkGCCard(tgt_addr_reg);
+  } else {
+    LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, nullptr);
+    UnconditionallyMarkGCCard(tgt_addr_reg);
+    LIR* target = NewLIR0(kPseudoTargetLabel);
+    branch_over->target = target;
+  }
 }
 
 /* Dump instructions and constant pool contents */
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index e12d305..3039852 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -112,18 +112,18 @@
 uint32_t GetInvokeReg(MIR* invoke, uint32_t arg) {
   DCHECK_LT(arg, invoke->dalvikInsn.vA);
   DCHECK(!MIR::DecodedInstruction::IsPseudoMirOp(invoke->dalvikInsn.opcode));
-  if (Instruction::FormatOf(invoke->dalvikInsn.opcode) == Instruction::k3rc) {
-    return invoke->dalvikInsn.vC + arg;  // Non-range invoke.
+  if (IsInvokeInstructionRange(invoke->dalvikInsn.opcode)) {
+    return invoke->dalvikInsn.vC + arg;  // Range invoke.
   } else {
     DCHECK_EQ(Instruction::FormatOf(invoke->dalvikInsn.opcode), Instruction::k35c);
-    return invoke->dalvikInsn.arg[arg];  // Range invoke.
+    return invoke->dalvikInsn.arg[arg];  // Non-range invoke.
   }
 }
 
 bool WideArgIsInConsecutiveDalvikRegs(MIR* invoke, uint32_t arg) {
   DCHECK_LT(arg + 1, invoke->dalvikInsn.vA);
   DCHECK(!MIR::DecodedInstruction::IsPseudoMirOp(invoke->dalvikInsn.opcode));
-  return Instruction::FormatOf(invoke->dalvikInsn.opcode) == Instruction::k3rc ||
+  return IsInvokeInstructionRange(invoke->dalvikInsn.opcode) ||
       invoke->dalvikInsn.arg[arg + 1u] == invoke->dalvikInsn.arg[arg] + 1u;
 }
 
@@ -573,8 +573,7 @@
     // If the invoke has not been eliminated yet, check now whether we should do it.
     // This is done so that dataflow analysis does not get tripped up seeing nop invoke.
     if (static_cast<int>(invoke->dalvikInsn.opcode) != kMirOpNop) {
-      bool is_static = invoke->dalvikInsn.opcode == Instruction::INVOKE_STATIC ||
-          invoke->dalvikInsn.opcode == Instruction::INVOKE_STATIC_RANGE;
+      bool is_static = IsInstructionInvokeStatic(invoke->dalvikInsn.opcode);
       if (is_static || (invoke->optimization_flags & MIR_IGNORE_NULL_CHECK) != 0) {
         // No null object register involved here so we can eliminate the invoke.
         invoke->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop);
@@ -804,9 +803,7 @@
     return !data.is_volatile;
   }
 
-  DCHECK_EQ(data.method_is_static != 0u,
-            invoke->dalvikInsn.opcode == Instruction::INVOKE_STATIC ||
-            invoke->dalvikInsn.opcode == Instruction::INVOKE_STATIC_RANGE);
+  DCHECK_EQ(data.method_is_static != 0u, IsInstructionInvokeStatic(invoke->dalvikInsn.opcode));
   bool object_is_this = (data.method_is_static == 0u && data.object_arg == 0u);
   if (!object_is_this) {
     // TODO: Implement inlining of IGET on non-"this" registers (needs correct stack trace for NPE).
@@ -865,9 +862,7 @@
     return false;
   }
 
-  DCHECK_EQ(data.method_is_static != 0u,
-            invoke->dalvikInsn.opcode == Instruction::INVOKE_STATIC ||
-            invoke->dalvikInsn.opcode == Instruction::INVOKE_STATIC_RANGE);
+  DCHECK_EQ(data.method_is_static != 0u, IsInstructionInvokeStatic(invoke->dalvikInsn.opcode));
   bool object_is_this = (data.method_is_static == 0u && data.object_arg == 0u);
   if (!object_is_this) {
     // TODO: Implement inlining of IPUT on non-"this" registers (needs correct stack trace for NPE).
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index c00f90b..0af0ca8 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -583,6 +583,7 @@
 
 void Mir2Lir::GenSput(MIR* mir, RegLocation rl_src, OpSize size) {
   const MirSFieldLoweringInfo& field_info = mir_graph_->GetSFieldLoweringInfo(mir);
+  DCHECK_EQ(SPutMemAccessType(mir->dalvikInsn.opcode), field_info.MemAccessType());
   cu_->compiler_driver->ProcessedStaticField(field_info.FastPut(), field_info.IsReferrersClass());
   if (!SLOW_FIELD_PATH && field_info.FastPut()) {
     DCHECK_GE(field_info.FieldOffset().Int32Value(), 0);
@@ -664,7 +665,7 @@
                     field_info.IsVolatile() ? kVolatile : kNotVolatile);
     }
     if (IsRef(size) && !mir_graph_->IsConstantNullRef(rl_src)) {
-      MarkGCCard(rl_src.reg, r_base);
+      MarkGCCard(mir->optimization_flags, rl_src.reg, r_base);
     }
     FreeTemp(r_base);
   } else {
@@ -701,6 +702,7 @@
 
 void Mir2Lir::GenSget(MIR* mir, RegLocation rl_dest, OpSize size, Primitive::Type type) {
   const MirSFieldLoweringInfo& field_info = mir_graph_->GetSFieldLoweringInfo(mir);
+  DCHECK_EQ(SGetMemAccessType(mir->dalvikInsn.opcode), field_info.MemAccessType());
   cu_->compiler_driver->ProcessedStaticField(field_info.FastGet(), field_info.IsReferrersClass());
 
   if (!SLOW_FIELD_PATH && field_info.FastGet()) {
@@ -839,6 +841,7 @@
 void Mir2Lir::GenIGet(MIR* mir, int opt_flags, OpSize size, Primitive::Type type,
                       RegLocation rl_dest, RegLocation rl_obj) {
   const MirIFieldLoweringInfo& field_info = mir_graph_->GetIFieldLoweringInfo(mir);
+  DCHECK_EQ(IGetMemAccessType(mir->dalvikInsn.opcode), field_info.MemAccessType());
   cu_->compiler_driver->ProcessedInstanceField(field_info.FastGet());
   if (!SLOW_FIELD_PATH && field_info.FastGet()) {
     RegisterClass reg_class = RegClassForFieldLoadStore(size, field_info.IsVolatile());
@@ -912,6 +915,7 @@
 void Mir2Lir::GenIPut(MIR* mir, int opt_flags, OpSize size,
                       RegLocation rl_src, RegLocation rl_obj) {
   const MirIFieldLoweringInfo& field_info = mir_graph_->GetIFieldLoweringInfo(mir);
+  DCHECK_EQ(IPutMemAccessType(mir->dalvikInsn.opcode), field_info.MemAccessType());
   cu_->compiler_driver->ProcessedInstanceField(field_info.FastPut());
   if (!SLOW_FIELD_PATH && field_info.FastPut()) {
     RegisterClass reg_class = RegClassForFieldLoadStore(size, field_info.IsVolatile());
@@ -936,7 +940,7 @@
     }
     MarkPossibleNullPointerExceptionAfter(opt_flags, store);
     if (IsRef(size) && !mir_graph_->IsConstantNullRef(rl_src)) {
-      MarkGCCard(rl_src.reg, rl_obj.reg);
+      MarkGCCard(opt_flags, rl_src.reg, rl_obj.reg);
     }
   } else {
     QuickEntrypointEnum target;
@@ -2212,43 +2216,53 @@
 }
 
 void Mir2Lir::GenSmallPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
+  BasicBlock* bb = mir_graph_->GetBasicBlock(mir->bb);
+  DCHECK(bb != nullptr);
+  ArenaVector<SuccessorBlockInfo*>::const_iterator succ_bb_iter = bb->successor_blocks.cbegin();
   const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
   const uint16_t entries = table[1];
   // Chained cmp-and-branch.
   const int32_t* as_int32 = reinterpret_cast<const int32_t*>(&table[2]);
   int32_t starting_key = as_int32[0];
-  const int32_t* targets = &as_int32[1];
   rl_src = LoadValue(rl_src, kCoreReg);
   int i = 0;
-  for (; i < entries; i++) {
+  for (; i < entries; ++i, ++succ_bb_iter) {
     if (!InexpensiveConstantInt(starting_key + i, Instruction::Code::IF_EQ)) {
       // Switch to using a temp and add.
       break;
     }
-    BasicBlock* case_block =
-        mir_graph_->FindBlock(current_dalvik_offset_ + targets[i]);
-    OpCmpImmBranch(kCondEq, rl_src.reg, starting_key + i, &block_label_list_[case_block->id]);
+    SuccessorBlockInfo* successor_block_info = *succ_bb_iter;
+    DCHECK(successor_block_info != nullptr);
+    int case_block_id = successor_block_info->block;
+    DCHECK_EQ(starting_key + i, successor_block_info->key);
+    OpCmpImmBranch(kCondEq, rl_src.reg, starting_key + i, &block_label_list_[case_block_id]);
   }
   if (i < entries) {
     // The rest do not seem to be inexpensive. Try to allocate a temp and use add.
     RegStorage key_temp = AllocTypedTemp(false, kCoreReg, false);
     if (key_temp.Valid()) {
       LoadConstantNoClobber(key_temp, starting_key + i);
-      for (; i < entries - 1; i++) {
-        BasicBlock* case_block =
-            mir_graph_->FindBlock(current_dalvik_offset_ + targets[i]);
-        OpCmpBranch(kCondEq, rl_src.reg, key_temp, &block_label_list_[case_block->id]);
+      for (; i < entries - 1; ++i, ++succ_bb_iter) {
+        SuccessorBlockInfo* successor_block_info = *succ_bb_iter;
+        DCHECK(successor_block_info != nullptr);
+        int case_block_id = successor_block_info->block;
+        DCHECK_EQ(starting_key + i, successor_block_info->key);
+        OpCmpBranch(kCondEq, rl_src.reg, key_temp, &block_label_list_[case_block_id]);
         OpRegImm(kOpAdd, key_temp, 1);  // Increment key.
       }
-      BasicBlock* case_block =
-          mir_graph_->FindBlock(current_dalvik_offset_ + targets[i]);
-      OpCmpBranch(kCondEq, rl_src.reg, key_temp, &block_label_list_[case_block->id]);
+      SuccessorBlockInfo* successor_block_info = *succ_bb_iter;
+      DCHECK(successor_block_info != nullptr);
+      int case_block_id = successor_block_info->block;
+      DCHECK_EQ(starting_key + i, successor_block_info->key);
+      OpCmpBranch(kCondEq, rl_src.reg, key_temp, &block_label_list_[case_block_id]);
     } else {
       // No free temp, just finish the old loop.
-      for (; i < entries; i++) {
-        BasicBlock* case_block =
-            mir_graph_->FindBlock(current_dalvik_offset_ + targets[i]);
-        OpCmpImmBranch(kCondEq, rl_src.reg, starting_key + i, &block_label_list_[case_block->id]);
+      for (; i < entries; ++i, ++succ_bb_iter) {
+        SuccessorBlockInfo* successor_block_info = *succ_bb_iter;
+        DCHECK(successor_block_info != nullptr);
+        int case_block_id = successor_block_info->block;
+        DCHECK_EQ(starting_key + i, successor_block_info->key);
+        OpCmpImmBranch(kCondEq, rl_src.reg, starting_key + i, &block_label_list_[case_block_id]);
       }
     }
   }
@@ -2257,7 +2271,7 @@
 void Mir2Lir::GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
   const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
   if (cu_->verbose) {
-    DumpSparseSwitchTable(table);
+    DumpPackedSwitchTable(table);
   }
 
   const uint16_t entries = table[1];
@@ -2270,18 +2284,20 @@
 }
 
 void Mir2Lir::GenSmallSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
+  BasicBlock* bb = mir_graph_->GetBasicBlock(mir->bb);
+  DCHECK(bb != nullptr);
   const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
   const uint16_t entries = table[1];
   // Chained cmp-and-branch.
-  const int32_t* keys = reinterpret_cast<const int32_t*>(&table[2]);
-  const int32_t* targets = &keys[entries];
   rl_src = LoadValue(rl_src, kCoreReg);
-  for (int i = 0; i < entries; i++) {
-    int key = keys[i];
-    BasicBlock* case_block =
-        mir_graph_->FindBlock(current_dalvik_offset_ + targets[i]);
-    OpCmpImmBranch(kCondEq, rl_src.reg, key, &block_label_list_[case_block->id]);
+  int i = 0;
+  for (SuccessorBlockInfo* successor_block_info : bb->successor_blocks) {
+    int case_block_id = successor_block_info->block;
+    int key = successor_block_info->key;
+    OpCmpImmBranch(kCondEq, rl_src.reg, key, &block_label_list_[case_block_id]);
+    i++;
   }
+  DCHECK_EQ(i, entries);
 }
 
 void Mir2Lir::GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index a7900ae..31b81bf 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -1668,7 +1668,7 @@
     GenMemBarrier(kAnyAny);
   }
   if (is_object) {
-    MarkGCCard(rl_value.reg, rl_object.reg);
+    MarkGCCard(0, rl_value.reg, rl_object.reg);
   }
   return true;
 }
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index fb47238..0778c3b 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -641,7 +641,7 @@
     FreeTemp(reg_ptr);
   }
   if (card_mark) {
-    MarkGCCard(rl_src.reg, rl_array.reg);
+    MarkGCCard(opt_flags, rl_src.reg, rl_array.reg);
   }
 }
 
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 70ef991..0d30927 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -332,7 +332,7 @@
         kNotVolatile);
   }
   if (IsRef(size)) {
-    MarkGCCard(reg_src, reg_obj);
+    MarkGCCard(0, reg_src, reg_obj);
   }
   return true;
 }
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 886b238..6847717 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -680,7 +680,7 @@
     int AssignSwitchTablesOffset(CodeOffset offset);
     int AssignFillArrayDataOffset(CodeOffset offset);
     virtual LIR* InsertCaseLabel(DexOffset vaddr, int keyVal);
-    void MarkPackedCaseLabels(Mir2Lir::SwitchTable* tab_rec);
+    virtual void MarkPackedCaseLabels(Mir2Lir::SwitchTable* tab_rec);
     void MarkSparseCaseLabels(Mir2Lir::SwitchTable* tab_rec);
 
     // Handle bookkeeping to convert a wide RegLocation to a narrow RegLocation.  No code generated.
@@ -1075,8 +1075,9 @@
      * @brief Mark a garbage collection card. Skip if the stored value is null.
      * @param val_reg the register holding the stored value to check against null.
      * @param tgt_addr_reg the address of the object or array where the value was stored.
+     * @param opt_flags the optimization flags which may indicate that the value is non-null.
      */
-    void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
+    void MarkGCCard(int opt_flags, RegStorage val_reg, RegStorage tgt_addr_reg);
 
     /*
      * @brief Load the address of the dex method into the register.
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index a808459..be10d93 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -30,23 +30,88 @@
  * pairs.
  */
 void X86Mir2Lir::GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
-  const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
+  GenSmallSparseSwitch(mir, table_offset, rl_src);
+}
+
+/*
+ * We override InsertCaseLabel, because the first parameter represents
+ * a basic block id, instead of a dex offset.
+ */
+LIR* X86Mir2Lir::InsertCaseLabel(DexOffset bbid, int keyVal) {
+  LIR* boundary_lir = &block_label_list_[bbid];
+  LIR* res = boundary_lir;
   if (cu_->verbose) {
-    DumpSparseSwitchTable(table);
+    // Only pay the expense if we're pretty-printing.
+    LIR* new_label = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocLIR));
+    BasicBlock* bb = mir_graph_->GetBasicBlock(bbid);
+    DCHECK(bb != nullptr);
+    new_label->dalvik_offset = bb->start_offset;;
+    new_label->opcode = kPseudoCaseLabel;
+    new_label->operands[0] = keyVal;
+    new_label->flags.fixup = kFixupLabel;
+    DCHECK(!new_label->flags.use_def_invalid);
+    new_label->u.m.def_mask = &kEncodeAll;
+    InsertLIRAfter(boundary_lir, new_label);
+    res = new_label;
   }
+  return res;
+}
+
+void X86Mir2Lir::MarkPackedCaseLabels(Mir2Lir::SwitchTable* tab_rec) {
+  const uint16_t* table = tab_rec->table;
+  const int32_t *targets = reinterpret_cast<const int32_t*>(&table[4]);
   int entries = table[1];
-  const int32_t* keys = reinterpret_cast<const int32_t*>(&table[2]);
-  const int32_t* targets = &keys[entries];
-  rl_src = LoadValue(rl_src, kCoreReg);
+  int low_key = s4FromSwitchData(&table[2]);
   for (int i = 0; i < entries; i++) {
-    int key = keys[i];
-    BasicBlock* case_block =
-        mir_graph_->FindBlock(current_dalvik_offset_ + targets[i]);
-    OpCmpImmBranch(kCondEq, rl_src.reg, key, &block_label_list_[case_block->id]);
+    // The value at targets[i] is a basic block id, instead of a dex offset.
+    tab_rec->targets[i] = InsertCaseLabel(targets[i], i + low_key);
   }
 }
 
 /*
+ * We convert and create a new packed switch table that stores
+ * basic block ids to targets[] by examining successor blocks.
+ * Note that the original packed switch table stores dex offsets to targets[].
+ */
+const uint16_t* X86Mir2Lir::ConvertPackedSwitchTable(MIR* mir, const uint16_t* table) {
+  /*
+   * The original packed switch data format:
+   *  ushort ident = 0x0100  magic value
+   *  ushort size            number of entries in the table
+   *  int first_key          first (and lowest) switch case value
+   *  int targets[size]      branch targets, relative to switch opcode
+   *
+   * Total size is (4+size*2) 16-bit code units.
+   *
+   * Note that the new packed switch data format is the same as the original
+   * format, except that targets[] are basic block ids.
+   *
+   */
+  BasicBlock* bb = mir_graph_->GetBasicBlock(mir->bb);
+  DCHECK(bb != nullptr);
+  // Get the number of entries.
+  int entries = table[1];
+  const int32_t* as_int32 = reinterpret_cast<const int32_t*>(&table[2]);
+  int32_t starting_key = as_int32[0];
+  // Create a new table.
+  int size = sizeof(uint16_t) * (4 + entries * 2);
+  uint16_t* new_table = reinterpret_cast<uint16_t*>(arena_->Alloc(size, kArenaAllocMisc));
+  // Copy ident, size, and first_key to the new table.
+  memcpy(new_table, table, sizeof(uint16_t) * 4);
+  // Get the new targets.
+  int32_t* new_targets = reinterpret_cast<int32_t*>(&new_table[4]);
+  // Find out targets for each entry.
+  int i = 0;
+  for (SuccessorBlockInfo* successor_block_info : bb->successor_blocks) {
+    DCHECK_EQ(starting_key + i, successor_block_info->key);
+    // Save target basic block id.
+    new_targets[i++] = successor_block_info->block;
+  }
+  DCHECK_EQ(i, entries);
+  return new_table;
+}
+
+/*
  * Code pattern will look something like:
  *
  * mov  r_val, ..
@@ -63,10 +128,8 @@
  * done:
  */
 void X86Mir2Lir::GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
-  const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
-  if (cu_->verbose) {
-    DumpPackedSwitchTable(table);
-  }
+  const uint16_t* old_table = mir_graph_->GetTable(mir, table_offset);
+  const uint16_t* table = ConvertPackedSwitchTable(mir, old_table);
   // Add the table to the list - we'll process it later
   SwitchTable* tab_rec =
       static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData));
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 26641f8..09e1482 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -264,8 +264,11 @@
                                      int first_bit, int second_bit) OVERRIDE;
   void GenNegDouble(RegLocation rl_dest, RegLocation rl_src) OVERRIDE;
   void GenNegFloat(RegLocation rl_dest, RegLocation rl_src) OVERRIDE;
+  const uint16_t* ConvertPackedSwitchTable(MIR* mir, const uint16_t* table);
   void GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE;
   void GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE;
+  LIR* InsertCaseLabel(DexOffset vaddr, int keyVal) OVERRIDE;
+  void MarkPackedCaseLabels(Mir2Lir::SwitchTable* tab_rec) OVERRIDE;
 
   /**
    * @brief Implement instanceof a final class with x86 specific code.
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 3f501b4..a063ce1 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -1170,7 +1170,7 @@
     if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
       // Mark card for object assuming new value is stored.
       FreeTemp(rs_r0);  // Temporarily release EAX for MarkGCCard().
-      MarkGCCard(rl_new_value.reg, rl_object.reg);
+      MarkGCCard(0, rl_new_value.reg, rl_object.reg);
       LockTemp(rs_r0);
     }
 
@@ -1898,6 +1898,16 @@
     AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
                             false /* is_load */, true /* is64bit */);
   }
+
+  int v_src_reg = mir_graph_->SRegToVReg(rl_src.s_reg_low);
+  int v_dst_reg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
+
+  // If the left operand is in memory and the right operand is in a register
+  // and both belong to the same dalvik register then we should clobber the
+  // right one because it doesn't hold valid data anymore.
+  if (v_src_reg == v_dst_reg) {
+    Clobber(rl_src.reg);
+  }
 }
 
 void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src1,
@@ -2398,7 +2408,7 @@
     if (!constant_index) {
       FreeTemp(rl_index.reg);
     }
-    MarkGCCard(rl_src.reg, rl_array.reg);
+    MarkGCCard(opt_flags, rl_src.reg, rl_array.reg);
   }
 }
 
diff --git a/compiler/dex/vreg_analysis.cc b/compiler/dex/vreg_analysis.cc
index f6c7d52..a541c7d 100644
--- a/compiler/dex/vreg_analysis.cc
+++ b/compiler/dex/vreg_analysis.cc
@@ -276,8 +276,7 @@
       }
       int num_uses = mir->dalvikInsn.vA;
       // If this is a non-static invoke, mark implicit "this"
-      if (((mir->dalvikInsn.opcode != Instruction::INVOKE_STATIC) &&
-          (mir->dalvikInsn.opcode != Instruction::INVOKE_STATIC_RANGE))) {
+      if (!IsInstructionInvokeStatic(mir->dalvikInsn.opcode)) {
         reg_location_[uses[next]].defined = true;
         reg_location_[uses[next]].ref = true;
         type_mismatch |= reg_location_[uses[next]].wide;
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 4f5026d..03899cc 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -1125,7 +1125,6 @@
   copy->SetEntryPointFromJniPtrSize<kVerifyNone>(orig->GetEntryPointFromJni(), target_ptr_size_);
   copy->SetEntryPointFromQuickCompiledCodePtrSize<kVerifyNone>(
       orig->GetEntryPointFromQuickCompiledCode(), target_ptr_size_);
-  copy->SetNativeGcMapPtrSize<kVerifyNone>(orig->GetNativeGcMap(), target_ptr_size_);
 
   // The resolution method has a special trampoline to call.
   Runtime* runtime = Runtime::Current();
@@ -1186,11 +1185,6 @@
         // Note this is not the code_ pointer, that is handled above.
         copy->SetEntryPointFromJniPtrSize<kVerifyNone>(GetOatAddress(jni_dlsym_lookup_offset_),
                                                        target_ptr_size_);
-      } else {
-        // Normal (non-abstract non-native) methods have various tables to relocate.
-        uint32_t native_gc_map_offset = orig->GetOatNativeGcMapOffset();
-        const uint8_t* native_gc_map = GetOatAddress(native_gc_map_offset);
-        copy->SetNativeGcMapPtrSize<kVerifyNone>(native_gc_map, target_ptr_size_);
       }
 
       // Interpreter entrypoint:
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index ce4ed6d..9fe98e3 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -190,8 +190,8 @@
   // If this test is failing and you have to update these constants,
   // it is time to update OatHeader::kOatVersion
   EXPECT_EQ(84U, sizeof(OatHeader));
-  EXPECT_EQ(8U, sizeof(OatMethodOffsets));
-  EXPECT_EQ(24U, sizeof(OatQuickMethodHeader));
+  EXPECT_EQ(4U, sizeof(OatMethodOffsets));
+  EXPECT_EQ(28U, sizeof(OatQuickMethodHeader));
   EXPECT_EQ(91 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
 }
 
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index c6beb36..a57f892 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -510,15 +510,18 @@
   }
 
   static uint32_t GetOffset(OatClass* oat_class, size_t method_offsets_index) ALWAYS_INLINE {
-    return oat_class->method_offsets_[method_offsets_index].gc_map_offset_;
+    uint32_t offset = oat_class->method_headers_[method_offsets_index].gc_map_offset_;
+    return offset == 0u ? 0u :
+        (oat_class->method_offsets_[method_offsets_index].code_offset_ & ~1) - offset;
   }
 
   static void SetOffset(OatClass* oat_class, size_t method_offsets_index, uint32_t offset)
       ALWAYS_INLINE {
-    oat_class->method_offsets_[method_offsets_index].gc_map_offset_ = offset;
+    oat_class->method_headers_[method_offsets_index].gc_map_offset_ =
+        (oat_class->method_offsets_[method_offsets_index].code_offset_ & ~1) - offset;
   }
 
-  static const char* Name() ALWAYS_INLINE {
+  static const char* Name() {
     return "GC map";
   }
 };
@@ -540,7 +543,7 @@
         (oat_class->method_offsets_[method_offsets_index].code_offset_ & ~1) - offset;
   }
 
-  static const char* Name() ALWAYS_INLINE {
+  static const char* Name() {
     return "mapping table";
   }
 };
@@ -562,7 +565,7 @@
         (oat_class->method_offsets_[method_offsets_index].code_offset_ & ~1) - offset;
   }
 
-  static const char* Name() ALWAYS_INLINE {
+  static const char* Name() {
     return "vmap table";
   }
 };
@@ -764,6 +767,7 @@
         OatQuickMethodHeader* method_header = &oat_class->method_headers_[method_offsets_index_];
         uint32_t mapping_table_offset = method_header->mapping_table_offset_;
         uint32_t vmap_table_offset = method_header->vmap_table_offset_;
+        uint32_t gc_map_offset = method_header->gc_map_offset_;
         // The code offset was 0 when the mapping/vmap table offset was set, so it's set
         // to 0-offset and we need to adjust it by code_offset.
         uint32_t code_offset = quick_code_offset - thumb_offset;
@@ -775,12 +779,16 @@
           vmap_table_offset += code_offset;
           DCHECK_LT(vmap_table_offset, code_offset);
         }
+        if (gc_map_offset != 0u) {
+          gc_map_offset += code_offset;
+          DCHECK_LT(gc_map_offset, code_offset);
+        }
         uint32_t frame_size_in_bytes = compiled_method->GetFrameSizeInBytes();
         uint32_t core_spill_mask = compiled_method->GetCoreSpillMask();
         uint32_t fp_spill_mask = compiled_method->GetFpSpillMask();
         *method_header = OatQuickMethodHeader(mapping_table_offset, vmap_table_offset,
-                                              frame_size_in_bytes, core_spill_mask, fp_spill_mask,
-                                              code_size);
+                                              gc_map_offset, frame_size_in_bytes, core_spill_mask,
+                                              fp_spill_mask, code_size);
 
         if (!deduped) {
           // Update offsets. (Checksum is updated when writing.)
@@ -909,7 +917,7 @@
     OatClass* oat_class = writer_->oat_classes_[oat_class_index_];
     CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
 
-    OatMethodOffsets offsets(0u, 0u);
+    OatMethodOffsets offsets(0u);
     if (compiled_method != nullptr) {
       DCHECK_LT(method_offsets_index_, oat_class->method_offsets_.size());
       offsets = oat_class->method_offsets_[method_offsets_index_];
@@ -920,7 +928,7 @@
     InvokeType invoke_type = it.GetMethodInvokeType(dex_file_->GetClassDef(class_def_index_));
     // Unchecked as we hold mutator_lock_ on entry.
     ScopedObjectAccessUnchecked soa(Thread::Current());
-    StackHandleScope<2> hs(soa.Self());
+    StackHandleScope<1> hs(soa.Self());
     Handle<mirror::DexCache> dex_cache(hs.NewHandle(linker->FindDexCache(*dex_file_)));
     mirror::ArtMethod* method = linker->ResolveMethod(*dex_file_, it.GetMemberIndex(), dex_cache,
                                                       NullHandle<mirror::ClassLoader>(),
@@ -936,7 +944,6 @@
     }
     // Portable code offsets are set by ElfWriterMclinker::FixupCompiledCodeOffset after linking.
     method->SetQuickOatCodeOffset(offsets.code_offset_);
-    method->SetOatNativeGcMapOffset(offsets.gc_map_offset_);
 
     return true;
   }
@@ -1157,7 +1164,7 @@
 class OatWriter::WriteMapMethodVisitor : public OatDexMethodVisitor {
  public:
   WriteMapMethodVisitor(OatWriter* writer, OutputStream* out, const size_t file_offset,
-                          size_t relative_offset)
+                        size_t relative_offset)
     : OatDexMethodVisitor(writer, relative_offset),
       out_(out),
       file_offset_(file_offset) {
@@ -1179,7 +1186,8 @@
       size_t map_size = map == nullptr ? 0 : map->size() * sizeof((*map)[0]);
       DCHECK((map_size == 0u && map_offset == 0u) ||
             (map_size != 0u && map_offset != 0u && map_offset <= offset_))
-          << PrettyMethod(it.GetMemberIndex(), *dex_file_);
+          << map_size << " " << map_offset << " " << offset_ << " "
+          << PrettyMethod(it.GetMemberIndex(), *dex_file_) << " for " << DataAccess::Name();
       if (map_size != 0u && map_offset == offset_) {
         if (UNLIKELY(!out->WriteFully(&(*map)[0], map_size))) {
           ReportWriteFailure(it);
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index be8631a..b261460 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -305,6 +305,15 @@
 }
 
 template<typename T>
+void HGraphBuilder::Binop_23x_shift(const Instruction& instruction,
+                                    Primitive::Type type) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), type);
+  HInstruction* second = LoadLocal(instruction.VRegC(), Primitive::kPrimInt);
+  current_block_->AddInstruction(new (arena_) T(type, first, second));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
 void HGraphBuilder::Binop_12x(const Instruction& instruction, Primitive::Type type) {
   HInstruction* first = LoadLocal(instruction.VRegA(), type);
   HInstruction* second = LoadLocal(instruction.VRegB(), type);
@@ -313,6 +322,14 @@
 }
 
 template<typename T>
+void HGraphBuilder::Binop_12x_shift(const Instruction& instruction, Primitive::Type type) {
+  HInstruction* first = LoadLocal(instruction.VRegA(), type);
+  HInstruction* second = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
+  current_block_->AddInstruction(new (arena_) T(type, first, second));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
 void HGraphBuilder::Binop_12x(const Instruction& instruction,
                               Primitive::Type type,
                               uint32_t dex_pc) {
@@ -1141,6 +1158,36 @@
       break;
     }
 
+    case Instruction::SHL_INT: {
+      Binop_23x_shift<HShl>(instruction, Primitive::kPrimInt);
+      break;
+    }
+
+    case Instruction::SHL_LONG: {
+      Binop_23x_shift<HShl>(instruction, Primitive::kPrimLong);
+      break;
+    }
+
+    case Instruction::SHR_INT: {
+      Binop_23x_shift<HShr>(instruction, Primitive::kPrimInt);
+      break;
+    }
+
+    case Instruction::SHR_LONG: {
+      Binop_23x_shift<HShr>(instruction, Primitive::kPrimLong);
+      break;
+    }
+
+    case Instruction::USHR_INT: {
+      Binop_23x_shift<HUShr>(instruction, Primitive::kPrimInt);
+      break;
+    }
+
+    case Instruction::USHR_LONG: {
+      Binop_23x_shift<HUShr>(instruction, Primitive::kPrimLong);
+      break;
+    }
+
     case Instruction::OR_INT: {
       Binop_23x<HOr>(instruction, Primitive::kPrimInt);
       break;
@@ -1240,6 +1287,36 @@
       break;
     }
 
+    case Instruction::SHL_INT_2ADDR: {
+      Binop_12x_shift<HShl>(instruction, Primitive::kPrimInt);
+      break;
+    }
+
+    case Instruction::SHL_LONG_2ADDR: {
+      Binop_12x_shift<HShl>(instruction, Primitive::kPrimLong);
+      break;
+    }
+
+    case Instruction::SHR_INT_2ADDR: {
+      Binop_12x_shift<HShr>(instruction, Primitive::kPrimInt);
+      break;
+    }
+
+    case Instruction::SHR_LONG_2ADDR: {
+      Binop_12x_shift<HShr>(instruction, Primitive::kPrimLong);
+      break;
+    }
+
+    case Instruction::USHR_INT_2ADDR: {
+      Binop_12x_shift<HUShr>(instruction, Primitive::kPrimInt);
+      break;
+    }
+
+    case Instruction::USHR_LONG_2ADDR: {
+      Binop_12x_shift<HUShr>(instruction, Primitive::kPrimLong);
+      break;
+    }
+
     case Instruction::DIV_FLOAT_2ADDR: {
       Binop_12x<HDiv>(instruction, Primitive::kPrimFloat, dex_pc);
       break;
@@ -1354,6 +1431,21 @@
       break;
     }
 
+    case Instruction::SHL_INT_LIT8: {
+      Binop_22b<HShl>(instruction, false);
+      break;
+    }
+
+    case Instruction::SHR_INT_LIT8: {
+      Binop_22b<HShr>(instruction, false);
+      break;
+    }
+
+    case Instruction::USHR_INT_LIT8: {
+      Binop_22b<HUShr>(instruction, false);
+      break;
+    }
+
     case Instruction::NEW_INSTANCE: {
       current_block_->AddInstruction(
           new (arena_) HNewInstance(dex_pc, instruction.VRegB_21c()));
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 897bcec..204005d 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -105,12 +105,18 @@
   void Binop_23x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
 
   template<typename T>
+  void Binop_23x_shift(const Instruction& instruction, Primitive::Type type);
+
+  template<typename T>
   void Binop_12x(const Instruction& instruction, Primitive::Type type);
 
   template<typename T>
   void Binop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
 
   template<typename T>
+  void Binop_12x_shift(const Instruction& instruction, Primitive::Type type);
+
+  template<typename T>
   void Binop_22b(const Instruction& instruction, bool reverse);
 
   template<typename T>
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 1701ef5..a204e21 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -2087,6 +2087,124 @@
   }
 }
 
+void LocationsBuilderARM::HandleShift(HBinaryOperation* op) {
+  DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
+
+  LocationSummary::CallKind call_kind = op->GetResultType() == Primitive::kPrimLong
+      ? LocationSummary::kCall
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(op, call_kind);
+
+  switch (op->GetResultType()) {
+    case Primitive::kPrimInt: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(op->InputAt(1)));
+      locations->SetOut(Location::RequiresRegister());
+      break;
+    }
+    case Primitive::kPrimLong: {
+      InvokeRuntimeCallingConvention calling_convention;
+      locations->SetInAt(0, Location::RegisterPairLocation(
+          calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
+      locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+      // The runtime helper puts the output in R0,R2.
+      locations->SetOut(Location::RegisterPairLocation(R0, R2));
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorARM::HandleShift(HBinaryOperation* op) {
+  DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
+
+  LocationSummary* locations = op->GetLocations();
+  Location out = locations->Out();
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+
+  Primitive::Type type = op->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt: {
+      Register out_reg = out.As<Register>();
+      Register first_reg = first.As<Register>();
+      // Arm doesn't mask the shift count so we need to do it ourselves.
+      if (second.IsRegister()) {
+        Register second_reg = second.As<Register>();
+        __ and_(second_reg, second_reg, ShifterOperand(kMaxIntShiftValue));
+        if (op->IsShl()) {
+          __ Lsl(out_reg, first_reg, second_reg);
+        } else if (op->IsShr()) {
+          __ Asr(out_reg, first_reg, second_reg);
+        } else {
+          __ Lsr(out_reg, first_reg, second_reg);
+        }
+      } else {
+        int32_t cst = second.GetConstant()->AsIntConstant()->GetValue();
+        uint32_t shift_value = static_cast<uint32_t>(cst & kMaxIntShiftValue);
+        if (shift_value == 0) {  // arm does not support shifting with 0 immediate.
+          __ Mov(out_reg, first_reg);
+        } else if (op->IsShl()) {
+          __ Lsl(out_reg, first_reg, shift_value);
+        } else if (op->IsShr()) {
+          __ Asr(out_reg, first_reg, shift_value);
+        } else {
+          __ Lsr(out_reg, first_reg, shift_value);
+        }
+      }
+      break;
+    }
+    case Primitive::kPrimLong: {
+      // TODO: Inline the assembly instead of calling the runtime.
+      InvokeRuntimeCallingConvention calling_convention;
+      DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(2), second.As<Register>());
+      DCHECK_EQ(R0, out.AsRegisterPairLow<Register>());
+      DCHECK_EQ(R2, out.AsRegisterPairHigh<Register>());
+
+      int32_t entry_point_offset;
+      if (op->IsShl()) {
+        entry_point_offset = QUICK_ENTRY_POINT(pShlLong);
+      } else if (op->IsShr()) {
+        entry_point_offset = QUICK_ENTRY_POINT(pShrLong);
+      } else {
+        entry_point_offset = QUICK_ENTRY_POINT(pUshrLong);
+      }
+      __ LoadFromOffset(kLoadWord, LR, TR, entry_point_offset);
+      __ blx(LR);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected operation type " << type;
+  }
+}
+
+void LocationsBuilderARM::VisitShl(HShl* shl) {
+  HandleShift(shl);
+}
+
+void InstructionCodeGeneratorARM::VisitShl(HShl* shl) {
+  HandleShift(shl);
+}
+
+void LocationsBuilderARM::VisitShr(HShr* shr) {
+  HandleShift(shr);
+}
+
+void InstructionCodeGeneratorARM::VisitShr(HShr* shr) {
+  HandleShift(shr);
+}
+
+void LocationsBuilderARM::VisitUShr(HUShr* ushr) {
+  HandleShift(ushr);
+}
+
+void InstructionCodeGeneratorARM::VisitUShr(HUShr* ushr) {
+  HandleShift(ushr);
+}
+
 void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index c00fac1..226e635 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -109,6 +109,7 @@
  private:
   void HandleInvoke(HInvoke* invoke);
   void HandleBitwiseOperation(HBinaryOperation* operation);
+  void HandleShift(HBinaryOperation* operation);
 
   CodeGeneratorARM* const codegen_;
   InvokeDexCallingConventionVisitor parameter_visitor_;
@@ -136,6 +137,7 @@
   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
   void GenerateClassInitializationCheck(SlowPathCodeARM* slow_path, Register class_reg);
   void HandleBitwiseOperation(HBinaryOperation* operation);
+  void HandleShift(HBinaryOperation* operation);
 
   ArmAssembler* const assembler_;
   CodeGeneratorARM* const codegen_;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 82dced5..7a8b941 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -801,7 +801,10 @@
 
 #define FOR_EACH_UNIMPLEMENTED_INSTRUCTION(M)              \
   M(ParallelMove)                                          \
-  M(Rem)
+  M(Rem)                                                   \
+  M(Shl)                                                   \
+  M(Shr)                                                   \
+  M(UShr)                                                  \
 
 #define UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name) name##UnimplementedInstructionBreakCode
 
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 3c53cea..917b7dd 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -2129,6 +2129,139 @@
   }
 }
 
+void LocationsBuilderX86::HandleShift(HBinaryOperation* op) {
+  DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
+
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall);
+
+  switch (op->GetResultType()) {
+    case Primitive::kPrimInt: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      // The shift count needs to be in CL.
+      locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    }
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      // The shift count needs to be in CL.
+      locations->SetInAt(1, Location::RegisterLocation(ECX));
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected op type " << op->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
+  DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
+
+  LocationSummary* locations = op->GetLocations();
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+  DCHECK(first.Equals(locations->Out()));
+
+  switch (op->GetResultType()) {
+    case Primitive::kPrimInt: {
+      Register first_reg = first.As<Register>();
+      if (second.IsRegister()) {
+        Register second_reg = second.As<Register>();
+        DCHECK_EQ(ECX, second_reg);
+        if (op->IsShl()) {
+          __ shll(first_reg, second_reg);
+        } else if (op->IsShr()) {
+          __ sarl(first_reg, second_reg);
+        } else {
+          __ shrl(first_reg, second_reg);
+        }
+      } else {
+        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
+        if (op->IsShl()) {
+          __ shll(first_reg, imm);
+        } else if (op->IsShr()) {
+          __ sarl(first_reg, imm);
+        } else {
+          __ shrl(first_reg, imm);
+        }
+      }
+      break;
+    }
+    case Primitive::kPrimLong: {
+      Register second_reg = second.As<Register>();
+      DCHECK_EQ(ECX, second_reg);
+      if (op->IsShl()) {
+        GenerateShlLong(first, second_reg);
+      } else if (op->IsShr()) {
+        GenerateShrLong(first, second_reg);
+      } else {
+        GenerateUShrLong(first, second_reg);
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected op type " << op->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) {
+  Label done;
+  __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter);
+  __ shll(loc.AsRegisterPairLow<Register>(), shifter);
+  __ testl(shifter, Immediate(32));
+  __ j(kEqual, &done);
+  __ movl(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>());
+  __ movl(loc.AsRegisterPairLow<Register>(), Immediate(0));
+  __ Bind(&done);
+}
+
+void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) {
+  Label done;
+  __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
+  __ sarl(loc.AsRegisterPairHigh<Register>(), shifter);
+  __ testl(shifter, Immediate(32));
+  __ j(kEqual, &done);
+  __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
+  __ sarl(loc.AsRegisterPairHigh<Register>(), Immediate(31));
+  __ Bind(&done);
+}
+
+void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) {
+  Label done;
+  __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
+  __ shrl(loc.AsRegisterPairHigh<Register>(), shifter);
+  __ testl(shifter, Immediate(32));
+  __ j(kEqual, &done);
+  __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
+  __ movl(loc.AsRegisterPairHigh<Register>(), Immediate(0));
+  __ Bind(&done);
+}
+
+void LocationsBuilderX86::VisitShl(HShl* shl) {
+  HandleShift(shl);
+}
+
+void InstructionCodeGeneratorX86::VisitShl(HShl* shl) {
+  HandleShift(shl);
+}
+
+void LocationsBuilderX86::VisitShr(HShr* shr) {
+  HandleShift(shr);
+}
+
+void InstructionCodeGeneratorX86::VisitShr(HShr* shr) {
+  HandleShift(shr);
+}
+
+void LocationsBuilderX86::VisitUShr(HUShr* ushr) {
+  HandleShift(ushr);
+}
+
+void InstructionCodeGeneratorX86::VisitUShr(HUShr* ushr) {
+  HandleShift(ushr);
+}
+
 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 0aff6cc..aed06c0 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -104,6 +104,7 @@
  private:
   void HandleBitwiseOperation(HBinaryOperation* instruction);
   void HandleInvoke(HInvoke* invoke);
+  void HandleShift(HBinaryOperation* instruction);
 
   CodeGeneratorX86* const codegen_;
   InvokeDexCallingConventionVisitor parameter_visitor_;
@@ -132,6 +133,10 @@
   void GenerateClassInitializationCheck(SlowPathCodeX86* slow_path, Register class_reg);
   void HandleBitwiseOperation(HBinaryOperation* instruction);
   void GenerateDivRemIntegral(HBinaryOperation* instruction);
+  void HandleShift(HBinaryOperation* instruction);
+  void GenerateShlLong(const Location& loc, Register shifter);
+  void GenerateShrLong(const Location& loc, Register shifter);
+  void GenerateUShrLong(const Location& loc, Register shifter);
 
   X86Assembler* const assembler_;
   CodeGeneratorX86* const codegen_;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 97f5e5c..83d04b1 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -2026,6 +2026,107 @@
   }
 }
 
+void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) {
+  DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
+
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall);
+
+  switch (op->GetResultType()) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      // The shift count needs to be in CL.
+      locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1)));
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
+  DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
+
+  LocationSummary* locations = op->GetLocations();
+  CpuRegister first_reg = locations->InAt(0).As<CpuRegister>();
+  Location second = locations->InAt(1);
+
+  switch (op->GetResultType()) {
+    case Primitive::kPrimInt: {
+      if (second.IsRegister()) {
+        CpuRegister second_reg = second.As<CpuRegister>();
+        if (op->IsShl()) {
+          __ shll(first_reg, second_reg);
+        } else if (op->IsShr()) {
+          __ sarl(first_reg, second_reg);
+        } else {
+          __ shrl(first_reg, second_reg);
+        }
+      } else {
+        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
+        if (op->IsShl()) {
+          __ shll(first_reg, imm);
+        } else if (op->IsShr()) {
+          __ sarl(first_reg, imm);
+        } else {
+          __ shrl(first_reg, imm);
+        }
+      }
+      break;
+    }
+    case Primitive::kPrimLong: {
+      if (second.IsRegister()) {
+        CpuRegister second_reg = second.As<CpuRegister>();
+        if (op->IsShl()) {
+          __ shlq(first_reg, second_reg);
+        } else if (op->IsShr()) {
+          __ sarq(first_reg, second_reg);
+        } else {
+          __ shrq(first_reg, second_reg);
+        }
+      } else {
+        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
+        if (op->IsShl()) {
+          __ shlq(first_reg, imm);
+        } else if (op->IsShr()) {
+          __ sarq(first_reg, imm);
+        } else {
+          __ shrq(first_reg, imm);
+        }
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
+  }
+}
+
+void LocationsBuilderX86_64::VisitShl(HShl* shl) {
+  HandleShift(shl);
+}
+
+void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) {
+  HandleShift(shl);
+}
+
+void LocationsBuilderX86_64::VisitShr(HShr* shr) {
+  HandleShift(shr);
+}
+
+void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) {
+  HandleShift(shr);
+}
+
+void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) {
+  HandleShift(ushr);
+}
+
+void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
+  HandleShift(ushr);
+}
+
 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
@@ -2795,7 +2896,7 @@
       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
                source.As<XmmRegister>());
     } else {
-      DCHECK(destination.IsDoubleStackSlot());
+      DCHECK(destination.IsDoubleStackSlot()) << destination;
       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
                source.As<XmmRegister>());
     }
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 29c679d..794b81f 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -108,6 +108,7 @@
  private:
   void HandleInvoke(HInvoke* invoke);
   void HandleBitwiseOperation(HBinaryOperation* operation);
+  void HandleShift(HBinaryOperation* operation);
 
   CodeGeneratorX86_64* const codegen_;
   InvokeDexCallingConventionVisitor parameter_visitor_;
@@ -136,6 +137,7 @@
   void GenerateClassInitializationCheck(SlowPathCodeX86_64* slow_path, CpuRegister class_reg);
   void HandleBitwiseOperation(HBinaryOperation* operation);
   void GenerateDivRemIntegral(HBinaryOperation* instruction);
+  void HandleShift(HBinaryOperation* operation);
 
   X86_64Assembler* const assembler_;
   CodeGeneratorX86_64* const codegen_;
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 1953241..5d712fe 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -342,4 +342,72 @@
   }
 }
 
+static Primitive::Type PrimitiveKind(Primitive::Type type) {
+  switch (type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt:
+      return Primitive::kPrimInt;
+    default:
+      return type;
+  }
+}
+
+void SSAChecker::VisitCondition(HCondition* op) {
+  VisitInstruction(op);
+  // TODO: check inputs types, and special case the `null` check.
+  if (op->GetType() != Primitive::kPrimBoolean) {
+    std::stringstream error;
+    error << "Condition " << op->DebugName() << " " << op->GetId()
+          << " has a non-boolean result type: "
+          << op->GetType() << ".";
+    errors_.push_back(error.str());
+  }
+}
+
+void SSAChecker::VisitBinaryOperation(HBinaryOperation* op) {
+  VisitInstruction(op);
+  if (op->IsUShr() || op->IsShr() || op->IsShl()) {
+    if (PrimitiveKind(op->InputAt(1)->GetType()) != Primitive::kPrimInt) {
+      std::stringstream error;
+      error << "Shift operation " << op->DebugName() << " " << op->GetId()
+            << " has a non-int kind second input: "
+            << op->InputAt(1)->DebugName() << " of type " << op->InputAt(1)->GetType()
+            << ".";
+      errors_.push_back(error.str());
+    }
+  } else {
+    if (PrimitiveKind(op->InputAt(1)->GetType()) != PrimitiveKind(op->InputAt(0)->GetType())) {
+      std::stringstream error;
+      error << "Binary operation " << op->DebugName() << " " << op->GetId()
+            << " has inputs of different type: "
+            << op->InputAt(0)->GetType() << ", and " << op->InputAt(1)->GetType()
+            << ".";
+      errors_.push_back(error.str());
+    }
+  }
+
+  if (op->IsCompare()) {
+    if (op->GetType() != Primitive::kPrimInt) {
+      std::stringstream error;
+      error << "Compare operation " << op->GetId()
+            << " has a non-int result type: "
+            << op->GetType() << ".";
+      errors_.push_back(error.str());
+    }
+  } else {
+    // Use the first input, so that we can also make this check for shift operations.
+    if (PrimitiveKind(op->GetType()) != PrimitiveKind(op->InputAt(0)->GetType())) {
+      std::stringstream error;
+      error << "Binary operation " << op->DebugName() << " " << op->GetId()
+            << " has a result type different than its input type: "
+            << op->GetType() << ", and " << op->InputAt(1)->GetType()
+            << ".";
+      errors_.push_back(error.str());
+    }
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h
index 8ba8cb1..b6c9f17 100644
--- a/compiler/optimizing/graph_checker.h
+++ b/compiler/optimizing/graph_checker.h
@@ -24,11 +24,11 @@
 namespace art {
 
 // A control-flow graph visitor performing various checks.
-class GraphChecker : public HGraphVisitor {
+class GraphChecker : public HGraphDelegateVisitor {
  public:
   GraphChecker(ArenaAllocator* allocator, HGraph* graph,
                const char* dump_prefix = "art::GraphChecker: ")
-    : HGraphVisitor(graph),
+    : HGraphDelegateVisitor(graph),
       allocator_(allocator),
       dump_prefix_(dump_prefix) {}
 
@@ -36,10 +36,10 @@
   virtual void Run() { VisitInsertionOrder(); }
 
   // Check `block`.
-  virtual void VisitBasicBlock(HBasicBlock* block) OVERRIDE;
+  void VisitBasicBlock(HBasicBlock* block) OVERRIDE;
 
   // Check `instruction`.
-  virtual void VisitInstruction(HInstruction* instruction) OVERRIDE;
+  void VisitInstruction(HInstruction* instruction) OVERRIDE;
 
   // Was the last visit of the graph valid?
   bool IsValid() const {
@@ -82,7 +82,7 @@
     : GraphChecker(allocator, graph, "art::SSAChecker: ") {}
 
   // Check the whole graph (in reverse post-order).
-  virtual void Run() {
+  void Run() OVERRIDE {
     // VisitReversePostOrder is used instead of VisitInsertionOrder,
     // as the latter might visit dead blocks removed by the dominator
     // computation.
@@ -90,13 +90,15 @@
   }
 
   // Perform SSA form checks on `block`.
-  virtual void VisitBasicBlock(HBasicBlock* block) OVERRIDE;
+  void VisitBasicBlock(HBasicBlock* block) OVERRIDE;
   // Loop-related checks from block `loop_header`.
   void CheckLoop(HBasicBlock* loop_header);
 
   // Perform SSA form checks on instructions.
-  virtual void VisitInstruction(HInstruction* instruction) OVERRIDE;
-  virtual void VisitPhi(HPhi* phi) OVERRIDE;
+  void VisitInstruction(HInstruction* instruction) OVERRIDE;
+  void VisitPhi(HPhi* phi) OVERRIDE;
+  void VisitBinaryOperation(HBinaryOperation* op) OVERRIDE;
+  void VisitCondition(HCondition* op) OVERRIDE;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(SSAChecker);
diff --git a/compiler/optimizing/graph_visualizer.h b/compiler/optimizing/graph_visualizer.h
index 4d8bec2..60d996b 100644
--- a/compiler/optimizing/graph_visualizer.h
+++ b/compiler/optimizing/graph_visualizer.h
@@ -30,7 +30,6 @@
 // TODO: Create an analysis/optimization abstraction.
 static const char* kLivenessPassName = "liveness";
 static const char* kRegisterAllocatorPassName = "register";
-static const char* kGVNPassName = "gvn";
 
 /**
  * If enabled, emits compilation information suitable for the c1visualizer tool
diff --git a/compiler/optimizing/gvn.h b/compiler/optimizing/gvn.h
index a841d5f..8e739cb 100644
--- a/compiler/optimizing/gvn.h
+++ b/compiler/optimizing/gvn.h
@@ -166,10 +166,10 @@
 /**
  * Optimization phase that removes redundant instruction.
  */
-class GlobalValueNumberer : public HOptimization {
+class GlobalValueNumberer : public ValueObject {
  public:
   GlobalValueNumberer(ArenaAllocator* allocator, HGraph* graph)
-      : HOptimization(graph, true, "GVN"),
+      : graph_(graph),
         allocator_(allocator),
         block_effects_(allocator, graph->GetBlocks().Size()),
         loop_effects_(allocator, graph->GetBlocks().Size()),
@@ -187,7 +187,7 @@
     }
   }
 
-  void Run() OVERRIDE;
+  void Run();
 
  private:
   // Per-block GVN. Will also update the ValueSet of the dominated and
@@ -202,6 +202,8 @@
   SideEffects GetLoopEffects(HBasicBlock* block) const;
   SideEffects GetBlockEffects(HBasicBlock* block) const;
 
+  HGraph* graph_;
+
   ArenaAllocator* const allocator_;
 
   // Side effects of individual blocks, that is the union of the side effects
@@ -224,6 +226,19 @@
   DISALLOW_COPY_AND_ASSIGN(GlobalValueNumberer);
 };
 
+class GVNOptimization : public HOptimization {
+ public:
+  explicit GVNOptimization(HGraph* graph) : HOptimization(graph, true, "GVN") {}
+
+  void Run() OVERRIDE {
+    GlobalValueNumberer gvn(graph_->GetArena(), graph_);
+    gvn.Run();
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(GVNOptimization);
+};
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_GVN_H_
diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc
index 6dd4207..c49cf7e 100644
--- a/compiler/optimizing/linearize_test.cc
+++ b/compiler/optimizing/linearize_test.cc
@@ -50,10 +50,9 @@
   SsaLivenessAnalysis liveness(*graph, &codegen);
   liveness.Analyze();
 
-  ASSERT_EQ(liveness.GetLinearPostOrder().Size(), number_of_blocks);
+  ASSERT_EQ(liveness.GetLinearOrder().Size(), number_of_blocks);
   for (size_t i = 0; i < number_of_blocks; ++i) {
-    ASSERT_EQ(liveness.GetLinearPostOrder().Get(number_of_blocks - i - 1)->GetBlockId(),
-              expected_order[i]);
+    ASSERT_EQ(liveness.GetLinearOrder().Get(i)->GetBlockId(), expected_order[i]);
   }
 }
 
@@ -194,4 +193,58 @@
   TestCode(data, blocks, 12);
 }
 
+TEST(LinearizeTest, CFG6) {
+  //            Block0
+  //              |
+  //            Block1
+  //              |
+  //            Block2 ++++++++++++++
+  //              |                 +
+  //            Block3              +
+  //            /     \             +
+  //       Block8     Block4        +
+  //         |         /   \        +
+  //       Block5 <- Block9 Block6  +
+  //         |
+  //       Block7
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::GOTO | 0x0100,
+    Instruction::IF_EQ, 0x0004,
+    Instruction::IF_EQ, 0x0003,
+    Instruction::RETURN_VOID,
+    Instruction::GOTO | 0xFA00);
+
+  const int blocks[] = {0, 1, 2, 3, 4, 6, 9, 8, 5, 7};
+  TestCode(data, blocks, arraysize(blocks));
+}
+
+TEST(LinearizeTest, CFG7) {
+  // Structure of this graph (+ are back edges)
+  //            Block0
+  //              |
+  //            Block1
+  //              |
+  //            Block2 ++++++++
+  //              |           +
+  //            Block3        +
+  //            /    \        +
+  //        Block4  Block8    +
+  //        /  \        |     +
+  //   Block5 Block9 - Block6 +
+  //     |
+  //   Block7
+  //
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::GOTO | 0x0100,
+    Instruction::IF_EQ, 0x0005,
+    Instruction::IF_EQ, 0x0003,
+    Instruction::RETURN_VOID,
+    Instruction::GOTO | 0xFA00);
+
+  const int blocks[] = {0, 1, 2, 3, 4, 9, 8, 6, 5, 7};
+  TestCode(data, blocks, arraysize(blocks));
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index 89c9495..e3c6fec 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -386,7 +386,7 @@
     Instruction::ADD_INT, 1 << 8,
     Instruction::GOTO | 0x300,
     Instruction::ADD_INT, 1 << 8,
-    Instruction::RETURN | 1 << 8);
+    Instruction::RETURN);
 
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
@@ -410,7 +410,10 @@
   interval = liveness.GetInstructionFromSsaIndex(1)->GetLiveInterval();
   range = interval->GetFirstRange();
   ASSERT_EQ(4u, range->GetStart());
-  ASSERT_EQ(28u, range->GetEnd());
+  ASSERT_EQ(17u, range->GetEnd());
+  range = range->GetNext();
+  ASSERT_EQ(20u, range->GetStart());
+  ASSERT_EQ(23u, range->GetEnd());
   ASSERT_TRUE(range->GetNext() == nullptr);
 
   // Test for the first add.
@@ -429,9 +432,8 @@
   ASSERT_EQ(26u, range->GetEnd());
   ASSERT_TRUE(range->GetNext() == nullptr);
 
-  // Test for the phi, which is unused.
   HPhi* phi = liveness.GetInstructionFromSsaIndex(4)->AsPhi();
-  ASSERT_EQ(phi->NumberOfUses(), 0u);
+  ASSERT_EQ(phi->NumberOfUses(), 1u);
   interval = phi->GetLiveInterval();
   range = interval->GetFirstRange();
   ASSERT_EQ(26u, range->GetStart());
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 7d52d7d..f562113 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -42,6 +42,9 @@
 static const int kDefaultNumberOfDominatedBlocks = 1;
 static const int kDefaultNumberOfBackEdges = 1;
 
+static constexpr uint32_t kMaxIntShiftValue = 0x1f;
+static constexpr uint64_t kMaxLongShiftValue = 0x3f;
+
 enum IfCondition {
   kCondEQ,
   kCondNE,
@@ -233,7 +236,7 @@
     return false;
   }
 
-  int NumberOfBackEdges() const {
+  size_t NumberOfBackEdges() const {
     return back_edges_.Size();
   }
 
@@ -521,9 +524,11 @@
   M(ParallelMove, Instruction)                                          \
   M(ParameterValue, Instruction)                                        \
   M(Phi, Instruction)                                                   \
-  M(Rem, BinaryOperation)                                             \
+  M(Rem, BinaryOperation)                                               \
   M(Return, Instruction)                                                \
   M(ReturnVoid, Instruction)                                            \
+  M(Shl, BinaryOperation)                                               \
+  M(Shr, BinaryOperation)                                               \
   M(StaticFieldGet, Instruction)                                        \
   M(StaticFieldSet, Instruction)                                        \
   M(StoreLocal, Instruction)                                            \
@@ -532,6 +537,7 @@
   M(Temporary, Instruction)                                             \
   M(Throw, Instruction)                                                 \
   M(TypeConversion, Instruction)                                        \
+  M(UShr, BinaryOperation)                                              \
   M(Xor, BinaryOperation)                                               \
 
 #define FOR_EACH_INSTRUCTION(M)                                         \
@@ -1831,6 +1837,57 @@
   DISALLOW_COPY_AND_ASSIGN(HDivZeroCheck);
 };
 
+class HShl : public HBinaryOperation {
+ public:
+  HShl(Primitive::Type result_type, HInstruction* left, HInstruction* right)
+      : HBinaryOperation(result_type, left, right) {}
+
+  int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { return x << (y & kMaxIntShiftValue); }
+  int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE { return x << (y & kMaxLongShiftValue); }
+
+  DECLARE_INSTRUCTION(Shl);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HShl);
+};
+
+class HShr : public HBinaryOperation {
+ public:
+  HShr(Primitive::Type result_type, HInstruction* left, HInstruction* right)
+      : HBinaryOperation(result_type, left, right) {}
+
+  int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { return x >> (y & kMaxIntShiftValue); }
+  int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE { return x >> (y & kMaxLongShiftValue); }
+
+  DECLARE_INSTRUCTION(Shr);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HShr);
+};
+
+class HUShr : public HBinaryOperation {
+ public:
+  HUShr(Primitive::Type result_type, HInstruction* left, HInstruction* right)
+      : HBinaryOperation(result_type, left, right) {}
+
+  int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE {
+    uint32_t ux = static_cast<uint32_t>(x);
+    uint32_t uy = static_cast<uint32_t>(y) & kMaxIntShiftValue;
+    return static_cast<int32_t>(ux >> uy);
+  }
+
+  int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE {
+    uint64_t ux = static_cast<uint64_t>(x);
+    uint64_t uy = static_cast<uint64_t>(y) & kMaxLongShiftValue;
+    return static_cast<int64_t>(ux >> uy);
+  }
+
+  DECLARE_INSTRUCTION(UShr);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HUShr);
+};
+
 class HAnd : public HBinaryOperation {
  public:
   HAnd(Primitive::Type result_type, HInstruction* left, HInstruction* right)
diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc
index d1178d5..b99f678 100644
--- a/compiler/optimizing/optimization.cc
+++ b/compiler/optimizing/optimization.cc
@@ -27,13 +27,15 @@
       SSAChecker checker(graph_->GetArena(), graph_);
       checker.Run();
       if (!checker.IsValid()) {
-        LOG(FATAL) << Dumpable<SSAChecker>(checker);
+        LOG(FATAL) << "Error after " << GetPassName() << ": "
+                   << Dumpable<SSAChecker>(checker);
       }
     } else {
       GraphChecker checker(graph_->GetArena(), graph_);
       checker.Run();
       if (!checker.IsValid()) {
-        LOG(FATAL) << Dumpable<GraphChecker>(checker);
+        LOG(FATAL) << "Error after " << GetPassName() << ": "
+                   << Dumpable<GraphChecker>(checker);
       }
     }
   }
diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h
index d281248..e36ef19 100644
--- a/compiler/optimizing/optimization.h
+++ b/compiler/optimizing/optimization.h
@@ -17,7 +17,6 @@
 #ifndef ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_
 #define ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_
 
-#include "graph_visualizer.h"
 #include "nodes.h"
 
 namespace art {
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 42ac77d..d8533eb 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -35,6 +35,7 @@
 #include "nodes.h"
 #include "prepare_for_register_allocation.h"
 #include "register_allocator.h"
+#include "ssa_builder.h"
 #include "ssa_phi_elimination.h"
 #include "ssa_liveness_analysis.h"
 #include "utils/arena_allocator.h"
@@ -191,21 +192,31 @@
 }
 
 static void RunOptimizations(HGraph* graph, const HGraphVisualizer& visualizer) {
+  TransformToSsa ssa(graph);
   HDeadCodeElimination opt1(graph);
   HConstantFolding opt2(graph);
   SsaRedundantPhiElimination opt3(graph);
   SsaDeadPhiElimination opt4(graph);
   InstructionSimplifier opt5(graph);
-  GlobalValueNumberer opt6(graph->GetArena(), graph);
+  GVNOptimization opt6(graph);
   InstructionSimplifier opt7(graph);
 
-  HOptimization* optimizations[] = { &opt1, &opt2, &opt3, &opt4, &opt5, &opt6, &opt7 };
+  HOptimization* optimizations[] = {
+    &ssa,
+    &opt1,
+    &opt2,
+    &opt3,
+    &opt4,
+    &opt5,
+    &opt6,
+    &opt7
+  };
 
   for (size_t i = 0; i < arraysize(optimizations); ++i) {
     HOptimization* optimization = optimizations[i];
     optimization->Run();
-    optimization->Check();
     visualizer.DumpGraph(optimization->GetPassName());
+    optimization->Check();
   }
 }
 
@@ -271,11 +282,6 @@
       && CanOptimize(*code_item)
       && RegisterAllocator::CanAllocateRegistersFor(*graph, instruction_set)) {
     optimized_compiled_methods_++;
-    graph->BuildDominatorTree();
-    graph->TransformToSSA();
-    visualizer.DumpGraph("ssa");
-    graph->FindNaturalLoops();
-
     RunOptimizations(graph, visualizer);
 
     PrepareForRegisterAllocation(graph).Run();
@@ -321,7 +327,7 @@
       graph->FindNaturalLoops();
       SsaRedundantPhiElimination(graph).Run();
       SsaDeadPhiElimination(graph).Run();
-      GlobalValueNumberer(graph->GetArena(), graph).Run();
+      GVNOptimization(graph).Run();
       SsaLivenessAnalysis liveness(*graph, codegen);
       liveness.Analyze();
       visualizer.DumpGraph(kLivenessPassName);
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index b2cc119..edfafcd 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -18,6 +18,7 @@
 
 #include "nodes.h"
 #include "ssa_type_propagation.h"
+#include "ssa_phi_elimination.h"
 
 namespace art {
 
@@ -41,11 +42,20 @@
     }
   }
 
-  // 3) Propagate types of phis.
+  // 3) Remove dead phis. This will remove phis that are only used by environments:
+  // at the DEX level, the type of these phis does not need to be consistent, but
+  // our code generator will complain if the inputs of a phi do not have the same
+  // type (modulo the special case of `null`).
+  SsaDeadPhiElimination dead_phis(GetGraph());
+  dead_phis.Run();
+
+  // 4) Propagate types of phis. At this point, phis are typed void in the general
+  // case, or float or double when we created a floating-point equivalent. So we
+  // need to propagate the types across phis to give them a correct type.
   SsaTypePropagation type_propagation(GetGraph());
   type_propagation.Run();
 
-  // 4) Clear locals.
+  // 5) Clear locals.
   // TODO: Move this to a dead code eliminator phase.
   for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions());
        !it.Done();
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 2207cd6..5ab328f 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -18,9 +18,24 @@
 #define ART_COMPILER_OPTIMIZING_SSA_BUILDER_H_
 
 #include "nodes.h"
+#include "optimization.h"
 
 namespace art {
 
+class TransformToSsa : public HOptimization {
+ public:
+  explicit TransformToSsa(HGraph* graph) : HOptimization(graph, true, "ssa transform") {}
+
+  void Run() OVERRIDE {
+    graph_->BuildDominatorTree();
+    graph_->TransformToSSA();
+    graph_->FindNaturalLoops();
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(TransformToSsa);
+};
+
 static constexpr int kDefaultNumberOfLoops = 2;
 
 class SsaBuilder : public HGraphVisitor {
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 0085b27..660a5c5 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -28,11 +28,6 @@
   ComputeLiveness();
 }
 
-static bool IsLoopExit(HLoopInformation* current, HLoopInformation* to) {
-  // `to` is either not part of a loop, or `current` is an inner loop of `to`.
-  return to == nullptr || (current != to && current->IsIn(*to));
-}
-
 static bool IsLoop(HLoopInformation* info) {
   return info != nullptr;
 }
@@ -48,46 +43,64 @@
       && inner->IsIn(*outer);
 }
 
-static void VisitBlockForLinearization(HBasicBlock* block,
-                                       GrowableArray<HBasicBlock*>* order,
-                                       ArenaBitVector* visited) {
-  if (visited->IsBitSet(block->GetBlockId())) {
-    return;
-  }
-  visited->SetBit(block->GetBlockId());
-  size_t number_of_successors = block->GetSuccessors().Size();
-  if (number_of_successors == 0) {
-    // Nothing to do.
-  } else if (number_of_successors == 1) {
-    VisitBlockForLinearization(block->GetSuccessors().Get(0), order, visited);
-  } else {
-    DCHECK_EQ(number_of_successors, 2u);
-    HBasicBlock* first_successor = block->GetSuccessors().Get(0);
-    HBasicBlock* second_successor = block->GetSuccessors().Get(1);
-    HLoopInformation* my_loop = block->GetLoopInformation();
-    HLoopInformation* first_loop = first_successor->GetLoopInformation();
-    HLoopInformation* second_loop = second_successor->GetLoopInformation();
-
-    if (!IsLoop(my_loop)) {
-      // Nothing to do. Current order is fine.
-    } else if (IsLoopExit(my_loop, second_loop) && InSameLoop(my_loop, first_loop)) {
-      // Visit the loop exit first in post order.
-      std::swap(first_successor, second_successor);
-    } else if (IsInnerLoop(my_loop, first_loop) && !IsInnerLoop(my_loop, second_loop)) {
-      // Visit the inner loop last in post order.
-      std::swap(first_successor, second_successor);
+static void AddToListForLinearization(GrowableArray<HBasicBlock*>* worklist, HBasicBlock* block) {
+  size_t insert_at = worklist->Size();
+  HLoopInformation* block_loop = block->GetLoopInformation();
+  for (; insert_at > 0; --insert_at) {
+    HBasicBlock* current = worklist->Get(insert_at - 1);
+    HLoopInformation* current_loop = current->GetLoopInformation();
+    if (InSameLoop(block_loop, current_loop)
+        || !IsLoop(current_loop)
+        || IsInnerLoop(current_loop, block_loop)) {
+      // The block can be processed immediately.
+      break;
     }
-    VisitBlockForLinearization(first_successor, order, visited);
-    VisitBlockForLinearization(second_successor, order, visited);
   }
-  order->Add(block);
+  worklist->InsertAt(insert_at, block);
 }
 
 void SsaLivenessAnalysis::LinearizeGraph() {
-  // For simplicity of the implementation, we create post linear order. The order for
-  // computing live ranges is the reverse of that order.
-  ArenaBitVector visited(graph_.GetArena(), graph_.GetBlocks().Size(), false);
-  VisitBlockForLinearization(graph_.GetEntryBlock(), &linear_post_order_, &visited);
+  // Create a reverse post ordering with the following properties:
+  // - Blocks in a loop are consecutive,
+  // - Back-edge is the last block before loop exits.
+
+  // (1): Record the number of forward predecessors for each block. This is to
+  //      ensure the resulting order is reverse post order. We could use the
+  //      current reverse post order in the graph, but it would require making
+  //      order queries to a GrowableArray, which is not the best data structure
+  //      for it.
+  GrowableArray<uint32_t> forward_predecessors(graph_.GetArena(), graph_.GetBlocks().Size());
+  forward_predecessors.SetSize(graph_.GetBlocks().Size());
+  for (size_t i = 0, e = graph_.GetBlocks().Size(); i < e; ++i) {
+    HBasicBlock* block = graph_.GetBlocks().Get(i);
+    size_t number_of_forward_predecessors = block->GetPredecessors().Size();
+    if (block->IsLoopHeader()) {
+      // We rely on having simplified the CFG.
+      DCHECK_EQ(1u, block->GetLoopInformation()->NumberOfBackEdges());
+      number_of_forward_predecessors--;
+    }
+    forward_predecessors.Put(block->GetBlockId(), number_of_forward_predecessors);
+  }
+
+  // (2): Following a worklist approach, first start with the entry block, and
+  //      iterate over the successors. When all non-back edge predecessors of a
+  //      successor block are visited, the successor block is added in the worklist
+  //      following an order that satisfies the requirements to build our linear graph.
+  GrowableArray<HBasicBlock*> worklist(graph_.GetArena(), 1);
+  worklist.Add(graph_.GetEntryBlock());
+  do {
+    HBasicBlock* current = worklist.Pop();
+    linear_order_.Add(current);
+    for (size_t i = 0, e = current->GetSuccessors().Size(); i < e; ++i) {
+      HBasicBlock* successor = current->GetSuccessors().Get(i);
+      int block_id = successor->GetBlockId();
+      size_t number_of_remaining_predecessors = forward_predecessors.Get(block_id);
+      if (number_of_remaining_predecessors == 1) {
+        AddToListForLinearization(&worklist, successor);
+      }
+      forward_predecessors.Put(block_id, number_of_remaining_predecessors - 1);
+    }
+  } while (!worklist.IsEmpty());
 }
 
 void SsaLivenessAnalysis::NumberInstructions() {
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index ca08d5b..2312389 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -582,7 +582,7 @@
   SsaLivenessAnalysis(const HGraph& graph, CodeGenerator* codegen)
       : graph_(graph),
         codegen_(codegen),
-        linear_post_order_(graph.GetArena(), graph.GetBlocks().Size()),
+        linear_order_(graph.GetArena(), graph.GetBlocks().Size()),
         block_infos_(graph.GetArena(), graph.GetBlocks().Size()),
         instructions_from_ssa_index_(graph.GetArena(), 0),
         instructions_from_lifetime_position_(graph.GetArena(), 0),
@@ -604,8 +604,8 @@
     return &block_infos_.Get(block.GetBlockId())->kill_;
   }
 
-  const GrowableArray<HBasicBlock*>& GetLinearPostOrder() const {
-    return linear_post_order_;
+  const GrowableArray<HBasicBlock*>& GetLinearOrder() const {
+    return linear_order_;
   }
 
   HInstruction* GetInstructionFromSsaIndex(size_t index) const {
@@ -661,7 +661,7 @@
 
   const HGraph& graph_;
   CodeGenerator* const codegen_;
-  GrowableArray<HBasicBlock*> linear_post_order_;
+  GrowableArray<HBasicBlock*> linear_order_;
   GrowableArray<BlockInfo*> block_infos_;
 
   // Temporary array used when computing live_in, live_out, and kill sets.
@@ -674,38 +674,43 @@
   DISALLOW_COPY_AND_ASSIGN(SsaLivenessAnalysis);
 };
 
-class HLinearOrderIterator : public ValueObject {
- public:
-  explicit HLinearOrderIterator(const SsaLivenessAnalysis& liveness)
-      : post_order_(liveness.GetLinearPostOrder()), index_(liveness.GetLinearPostOrder().Size()) {}
-
-  bool Done() const { return index_ == 0; }
-  HBasicBlock* Current() const { return post_order_.Get(index_ -1); }
-  void Advance() { --index_; DCHECK_GE(index_, 0U); }
-
- private:
-  const GrowableArray<HBasicBlock*>& post_order_;
-  size_t index_;
-
-  DISALLOW_COPY_AND_ASSIGN(HLinearOrderIterator);
-};
-
 class HLinearPostOrderIterator : public ValueObject {
  public:
   explicit HLinearPostOrderIterator(const SsaLivenessAnalysis& liveness)
-      : post_order_(liveness.GetLinearPostOrder()), index_(0) {}
+      : order_(liveness.GetLinearOrder()), index_(liveness.GetLinearOrder().Size()) {}
 
-  bool Done() const { return index_ == post_order_.Size(); }
-  HBasicBlock* Current() const { return post_order_.Get(index_); }
-  void Advance() { ++index_; }
+  bool Done() const { return index_ == 0; }
+
+  HBasicBlock* Current() const { return order_.Get(index_ -1); }
+
+  void Advance() {
+    --index_;
+    DCHECK_GE(index_, 0U);
+  }
 
  private:
-  const GrowableArray<HBasicBlock*>& post_order_;
+  const GrowableArray<HBasicBlock*>& order_;
   size_t index_;
 
   DISALLOW_COPY_AND_ASSIGN(HLinearPostOrderIterator);
 };
 
+class HLinearOrderIterator : public ValueObject {
+ public:
+  explicit HLinearOrderIterator(const SsaLivenessAnalysis& liveness)
+      : order_(liveness.GetLinearOrder()), index_(0) {}
+
+  bool Done() const { return index_ == order_.Size(); }
+  HBasicBlock* Current() const { return order_.Get(index_); }
+  void Advance() { ++index_; }
+
+ private:
+  const GrowableArray<HBasicBlock*>& order_;
+  size_t index_;
+
+  DISALLOW_COPY_AND_ASSIGN(HLinearOrderIterator);
+};
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_SSA_LIVENESS_ANALYSIS_H_
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index 56979e1..58cea77 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -24,6 +24,8 @@
     HBasicBlock* block = it.Current();
     for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
       HPhi* phi = inst_it.Current()->AsPhi();
+      // Set dead ahead of running through uses. The phi may have no use.
+      phi->SetDead();
       for (HUseIterator<HInstruction> use_it(phi->GetUses()); !use_it.Done(); use_it.Advance()) {
         HUseListNode<HInstruction>* current = use_it.Current();
         HInstruction* user = current->GetUser();
@@ -31,8 +33,6 @@
           worklist_.Add(phi);
           phi->SetLive();
           break;
-        } else {
-          phi->SetDead();
         }
       }
     }
@@ -65,8 +65,8 @@
                use_it.Advance()) {
             HUseListNode<HInstruction>* user_node = use_it.Current();
             HInstruction* user = user_node->GetUser();
-            DCHECK(user->IsLoopHeaderPhi());
-            DCHECK(user->AsPhi()->IsDead());
+            DCHECK(user->IsLoopHeaderPhi()) << user->GetId();
+            DCHECK(user->AsPhi()->IsDead()) << user->GetId();
             // Just put itself as an input. The phi will be removed in this loop anyway.
             user->SetRawInputAt(user_node->GetIndex(), user);
             current->RemoveUser(user, user_node->GetIndex());
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index fffe5c2..6174dd4 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -199,29 +199,31 @@
   // Test that we create a phi for an initialized local at entry of a loop.
   const char* expected =
     "BasicBlock 0, succ: 1\n"
-    "  0: IntConstant 0 [6, 4, 2, 2]\n"
-    "  1: Goto\n"
-    "BasicBlock 1, pred: 0, succ: 5, 6\n"
-    "  2: Equal(0, 0) [3]\n"
-    "  3: If(2)\n"
-    "BasicBlock 2, pred: 3, 6, succ: 3\n"
-    "  4: Phi(6, 0) [6]\n"
+    "  0: IntConstant 0 [6, 3, 3]\n"
+    "  1: IntConstant 4 [6]\n"
+    "  2: Goto\n"
+    "BasicBlock 1, pred: 0, succ: 4, 2\n"
+    "  3: Equal(0, 0) [4]\n"
+    "  4: If(3)\n"
+    "BasicBlock 2, pred: 1, succ: 3\n"
     "  5: Goto\n"
-    "BasicBlock 3, pred: 5, 2, succ: 2\n"
-    "  6: Phi(0, 4) [4]\n"
+    "BasicBlock 3, pred: 2, 4, succ: 5\n"
+    "  6: Phi(1, 0) [9]\n"
     "  7: Goto\n"
-    "BasicBlock 4\n"
-    // Synthesized blocks to avoid critical edge.
-    "BasicBlock 5, pred: 1, succ: 3\n"
+    "BasicBlock 4, pred: 1, succ: 3\n"
     "  8: Goto\n"
-    "BasicBlock 6, pred: 1, succ: 2\n"
-    "  9: Goto\n";
+    "BasicBlock 5, pred: 3, succ: 6\n"
+    "  9: Return(6)\n"
+    "BasicBlock 6, pred: 5\n"
+    "  10: Exit\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
-    Instruction::IF_EQ, 3,
-    Instruction::GOTO | 0x100,
-    Instruction::GOTO | 0xFF00);
+    Instruction::IF_EQ, 4,
+    Instruction::CONST_4 | 4 << 12 | 0,
+    Instruction::GOTO | 0x200,
+    Instruction::GOTO | 0xFF00,
+    Instruction::RETURN | 0 << 8);
 
   TestCode(data, expected);
 }
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index 9c84bc1..0f28591 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -205,10 +205,9 @@
       encoding =  am_ | offset_;
     }
   } else {
-    uint32_t imm5 = offset_;
     uint32_t shift = shift_;
     if (shift == RRX) {
-      imm5 = 0;
+      CHECK_EQ(offset_, 0);
       shift = ROR;
     }
     encoding = am_ | static_cast<uint32_t>(rm_) | shift << 5 | offset_ << 7 | B25;
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index a1594b0..a541763 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -1079,7 +1079,7 @@
 
 void Arm32Assembler::Lsl(Register rd, Register rm, uint32_t shift_imm,
                          bool setcc, Condition cond) {
-  CHECK_NE(shift_imm, 0u);  // Do not use Lsl if no shift is wanted.
+  CHECK_LE(shift_imm, 31u);
   if (setcc) {
     movs(rd, ShifterOperand(rm, LSL, shift_imm), cond);
   } else {
@@ -1090,7 +1090,7 @@
 
 void Arm32Assembler::Lsr(Register rd, Register rm, uint32_t shift_imm,
                          bool setcc, Condition cond) {
-  CHECK_NE(shift_imm, 0u);  // Do not use Lsr if no shift is wanted.
+  CHECK(1u <= shift_imm && shift_imm <= 32u);
   if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
   if (setcc) {
     movs(rd, ShifterOperand(rm, LSR, shift_imm), cond);
@@ -1102,7 +1102,7 @@
 
 void Arm32Assembler::Asr(Register rd, Register rm, uint32_t shift_imm,
                          bool setcc, Condition cond) {
-  CHECK_NE(shift_imm, 0u);  // Do not use Asr if no shift is wanted.
+  CHECK(1u <= shift_imm && shift_imm <= 32u);
   if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
   if (setcc) {
     movs(rd, ShifterOperand(rm, ASR, shift_imm), cond);
@@ -1114,7 +1114,7 @@
 
 void Arm32Assembler::Ror(Register rd, Register rm, uint32_t shift_imm,
                          bool setcc, Condition cond) {
-  CHECK_NE(shift_imm, 0u);  // Use Rrx instruction.
+  CHECK(1u <= shift_imm && shift_imm <= 31u);
   if (setcc) {
     movs(rd, ShifterOperand(rm, ROR, shift_imm), cond);
   } else {
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index a349209..a377cb2 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -2210,7 +2210,7 @@
 
 void Thumb2Assembler::Lsl(Register rd, Register rm, uint32_t shift_imm,
                           bool setcc, Condition cond) {
-  CHECK_NE(shift_imm, 0u);  // Do not use Lsl if no shift is wanted.
+  CHECK_LE(shift_imm, 31u);
   CheckCondition(cond);
   EmitShift(rd, rm, LSL, shift_imm, setcc);
 }
@@ -2218,7 +2218,7 @@
 
 void Thumb2Assembler::Lsr(Register rd, Register rm, uint32_t shift_imm,
                           bool setcc, Condition cond) {
-  CHECK_NE(shift_imm, 0u);  // Do not use Lsr if no shift is wanted.
+  CHECK(1u <= shift_imm && shift_imm <= 32u);
   if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
   CheckCondition(cond);
   EmitShift(rd, rm, LSR, shift_imm, setcc);
@@ -2227,7 +2227,7 @@
 
 void Thumb2Assembler::Asr(Register rd, Register rm, uint32_t shift_imm,
                           bool setcc, Condition cond) {
-  CHECK_NE(shift_imm, 0u);  // Do not use Asr if no shift is wanted.
+  CHECK(1u <= shift_imm && shift_imm <= 32u);
   if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
   CheckCondition(cond);
   EmitShift(rd, rm, ASR, shift_imm, setcc);
@@ -2236,7 +2236,7 @@
 
 void Thumb2Assembler::Ror(Register rd, Register rm, uint32_t shift_imm,
                           bool setcc, Condition cond) {
-  CHECK_NE(shift_imm, 0u);  // Use Rrx instruction.
+  CHECK(1u <= shift_imm && shift_imm <= 31u);
   CheckCondition(cond);
   EmitShift(rd, rm, ROR, shift_imm, setcc);
 }
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index 1fadb91..2b55120 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -226,6 +226,10 @@
     UNREACHABLE();
   }
 
+  std::string GetRegisterName(const Reg& reg) {
+    return GetRegName<RegisterView::kUsePrimaryName>(reg);
+  }
+
  protected:
   explicit AssemblerTest() {}
 
diff --git a/compiler/utils/dex_instruction_utils.h b/compiler/utils/dex_instruction_utils.h
new file mode 100644
index 0000000..2c6e525
--- /dev/null
+++ b/compiler/utils/dex_instruction_utils.h
@@ -0,0 +1,196 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_DEX_INSTRUCTION_UTILS_H_
+#define ART_COMPILER_UTILS_DEX_INSTRUCTION_UTILS_H_
+
+#include "dex_instruction.h"
+
+namespace art {
+
+// Dex invoke type corresponds to the ordering of INVOKE instructions;
+// this order is the same for range and non-range invokes.
+enum DexInvokeType : uint8_t {
+  kDexInvokeVirtual = 0,  // invoke-virtual, invoke-virtual-range
+  kDexInvokeSuper,        // invoke-super, invoke-super-range
+  kDexInvokeDirect,       // invoke-direct, invoke-direct-range
+  kDexInvokeStatic,       // invoke-static, invoke-static-range
+  kDexInvokeInterface,    // invoke-interface, invoke-interface-range
+  kDexInvokeTypeCount
+};
+
+// Dex instruction memory access types correspond to the ordering of GET/PUT instructions;
+// this order is the same for IGET, IPUT, SGET, SPUT, AGET and APUT.
+enum DexMemAccessType : uint8_t {
+  kDexMemAccessWord = 0,  // op         0; int or float, the actual type is not encoded.
+  kDexMemAccessWide,      // op_WIDE    1; long or double, the actual type is not encoded.
+  kDexMemAccessObject,    // op_OBJECT  2; the actual reference type is not encoded.
+  kDexMemAccessBoolean,   // op_BOOLEAN 3
+  kDexMemAccessByte,      // op_BYTE    4
+  kDexMemAccessChar,      // op_CHAR    5
+  kDexMemAccessShort,     // op_SHORT   6
+  kDexMemAccessTypeCount
+};
+
+std::ostream& operator<<(std::ostream& os, const DexMemAccessType& type);
+
+// NOTE: The following functions disregard quickened instructions.
+
+constexpr bool IsInstructionReturn(Instruction::Code opcode) {
+  return Instruction::RETURN_VOID <= opcode && opcode <= Instruction::RETURN_OBJECT;
+}
+
+constexpr bool IsInstructionInvoke(Instruction::Code opcode) {
+  return Instruction::INVOKE_VIRTUAL <= opcode && opcode <= Instruction::INVOKE_INTERFACE_RANGE &&
+      opcode != Instruction::RETURN_VOID_BARRIER;
+}
+
+constexpr bool IsInstructionInvokeStatic(Instruction::Code opcode) {
+  return opcode == Instruction::INVOKE_STATIC || opcode == Instruction::INVOKE_STATIC_RANGE;
+}
+
+constexpr bool IsInstructionGoto(Instruction::Code opcode) {
+  return Instruction::GOTO <= opcode && opcode <= Instruction::GOTO_32;
+}
+
+constexpr bool IsInstructionIfCc(Instruction::Code opcode) {
+  return Instruction::IF_EQ <= opcode && opcode <= Instruction::IF_LE;
+}
+
+constexpr bool IsInstructionIfCcZ(Instruction::Code opcode) {
+  return Instruction::IF_EQZ <= opcode && opcode <= Instruction::IF_LEZ;
+}
+
+constexpr bool IsInstructionIGet(Instruction::Code code) {
+  return Instruction::IGET <= code && code <= Instruction::IGET_SHORT;
+}
+
+constexpr bool IsInstructionIPut(Instruction::Code code) {
+  return Instruction::IPUT <= code && code <= Instruction::IPUT_SHORT;
+}
+
+constexpr bool IsInstructionSGet(Instruction::Code code) {
+  return Instruction::SGET <= code && code <= Instruction::SGET_SHORT;
+}
+
+constexpr bool IsInstructionSPut(Instruction::Code code) {
+  return Instruction::SPUT <= code && code <= Instruction::SPUT_SHORT;
+}
+
+constexpr bool IsInstructionAGet(Instruction::Code code) {
+  return Instruction::AGET <= code && code <= Instruction::AGET_SHORT;
+}
+
+constexpr bool IsInstructionAPut(Instruction::Code code) {
+  return Instruction::APUT <= code && code <= Instruction::APUT_SHORT;
+}
+
+constexpr bool IsInstructionIGetOrIPut(Instruction::Code code) {
+  return Instruction::IGET <= code && code <= Instruction::IPUT_SHORT;
+}
+
+constexpr bool IsInstructionSGetOrSPut(Instruction::Code code) {
+  return Instruction::SGET <= code && code <= Instruction::SPUT_SHORT;
+}
+
+constexpr bool IsInstructionAGetOrAPut(Instruction::Code code) {
+  return Instruction::AGET <= code && code <= Instruction::APUT_SHORT;
+}
+
+// TODO: Remove the #if guards below when we fully migrate to C++14.
+
+constexpr bool IsInvokeInstructionRange(Instruction::Code opcode) {
+#if __cplusplus >= 201402  // C++14 allows the DCHECK() in constexpr functions.
+  DCHECK(IsInstructionInvoke(opcode));
+#endif
+  return opcode >= Instruction::INVOKE_VIRTUAL_RANGE;
+}
+
+constexpr DexInvokeType InvokeInstructionType(Instruction::Code opcode) {
+#if __cplusplus >= 201402  // C++14 allows the DCHECK() in constexpr functions.
+  DCHECK(IsInstructionInvoke(opcode));
+#endif
+  return static_cast<DexInvokeType>(IsInvokeInstructionRange(opcode)
+                                    ? (opcode - Instruction::INVOKE_VIRTUAL_RANGE)
+                                    : (opcode - Instruction::INVOKE_VIRTUAL));
+}
+
+constexpr DexMemAccessType IGetMemAccessType(Instruction::Code code) {
+#if __cplusplus >= 201402  // C++14 allows the DCHECK() in constexpr functions.
+  DCHECK(IsInstructionIGet(opcode));
+#endif
+  return static_cast<DexMemAccessType>(code - Instruction::IGET);
+}
+
+constexpr DexMemAccessType IPutMemAccessType(Instruction::Code code) {
+#if __cplusplus >= 201402  // C++14 allows the DCHECK() in constexpr functions.
+  DCHECK(IsInstructionIPut(opcode));
+#endif
+  return static_cast<DexMemAccessType>(code - Instruction::IPUT);
+}
+
+constexpr DexMemAccessType SGetMemAccessType(Instruction::Code code) {
+#if __cplusplus >= 201402  // C++14 allows the DCHECK() in constexpr functions.
+  DCHECK(IsInstructionSGet(opcode));
+#endif
+  return static_cast<DexMemAccessType>(code - Instruction::SGET);
+}
+
+constexpr DexMemAccessType SPutMemAccessType(Instruction::Code code) {
+#if __cplusplus >= 201402  // C++14 allows the DCHECK() in constexpr functions.
+  DCHECK(IsInstructionSPut(opcode));
+#endif
+  return static_cast<DexMemAccessType>(code - Instruction::SPUT);
+}
+
+constexpr DexMemAccessType AGetMemAccessType(Instruction::Code code) {
+#if __cplusplus >= 201402  // C++14 allows the DCHECK() in constexpr functions.
+  DCHECK(IsInstructionAGet(opcode));
+#endif
+  return static_cast<DexMemAccessType>(code - Instruction::AGET);
+}
+
+constexpr DexMemAccessType APutMemAccessType(Instruction::Code code) {
+#if __cplusplus >= 201402  // C++14 allows the DCHECK() in constexpr functions.
+  DCHECK(IsInstructionAPut(opcode));
+#endif
+  return static_cast<DexMemAccessType>(code - Instruction::APUT);
+}
+
+constexpr DexMemAccessType IGetOrIPutMemAccessType(Instruction::Code code) {
+#if __cplusplus >= 201402  // C++14 allows the DCHECK() in constexpr functions.
+  DCHECK(IsInstructionIGetOrIPut(opcode));
+#endif
+  return (code >= Instruction::IPUT) ? IPutMemAccessType(code) : IGetMemAccessType(code);
+}
+
+constexpr DexMemAccessType SGetOrSPutMemAccessType(Instruction::Code code) {
+#if __cplusplus >= 201402  // C++14 allows the DCHECK() in constexpr functions.
+  DCHECK(IsInstructionSGetOrSPut(opcode));
+#endif
+  return (code >= Instruction::SPUT) ? SPutMemAccessType(code) : SGetMemAccessType(code);
+}
+
+constexpr DexMemAccessType AGetOrAPutMemAccessType(Instruction::Code code) {
+#if __cplusplus >= 201402  // C++14 allows the DCHECK() in constexpr functions.
+  DCHECK(IsInstructionAGetOrAPut(opcode));
+#endif
+  return (code >= Instruction::APUT) ? APutMemAccessType(code) : AGetMemAccessType(code);
+}
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_DEX_INSTRUCTION_UTILS_H_
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index afa4a3b..a297ea3 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1126,7 +1126,8 @@
 }
 
 
-void X86Assembler::shld(Register dst, Register src) {
+void X86Assembler::shld(Register dst, Register src, Register shifter) {
+  DCHECK_EQ(ECX, shifter);
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x0F);
   EmitUint8(0xA5);
@@ -1134,6 +1135,15 @@
 }
 
 
+void X86Assembler::shrd(Register dst, Register src, Register shifter) {
+  DCHECK_EQ(ECX, shifter);
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xAD);
+  EmitRegisterOperand(src, dst);
+}
+
+
 void X86Assembler::negl(Register reg) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF7);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 8aed934..6ea66a5 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -405,7 +405,8 @@
   void shrl(Register operand, Register shifter);
   void sarl(Register reg, const Immediate& imm);
   void sarl(Register operand, Register shifter);
-  void shld(Register dst, Register src);
+  void shld(Register dst, Register src, Register shifter);
+  void shrd(Register dst, Register src, Register shifter);
 
   void negl(Register reg);
   void notl(Register reg);
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 8c428f4..dff3849 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1451,8 +1451,18 @@
 }
 
 
+void X86_64Assembler::shlq(CpuRegister reg, const Immediate& imm) {
+  EmitGenericShift(true, 4, reg, imm);
+}
+
+
 void X86_64Assembler::shll(CpuRegister operand, CpuRegister shifter) {
-  EmitGenericShift(4, operand, shifter);
+  EmitGenericShift(false, 4, operand, shifter);
+}
+
+
+void X86_64Assembler::shlq(CpuRegister operand, CpuRegister shifter) {
+  EmitGenericShift(true, 4, operand, shifter);
 }
 
 
@@ -1467,7 +1477,12 @@
 
 
 void X86_64Assembler::shrl(CpuRegister operand, CpuRegister shifter) {
-  EmitGenericShift(5, operand, shifter);
+  EmitGenericShift(false, 5, operand, shifter);
+}
+
+
+void X86_64Assembler::shrq(CpuRegister operand, CpuRegister shifter) {
+  EmitGenericShift(true, 5, operand, shifter);
 }
 
 
@@ -1477,7 +1492,17 @@
 
 
 void X86_64Assembler::sarl(CpuRegister operand, CpuRegister shifter) {
-  EmitGenericShift(7, operand, shifter);
+  EmitGenericShift(false, 7, operand, shifter);
+}
+
+
+void X86_64Assembler::sarq(CpuRegister reg, const Immediate& imm) {
+  EmitGenericShift(true, 7, reg, imm);
+}
+
+
+void X86_64Assembler::sarq(CpuRegister operand, CpuRegister shifter) {
+  EmitGenericShift(true, 7, operand, shifter);
 }
 
 
@@ -1826,12 +1851,17 @@
 }
 
 
-void X86_64Assembler::EmitGenericShift(int reg_or_opcode,
+void X86_64Assembler::EmitGenericShift(bool wide,
+                                       int reg_or_opcode,
                                        CpuRegister operand,
                                        CpuRegister shifter) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   CHECK_EQ(shifter.AsRegister(), RCX);
-  EmitOptionalRex32(operand);
+  if (wide) {
+    EmitRex64(operand);
+  } else {
+    EmitOptionalRex32(operand);
+  }
   EmitUint8(0xD3);
   EmitOperand(reg_or_opcode, Operand(operand));
 }
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 4dd70e2..ab1bc9e 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -460,7 +460,12 @@
   void sarl(CpuRegister reg, const Immediate& imm);
   void sarl(CpuRegister operand, CpuRegister shifter);
 
+  void shlq(CpuRegister reg, const Immediate& imm);
+  void shlq(CpuRegister operand, CpuRegister shifter);
   void shrq(CpuRegister reg, const Immediate& imm);
+  void shrq(CpuRegister operand, CpuRegister shifter);
+  void sarq(CpuRegister reg, const Immediate& imm);
+  void sarq(CpuRegister operand, CpuRegister shifter);
 
   void negl(CpuRegister reg);
   void negq(CpuRegister reg);
@@ -657,7 +662,7 @@
   void EmitNearLabelLink(Label* label);
 
   void EmitGenericShift(bool wide, int rm, CpuRegister reg, const Immediate& imm);
-  void EmitGenericShift(int rm, CpuRegister operand, CpuRegister shifter);
+  void EmitGenericShift(bool wide, int rm, CpuRegister operand, CpuRegister shifter);
 
   // If any input is not false, output the necessary rex prefix.
   void EmitOptionalRex(bool force, bool w, bool r, bool x, bool b);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index af389e6..14a98b9 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -296,7 +296,7 @@
   DriverStr(Repeatri(&x86_64::X86_64Assembler::subl, 4U, "sub ${imm}, %{reg}"), "subli");
 }
 
-// Shll only allows CL as the shift register.
+// Shll only allows CL as the shift count.
 std::string shll_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
   std::ostringstream str;
 
@@ -319,7 +319,31 @@
   DriverStr(Repeatri(&x86_64::X86_64Assembler::shll, 1U, "shll ${imm}, %{reg}"), "shlli");
 }
 
-// Shrl only allows CL as the shift register.
+// Shlq only allows CL as the shift count.
+std::string shlq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
+  std::ostringstream str;
+
+  std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+
+  x86_64::CpuRegister shifter(x86_64::RCX);
+  for (auto reg : registers) {
+    assembler->shlq(*reg, shifter);
+    str << "shlq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
+  }
+  printf("%s\n", str.str().c_str());
+
+  return str.str();
+}
+
+TEST_F(AssemblerX86_64Test, ShlqReg) {
+  DriverFn(&shlq_fn, "shlq");
+}
+
+TEST_F(AssemblerX86_64Test, ShlqImm) {
+  DriverStr(RepeatRI(&x86_64::X86_64Assembler::shlq, 1U, "shlq ${imm}, %{reg}"), "shlqi");
+}
+
+// Shrl only allows CL as the shift count.
 std::string shrl_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
   std::ostringstream str;
 
@@ -342,7 +366,30 @@
   DriverStr(Repeatri(&x86_64::X86_64Assembler::shrl, 1U, "shrl ${imm}, %{reg}"), "shrli");
 }
 
-// Sarl only allows CL as the shift register.
+// Shrq only allows CL as the shift count.
+std::string shrq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
+  std::ostringstream str;
+
+  std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+
+  x86_64::CpuRegister shifter(x86_64::RCX);
+  for (auto reg : registers) {
+    assembler->shrq(*reg, shifter);
+    str << "shrq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
+  }
+
+  return str.str();
+}
+
+TEST_F(AssemblerX86_64Test, ShrqReg) {
+  DriverFn(&shrq_fn, "shrq");
+}
+
+TEST_F(AssemblerX86_64Test, ShrqImm) {
+  DriverStr(RepeatRI(&x86_64::X86_64Assembler::shrq, 1U, "shrq ${imm}, %{reg}"), "shrqi");
+}
+
+// Sarl only allows CL as the shift count.
 std::string sarl_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
   std::ostringstream str;
 
@@ -365,6 +412,29 @@
   DriverStr(Repeatri(&x86_64::X86_64Assembler::sarl, 1U, "sarl ${imm}, %{reg}"), "sarli");
 }
 
+// Sarq only allows CL as the shift count.
+std::string sarq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
+  std::ostringstream str;
+
+  std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+
+  x86_64::CpuRegister shifter(x86_64::RCX);
+  for (auto reg : registers) {
+    assembler->sarq(*reg, shifter);
+    str << "sarq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
+  }
+
+  return str.str();
+}
+
+TEST_F(AssemblerX86_64Test, SarqReg) {
+  DriverFn(&sarq_fn, "sarq");
+}
+
+TEST_F(AssemblerX86_64Test, SarqImm) {
+  DriverStr(RepeatRI(&x86_64::X86_64Assembler::sarq, 1U, "sarq ${imm}, %{reg}"), "sarqi");
+}
+
 TEST_F(AssemblerX86_64Test, CmpqRegs) {
   DriverStr(RepeatRR(&x86_64::X86_64Assembler::cmpq, "cmpq %{reg2}, %{reg1}"), "cmpq");
 }
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 08352de..d28b626 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -605,7 +605,7 @@
     offsets_.insert(code_offset);
     offsets_.insert(oat_method.GetMappingTableOffset());
     offsets_.insert(oat_method.GetVmapTableOffset());
-    offsets_.insert(oat_method.GetNativeGcMapOffset());
+    offsets_.insert(oat_method.GetGcMapOffset());
   }
 
   bool DumpOatDexFile(std::ostream& os, const OatFile::OatDexFile& oat_dex_file) {
@@ -755,9 +755,9 @@
 
       *indent2_os << "gc_map: ";
       if (options_->absolute_addresses_) {
-        *indent2_os << StringPrintf("%p ", oat_method.GetNativeGcMap());
+        *indent2_os << StringPrintf("%p ", oat_method.GetGcMap());
       }
-      uint32_t gc_map_offset = oat_method.GetNativeGcMapOffset();
+      uint32_t gc_map_offset = oat_method.GetGcMapOffset();
       *indent2_os << StringPrintf("(offset=0x%08x)\n", gc_map_offset);
       if (gc_map_offset > oat_file_.Size()) {
         *indent2_os << StringPrintf("WARNING: "
@@ -935,7 +935,7 @@
     // If the native GC map is null, then this method has been compiled with the
     // optimizing compiler. The optimizing compiler currently outputs its stack map
     // in the vmap table, and the code below does not work with such a stack map.
-    if (oat_method.GetNativeGcMap() == nullptr) {
+    if (oat_method.GetGcMap() == nullptr) {
       return;
     }
     const uint8_t* raw_table = oat_method.GetVmapTable();
@@ -1050,7 +1050,7 @@
   }
   void DumpGcMap(std::ostream& os, const OatFile::OatMethod& oat_method,
                  const DexFile::CodeItem* code_item) {
-    const uint8_t* gc_map_raw = oat_method.GetNativeGcMap();
+    const uint8_t* gc_map_raw = oat_method.GetGcMap();
     if (gc_map_raw == nullptr) {
       return;  // No GC map.
     }
@@ -1128,7 +1128,7 @@
 
   void DumpGcMapAtNativePcOffset(std::ostream& os, const OatFile::OatMethod& oat_method,
                                  const DexFile::CodeItem* code_item, size_t native_pc_offset) {
-    const uint8_t* gc_map_raw = oat_method.GetNativeGcMap();
+    const uint8_t* gc_map_raw = oat_method.GetGcMap();
     if (gc_map_raw != nullptr) {
       NativePcOffsetToReferenceMap map(gc_map_raw);
       if (map.HasEntry(native_pc_offset)) {
@@ -1634,8 +1634,7 @@
       mirror::ArtMethod* method = obj->AsArtMethod();
       if (method->IsNative()) {
         // TODO: portable dumping.
-        DCHECK(method->GetNativeGcMapPtrSize(image_pointer_size) == nullptr)
-            << PrettyMethod(method);
+        DCHECK(method->GetNativeGcMap(image_pointer_size) == nullptr) << PrettyMethod(method);
         DCHECK(method->GetMappingTable(image_pointer_size) == nullptr) << PrettyMethod(method);
         bool first_occurrence;
         const void* quick_oat_code = state->GetQuickOatCodeBegin(method);
@@ -1651,8 +1650,7 @@
       } else if (method->IsAbstract() || method->IsCalleeSaveMethod() ||
           method->IsResolutionMethod() || method->IsImtConflictMethod() ||
           method->IsImtUnimplementedMethod() || method->IsClassInitializer()) {
-        DCHECK(method->GetNativeGcMapPtrSize(image_pointer_size) == nullptr)
-            << PrettyMethod(method);
+        DCHECK(method->GetNativeGcMap(image_pointer_size) == nullptr) << PrettyMethod(method);
         DCHECK(method->GetMappingTable(image_pointer_size) == nullptr) << PrettyMethod(method);
       } else {
         const DexFile::CodeItem* code_item = method->GetCodeItem();
@@ -1660,8 +1658,8 @@
         state->stats_.dex_instruction_bytes += dex_instruction_bytes;
 
         bool first_occurrence;
-        size_t gc_map_bytes = state->ComputeOatSize(
-            method->GetNativeGcMapPtrSize(image_pointer_size), &first_occurrence);
+        size_t gc_map_bytes =
+            state->ComputeOatSize(method->GetNativeGcMap(image_pointer_size), &first_occurrence);
         if (first_occurrence) {
           state->stats_.gc_map_bytes += gc_map_bytes;
         }
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index b15c712..68fd15b 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -565,12 +565,6 @@
     copy->SetEntryPointFromJniPtrSize(reinterpret_cast<void*>(native_method + delta_),
                                       pointer_size);
   }
-
-  uintptr_t native_gc_map = reinterpret_cast<uintptr_t>(
-      object->GetNativeGcMapPtrSize(pointer_size));
-  if (native_gc_map != 0) {
-    copy->SetNativeGcMapPtrSize(reinterpret_cast<uint8_t*>(native_gc_map + delta_), pointer_size);
-  }
 }
 
 bool PatchOat::Patch(File* input_oat, off_t delta, File* output_oat, TimingLogger* timings,
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 4b4c8855..c737ec9 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -148,7 +148,7 @@
 ADD_TEST_EQ(MIRROR_ART_METHOD_DEX_CACHE_METHODS_OFFSET,
             art::mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value())
 
-#define MIRROR_ART_METHOD_PORTABLE_CODE_OFFSET_32     (48 + MIRROR_OBJECT_HEADER_SIZE)
+#define MIRROR_ART_METHOD_PORTABLE_CODE_OFFSET_32     (44 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_ART_METHOD_PORTABLE_CODE_OFFSET_32,
             art::mirror::ArtMethod::EntryPointFromPortableCompiledCodeOffset(4).Int32Value())
 
@@ -156,7 +156,7 @@
 ADD_TEST_EQ(MIRROR_ART_METHOD_QUICK_CODE_OFFSET_32,
             art::mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(4).Int32Value())
 
-#define MIRROR_ART_METHOD_PORTABLE_CODE_OFFSET_64     (64 + MIRROR_OBJECT_HEADER_SIZE)
+#define MIRROR_ART_METHOD_PORTABLE_CODE_OFFSET_64     (56 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_ART_METHOD_PORTABLE_CODE_OFFSET_64,
             art::mirror::ArtMethod::EntryPointFromPortableCompiledCodeOffset(8).Int32Value())
 
diff --git a/runtime/check_reference_map_visitor.h b/runtime/check_reference_map_visitor.h
index 821d613..4fe3852 100644
--- a/runtime/check_reference_map_visitor.h
+++ b/runtime/check_reference_map_visitor.h
@@ -98,7 +98,7 @@
   void CheckQuickMethod(int* registers, int number_of_references, uint32_t native_pc_offset)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtMethod* m = GetMethod();
-    NativePcOffsetToReferenceMap map(m->GetNativeGcMap());
+    NativePcOffsetToReferenceMap map(m->GetNativeGcMap(sizeof(void*)));
     const uint8_t* ref_bitmap = map.FindBitMap(native_pc_offset);
     CHECK(ref_bitmap);
     for (int i = 0; i < number_of_references; ++i) {
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index e1b79c9..677fdc9 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -5737,24 +5737,13 @@
   }
 }
 
-static OatFile::OatMethod CreateOatMethod(const void* code, const uint8_t* gc_map,
-                                          bool is_portable) {
+static OatFile::OatMethod CreateOatMethod(const void* code, bool is_portable) {
   CHECK_EQ(kUsePortableCompiler, is_portable);
   CHECK(code != nullptr);
-  const uint8_t* base;
-  uint32_t code_offset, gc_map_offset;
-  if (gc_map == nullptr) {
-    base = reinterpret_cast<const uint8_t*>(code);  // Base of data points at code.
-    base -= sizeof(void*);  // Move backward so that code_offset != 0.
-    code_offset = sizeof(void*);
-    gc_map_offset = 0;
-  } else {
-    // TODO: 64bit support.
-    base = nullptr;  // Base of data in oat file, ie 0.
-    code_offset = PointerToLowMemUInt32(code);
-    gc_map_offset = PointerToLowMemUInt32(gc_map);
-  }
-  return OatFile::OatMethod(base, code_offset, gc_map_offset);
+  const uint8_t* base = reinterpret_cast<const uint8_t*>(code);  // Base of data points at code.
+  base -= sizeof(void*);  // Move backward so that code_offset != 0.
+  const uint32_t code_offset = sizeof(void*);
+  return OatFile::OatMethod(base, code_offset);
 }
 
 bool ClassLinker::IsPortableResolutionStub(const void* entry_point) const {
@@ -5788,7 +5777,7 @@
 
 void ClassLinker::SetEntryPointsToCompiledCode(mirror::ArtMethod* method, const void* method_code,
                                                bool is_portable) const {
-  OatFile::OatMethod oat_method = CreateOatMethod(method_code, nullptr, is_portable);
+  OatFile::OatMethod oat_method = CreateOatMethod(method_code, is_portable);
   oat_method.LinkMethod(method);
   method->SetEntryPointFromInterpreter(artInterpreterToCompiledCodeBridge);
   // Create bridges to transition between different kinds of compiled bridge.
@@ -5808,7 +5797,7 @@
     method->SetEntryPointFromQuickCompiledCode(GetQuickToInterpreterBridge());
   } else {
     const void* quick_method_code = GetQuickGenericJniStub();
-    OatFile::OatMethod oat_method = CreateOatMethod(quick_method_code, nullptr, false);
+    OatFile::OatMethod oat_method = CreateOatMethod(quick_method_code, false);
     oat_method.LinkMethod(method);
     method->SetEntryPointFromInterpreter(artInterpreterToCompiledCodeBridge);
     method->SetEntryPointFromPortableCompiledCode(GetPortableToQuickBridge());
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index ef5db2d..49b132d 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -31,7 +31,7 @@
 #include "gc/space/space-inl.h"
 #include "handle_scope.h"
 #include "jdwp/object_registry.h"
-#include "method_helper.h"
+#include "method_helper-inl.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class.h"
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index 1a8ca02..67265a2 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -37,9 +37,9 @@
 
 template <const bool kAccessCheck>
 ALWAYS_INLINE
-static inline mirror::Class* CheckObjectAlloc(uint32_t type_idx,
-                                              mirror::ArtMethod* method,
-                                              Thread* self, bool* slow_path) {
+inline mirror::Class* CheckObjectAlloc(uint32_t type_idx,
+                                       mirror::ArtMethod* method,
+                                       Thread* self, bool* slow_path) {
   mirror::Class* klass = method->GetDexCacheResolvedType<false>(type_idx);
   if (UNLIKELY(klass == NULL)) {
     klass = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, method);
@@ -90,9 +90,9 @@
 }
 
 ALWAYS_INLINE
-static inline mirror::Class* CheckClassInitializedForObjectAlloc(mirror::Class* klass,
-                                                                 Thread* self,
-                                                                 bool* slow_path) {
+inline mirror::Class* CheckClassInitializedForObjectAlloc(mirror::Class* klass,
+                                                          Thread* self,
+                                                          bool* slow_path) {
   if (UNLIKELY(!klass->IsInitialized())) {
     StackHandleScope<1> hs(self);
     Handle<mirror::Class> h_class(hs.NewHandle(klass));
@@ -120,10 +120,10 @@
 // check.
 template <bool kAccessCheck, bool kInstrumented>
 ALWAYS_INLINE
-static inline mirror::Object* AllocObjectFromCode(uint32_t type_idx,
-                                                  mirror::ArtMethod* method,
-                                                  Thread* self,
-                                                  gc::AllocatorType allocator_type) {
+inline mirror::Object* AllocObjectFromCode(uint32_t type_idx,
+                                           mirror::ArtMethod* method,
+                                           Thread* self,
+                                           gc::AllocatorType allocator_type) {
   bool slow_path = false;
   mirror::Class* klass = CheckObjectAlloc<kAccessCheck>(type_idx, method, self, &slow_path);
   if (UNLIKELY(slow_path)) {
@@ -139,9 +139,9 @@
 // Given the context of a calling Method and a resolved class, create an instance.
 template <bool kInstrumented>
 ALWAYS_INLINE
-static inline mirror::Object* AllocObjectFromCodeResolved(mirror::Class* klass,
-                                                          Thread* self,
-                                                          gc::AllocatorType allocator_type) {
+inline mirror::Object* AllocObjectFromCodeResolved(mirror::Class* klass,
+                                                   Thread* self,
+                                                   gc::AllocatorType allocator_type) {
   DCHECK(klass != nullptr);
   bool slow_path = false;
   klass = CheckClassInitializedForObjectAlloc(klass, self, &slow_path);
@@ -160,9 +160,9 @@
 // Given the context of a calling Method and an initialized class, create an instance.
 template <bool kInstrumented>
 ALWAYS_INLINE
-static inline mirror::Object* AllocObjectFromCodeInitialized(mirror::Class* klass,
-                                                             Thread* self,
-                                                             gc::AllocatorType allocator_type) {
+inline mirror::Object* AllocObjectFromCodeInitialized(mirror::Class* klass,
+                                                      Thread* self,
+                                                      gc::AllocatorType allocator_type) {
   DCHECK(klass != nullptr);
   // Pass in false since the object can not be finalizable.
   return klass->Alloc<kInstrumented, false>(self, allocator_type);
@@ -171,10 +171,10 @@
 
 template <bool kAccessCheck>
 ALWAYS_INLINE
-static inline mirror::Class* CheckArrayAlloc(uint32_t type_idx,
-                                             mirror::ArtMethod* method,
-                                             int32_t component_count,
-                                             bool* slow_path) {
+inline mirror::Class* CheckArrayAlloc(uint32_t type_idx,
+                                      mirror::ArtMethod* method,
+                                      int32_t component_count,
+                                      bool* slow_path) {
   if (UNLIKELY(component_count < 0)) {
     ThrowNegativeArraySizeException(component_count);
     *slow_path = true;
@@ -207,11 +207,11 @@
 // check.
 template <bool kAccessCheck, bool kInstrumented>
 ALWAYS_INLINE
-static inline mirror::Array* AllocArrayFromCode(uint32_t type_idx,
-                                                mirror::ArtMethod* method,
-                                                int32_t component_count,
-                                                Thread* self,
-                                                gc::AllocatorType allocator_type) {
+inline mirror::Array* AllocArrayFromCode(uint32_t type_idx,
+                                         mirror::ArtMethod* method,
+                                         int32_t component_count,
+                                         Thread* self,
+                                         gc::AllocatorType allocator_type) {
   bool slow_path = false;
   mirror::Class* klass = CheckArrayAlloc<kAccessCheck>(type_idx, method, component_count,
                                                        &slow_path);
@@ -230,11 +230,11 @@
 
 template <bool kAccessCheck, bool kInstrumented>
 ALWAYS_INLINE
-static inline mirror::Array* AllocArrayFromCodeResolved(mirror::Class* klass,
-                                                        mirror::ArtMethod* method,
-                                                        int32_t component_count,
-                                                        Thread* self,
-                                                        gc::AllocatorType allocator_type) {
+inline mirror::Array* AllocArrayFromCodeResolved(mirror::Class* klass,
+                                                 mirror::ArtMethod* method,
+                                                 int32_t component_count,
+                                                 Thread* self,
+                                                 gc::AllocatorType allocator_type) {
   DCHECK(klass != nullptr);
   if (UNLIKELY(component_count < 0)) {
     ThrowNegativeArraySizeException(component_count);
@@ -254,8 +254,8 @@
 }
 
 template<FindFieldType type, bool access_check>
-static inline mirror::ArtField* FindFieldFromCode(uint32_t field_idx, mirror::ArtMethod* referrer,
-                                                  Thread* self, size_t expected_size) {
+inline mirror::ArtField* FindFieldFromCode(uint32_t field_idx, mirror::ArtMethod* referrer,
+                                           Thread* self, size_t expected_size) {
   bool is_primitive;
   bool is_set;
   bool is_static;
@@ -349,9 +349,9 @@
 #undef EXPLICIT_FIND_FIELD_FROM_CODE_TEMPLATE_DECL
 
 template<InvokeType type, bool access_check>
-static inline mirror::ArtMethod* FindMethodFromCode(uint32_t method_idx,
-                                                    mirror::Object** this_object,
-                                                    mirror::ArtMethod** referrer, Thread* self) {
+inline mirror::ArtMethod* FindMethodFromCode(uint32_t method_idx,
+                                             mirror::Object** this_object,
+                                             mirror::ArtMethod** referrer, Thread* self) {
   ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
   mirror::ArtMethod* resolved_method = class_linker->GetResolvedMethod(method_idx, *referrer);
   if (resolved_method == nullptr) {
@@ -475,9 +475,9 @@
 #undef EXPLICIT_FIND_METHOD_FROM_CODE_TEMPLATE_DECL
 
 // Fast path field resolution that can't initialize classes or throw exceptions.
-static inline mirror::ArtField* FindFieldFast(uint32_t field_idx,
-                                              mirror::ArtMethod* referrer,
-                                              FindFieldType type, size_t expected_size) {
+inline mirror::ArtField* FindFieldFast(uint32_t field_idx,
+                                       mirror::ArtMethod* referrer,
+                                       FindFieldType type, size_t expected_size) {
   mirror::ArtField* resolved_field =
       referrer->GetDeclaringClass()->GetDexCache()->GetResolvedField(field_idx);
   if (UNLIKELY(resolved_field == nullptr)) {
@@ -528,10 +528,10 @@
 }
 
 // Fast path method resolution that can't throw exceptions.
-static inline mirror::ArtMethod* FindMethodFast(uint32_t method_idx,
-                                                mirror::Object* this_object,
-                                                mirror::ArtMethod* referrer,
-                                                bool access_check, InvokeType type) {
+inline mirror::ArtMethod* FindMethodFast(uint32_t method_idx,
+                                         mirror::Object* this_object,
+                                         mirror::ArtMethod* referrer,
+                                         bool access_check, InvokeType type) {
   if (UNLIKELY(this_object == NULL && type != kStatic)) {
     return NULL;
   }
@@ -568,7 +568,7 @@
   }
 }
 
-static inline mirror::Class* ResolveVerifyAndClinit(uint32_t type_idx,
+inline mirror::Class* ResolveVerifyAndClinit(uint32_t type_idx,
                                                     mirror::ArtMethod* referrer,
                                                     Thread* self, bool can_run_clinit,
                                                     bool verify_access) {
@@ -604,13 +604,13 @@
   return h_class.Get();
 }
 
-static inline mirror::String* ResolveStringFromCode(mirror::ArtMethod* referrer,
-                                                    uint32_t string_idx) {
+inline mirror::String* ResolveStringFromCode(mirror::ArtMethod* referrer,
+                                             uint32_t string_idx) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   return class_linker->ResolveString(string_idx, referrer);
 }
 
-static inline void UnlockJniSynchronizedMethod(jobject locked, Thread* self) {
+inline void UnlockJniSynchronizedMethod(jobject locked, Thread* self) {
   // Save any pending exception over monitor exit call.
   mirror::Throwable* saved_exception = NULL;
   ThrowLocation saved_throw_location;
@@ -635,7 +635,7 @@
 }
 
 template <typename INT_TYPE, typename FLOAT_TYPE>
-static inline INT_TYPE art_float_to_integral(FLOAT_TYPE f) {
+inline INT_TYPE art_float_to_integral(FLOAT_TYPE f) {
   const INT_TYPE kMaxInt = static_cast<INT_TYPE>(std::numeric_limits<INT_TYPE>::max());
   const INT_TYPE kMinInt = static_cast<INT_TYPE>(std::numeric_limits<INT_TYPE>::min());
   const FLOAT_TYPE kMaxIntAsFloat = static_cast<FLOAT_TYPE>(kMaxInt);
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index 311cafa..0531122 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -42,13 +42,14 @@
 class Thread;
 
 template <const bool kAccessCheck>
-ALWAYS_INLINE static inline mirror::Class* CheckObjectAlloc(uint32_t type_idx,
-                                                            mirror::ArtMethod* method,
-                                                            Thread* self, bool* slow_path)
+ALWAYS_INLINE inline mirror::Class* CheckObjectAlloc(uint32_t type_idx,
+                                                     mirror::ArtMethod* method,
+                                                     Thread* self, bool* slow_path)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-ALWAYS_INLINE static inline mirror::Class* CheckClassInitializedForObjectAlloc(mirror::Class* klass,
-                                                                               Thread* self, bool* slow_path)
+ALWAYS_INLINE inline mirror::Class* CheckClassInitializedForObjectAlloc(mirror::Class* klass,
+                                                                        Thread* self,
+                                                                        bool* slow_path)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Given the context of a calling Method, use its DexCache to resolve a type to a Class. If it
@@ -56,32 +57,32 @@
 // When verification/compiler hasn't been able to verify access, optionally perform an access
 // check.
 template <bool kAccessCheck, bool kInstrumented>
-ALWAYS_INLINE static inline mirror::Object* AllocObjectFromCode(uint32_t type_idx,
-                                                                mirror::ArtMethod* method,
-                                                                Thread* self,
-                                                                gc::AllocatorType allocator_type)
+ALWAYS_INLINE inline mirror::Object* AllocObjectFromCode(uint32_t type_idx,
+                                                         mirror::ArtMethod* method,
+                                                         Thread* self,
+                                                         gc::AllocatorType allocator_type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Given the context of a calling Method and a resolved class, create an instance.
 template <bool kInstrumented>
-ALWAYS_INLINE static inline mirror::Object* AllocObjectFromCodeResolved(mirror::Class* klass,
-                                                                        Thread* self,
-                                                                        gc::AllocatorType allocator_type)
+ALWAYS_INLINE inline mirror::Object* AllocObjectFromCodeResolved(mirror::Class* klass,
+                                                                 Thread* self,
+                                                                 gc::AllocatorType allocator_type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Given the context of a calling Method and an initialized class, create an instance.
 template <bool kInstrumented>
-ALWAYS_INLINE static inline mirror::Object* AllocObjectFromCodeInitialized(mirror::Class* klass,
-                                                                           Thread* self,
-                                                                           gc::AllocatorType allocator_type)
+ALWAYS_INLINE inline mirror::Object* AllocObjectFromCodeInitialized(mirror::Class* klass,
+                                                                    Thread* self,
+                                                                    gc::AllocatorType allocator_type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 
 template <bool kAccessCheck>
-ALWAYS_INLINE static inline mirror::Class* CheckArrayAlloc(uint32_t type_idx,
-                                                           mirror::ArtMethod* method,
-                                                           int32_t component_count,
-                                                           bool* slow_path)
+ALWAYS_INLINE inline mirror::Class* CheckArrayAlloc(uint32_t type_idx,
+                                                    mirror::ArtMethod* method,
+                                                    int32_t component_count,
+                                                    bool* slow_path)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Given the context of a calling Method, use its DexCache to resolve a type to an array Class. If
@@ -89,19 +90,19 @@
 // When verification/compiler hasn't been able to verify access, optionally perform an access
 // check.
 template <bool kAccessCheck, bool kInstrumented>
-ALWAYS_INLINE static inline mirror::Array* AllocArrayFromCode(uint32_t type_idx,
-                                                              mirror::ArtMethod* method,
-                                                              int32_t component_count,
-                                                              Thread* self,
-                                                              gc::AllocatorType allocator_type)
+ALWAYS_INLINE inline mirror::Array* AllocArrayFromCode(uint32_t type_idx,
+                                                       mirror::ArtMethod* method,
+                                                       int32_t component_count,
+                                                       Thread* self,
+                                                       gc::AllocatorType allocator_type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 template <bool kAccessCheck, bool kInstrumented>
-ALWAYS_INLINE static inline mirror::Array* AllocArrayFromCodeResolved(mirror::Class* klass,
-                                                                      mirror::ArtMethod* method,
-                                                                      int32_t component_count,
-                                                                      Thread* self,
-                                                                      gc::AllocatorType allocator_type)
+ALWAYS_INLINE inline mirror::Array* AllocArrayFromCodeResolved(mirror::Class* klass,
+                                                               mirror::ArtMethod* method,
+                                                               int32_t component_count,
+                                                               Thread* self,
+                                                               gc::AllocatorType allocator_type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 extern mirror::Array* CheckAndAllocArrayFromCode(uint32_t type_idx, mirror::ArtMethod* method,
@@ -130,43 +131,42 @@
 };
 
 template<FindFieldType type, bool access_check>
-static inline mirror::ArtField* FindFieldFromCode(uint32_t field_idx, mirror::ArtMethod* referrer,
-                                                  Thread* self, size_t expected_size)
+inline mirror::ArtField* FindFieldFromCode(uint32_t field_idx, mirror::ArtMethod* referrer,
+                                           Thread* self, size_t expected_size)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 template<InvokeType type, bool access_check>
-static inline mirror::ArtMethod* FindMethodFromCode(uint32_t method_idx,
-                                                    mirror::Object** this_object,
-                                                    mirror::ArtMethod** referrer, Thread* self)
+inline mirror::ArtMethod* FindMethodFromCode(uint32_t method_idx,
+                                             mirror::Object** this_object,
+                                             mirror::ArtMethod** referrer, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Fast path field resolution that can't initialize classes or throw exceptions.
-static inline mirror::ArtField* FindFieldFast(uint32_t field_idx,
-                                              mirror::ArtMethod* referrer,
-                                              FindFieldType type, size_t expected_size)
+inline mirror::ArtField* FindFieldFast(uint32_t field_idx,
+                                       mirror::ArtMethod* referrer,
+                                       FindFieldType type, size_t expected_size)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Fast path method resolution that can't throw exceptions.
-static inline mirror::ArtMethod* FindMethodFast(uint32_t method_idx,
-                                                mirror::Object* this_object,
-                                                mirror::ArtMethod* referrer,
-                                                bool access_check, InvokeType type)
+inline mirror::ArtMethod* FindMethodFast(uint32_t method_idx,
+                                         mirror::Object* this_object,
+                                         mirror::ArtMethod* referrer,
+                                         bool access_check, InvokeType type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-static inline mirror::Class* ResolveVerifyAndClinit(uint32_t type_idx,
-                                                    mirror::ArtMethod* referrer,
-                                                    Thread* self, bool can_run_clinit,
-                                                    bool verify_access)
+inline mirror::Class* ResolveVerifyAndClinit(uint32_t type_idx,
+                                             mirror::ArtMethod* referrer,
+                                             Thread* self, bool can_run_clinit,
+                                             bool verify_access)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 extern void ThrowStackOverflowError(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-static inline mirror::String* ResolveStringFromCode(mirror::ArtMethod* referrer,
-                                                    uint32_t string_idx)
+inline mirror::String* ResolveStringFromCode(mirror::ArtMethod* referrer, uint32_t string_idx)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // TODO: annotalysis disabled as monitor semantics are maintained in Java code.
-static inline void UnlockJniSynchronizedMethod(jobject locked, Thread* self)
+inline void UnlockJniSynchronizedMethod(jobject locked, Thread* self)
     NO_THREAD_SAFETY_ANALYSIS;
 
 void CheckReferenceResult(mirror::Object* o, Thread* self)
@@ -181,7 +181,7 @@
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 template <typename INT_TYPE, typename FLOAT_TYPE>
-static inline INT_TYPE art_float_to_integral(FLOAT_TYPE f);
+inline INT_TYPE art_float_to_integral(FLOAT_TYPE f);
 
 }  // namespace art
 
diff --git a/runtime/entrypoints/portable/portable_thread_entrypoints.cc b/runtime/entrypoints/portable/portable_thread_entrypoints.cc
index ecbc65e..95ac66c 100644
--- a/runtime/entrypoints/portable/portable_thread_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_thread_entrypoints.cc
@@ -34,7 +34,7 @@
       uint32_t dex_pc = cur_frame->GetDexPC();
       ShadowFrame* new_frame = ShadowFrame::Create(num_regs, NULL, method, dex_pc);
 
-      const uint8_t* gc_map = method->GetNativeGcMap();
+      const uint8_t* gc_map = method->GetNativeGcMap(sizeof(void*));
       verifier::DexPcToReferenceMap dex_gc_map(gc_map);
       const uint8_t* reg_bitmap = dex_gc_map.FindBitMap(dex_pc);
       for (size_t reg = 0; reg < num_regs; ++reg) {
diff --git a/runtime/exception_test.cc b/runtime/exception_test.cc
index ee9b221..580b541 100644
--- a/runtime/exception_test.cc
+++ b/runtime/exception_test.cc
@@ -76,7 +76,8 @@
     const std::vector<uint8_t>& fake_mapping_data = fake_mapping_data_.GetData();
     uint32_t vmap_table_offset = sizeof(OatQuickMethodHeader) + fake_vmap_table_data.size();
     uint32_t mapping_table_offset = vmap_table_offset + fake_mapping_data.size();
-    OatQuickMethodHeader method_header(mapping_table_offset, vmap_table_offset,
+    uint32_t gc_map_offset = mapping_table_offset + fake_gc_map_.size();
+    OatQuickMethodHeader method_header(mapping_table_offset, vmap_table_offset, gc_map_offset,
                                        4 * sizeof(void*), 0u, 0u, code_size);
     fake_header_code_and_maps_.resize(sizeof(method_header));
     memcpy(&fake_header_code_and_maps_[0], &method_header, sizeof(method_header));
@@ -84,23 +85,23 @@
                                       fake_vmap_table_data.begin(), fake_vmap_table_data.end());
     fake_header_code_and_maps_.insert(fake_header_code_and_maps_.begin(),
                                       fake_mapping_data.begin(), fake_mapping_data.end());
+    fake_header_code_and_maps_.insert(fake_header_code_and_maps_.begin(),
+                                      fake_gc_map_.begin(), fake_gc_map_.end());
     fake_header_code_and_maps_.insert(fake_header_code_and_maps_.end(),
                                       fake_code_.begin(), fake_code_.end());
 
     // NOTE: Don't align the code (it will not be executed) but check that the Thumb2
     // adjustment will be a NOP, see ArtMethod::EntryPointToCodePointer().
     CHECK_EQ(mapping_table_offset & 1u, 0u);
-    const uint8_t* code_ptr = &fake_header_code_and_maps_[mapping_table_offset];
+    const uint8_t* code_ptr = &fake_header_code_and_maps_[gc_map_offset];
 
     method_f_ = my_klass_->FindVirtualMethod("f", "()I");
     ASSERT_TRUE(method_f_ != nullptr);
     method_f_->SetEntryPointFromQuickCompiledCode(code_ptr);
-    method_f_->SetNativeGcMap(&fake_gc_map_[0]);
 
     method_g_ = my_klass_->FindVirtualMethod("g", "(I)V");
     ASSERT_TRUE(method_g_ != nullptr);
     method_g_->SetEntryPointFromQuickCompiledCode(code_ptr);
-    method_g_->SetNativeGcMap(&fake_gc_map_[0]);
   }
 
   const DexFile* dex_;
diff --git a/runtime/mirror/art_method-inl.h b/runtime/mirror/art_method-inl.h
index b936511..62ef6f8 100644
--- a/runtime/mirror/art_method-inl.h
+++ b/runtime/mirror/art_method-inl.h
@@ -252,14 +252,23 @@
   return CodeInfo(data);
 }
 
-inline void ArtMethod::SetOatNativeGcMapOffset(uint32_t gc_map_offset) {
-  DCHECK(!Runtime::Current()->IsStarted());
-  SetNativeGcMap(reinterpret_cast<uint8_t*>(gc_map_offset));
+inline const uint8_t* ArtMethod::GetNativeGcMap(size_t pointer_size) {
+  const void* code_pointer = GetQuickOatCodePointer(pointer_size);
+  if (code_pointer == nullptr) {
+    return nullptr;
+  }
+  return GetNativeGcMap(code_pointer, pointer_size);
 }
 
-inline uint32_t ArtMethod::GetOatNativeGcMapOffset() {
-  DCHECK(!Runtime::Current()->IsStarted());
-  return PointerToLowMemUInt32(GetNativeGcMap());
+inline const uint8_t* ArtMethod::GetNativeGcMap(const void* code_pointer, size_t pointer_size) {
+  DCHECK(code_pointer != nullptr);
+  DCHECK_EQ(code_pointer, GetQuickOatCodePointer(pointer_size));
+  uint32_t offset =
+      reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].gc_map_offset_;
+  if (UNLIKELY(offset == 0u)) {
+    return nullptr;
+  }
+  return reinterpret_cast<const uint8_t*>(code_pointer) - offset;
 }
 
 inline bool ArtMethod::IsRuntimeMethod() {
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index d292552..f5c1454 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -152,7 +152,7 @@
     // (as in stack_map.h).
     return GetEntryPointFromQuickCompiledCodePtrSize(pointer_size) != nullptr
         && GetQuickOatCodePointer(pointer_size) != nullptr
-        && GetNativeGcMapPtrSize(pointer_size) == nullptr;
+        && GetNativeGcMap(pointer_size) == nullptr;
   }
 
   bool IsPortableCompiled() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -405,29 +405,11 @@
   StackMap GetStackMap(uint32_t native_pc_offset) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   CodeInfo GetOptimizedCodeInfo() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  const uint8_t* GetNativeGcMap() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    CheckObjectSizeEqualsMirrorSize();
-    return GetNativeGcMapPtrSize(sizeof(void*));
-  }
-  ALWAYS_INLINE const uint8_t* GetNativeGcMapPtrSize(size_t pointer_size)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldPtrWithSize<uint8_t*>(GcMapOffset(pointer_size), pointer_size);
-  }
-  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  void SetNativeGcMap(const uint8_t* data) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    CheckObjectSizeEqualsMirrorSize();
-    SetNativeGcMapPtrSize(data, sizeof(void*));
-  }
-  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  ALWAYS_INLINE void SetNativeGcMapPtrSize(const uint8_t* data, size_t pointer_size)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    SetFieldPtrWithSize<false, true, kVerifyFlags>(GcMapOffset(pointer_size), data,
-                                                   pointer_size);
-  }
-
-  // When building the oat need a convenient place to stuff the offset of the native GC map.
-  void SetOatNativeGcMapOffset(uint32_t gc_map_offset) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  uint32_t GetOatNativeGcMapOffset() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // Callers should wrap the uint8_t* in a GcMap instance for convenient access.
+  const uint8_t* GetNativeGcMap(size_t pointer_size)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const uint8_t* GetNativeGcMap(const void* code_pointer, size_t pointer_size)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template <bool kCheckFrameSize = true>
   uint32_t GetFrameSizeInBytes() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -477,11 +459,6 @@
         PtrSizedFields, entry_point_from_quick_compiled_code_) / sizeof(void*) * pointer_size);
   }
 
-  static MemberOffset GcMapOffset(size_t pointer_size) {
-    return MemberOffset(PtrSizedFieldsOffset() + OFFSETOF_MEMBER(
-        PtrSizedFields, gc_map_) / sizeof(void*) * pointer_size);
-  }
-
   void* GetEntryPointFromJni() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     CheckObjectSizeEqualsMirrorSize();
     return GetEntryPointFromJniPtrSize(sizeof(void*));
@@ -660,11 +637,6 @@
     // portable compiled code or the interpreter.
     void* entry_point_from_quick_compiled_code_;
 
-    // Pointer to a data structure created by the compiler and used by the garbage collector to
-    // determine which registers hold live references to objects within the heap. Keyed by native PC
-    // offsets for the quick compiler and dex PCs for the portable.
-    void* gc_map_;
-
     // Method dispatch from portable compiled code invokes this pointer which may cause bridging
     // into quick compiled code or the interpreter. Last to simplify entrypoint logic.
     void* entry_point_from_portable_compiled_code_;
diff --git a/runtime/native/java_lang_Thread.cc b/runtime/native/java_lang_Thread.cc
index 420e9df..760eb9b 100644
--- a/runtime/native/java_lang_Thread.cc
+++ b/runtime/native/java_lang_Thread.cc
@@ -118,14 +118,12 @@
 
 static void Thread_nativeSetName(JNIEnv* env, jobject peer, jstring java_name) {
   ScopedUtfChars name(env, java_name);
-  Thread* self;
   {
     ScopedObjectAccess soa(env);
     if (soa.Decode<mirror::Object*>(peer) == soa.Self()->GetPeer()) {
       soa.Self()->SetThreadName(name.c_str());
       return;
     }
-    self = soa.Self();
   }
   // Suspend thread to avoid it from killing itself while we set its name. We don't just hold the
   // thread list lock to avoid this, as setting the thread name causes mutator to lock/unlock
diff --git a/runtime/oat.cc b/runtime/oat.cc
index bfb27dd..6455783 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -25,7 +25,7 @@
 namespace art {
 
 const uint8_t OatHeader::kOatMagic[] = { 'o', 'a', 't', '\n' };
-const uint8_t OatHeader::kOatVersion[] = { '0', '4', '8', '\0' };
+const uint8_t OatHeader::kOatVersion[] = { '0', '4', '9', '\0' };
 
 static size_t ComputeOatHeaderSize(const SafeMap<std::string, std::string>* variable_data) {
   size_t estimate = 0U;
@@ -493,35 +493,19 @@
   key_value_store_size_ = data_ptr - reinterpret_cast<char*>(&key_value_store_);
 }
 
-OatMethodOffsets::OatMethodOffsets()
-  : code_offset_(0),
-    gc_map_offset_(0)
-{}
-
-OatMethodOffsets::OatMethodOffsets(uint32_t code_offset,
-                                   uint32_t gc_map_offset
-                                   )
-  : code_offset_(code_offset),
-    gc_map_offset_(gc_map_offset)
-{}
+OatMethodOffsets::OatMethodOffsets(uint32_t code_offset) : code_offset_(code_offset) {
+}
 
 OatMethodOffsets::~OatMethodOffsets() {}
 
-OatQuickMethodHeader::OatQuickMethodHeader()
-  : mapping_table_offset_(0),
-    vmap_table_offset_(0),
-    frame_info_(0, 0, 0),
-    code_size_(0)
-{}
-
 OatQuickMethodHeader::OatQuickMethodHeader(
-    uint32_t mapping_table_offset, uint32_t vmap_table_offset, uint32_t frame_size_in_bytes,
-    uint32_t core_spill_mask, uint32_t fp_spill_mask, uint32_t code_size)
-  : mapping_table_offset_(mapping_table_offset),
-    vmap_table_offset_(vmap_table_offset),
-    frame_info_(frame_size_in_bytes, core_spill_mask, fp_spill_mask),
-    code_size_(code_size)
-{}
+    uint32_t mapping_table_offset, uint32_t vmap_table_offset, uint32_t gc_map_offset,
+    uint32_t frame_size_in_bytes, uint32_t core_spill_mask, uint32_t fp_spill_mask,
+    uint32_t code_size)
+    : mapping_table_offset_(mapping_table_offset), vmap_table_offset_(vmap_table_offset),
+      gc_map_offset_(gc_map_offset),
+      frame_info_(frame_size_in_bytes, core_spill_mask, fp_spill_mask), code_size_(code_size) {
+}
 
 OatQuickMethodHeader::~OatQuickMethodHeader() {}
 
diff --git a/runtime/oat.h b/runtime/oat.h
index 8fb02b8..11ed4fb 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -164,25 +164,20 @@
 
 class PACKED(4) OatMethodOffsets {
  public:
-  OatMethodOffsets();
-
-  OatMethodOffsets(uint32_t code_offset,
-                   uint32_t gc_map_offset);
+  OatMethodOffsets(uint32_t code_offset = 0);
 
   ~OatMethodOffsets();
 
   uint32_t code_offset_;
-  uint32_t gc_map_offset_;
 };
 
 // OatQuickMethodHeader precedes the raw code chunk generated by the Quick compiler.
 class PACKED(4) OatQuickMethodHeader {
  public:
-  OatQuickMethodHeader();
-
-  explicit OatQuickMethodHeader(uint32_t mapping_table_offset, uint32_t vmap_table_offset,
-                                uint32_t frame_size_in_bytes, uint32_t core_spill_mask,
-                                uint32_t fp_spill_mask, uint32_t code_size);
+  OatQuickMethodHeader(uint32_t mapping_table_offset = 0U, uint32_t vmap_table_offset = 0U,
+                       uint32_t gc_map_offset = 0U, uint32_t frame_size_in_bytes = 0U,
+                       uint32_t core_spill_mask = 0U, uint32_t fp_spill_mask = 0U,
+                       uint32_t code_size = 0U);
 
   ~OatQuickMethodHeader();
 
@@ -190,6 +185,8 @@
   uint32_t mapping_table_offset_;
   // The offset in bytes from the start of the vmap table to the end of the header.
   uint32_t vmap_table_offset_;
+  // The offset in bytes from the start of the gc map to the end of the header.
+  uint32_t gc_map_offset_;
   // The stack frame information.
   QuickMethodFrameInfo frame_info_;
   // The code size in bytes.
diff --git a/runtime/oat_file-inl.h b/runtime/oat_file-inl.h
index 6237767..a429c87 100644
--- a/runtime/oat_file-inl.h
+++ b/runtime/oat_file-inl.h
@@ -78,6 +78,31 @@
   return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].frame_info_.FpSpillMask();
 }
 
+const uint8_t* OatFile::OatMethod::GetGcMap() const {
+  const void* code = mirror::ArtMethod::EntryPointToCodePointer(GetQuickCode());
+  if (code == nullptr) {
+    return nullptr;
+  }
+  uint32_t offset = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].gc_map_offset_;
+  if (UNLIKELY(offset == 0u)) {
+    return nullptr;
+  }
+  return reinterpret_cast<const uint8_t*>(code) - offset;
+}
+
+uint32_t OatFile::OatMethod::GetGcMapOffset() const {
+  const uint8_t* gc_map = GetGcMap();
+  return static_cast<uint32_t>(gc_map != nullptr ? gc_map - begin_ : 0u);
+}
+
+uint32_t OatFile::OatMethod::GetGcMapOffsetOffset() const {
+  const OatQuickMethodHeader* method_header = GetOatQuickMethodHeader();
+  if (method_header == nullptr) {
+    return 0u;
+  }
+  return reinterpret_cast<const uint8_t*>(&method_header->gc_map_offset_) - begin_;
+}
+
 inline uint32_t OatFile::OatMethod::GetMappingTableOffset() const {
   const uint8_t* mapping_table = GetMappingTable();
   return static_cast<uint32_t>(mapping_table != nullptr ? mapping_table - begin_ : 0u);
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 54f5eab..91e571b 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -577,18 +577,15 @@
 const OatFile::OatMethod OatFile::OatClass::GetOatMethod(uint32_t method_index) const {
   const OatMethodOffsets* oat_method_offsets = GetOatMethodOffsets(method_index);
   if (oat_method_offsets == nullptr) {
-    return OatMethod(nullptr, 0, 0);
+    return OatMethod(nullptr, 0);
   }
   if (oat_file_->IsExecutable() ||
       Runtime::Current() == nullptr ||        // This case applies for oatdump.
       Runtime::Current()->IsCompiler()) {
-    return OatMethod(
-        oat_file_->Begin(),
-        oat_method_offsets->code_offset_,
-        oat_method_offsets->gc_map_offset_);
+    return OatMethod(oat_file_->Begin(), oat_method_offsets->code_offset_);
   } else {
     // We aren't allowed to use the compiled code. We just force it down the interpreted version.
-    return OatMethod(oat_file_->Begin(), 0, 0);
+    return OatMethod(oat_file_->Begin(), 0);
   }
 }
 
@@ -596,7 +593,6 @@
   CHECK(method != NULL);
   method->SetEntryPointFromPortableCompiledCode(GetPortableCode());
   method->SetEntryPointFromQuickCompiledCode(GetQuickCode());
-  method->SetNativeGcMap(GetNativeGcMap());  // Used by native methods in work around JNI mode.
 }
 
 bool OatFile::IsPic() const {
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index 2b94249..a335c94 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -96,9 +96,6 @@
     uint32_t GetCodeOffset() const {
       return code_offset_;
     }
-    uint32_t GetNativeGcMapOffset() const {
-      return native_gc_map_offset_;
-    }
 
     const void* GetPortableCode() const {
       // TODO: encode whether code is portable/quick in flags within OatMethod.
@@ -134,10 +131,6 @@
     const OatQuickMethodHeader* GetOatQuickMethodHeader() const;
     uint32_t GetOatQuickMethodHeaderOffset() const;
 
-    const uint8_t* GetNativeGcMap() const {
-      return GetOatPointer<const uint8_t*>(native_gc_map_offset_);
-    }
-
     size_t GetFrameSizeInBytes() const;
     uint32_t GetCoreSpillMask() const;
     uint32_t GetFpSpillMask() const;
@@ -150,18 +143,20 @@
     uint32_t GetVmapTableOffset() const;
     uint32_t GetVmapTableOffsetOffset() const;
 
+    const uint8_t* GetGcMap() const;
+    uint32_t GetGcMapOffset() const;
+    uint32_t GetGcMapOffsetOffset() const;
+
     // Create an OatMethod with offsets relative to the given base address
-    OatMethod(const uint8_t* base, const uint32_t code_offset, const uint32_t gc_map_offset)
-      : begin_(base),
-        code_offset_(code_offset),
-        native_gc_map_offset_(gc_map_offset) {
+    OatMethod(const uint8_t* base, const uint32_t code_offset)
+        : begin_(base), code_offset_(code_offset) {
     }
     ~OatMethod() {}
 
     // A representation of an invalid OatMethod, used when an OatMethod or OatClass can't be found.
     // See ClassLinker::FindOatMethodFor.
     static const OatMethod Invalid() {
-      return OatMethod(nullptr, -1, -1);
+      return OatMethod(nullptr, -1);
     }
 
    private:
@@ -174,9 +169,7 @@
     }
 
     const uint8_t* const begin_;
-
     const uint32_t code_offset_;
-    const uint32_t native_gc_map_offset_;
 
     friend class OatClass;
   };
diff --git a/runtime/quick/inline_method_analyser.h b/runtime/quick/inline_method_analyser.h
index a8d4308..72b696b 100644
--- a/runtime/quick/inline_method_analyser.h
+++ b/runtime/quick/inline_method_analyser.h
@@ -106,9 +106,7 @@
 };
 
 struct InlineIGetIPutData {
-  // The op_variant below is opcode-Instruction::IGET for IGETs and
-  // opcode-Instruction::IPUT for IPUTs. This is because the runtime
-  // doesn't know the OpSize enumeration.
+  // The op_variant below is DexMemAccessType but the runtime doesn't know that enumeration.
   uint16_t op_variant : 3;
   uint16_t method_is_static : 1;
   uint16_t object_arg : 4;
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 163c11d..f7c7106 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -2102,7 +2102,7 @@
     } else {
       // Java method.
       // Portable path use DexGcMap and store in Method.native_gc_map_.
-      const uint8_t* gc_map = m->GetNativeGcMap();
+      const uint8_t* gc_map = m->GetNativeGcMap(sizeof(void*));
       CHECK(gc_map != nullptr) << PrettyMethod(m);
       verifier::DexPcToReferenceMap dex_gc_map(gc_map);
       uint32_t dex_pc = shadow_frame->GetDexPC();
@@ -2157,7 +2157,7 @@
           }
         }
       } else {
-        const uint8_t* native_gc_map = m->GetNativeGcMap();
+        const uint8_t* native_gc_map = m->GetNativeGcMap(sizeof(void*));
         CHECK(native_gc_map != nullptr) << PrettyMethod(m);
         const DexFile::CodeItem* code_item = m->GetCodeItem();
         // Can't be nullptr or how would we compile its instructions?
diff --git a/test/431-optimizing-arith-shifts/expected.txt b/test/431-optimizing-arith-shifts/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/431-optimizing-arith-shifts/expected.txt
diff --git a/test/431-optimizing-arith-shifts/info.txt b/test/431-optimizing-arith-shifts/info.txt
new file mode 100644
index 0000000..14ff264
--- /dev/null
+++ b/test/431-optimizing-arith-shifts/info.txt
@@ -0,0 +1 @@
+Tests for shift operations.
diff --git a/test/431-optimizing-arith-shifts/src/Main.java b/test/431-optimizing-arith-shifts/src/Main.java
new file mode 100644
index 0000000..d8667c6
--- /dev/null
+++ b/test/431-optimizing-arith-shifts/src/Main.java
@@ -0,0 +1,305 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void expectEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void main(String[] args) {
+    shlInt();
+    shlLong();
+    shrInt();
+    shrLong();
+    ushrInt();
+    ushrLong();
+  }
+
+  private static void shlInt() {
+    expectEquals(48, $opt$ShlConst2(12));
+    expectEquals(12, $opt$ShlConst0(12));
+    expectEquals(-48, $opt$Shl(-12, 2));
+    expectEquals(1024, $opt$Shl(32, 5));
+
+    expectEquals(7, $opt$Shl(7, 0));
+    expectEquals(14, $opt$Shl(7, 1));
+    expectEquals(0, $opt$Shl(0, 30));
+
+    expectEquals(1073741824L, $opt$Shl(1, 30));
+    expectEquals(Integer.MIN_VALUE, $opt$Shl(1, 31));  // overflow
+    expectEquals(Integer.MIN_VALUE, $opt$Shl(1073741824, 1));  // overflow
+    expectEquals(1073741824, $opt$Shl(268435456, 2));
+
+   // othe nly 5 lower bits should be used for shifting (& 0x1f).
+    expectEquals(7, $opt$Shl(7, 32));  // 32 & 0x1f = 0
+    expectEquals(14, $opt$Shl(7, 33));  // 33 & 0x1f = 1
+    expectEquals(32, $opt$Shl(1, 101));  // 101 & 0x1f = 5
+
+    expectEquals(Integer.MIN_VALUE, $opt$Shl(1, -1));  // -1 & 0x1f = 31
+    expectEquals(14, $opt$Shl(7, -31));  // -31 & 0x1f = 1
+    expectEquals(7, $opt$Shl(7, -32));  // -32 & 0x1f = 0
+    expectEquals(-536870912, $opt$Shl(7, -3));  // -3 & 0x1f = 29
+
+    expectEquals(Integer.MIN_VALUE, $opt$Shl(7, Integer.MAX_VALUE));
+    expectEquals(7, $opt$Shl(7, Integer.MIN_VALUE));
+  }
+
+  private static void shlLong() {
+    expectEquals(48L, $opt$ShlConst2(12L));
+    expectEquals(12L, $opt$ShlConst0(12L));
+    expectEquals(-48L, $opt$Shl(-12L, 2L));
+    expectEquals(1024L, $opt$Shl(32L, 5L));
+
+    expectEquals(7L, $opt$Shl(7L, 0L));
+    expectEquals(14L, $opt$Shl(7L, 1L));
+    expectEquals(0L, $opt$Shl(0L, 30L));
+
+    expectEquals(1073741824L, $opt$Shl(1L, 30L));
+    expectEquals(2147483648L, $opt$Shl(1L, 31L));
+    expectEquals(2147483648L, $opt$Shl(1073741824L, 1L));
+
+    // Long shifts can use up to 6 lower bits.
+    expectEquals(4294967296L, $opt$Shl(1L, 32L));
+    expectEquals(60129542144L, $opt$Shl(7L, 33L));
+    expectEquals(Long.MIN_VALUE, $opt$Shl(1L, 63L));  // overflow
+
+    // Only the 6 lower bits should be used for shifting (& 0x3f).
+    expectEquals(7L, $opt$Shl(7L, 64L));  // 64 & 0x3f = 0
+    expectEquals(14L, $opt$Shl(7L, 65L));  // 65 & 0x3f = 1
+    expectEquals(137438953472L, $opt$Shl(1L, 101L));  // 101 & 0x3f = 37
+
+    expectEquals(Long.MIN_VALUE, $opt$Shl(1L, -1L));  // -1 & 0x3f = 63
+    expectEquals(14L, $opt$Shl(7L, -63L));  // -63 & 0x3f = 1
+    expectEquals(7L, $opt$Shl(7L, -64L));  // -64 & 0x3f = 0
+    expectEquals(2305843009213693952L, $opt$Shl(1L, -3L));  // -3 & 0x3f = 61
+
+    expectEquals(Long.MIN_VALUE, $opt$Shl(7L, Long.MAX_VALUE));
+    expectEquals(7L, $opt$Shl(7L, Long.MIN_VALUE));
+  }
+
+  private static void shrInt() {
+    expectEquals(3, $opt$ShrConst2(12));
+    expectEquals(12, $opt$ShrConst0(12));
+    expectEquals(-3, $opt$Shr(-12, 2));
+    expectEquals(1, $opt$Shr(32, 5));
+
+    expectEquals(7, $opt$Shr(7, 0));
+    expectEquals(3, $opt$Shr(7, 1));
+    expectEquals(0, $opt$Shr(0, 30));
+    expectEquals(0, $opt$Shr(1, 30));
+    expectEquals(-1, $opt$Shr(-1, 30));
+
+    expectEquals(0, $opt$Shr(Integer.MAX_VALUE, 31));
+    expectEquals(-1, $opt$Shr(Integer.MIN_VALUE, 31));
+
+    // Only the 5 lower bits should be used for shifting (& 0x1f).
+    expectEquals(7, $opt$Shr(7, 32));  // 32 & 0x1f = 0
+    expectEquals(3, $opt$Shr(7, 33));  // 33 & 0x1f = 1
+
+    expectEquals(0, $opt$Shr(1, -1));  // -1 & 0x1f = 31
+    expectEquals(3, $opt$Shr(7, -31));  // -31 & 0x1f = 1
+    expectEquals(7, $opt$Shr(7, -32));  // -32 & 0x1f = 0
+    expectEquals(-4, $opt$Shr(Integer.MIN_VALUE, -3));  // -3 & 0x1f = 29
+
+    expectEquals(0, $opt$Shr(7, Integer.MAX_VALUE));
+    expectEquals(7, $opt$Shr(7, Integer.MIN_VALUE));
+  }
+
+  private static void shrLong() {
+    expectEquals(3L, $opt$ShrConst2(12L));
+    expectEquals(12L, $opt$ShrConst0(12L));
+    expectEquals(-3L, $opt$Shr(-12L, 2L));
+    expectEquals(1, $opt$Shr(32, 5));
+
+    expectEquals(7L, $opt$Shr(7L, 0L));
+    expectEquals(3L, $opt$Shr(7L, 1L));
+    expectEquals(0L, $opt$Shr(0L, 30L));
+    expectEquals(0L, $opt$Shr(1L, 30L));
+    expectEquals(-1L, $opt$Shr(-1L, 30L));
+
+
+    expectEquals(1L, $opt$Shr(1073741824L, 30L));
+    expectEquals(1L, $opt$Shr(2147483648L, 31L));
+    expectEquals(1073741824L, $opt$Shr(2147483648L, 1L));
+
+    // Long shifts can use up to 6 lower bits.
+    expectEquals(1L, $opt$Shr(4294967296L, 32L));
+    expectEquals(7L, $opt$Shr(60129542144L, 33L));
+    expectEquals(0L, $opt$Shr(Long.MAX_VALUE, 63L));
+    expectEquals(-1L, $opt$Shr(Long.MIN_VALUE, 63L));
+
+    // Only the 6 lower bits should be used for shifting (& 0x3f).
+    expectEquals(7L, $opt$Shr(7L, 64L));  // 64 & 0x3f = 0
+    expectEquals(3L, $opt$Shr(7L, 65L));  // 65 & 0x3f = 1
+
+    expectEquals(-1L, $opt$Shr(Long.MIN_VALUE, -1L));  // -1 & 0x3f = 63
+    expectEquals(3L, $opt$Shr(7L, -63L));  // -63 & 0x3f = 1
+    expectEquals(7L, $opt$Shr(7L, -64L));  // -64 & 0x3f = 0
+    expectEquals(1L, $opt$Shr(2305843009213693952L, -3L));  // -3 & 0x3f = 61
+    expectEquals(-4L, $opt$Shr(Integer.MIN_VALUE, -3));  // -3 & 0x1f = 29
+
+    expectEquals(0L, $opt$Shr(7L, Long.MAX_VALUE));
+    expectEquals(7L, $opt$Shr(7L, Long.MIN_VALUE));
+  }
+
+  private static void ushrInt() {
+    expectEquals(3, $opt$UShrConst2(12));
+    expectEquals(12, $opt$UShrConst0(12));
+    expectEquals(1073741821, $opt$UShr(-12, 2));
+    expectEquals(1, $opt$UShr(32, 5));
+
+    expectEquals(7, $opt$UShr(7, 0));
+    expectEquals(3, $opt$UShr(7, 1));
+    expectEquals(0, $opt$UShr(0, 30));
+    expectEquals(0, $opt$UShr(1, 30));
+    expectEquals(3, $opt$UShr(-1, 30));
+
+    expectEquals(0, $opt$UShr(Integer.MAX_VALUE, 31));
+    expectEquals(1, $opt$UShr(Integer.MIN_VALUE, 31));
+
+    // Only the 5 lower bits should be used for shifting (& 0x1f).
+    expectEquals(7, $opt$UShr(7, 32));  // 32 & 0x1f = 0
+    expectEquals(3, $opt$UShr(7, 33));  // 33 & 0x1f = 1
+
+    expectEquals(0, $opt$UShr(1, -1));  // -1 & 0x1f = 31
+    expectEquals(3, $opt$UShr(7, -31));  // -31 & 0x1f = 1
+    expectEquals(7, $opt$UShr(7, -32));  // -32 & 0x1f = 0
+    expectEquals(4, $opt$UShr(Integer.MIN_VALUE, -3));  // -3 & 0x1f = 29
+
+    expectEquals(0, $opt$UShr(7, Integer.MAX_VALUE));
+    expectEquals(7, $opt$UShr(7, Integer.MIN_VALUE));
+  }
+
+  private static void ushrLong() {
+    expectEquals(3L, $opt$UShrConst2(12L));
+    expectEquals(12L, $opt$UShrConst0(12L));
+    expectEquals(4611686018427387901L, $opt$UShr(-12L, 2L));
+    expectEquals(1, $opt$UShr(32, 5));
+
+    expectEquals(7L, $opt$UShr(7L, 0L));
+    expectEquals(3L, $opt$UShr(7L, 1L));
+    expectEquals(0L, $opt$UShr(0L, 30L));
+    expectEquals(0L, $opt$UShr(1L, 30L));
+    expectEquals(17179869183L, $opt$UShr(-1L, 30L));
+
+
+    expectEquals(1L, $opt$UShr(1073741824L, 30L));
+    expectEquals(1L, $opt$UShr(2147483648L, 31L));
+    expectEquals(1073741824L, $opt$UShr(2147483648L, 1L));
+
+    // Long shifts can use use up to 6 lower bits.
+    expectEquals(1L, $opt$UShr(4294967296L, 32L));
+    expectEquals(7L, $opt$UShr(60129542144L, 33L));
+    expectEquals(0L, $opt$UShr(Long.MAX_VALUE, 63L));
+    expectEquals(1L, $opt$UShr(Long.MIN_VALUE, 63L));
+
+    // Only the 6 lower bits should be used for shifting (& 0x3f).
+    expectEquals(7L, $opt$UShr(7L, 64L));  // 64 & 0x3f = 0
+    expectEquals(3L, $opt$UShr(7L, 65L));  // 65 & 0x3f = 1
+
+    expectEquals(1L, $opt$UShr(Long.MIN_VALUE, -1L));  // -1 & 0x3f = 63
+    expectEquals(3L, $opt$UShr(7L, -63L));  // -63 & 0x3f = 1
+    expectEquals(7L, $opt$UShr(7L, -64L));  // -64 & 0x3f = 0
+    expectEquals(1L, $opt$UShr(2305843009213693952L, -3L));  // -3 & 0x3f = 61
+    expectEquals(4L, $opt$UShr(Long.MIN_VALUE, -3L));  // -3 & 0x3f = 61
+
+    expectEquals(0L, $opt$UShr(7L, Long.MAX_VALUE));
+    expectEquals(7L, $opt$UShr(7L, Long.MIN_VALUE));
+  }
+
+  static int $opt$Shl(int a, int b) {
+    return a << b;
+  }
+
+  static long $opt$Shl(long a, long b) {
+    return a << b;
+  }
+
+  static int $opt$Shr(int a, int b) {
+    return a >> b;
+  }
+
+  static long $opt$Shr(long a, long b) {
+    return a >> b;
+  }
+
+  static int $opt$UShr(int a, int b) {
+    return a >>> b;
+  }
+
+  static long $opt$UShr(long a, long b) {
+    return a >>> b;
+  }
+
+  static int $opt$ShlConst2(int a) {
+    return a << 2;
+  }
+
+  static long $opt$ShlConst2(long a) {
+    return a << 2L;
+  }
+
+  static int $opt$ShrConst2(int a) {
+    return a >> 2;
+  }
+
+  static long $opt$ShrConst2(long a) {
+    return a >> 2L;
+  }
+
+  static int $opt$UShrConst2(int a) {
+    return a >>> 2;
+  }
+
+  static long $opt$UShrConst2(long a) {
+    return a >>> 2L;
+  }
+
+    static int $opt$ShlConst0(int a) {
+    return a << 0;
+  }
+
+  static long $opt$ShlConst0(long a) {
+    return a << 0L;
+  }
+
+  static int $opt$ShrConst0(int a) {
+    return a >> 0;
+  }
+
+  static long $opt$ShrConst0(long a) {
+    return a >> 0L;
+  }
+
+  static int $opt$UShrConst0(int a) {
+    return a >>> 0;
+  }
+
+  static long $opt$UShrConst0(long a) {
+    return a >>> 0L;
+  }
+
+}
+
diff --git a/test/431-type-propagation/expected.txt b/test/431-type-propagation/expected.txt
new file mode 100644
index 0000000..ccaf6f8
--- /dev/null
+++ b/test/431-type-propagation/expected.txt
@@ -0,0 +1 @@
+Enter
diff --git a/test/431-type-propagation/info.txt b/test/431-type-propagation/info.txt
new file mode 100644
index 0000000..b895e91
--- /dev/null
+++ b/test/431-type-propagation/info.txt
@@ -0,0 +1,2 @@
+Regression test for the SSA building of the optimizing
+compiler. See comment in smali file.
diff --git a/test/431-type-propagation/smali/TypePropagation.smali b/test/431-type-propagation/smali/TypePropagation.smali
new file mode 100644
index 0000000..817f0c5
--- /dev/null
+++ b/test/431-type-propagation/smali/TypePropagation.smali
@@ -0,0 +1,43 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTypePropagation;
+
+.super Ljava/lang/Object;
+
+.method public static method([I)V
+   .registers 3
+   const/4 v0, 0
+   aget v1, v2, v0
+   add-int v2, v1, v0
+   if-eq v1, v0, :end
+   # Putting a float in v1 will lead to the creation of a phi with one
+   # float input and one integer input. Since the SSA builder trusts
+   # the verifier, it assumes that the integer input must be converted
+   # to float. However, since v0 is not used afterwards, the verifier
+   # hasn't ensured that. Therefore, the compiler must remove
+   # the phi prior to doing type propagation.
+   int-to-float v1, v0
+   :end
+   # Do a call to create an environment that will capture all Dex registers.
+   # This environment is the reason why a phi is created at the join block
+   # of the if.
+   invoke-static {}, LTypePropagation;->emptyMethod()V
+   return-void
+.end method
+
+.method public static emptyMethod()V
+   .registers 0
+   return-void
+.end method
diff --git a/test/431-type-propagation/src/Main.java b/test/431-type-propagation/src/Main.java
new file mode 100644
index 0000000..91dfe10
--- /dev/null
+++ b/test/431-type-propagation/src/Main.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  public static void main(String[] args) throws Exception {
+    System.out.println("Enter");
+    Class<?> c = Class.forName("TypePropagation");
+    Method m = c.getMethod("method", int[].class);
+    int[] array = new int[7];
+    Object[] arguments = { array };
+    m.invoke(null, arguments);
+  }
+}
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index 7674a8a..7ec3168 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -1,3 +1,4 @@
+PackedSwitch
 b/17790197
 b/17978759
 FloatBadArgReg
diff --git a/test/800-smali/smali/PackedSwitch.smali b/test/800-smali/smali/PackedSwitch.smali
new file mode 100644
index 0000000..6a3e5f0
--- /dev/null
+++ b/test/800-smali/smali/PackedSwitch.smali
@@ -0,0 +1,26 @@
+.class public LPackedSwitch;
+
+.super Ljava/lang/Object;
+
+.method public static packedSwitch(I)I
+    .registers 2
+
+    const/4 v0, 0
+    packed-switch v0, :switch_data
+    goto :default
+
+    :switch_data
+    .packed-switch 0x0
+        :case
+    .end packed-switch
+
+    :return
+    return v1
+
+    :default
+    goto :return
+
+    :case
+    goto :return
+
+.end method
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index 8d318c3..abb53de 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -49,6 +49,8 @@
     public Main() {
         // Create the test cases.
         testCases = new LinkedList<TestCase>();
+        testCases.add(new TestCase("PackedSwitch", "PackedSwitch", "packedSwitch",
+          new Object[]{123}, null, 123));
 
         testCases.add(new TestCase("b/17790197", "B17790197", "getInt", null, null, 100));
         testCases.add(new TestCase("b/17978759", "B17978759", "test", null, new VerifyError(), null));
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 29da2f6..47d186a 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -306,6 +306,7 @@
   010-instance \
   012-math \
   023-many-interfaces \
+  027-arithmetic \
   037-inherit \
   044-proxy \
   045-reflect-array \
@@ -329,6 +330,7 @@
   427-bounds \
   428-optimizing-arith-rem \
   430-live-register-slow-path \
+  431-optimizing-arith-shifts \
   701-easy-div-rem \
   800-smali \