Merge "Add interface for updating process state."
diff --git a/compiler/dex/dataflow_iterator-inl.h b/compiler/dex/dataflow_iterator-inl.h
index 74f36dd..64e5fa6 100644
--- a/compiler/dex/dataflow_iterator-inl.h
+++ b/compiler/dex/dataflow_iterator-inl.h
@@ -37,6 +37,7 @@
   BasicBlock* res = NULL;
   if ((idx_ >= end_idx_) && changed_) {
     idx_ = start_idx_;
+    repeats_++;
     changed_ = false;
   }
   if (idx_ < end_idx_) {
@@ -62,6 +63,7 @@
   BasicBlock* res = NULL;
   if ((idx_ < 0) && changed_) {
     idx_ = start_idx_;
+    repeats_++;
     changed_ = false;
   }
   if (idx_ >= 0) {
diff --git a/compiler/dex/dataflow_iterator.h b/compiler/dex/dataflow_iterator.h
index 26e3665..a0c1c12 100644
--- a/compiler/dex/dataflow_iterator.h
+++ b/compiler/dex/dataflow_iterator.h
@@ -37,6 +37,7 @@
   class DataflowIterator {
     public:
       virtual ~DataflowIterator() {}
+      int32_t GetRepeatCount() { return repeats_; }
 
     protected:
       DataflowIterator(MIRGraph* mir_graph, int32_t start_idx, int32_t end_idx)
@@ -45,6 +46,7 @@
             end_idx_(end_idx),
             block_id_list_(NULL),
             idx_(0),
+            repeats_(0),
             changed_(false) {}
 
       virtual BasicBlock* ForwardSingleNext() ALWAYS_INLINE;
@@ -52,11 +54,13 @@
       virtual BasicBlock* ForwardRepeatNext(bool had_change) ALWAYS_INLINE;
       virtual BasicBlock* ReverseRepeatNext(bool had_change) ALWAYS_INLINE;
 
+
       MIRGraph* const mir_graph_;
       const int32_t start_idx_;
       const int32_t end_idx_;
       GrowableArray<BasicBlockId>* block_id_list_;
       int32_t idx_;
+      int32_t repeats_;
       bool changed_;
   };  // DataflowIterator
 
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index e53d636..197bba5 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -253,15 +253,15 @@
   cu.mir_graph->InlineMethod(code_item, access_flags, invoke_type, class_def_idx, method_idx,
                               class_loader, dex_file);
 
+  cu.NewTimingSplit("MIROpt:CheckFilters");
 #if !defined(ART_USE_PORTABLE_COMPILER)
   if (cu.mir_graph->SkipCompilation(Runtime::Current()->GetCompilerFilter())) {
     return NULL;
   }
 #endif
 
-  cu.NewTimingSplit("MIROpt:CodeLayout");
-
   /* Do a code layout pass */
+  cu.NewTimingSplit("MIROpt:CodeLayout");
   cu.mir_graph->CodeLayout();
 
   /* Perform SSA transformation for the whole method */
@@ -272,18 +272,23 @@
   cu.NewTimingSplit("MIROpt:ConstantProp");
   cu.mir_graph->PropagateConstants();
 
+  cu.NewTimingSplit("MIROpt:InitRegLoc");
+  cu.mir_graph->InitRegLocations();
+
   /* Count uses */
+  cu.NewTimingSplit("MIROpt:UseCount");
   cu.mir_graph->MethodUseCount();
 
-  /* Perform null check elimination */
-  cu.NewTimingSplit("MIROpt:NullCheckElimination");
-  cu.mir_graph->NullCheckElimination();
+  /* Perform null check elimination and type inference*/
+  cu.NewTimingSplit("MIROpt:NCE_TypeInference");
+  cu.mir_graph->NullCheckEliminationAndTypeInference();
 
   /* Combine basic blocks where possible */
-  cu.NewTimingSplit("MIROpt:BBOpt");
+  cu.NewTimingSplit("MIROpt:BBCombine");
   cu.mir_graph->BasicBlockCombine();
 
   /* Do some basic block optimizations */
+  cu.NewTimingSplit("MIROpt:BBOpt");
   cu.mir_graph->BasicBlockOptimization();
 
   if (cu.enable_debug & (1 << kDebugDumpCheckStats)) {
@@ -294,8 +299,8 @@
     cu.mir_graph->ShowOpcodeStats();
   }
 
-  /* Set up regLocation[] array to describe values - one for each ssa_name. */
-  cu.mir_graph->BuildRegLocations();
+  /* Reassociate sreg names with original Dalvik vreg names. */
+  cu.mir_graph->RemapRegLocations();
 
   CompiledMethod* result = NULL;
 
@@ -323,8 +328,9 @@
 
   cu.cg->Materialize();
 
-  cu.NewTimingSplit("Cleanup");
+  cu.NewTimingSplit("Dedupe");  /* deduping takes up the vast majority of time in GetCompiledMethod(). */
   result = cu.cg->GetCompiledMethod();
+  cu.NewTimingSplit("Cleanup");
 
   if (result) {
     VLOG(compiler) << "Compiled " << PrettyMethod(method_idx, dex_file);
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index d359ee2..728d48a 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -29,7 +29,7 @@
  * TODO - many optimization flags are incomplete - they will only limit the
  * scope of optimizations but will not cause mis-optimizations.
  */
-const int MIRGraph::oat_data_flow_attributes_[kMirOpLast] = {
+const uint64_t MIRGraph::oat_data_flow_attributes_[kMirOpLast] = {
   // 00 NOP
   DF_NOP,
 
@@ -235,88 +235,88 @@
   DF_NOP,
 
   // 44 AGET vAA, vBB, vCC
-  DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_REF_B | DF_CORE_C,
+  DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_REF_B | DF_CORE_C | DF_LVN,
 
   // 45 AGET_WIDE vAA, vBB, vCC
-  DF_DA | DF_A_WIDE | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_REF_B | DF_CORE_C,
+  DF_DA | DF_A_WIDE | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_REF_B | DF_CORE_C | DF_LVN,
 
   // 46 AGET_OBJECT vAA, vBB, vCC
-  DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_REF_A | DF_REF_B | DF_CORE_C,
+  DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_REF_A | DF_REF_B | DF_CORE_C | DF_LVN,
 
   // 47 AGET_BOOLEAN vAA, vBB, vCC
-  DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_REF_B | DF_CORE_C,
+  DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_REF_B | DF_CORE_C | DF_LVN,
 
   // 48 AGET_BYTE vAA, vBB, vCC
-  DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_REF_B | DF_CORE_C,
+  DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_REF_B | DF_CORE_C | DF_LVN,
 
   // 49 AGET_CHAR vAA, vBB, vCC
-  DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_REF_B | DF_CORE_C,
+  DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_REF_B | DF_CORE_C | DF_LVN,
 
   // 4A AGET_SHORT vAA, vBB, vCC
-  DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_REF_B | DF_CORE_C,
+  DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_REF_B | DF_CORE_C | DF_LVN,
 
   // 4B APUT vAA, vBB, vCC
-  DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_REF_B | DF_CORE_C,
+  DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_REF_B | DF_CORE_C | DF_LVN,
 
   // 4C APUT_WIDE vAA, vBB, vCC
-  DF_UA | DF_A_WIDE | DF_UB | DF_UC | DF_NULL_CHK_2 | DF_RANGE_CHK_3 | DF_REF_B | DF_CORE_C,
+  DF_UA | DF_A_WIDE | DF_UB | DF_UC | DF_NULL_CHK_2 | DF_RANGE_CHK_3 | DF_REF_B | DF_CORE_C | DF_LVN,
 
   // 4D APUT_OBJECT vAA, vBB, vCC
-  DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_REF_A | DF_REF_B | DF_CORE_C,
+  DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_REF_A | DF_REF_B | DF_CORE_C | DF_LVN,
 
   // 4E APUT_BOOLEAN vAA, vBB, vCC
-  DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_REF_B | DF_CORE_C,
+  DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_REF_B | DF_CORE_C | DF_LVN,
 
   // 4F APUT_BYTE vAA, vBB, vCC
-  DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_REF_B | DF_CORE_C,
+  DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_REF_B | DF_CORE_C | DF_LVN,
 
   // 50 APUT_CHAR vAA, vBB, vCC
-  DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_REF_B | DF_CORE_C,
+  DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_REF_B | DF_CORE_C | DF_LVN,
 
   // 51 APUT_SHORT vAA, vBB, vCC
-  DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_REF_B | DF_CORE_C,
+  DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_REF_B | DF_CORE_C | DF_LVN,
 
   // 52 IGET vA, vB, field@CCCC
-  DF_DA | DF_UB | DF_NULL_CHK_0 | DF_REF_B,
+  DF_DA | DF_UB | DF_NULL_CHK_0 | DF_REF_B | DF_LVN,
 
   // 53 IGET_WIDE vA, vB, field@CCCC
-  DF_DA | DF_A_WIDE | DF_UB | DF_NULL_CHK_0 | DF_REF_B,
+  DF_DA | DF_A_WIDE | DF_UB | DF_NULL_CHK_0 | DF_REF_B | DF_LVN,
 
   // 54 IGET_OBJECT vA, vB, field@CCCC
-  DF_DA | DF_UB | DF_NULL_CHK_0 | DF_REF_A | DF_REF_B,
+  DF_DA | DF_UB | DF_NULL_CHK_0 | DF_REF_A | DF_REF_B | DF_LVN,
 
   // 55 IGET_BOOLEAN vA, vB, field@CCCC
-  DF_DA | DF_UB | DF_NULL_CHK_0 | DF_REF_B,
+  DF_DA | DF_UB | DF_NULL_CHK_0 | DF_REF_B | DF_LVN,
 
   // 56 IGET_BYTE vA, vB, field@CCCC
-  DF_DA | DF_UB | DF_NULL_CHK_0 | DF_REF_B,
+  DF_DA | DF_UB | DF_NULL_CHK_0 | DF_REF_B | DF_LVN,
 
   // 57 IGET_CHAR vA, vB, field@CCCC
-  DF_DA | DF_UB | DF_NULL_CHK_0 | DF_REF_B,
+  DF_DA | DF_UB | DF_NULL_CHK_0 | DF_REF_B | DF_LVN,
 
   // 58 IGET_SHORT vA, vB, field@CCCC
-  DF_DA | DF_UB | DF_NULL_CHK_0 | DF_REF_B,
+  DF_DA | DF_UB | DF_NULL_CHK_0 | DF_REF_B | DF_LVN,
 
   // 59 IPUT vA, vB, field@CCCC
-  DF_UA | DF_UB | DF_NULL_CHK_1 | DF_REF_B,
+  DF_UA | DF_UB | DF_NULL_CHK_1 | DF_REF_B | DF_LVN,
 
   // 5A IPUT_WIDE vA, vB, field@CCCC
-  DF_UA | DF_A_WIDE | DF_UB | DF_NULL_CHK_2 | DF_REF_B,
+  DF_UA | DF_A_WIDE | DF_UB | DF_NULL_CHK_2 | DF_REF_B | DF_LVN,
 
   // 5B IPUT_OBJECT vA, vB, field@CCCC
-  DF_UA | DF_UB | DF_NULL_CHK_1 | DF_REF_A | DF_REF_B,
+  DF_UA | DF_UB | DF_NULL_CHK_1 | DF_REF_A | DF_REF_B | DF_LVN,
 
   // 5C IPUT_BOOLEAN vA, vB, field@CCCC
-  DF_UA | DF_UB | DF_NULL_CHK_1 | DF_REF_B,
+  DF_UA | DF_UB | DF_NULL_CHK_1 | DF_REF_B | DF_LVN,
 
   // 5D IPUT_BYTE vA, vB, field@CCCC
-  DF_UA | DF_UB | DF_NULL_CHK_1 | DF_REF_B,
+  DF_UA | DF_UB | DF_NULL_CHK_1 | DF_REF_B | DF_LVN,
 
   // 5E IPUT_CHAR vA, vB, field@CCCC
-  DF_UA | DF_UB | DF_NULL_CHK_1 | DF_REF_B,
+  DF_UA | DF_UB | DF_NULL_CHK_1 | DF_REF_B | DF_LVN,
 
   // 5F IPUT_SHORT vA, vB, field@CCCC
-  DF_UA | DF_UB | DF_NULL_CHK_1 | DF_REF_B,
+  DF_UA | DF_UB | DF_NULL_CHK_1 | DF_REF_B | DF_LVN,
 
   // 60 SGET vAA, field@BBBB
   DF_DA | DF_UMS,
@@ -712,10 +712,10 @@
   DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
 
   // E3 IGET_VOLATILE
-  DF_DA | DF_UB | DF_NULL_CHK_0 | DF_REF_B,
+  DF_DA | DF_UB | DF_NULL_CHK_0 | DF_REF_B | DF_LVN,
 
   // E4 IPUT_VOLATILE
-  DF_UA | DF_UB | DF_NULL_CHK_1 | DF_REF_B,
+  DF_UA | DF_UB | DF_NULL_CHK_1 | DF_REF_B | DF_LVN,
 
   // E5 SGET_VOLATILE
   DF_DA | DF_UMS,
@@ -724,13 +724,13 @@
   DF_UA | DF_UMS,
 
   // E7 IGET_OBJECT_VOLATILE
-  DF_DA | DF_UB | DF_NULL_CHK_0 | DF_REF_A | DF_REF_B,
+  DF_DA | DF_UB | DF_NULL_CHK_0 | DF_REF_A | DF_REF_B | DF_LVN,
 
   // E8 IGET_WIDE_VOLATILE
-  DF_DA | DF_A_WIDE | DF_UB | DF_NULL_CHK_0 | DF_REF_B,
+  DF_DA | DF_A_WIDE | DF_UB | DF_NULL_CHK_0 | DF_REF_B | DF_LVN,
 
   // E9 IPUT_WIDE_VOLATILE
-  DF_UA | DF_A_WIDE | DF_UB | DF_NULL_CHK_2 | DF_REF_B,
+  DF_UA | DF_A_WIDE | DF_UB | DF_NULL_CHK_2 | DF_REF_B | DF_LVN,
 
   // EA SGET_WIDE_VOLATILE
   DF_DA | DF_A_WIDE | DF_UMS,
@@ -757,22 +757,22 @@
   DF_NOP,
 
   // F2 IGET_QUICK
-  DF_DA | DF_UB | DF_NULL_CHK_0,
+  DF_DA | DF_UB | DF_NULL_CHK_0 | DF_LVN,
 
   // F3 IGET_WIDE_QUICK
-  DF_DA | DF_A_WIDE | DF_UB | DF_NULL_CHK_0,
+  DF_DA | DF_A_WIDE | DF_UB | DF_NULL_CHK_0 | DF_LVN,
 
   // F4 IGET_OBJECT_QUICK
-  DF_DA | DF_UB | DF_NULL_CHK_0,
+  DF_DA | DF_UB | DF_NULL_CHK_0 | DF_LVN,
 
   // F5 IPUT_QUICK
-  DF_UA | DF_UB | DF_NULL_CHK_1,
+  DF_UA | DF_UB | DF_NULL_CHK_1 | DF_LVN,
 
   // F6 IPUT_WIDE_QUICK
-  DF_UA | DF_A_WIDE | DF_UB | DF_NULL_CHK_2,
+  DF_UA | DF_A_WIDE | DF_UB | DF_NULL_CHK_2 | DF_LVN,
 
   // F7 IPUT_OBJECT_QUICK
-  DF_UA | DF_UB | DF_NULL_CHK_1,
+  DF_UA | DF_UB | DF_NULL_CHK_1 | DF_LVN,
 
   // F8 INVOKE_VIRTUAL_QUICK
   DF_FORMAT_35C | DF_NULL_CHK_OUT0 | DF_UMS,
@@ -787,7 +787,7 @@
   DF_FORMAT_3RC | DF_NULL_CHK_OUT0 | DF_UMS,
 
   // FC IPUT_OBJECT_VOLATILE
-  DF_UA | DF_UB | DF_NULL_CHK_1 | DF_REF_A | DF_REF_B,
+  DF_UA | DF_UB | DF_NULL_CHK_1 | DF_REF_A | DF_REF_B | DF_LVN,
 
   // FD SGET_OBJECT_VOLATILE
   DF_DA | DF_REF_A | DF_UMS,
@@ -879,7 +879,7 @@
       new (arena_) ArenaBitVector(arena_, cu_->num_dalvik_registers, false, kBitMapLiveIn);
 
   for (mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
-    int df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
+    uint64_t df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
     DecodedInstruction *d_insn = &mir->dalvikInsn;
 
     if (df_attributes & DF_HAS_USES) {
@@ -994,7 +994,7 @@
         static_cast<struct SSARepresentation *>(arena_->Alloc(sizeof(SSARepresentation),
                                                               ArenaAllocator::kAllocDFInfo));
 
-    int df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
+    uint64_t df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
 
       // If not a pseudo-op, note non-leaf or can throw
     if (static_cast<int>(mir->dalvikInsn.opcode) <
@@ -1239,37 +1239,33 @@
   if (bb->block_type != kDalvikByteCode) {
     return false;
   }
+  // Each level of nesting adds *100 to count, up to 3 levels deep.
+  uint32_t depth = std::min(3U, static_cast<uint32_t>(bb->nesting_depth));
+  uint32_t weight = std::max(1U, depth * 100);
   for (MIR* mir = bb->first_mir_insn; (mir != NULL); mir = mir->next) {
     if (mir->ssa_rep == NULL) {
       continue;
     }
-    // Each level of nesting adds *100 to count, up to 3 levels deep.
-    uint32_t depth = std::min(3U, static_cast<uint32_t>(bb->nesting_depth));
-    uint32_t weight = std::max(1U, depth * 100);
     for (int i = 0; i < mir->ssa_rep->num_uses; i++) {
       int s_reg = mir->ssa_rep->uses[i];
       raw_use_counts_.Increment(s_reg);
       use_counts_.Put(s_reg, use_counts_.Get(s_reg) + weight);
     }
     if (!(cu_->disable_opt & (1 << kPromoteCompilerTemps))) {
-      int df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
+      uint64_t df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
       // Implicit use of Method* ? */
       if (df_attributes & DF_UMS) {
         /*
          * Some invokes will not use Method* - need to perform test similar
          * to that found in GenInvoke() to decide whether to count refs
-         * for Method* on invoke-class opcodes.
-         * TODO: refactor for common test here, save results for GenInvoke
+         * for Method* on invoke-class opcodes.  This is a relatively expensive
+         * operation, so should only be done once.
+         * TODO: refactor InvokeUsesMethodStar() to perform check at parse time,
+         * and save results for both here and GenInvoke.  For now, go ahead
+         * and assume all invokes use method*.
          */
-        int uses_method_star = true;
-        if ((df_attributes & (DF_FORMAT_35C | DF_FORMAT_3RC)) &&
-            !(df_attributes & DF_NON_NULL_RET)) {
-          uses_method_star &= InvokeUsesMethodStar(mir);
-        }
-        if (uses_method_star) {
-          raw_use_counts_.Increment(method_sreg_);
-          use_counts_.Put(method_sreg_, use_counts_.Get(method_sreg_) + weight);
-        }
+        raw_use_counts_.Increment(method_sreg_);
+        use_counts_.Put(method_sreg_, use_counts_.Get(method_sreg_) + weight);
       }
     }
   }
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index deaf2ff..2a18280 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -650,12 +650,16 @@
 
     int flags = Instruction::FlagsOf(insn->dalvikInsn.opcode);
 
-    int df_flags = oat_data_flow_attributes_[insn->dalvikInsn.opcode];
+    uint64_t df_flags = oat_data_flow_attributes_[insn->dalvikInsn.opcode];
 
     if (df_flags & DF_HAS_DEFS) {
       def_count_ += (df_flags & DF_A_WIDE) ? 2 : 1;
     }
 
+    if (df_flags & DF_LVN) {
+      cur_block->use_lvn = true;  // Run local value numbering on this basic block.
+    }
+
     // Check for inline data block signatures
     if (opcode == Instruction::NOP) {
       // A simple NOP will have a width of 1 at this point, embedded data NOP > 1.
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 8c20728..bffec39 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -92,41 +92,43 @@
   kRefB,
   kRefC,
   kUsesMethodStar,       // Implicit use of Method*.
+  kDoLVN,                // Worth computing local value numbers.
 };
 
-#define DF_NOP                  0
-#define DF_UA                   (1 << kUA)
-#define DF_UB                   (1 << kUB)
-#define DF_UC                   (1 << kUC)
-#define DF_A_WIDE               (1 << kAWide)
-#define DF_B_WIDE               (1 << kBWide)
-#define DF_C_WIDE               (1 << kCWide)
-#define DF_DA                   (1 << kDA)
-#define DF_IS_MOVE              (1 << kIsMove)
-#define DF_SETS_CONST           (1 << kSetsConst)
-#define DF_FORMAT_35C           (1 << kFormat35c)
-#define DF_FORMAT_3RC           (1 << kFormat3rc)
-#define DF_NULL_CHK_0           (1 << kNullCheckSrc0)
-#define DF_NULL_CHK_1           (1 << kNullCheckSrc1)
-#define DF_NULL_CHK_2           (1 << kNullCheckSrc2)
-#define DF_NULL_CHK_OUT0        (1 << kNullCheckOut0)
-#define DF_NON_NULL_DST         (1 << kDstNonNull)
-#define DF_NON_NULL_RET         (1 << kRetNonNull)
-#define DF_NULL_TRANSFER_0      (1 << kNullTransferSrc0)
-#define DF_NULL_TRANSFER_N      (1 << kNullTransferSrcN)
-#define DF_RANGE_CHK_1          (1 << kRangeCheckSrc1)
-#define DF_RANGE_CHK_2          (1 << kRangeCheckSrc2)
-#define DF_RANGE_CHK_3          (1 << kRangeCheckSrc3)
-#define DF_FP_A                 (1 << kFPA)
-#define DF_FP_B                 (1 << kFPB)
-#define DF_FP_C                 (1 << kFPC)
-#define DF_CORE_A               (1 << kCoreA)
-#define DF_CORE_B               (1 << kCoreB)
-#define DF_CORE_C               (1 << kCoreC)
-#define DF_REF_A                (1 << kRefA)
-#define DF_REF_B                (1 << kRefB)
-#define DF_REF_C                (1 << kRefC)
-#define DF_UMS                  (1 << kUsesMethodStar)
+#define DF_NOP                  0ULL
+#define DF_UA                   (1ULL << kUA)
+#define DF_UB                   (1ULL << kUB)
+#define DF_UC                   (1ULL << kUC)
+#define DF_A_WIDE               (1ULL << kAWide)
+#define DF_B_WIDE               (1ULL << kBWide)
+#define DF_C_WIDE               (1ULL << kCWide)
+#define DF_DA                   (1ULL << kDA)
+#define DF_IS_MOVE              (1ULL << kIsMove)
+#define DF_SETS_CONST           (1ULL << kSetsConst)
+#define DF_FORMAT_35C           (1ULL << kFormat35c)
+#define DF_FORMAT_3RC           (1ULL << kFormat3rc)
+#define DF_NULL_CHK_0           (1ULL << kNullCheckSrc0)
+#define DF_NULL_CHK_1           (1ULL << kNullCheckSrc1)
+#define DF_NULL_CHK_2           (1ULL << kNullCheckSrc2)
+#define DF_NULL_CHK_OUT0        (1ULL << kNullCheckOut0)
+#define DF_NON_NULL_DST         (1ULL << kDstNonNull)
+#define DF_NON_NULL_RET         (1ULL << kRetNonNull)
+#define DF_NULL_TRANSFER_0      (1ULL << kNullTransferSrc0)
+#define DF_NULL_TRANSFER_N      (1ULL << kNullTransferSrcN)
+#define DF_RANGE_CHK_1          (1ULL << kRangeCheckSrc1)
+#define DF_RANGE_CHK_2          (1ULL << kRangeCheckSrc2)
+#define DF_RANGE_CHK_3          (1ULL << kRangeCheckSrc3)
+#define DF_FP_A                 (1ULL << kFPA)
+#define DF_FP_B                 (1ULL << kFPB)
+#define DF_FP_C                 (1ULL << kFPC)
+#define DF_CORE_A               (1ULL << kCoreA)
+#define DF_CORE_B               (1ULL << kCoreB)
+#define DF_CORE_C               (1ULL << kCoreC)
+#define DF_REF_A                (1ULL << kRefA)
+#define DF_REF_B                (1ULL << kRefB)
+#define DF_REF_C                (1ULL << kRefC)
+#define DF_UMS                  (1ULL << kUsesMethodStar)
+#define DF_LVN                  (1ULL << kDoLVN)
 
 #define DF_HAS_USES             (DF_UA | DF_UB | DF_UC)
 
@@ -273,8 +275,9 @@
   bool catch_entry:1;
   bool explicit_throw:1;
   bool conditional_branch:1;
-  bool terminated_by_return:1;        // Block ends with a Dalvik return opcode.
-  bool dominates_return:1;            // Is a member of return extended basic block.
+  bool terminated_by_return:1;  // Block ends with a Dalvik return opcode.
+  bool dominates_return:1;      // Is a member of return extended basic block.
+  bool use_lvn:1;               // Run local value numbering on this block.
   MIR* first_mir_insn;
   MIR* last_mir_insn;
   BasicBlockDataFlow* data_flow_info;
@@ -451,7 +454,9 @@
 
   void DumpCFG(const char* dir_prefix, bool all_blocks);
 
-  void BuildRegLocations();
+  void InitRegLocations();
+
+  void RemapRegLocations();
 
   void DumpRegLocTable(RegLocation* table, int count);
 
@@ -619,7 +624,7 @@
   void MethodUseCount();
   void SSATransformation();
   void CheckForDominanceFrontier(BasicBlock* dom_bb, const BasicBlock* succ_bb);
-  void NullCheckElimination();
+  void NullCheckEliminationAndTypeInference();
   /*
    * Type inference handling helpers.  Because Dalvik's bytecode is not fully typed,
    * we have to do some work to figure out the sreg type.  For some operations it is
@@ -675,7 +680,7 @@
   GrowableArray<CompilerTemp*> compiler_temps_;
   SafeMap<unsigned int, unsigned int> block_id_map_;  // Block collapse lookup cache.
 
-  static const int oat_data_flow_attributes_[kMirOpLast];
+  static const uint64_t oat_data_flow_attributes_[kMirOpLast];
   static const char* extended_mir_op_names_[kMirOpLast - kMirOpFirst];
   static const uint32_t analysis_attributes_[kMirOpLast];
 
@@ -711,7 +716,7 @@
   bool FindLocalLiveIn(BasicBlock* bb);
   void ClearAllVisitedFlags();
   bool CountUses(struct BasicBlock* bb);
-  bool InferTypeAndSize(BasicBlock* bb);
+  bool InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed);
   bool VerifyPredInfo(BasicBlock* bb);
   BasicBlock* NeedsVisit(BasicBlock* bb);
   BasicBlock* NextUnvisitedSuccessor(BasicBlock* bb);
@@ -727,7 +732,7 @@
   void SetConstantWide(int ssa_reg, int64_t value);
   int GetSSAUseCount(int s_reg);
   bool BasicBlockOpt(BasicBlock* bb);
-  bool EliminateNullChecks(BasicBlock* bb);
+  bool EliminateNullChecksAndInferTypes(BasicBlock* bb);
   void NullCheckEliminationInit(BasicBlock* bb);
   bool BuildExtendedBBList(struct BasicBlock* bb);
   bool FillDefBlockMatrix(BasicBlock* bb);
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index f5913a5..6353937 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -40,7 +40,7 @@
   MIR* mir;
 
   for (mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
-    int df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
+    uint64_t df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
 
     DecodedInstruction *d_insn = &mir->dalvikInsn;
 
@@ -155,7 +155,7 @@
       || (bb->block_type == kExitBlock));
   BasicBlock* bb_taken = GetBasicBlock(bb->taken);
   BasicBlock* bb_fall_through = GetBasicBlock(bb->fall_through);
-  if (((bb_taken != NULL) && (bb_fall_through == NULL)) &&
+  if (((bb_fall_through == NULL) && (bb_taken != NULL)) &&
       ((bb_taken->block_type == kDalvikByteCode) || (bb_taken->block_type == kExitBlock))) {
     // Follow simple unconditional branches.
     bb = bb_taken;
@@ -216,11 +216,17 @@
     return true;
   }
   int num_temps = 0;
-  LocalValueNumbering local_valnum(cu_);
+  bool use_lvn = bb->use_lvn;
+  UniquePtr<LocalValueNumbering> local_valnum;
+  if (use_lvn) {
+    local_valnum.reset(new LocalValueNumbering(cu_));
+  }
   while (bb != NULL) {
     for (MIR* mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
       // TUNING: use the returned value number for CSE.
-      local_valnum.GetValueNumber(mir);
+      if (use_lvn) {
+        local_valnum->GetValueNumber(mir);
+      }
       // Look for interesting opcodes, skip otherwise
       Instruction::Code opcode = mir->dalvikInsn.opcode;
       switch (opcode) {
@@ -463,7 +469,7 @@
         }
       }
     }
-    bb = NextDominatedBlock(bb);
+    bb = ((cu_->disable_opt & (1 << kSuppressExceptionEdges)) != 0) ? NextDominatedBlock(bb) : NULL;
   }
 
   if (num_temps > cu_->num_compiler_temps) {
@@ -486,7 +492,7 @@
       if (mir->ssa_rep == NULL) {
         continue;
       }
-      int df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
+      uint64_t df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
       if (df_attributes & DF_HAS_NULL_CHKS) {
         checkstats_->null_checks++;
         if (mir->optimization_flags & MIR_IGNORE_NULL_CHECK) {
@@ -571,7 +577,7 @@
     MIR* mir = bb->last_mir_insn;
     // Grab the attributes from the paired opcode
     MIR* throw_insn = mir->meta.throw_insn;
-    int df_attributes = oat_data_flow_attributes_[throw_insn->dalvikInsn.opcode];
+    uint64_t df_attributes = oat_data_flow_attributes_[throw_insn->dalvikInsn.opcode];
     bool can_combine = true;
     if (df_attributes & DF_HAS_NULL_CHKS) {
       can_combine &= ((throw_insn->optimization_flags & MIR_IGNORE_NULL_CHECK) != 0);
@@ -618,74 +624,87 @@
   return false;
 }
 
-/* Eliminate unnecessary null checks for a basic block. */
-bool MIRGraph::EliminateNullChecks(struct BasicBlock* bb) {
+/*
+ * Eliminate unnecessary null checks for a basic block.   Also, while we're doing
+ * an iterative walk go ahead and perform type and size inference.
+ */
+bool MIRGraph::EliminateNullChecksAndInferTypes(struct BasicBlock* bb) {
   if (bb->data_flow_info == NULL) return false;
+  bool infer_changed = false;
+  bool do_nce = ((cu_->disable_opt & (1 << kNullCheckElimination)) == 0);
 
-  /*
-   * Set initial state.  Be conservative with catch
-   * blocks and start with no assumptions about null check
-   * status (except for "this").
-   */
-  if ((bb->block_type == kEntryBlock) | bb->catch_entry) {
-    temp_ssa_register_v_->ClearAllBits();
-    // Assume all ins are objects.
-    for (uint16_t in_reg = cu_->num_dalvik_registers - cu_->num_ins;
-         in_reg < cu_->num_dalvik_registers; in_reg++) {
-      temp_ssa_register_v_->SetBit(in_reg);
-    }
-    if ((cu_->access_flags & kAccStatic) == 0) {
-      // If non-static method, mark "this" as non-null
-      int this_reg = cu_->num_dalvik_registers - cu_->num_ins;
-      temp_ssa_register_v_->ClearBit(this_reg);
-    }
-  } else if (bb->predecessors->Size() == 1) {
-    BasicBlock* pred_bb = GetBasicBlock(bb->predecessors->Get(0));
-    temp_ssa_register_v_->Copy(pred_bb->data_flow_info->ending_null_check_v);
-    if (pred_bb->block_type == kDalvikByteCode) {
-      // Check to see if predecessor had an explicit null-check.
-      MIR* last_insn = pred_bb->last_mir_insn;
-      Instruction::Code last_opcode = last_insn->dalvikInsn.opcode;
-      if (last_opcode == Instruction::IF_EQZ) {
-        if (pred_bb->fall_through == bb->id) {
-          // The fall-through of a block following a IF_EQZ, set the vA of the IF_EQZ to show that
-          // it can't be null.
-          temp_ssa_register_v_->ClearBit(last_insn->ssa_rep->uses[0]);
-        }
-      } else if (last_opcode == Instruction::IF_NEZ) {
-        if (pred_bb->taken == bb->id) {
-          // The taken block following a IF_NEZ, set the vA of the IF_NEZ to show that it can't be
-          // null.
-          temp_ssa_register_v_->ClearBit(last_insn->ssa_rep->uses[0]);
+  if (do_nce) {
+    /*
+     * Set initial state.  Be conservative with catch
+     * blocks and start with no assumptions about null check
+     * status (except for "this").
+     */
+    if ((bb->block_type == kEntryBlock) | bb->catch_entry) {
+      temp_ssa_register_v_->ClearAllBits();
+      // Assume all ins are objects.
+      for (uint16_t in_reg = cu_->num_dalvik_registers - cu_->num_ins;
+           in_reg < cu_->num_dalvik_registers; in_reg++) {
+        temp_ssa_register_v_->SetBit(in_reg);
+      }
+      if ((cu_->access_flags & kAccStatic) == 0) {
+        // If non-static method, mark "this" as non-null
+        int this_reg = cu_->num_dalvik_registers - cu_->num_ins;
+        temp_ssa_register_v_->ClearBit(this_reg);
+      }
+    } else if (bb->predecessors->Size() == 1) {
+      BasicBlock* pred_bb = GetBasicBlock(bb->predecessors->Get(0));
+      temp_ssa_register_v_->Copy(pred_bb->data_flow_info->ending_null_check_v);
+      if (pred_bb->block_type == kDalvikByteCode) {
+        // Check to see if predecessor had an explicit null-check.
+        MIR* last_insn = pred_bb->last_mir_insn;
+        Instruction::Code last_opcode = last_insn->dalvikInsn.opcode;
+        if (last_opcode == Instruction::IF_EQZ) {
+          if (pred_bb->fall_through == bb->id) {
+            // The fall-through of a block following a IF_EQZ, set the vA of the IF_EQZ to show that
+            // it can't be null.
+            temp_ssa_register_v_->ClearBit(last_insn->ssa_rep->uses[0]);
+          }
+        } else if (last_opcode == Instruction::IF_NEZ) {
+          if (pred_bb->taken == bb->id) {
+            // The taken block following a IF_NEZ, set the vA of the IF_NEZ to show that it can't be
+            // null.
+            temp_ssa_register_v_->ClearBit(last_insn->ssa_rep->uses[0]);
+          }
         }
       }
-    }
-  } else {
-    // Starting state is union of all incoming arcs
-    GrowableArray<BasicBlockId>::Iterator iter(bb->predecessors);
-    BasicBlock* pred_bb = GetBasicBlock(iter.Next());
-    DCHECK(pred_bb != NULL);
-    temp_ssa_register_v_->Copy(pred_bb->data_flow_info->ending_null_check_v);
-    while (true) {
-      pred_bb = GetBasicBlock(iter.Next());
-      if (!pred_bb) break;
-      if ((pred_bb->data_flow_info == NULL) ||
-          (pred_bb->data_flow_info->ending_null_check_v == NULL)) {
-        continue;
+    } else {
+      // Starting state is union of all incoming arcs
+      GrowableArray<BasicBlockId>::Iterator iter(bb->predecessors);
+      BasicBlock* pred_bb = GetBasicBlock(iter.Next());
+      DCHECK(pred_bb != NULL);
+      temp_ssa_register_v_->Copy(pred_bb->data_flow_info->ending_null_check_v);
+      while (true) {
+        pred_bb = GetBasicBlock(iter.Next());
+        if (!pred_bb) break;
+        if ((pred_bb->data_flow_info == NULL) ||
+            (pred_bb->data_flow_info->ending_null_check_v == NULL)) {
+          continue;
+        }
+        temp_ssa_register_v_->Union(pred_bb->data_flow_info->ending_null_check_v);
       }
-      temp_ssa_register_v_->Union(pred_bb->data_flow_info->ending_null_check_v);
     }
+    // At this point, temp_ssa_register_v_ shows which sregs have an object definition with
+    // no intervening uses.
   }
 
-  // At this point, temp_ssa_register_v_ shows which sregs have an object definition with
-  // no intervening uses.
-
   // Walk through the instruction in the block, updating as necessary
   for (MIR* mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
     if (mir->ssa_rep == NULL) {
         continue;
     }
-    int df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
+
+    // Propagate type info.
+    infer_changed = InferTypeAndSize(bb, mir, infer_changed);
+    if (!do_nce) {
+      continue;
+    }
+
+    uint64_t df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
 
     // Might need a null check?
     if (df_attributes & DF_HAS_NULL_CHKS) {
@@ -784,25 +803,25 @@
   }
 
   // Did anything change?
-  bool changed = !temp_ssa_register_v_->Equal(bb->data_flow_info->ending_null_check_v);
-  if (changed) {
+  bool nce_changed = do_nce && !temp_ssa_register_v_->Equal(bb->data_flow_info->ending_null_check_v);
+  if (nce_changed) {
     bb->data_flow_info->ending_null_check_v->Copy(temp_ssa_register_v_);
   }
-  return changed;
+  return infer_changed | nce_changed;
 }
 
-void MIRGraph::NullCheckElimination() {
-  if (!(cu_->disable_opt & (1 << kNullCheckElimination))) {
-    DCHECK(temp_ssa_register_v_ != NULL);
+void MIRGraph::NullCheckEliminationAndTypeInference() {
+  DCHECK(temp_ssa_register_v_ != NULL);
+  if ((cu_->disable_opt & (1 << kNullCheckElimination)) == 0) {
     AllNodesIterator iter(this);
     for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) {
       NullCheckEliminationInit(bb);
     }
-    RepeatingPreOrderDfsIterator iter2(this);
-    bool change = false;
-    for (BasicBlock* bb = iter2.Next(change); bb != NULL; bb = iter2.Next(change)) {
-      change = EliminateNullChecks(bb);
-    }
+  }
+  RepeatingPreOrderDfsIterator iter2(this);
+  bool change = false;
+  for (BasicBlock* bb = iter2.Next(change); bb != NULL; bb = iter2.Next(change)) {
+    change = EliminateNullChecksAndInferTypes(bb);
   }
   if (cu_->enable_debug & (1 << kDebugDumpCFG)) {
     DumpCFG("/sdcard/4_post_nce_cfg/", false);
@@ -810,12 +829,14 @@
 }
 
 void MIRGraph::BasicBlockCombine() {
-  PreOrderDfsIterator iter(this);
-  for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) {
-    CombineBlocks(bb);
-  }
-  if (cu_->enable_debug & (1 << kDebugDumpCFG)) {
-    DumpCFG("/sdcard/5_post_bbcombine_cfg/", false);
+  if ((cu_->disable_opt & (1 << kSuppressExceptionEdges)) != 0) {
+    PreOrderDfsIterator iter(this);
+    for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) {
+      CombineBlocks(bb);
+    }
+    if (cu_->enable_debug & (1 << kDebugDumpCFG)) {
+      DumpCFG("/sdcard/5_post_bbcombine_cfg/", false);
+    }
   }
 }
 
@@ -868,17 +889,20 @@
   BasicBlock* start_bb = bb;
   extended_basic_blocks_.push_back(bb->id);
   bool terminated_by_return = false;
+  bool do_local_value_numbering = false;
   // Visit blocks strictly dominated by this head.
   while (bb != NULL) {
     bb->visited = true;
     terminated_by_return |= bb->terminated_by_return;
+    do_local_value_numbering |= bb->use_lvn;
     bb = NextDominatedBlock(bb);
   }
-  if (terminated_by_return) {
-    // This extended basic block contains a return, so mark all members.
+  if (terminated_by_return || do_local_value_numbering) {
+    // Do lvn for all blocks in this extended set.
     bb = start_bb;
     while (bb != NULL) {
-      bb->dominates_return = true;
+      bb->use_lvn = do_local_value_numbering;
+      bb->dominates_return = terminated_by_return;
       bb = NextDominatedBlock(bb);
     }
   }
@@ -889,14 +913,21 @@
 void MIRGraph::BasicBlockOptimization() {
   if (!(cu_->disable_opt & (1 << kBBOpt))) {
     DCHECK_EQ(cu_->num_compiler_temps, 0);
-    ClearAllVisitedFlags();
-    PreOrderDfsIterator iter2(this);
-    for (BasicBlock* bb = iter2.Next(); bb != NULL; bb = iter2.Next()) {
-      BuildExtendedBBList(bb);
+    if ((cu_->disable_opt & (1 << kSuppressExceptionEdges)) != 0) {
+      ClearAllVisitedFlags();
+      PreOrderDfsIterator iter2(this);
+      for (BasicBlock* bb = iter2.Next(); bb != NULL; bb = iter2.Next()) {
+        BuildExtendedBBList(bb);
+      }
+      // Perform extended basic block optimizations.
+      for (unsigned int i = 0; i < extended_basic_blocks_.size(); i++) {
+        BasicBlockOpt(GetBasicBlock(extended_basic_blocks_[i]));
+      }
     }
-    // Perform extended basic block optimizations.
-    for (unsigned int i = 0; i < extended_basic_blocks_.size(); i++) {
-      BasicBlockOpt(GetBasicBlock(extended_basic_blocks_[i]));
+  } else {
+    PreOrderDfsIterator iter(this);
+    for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) {
+      BasicBlockOpt(bb);
     }
   }
   if (cu_->enable_debug & (1 << kDebugDumpCFG)) {
diff --git a/compiler/dex/portable/mir_to_gbc.cc b/compiler/dex/portable/mir_to_gbc.cc
index 07bd2aa..e5b4876 100644
--- a/compiler/dex/portable/mir_to_gbc.cc
+++ b/compiler/dex/portable/mir_to_gbc.cc
@@ -705,7 +705,7 @@
   /* Prep Src and Dest locations */
   int next_sreg = 0;
   int next_loc = 0;
-  int attrs = mir_graph_->oat_data_flow_attributes_[opcode];
+  uint64_t attrs = mir_graph_->oat_data_flow_attributes_[opcode];
   rl_src[0] = rl_src[1] = rl_src[2] = mir_graph_->GetBadLoc();
   if (attrs & DF_UA) {
     if (attrs & DF_A_WIDE) {
diff --git a/compiler/dex/quick/arm/arm_dex_file_method_inliner.cc b/compiler/dex/quick/arm/arm_dex_file_method_inliner.cc
index 257b2c4..59f7202 100644
--- a/compiler/dex/quick/arm/arm_dex_file_method_inliner.cc
+++ b/compiler/dex/quick/arm/arm_dex_file_method_inliner.cc
@@ -66,8 +66,8 @@
 
     INTRINSIC(SunMiscUnsafe, CompareAndSwapInt, ObjectJII_Z, kIntrinsicCas,
               kIntrinsicFlagNone),
-    // INTRINSIC(SunMiscUnsafe, CompareAndSwapLong, ObjectJJJ_Z, kIntrinsicCas,
-    //           kIntrinsicFlagIsLong),
+    INTRINSIC(SunMiscUnsafe, CompareAndSwapLong, ObjectJJJ_Z, kIntrinsicCas,
+              kIntrinsicFlagIsLong),
     INTRINSIC(SunMiscUnsafe, CompareAndSwapObject, ObjectJObjectObject_Z, kIntrinsicCas,
               kIntrinsicFlagIsObject),
 
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h
index 8cd7c94..395c788 100644
--- a/compiler/dex/quick/arm/arm_lir.h
+++ b/compiler/dex/quick/arm/arm_lir.h
@@ -426,9 +426,11 @@
   kThumb2Vmovd_IMM8,  // vmov.f64 [111011101] D [11] imm4h[19-16] vd[15-12] [10110000] imm4l[3-0].
   kThumb2Mla,        // mla [111110110000] rn[19-16] ra[15-12] rd[7-4] [0000] rm[3-0].
   kThumb2Umull,      // umull [111110111010] rn[19-16], rdlo[15-12] rdhi[11-8] [0000] rm[3-0].
-  kThumb2Ldrex,      // ldrex [111010000101] rn[19-16] rt[11-8] [1111] imm8[7-0].
-  kThumb2Strex,      // strex [111010000100] rn[19-16] rt[11-8] rd[11-8] imm8[7-0].
-  kThumb2Clrex,      // clrex [111100111011111110000111100101111].
+  kThumb2Ldrex,      // ldrex [111010000101] rn[19-16] rt[15-12] [1111] imm8[7-0].
+  kThumb2Ldrexd,     // ldrexd [111010001101] rn[19-16] rt[15-12] rt2[11-8] [11111111].
+  kThumb2Strex,      // strex [111010000100] rn[19-16] rt[15-12] rd[11-8] imm8[7-0].
+  kThumb2Strexd,     // strexd [111010001100] rn[19-16] rt[15-12] rt2[11-8] [0111] Rd[3-0].
+  kThumb2Clrex,      // clrex [11110011101111111000111100101111].
   kThumb2Bfi,        // bfi [111100110110] rn[19-16] [0] imm3[14-12] rd[11-8] imm2[7-6] [0] msb[4-0].
   kThumb2Bfc,        // bfc [11110011011011110] [0] imm3[14-12] rd[11-8] imm2[7-6] [0] msb[4-0].
   kThumb2Dmb,        // dmb [1111001110111111100011110101] option[3-0].
@@ -447,7 +449,7 @@
   kThumb2MovImm16HST,  // Special purpose version for switch table use.
   kThumb2LdmiaWB,    // ldmia  [111010011001[ rn[19..16] mask[15..0].
   kThumb2SubsRRI12,  // setflags encoding.
-  kThumb2OrrRRRs,    // orrx [111010100101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
+  kThumb2OrrRRRs,    // orrs [111010100101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
   kThumb2Push1,      // t3 encoding of push.
   kThumb2Pop1,       // t3 encoding of pop.
   kThumb2RsubRRR,    // rsb [111010111101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index 1c81a5a..820b3aa 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -877,8 +877,7 @@
                  "vmov.f64", "!0S, #0x!1h", 4, kFixupNone),
     ENCODING_MAP(kThumb2Mla,  0xfb000000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtBitBlt, 15, 12,
-                 IS_QUAD_OP | REG_DEF0 | REG_USE1 | REG_USE2 | REG_USE3,
+                 kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE123,
                  "mla", "!0C, !1C, !2C, !3C", 4, kFixupNone),
     ENCODING_MAP(kThumb2Umull,  0xfba00000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
@@ -889,10 +888,18 @@
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
                  "ldrex", "!0C, [!1C, #!2E]", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Ldrexd,      0xe8d0007f,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF01_USE2 | IS_LOAD,
+                 "ldrexd", "!0C, !1C, [!2C]", 4, kFixupNone),
     ENCODING_MAP(kThumb2Strex,       0xe8400000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16,
                  kFmtBitBlt, 7, 0, IS_QUAD_OP | REG_DEF0_USE12 | IS_STORE,
-                 "strex", "!0C,!1C, [!2C, #!2E]", 4, kFixupNone),
+                 "strex", "!0C, !1C, [!2C, #!2E]", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Strexd,      0xe8c00070,
+                 kFmtBitBlt, 3, 0, kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8,
+                 kFmtBitBlt, 19, 16, IS_QUAD_OP | REG_DEF0_USE123 | IS_STORE,
+                 "strexd", "!0C, !1C, !2C, [!3C]", 4, kFixupNone),
     ENCODING_MAP(kThumb2Clrex,       0xf3bf8f2f,
                  kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, NO_OPERAND,
@@ -1608,7 +1615,6 @@
 
   data_offset_ = (code_buffer_.size() + 0x3) & ~0x3;
 
-  cu_->NewTimingSplit("LiteralData");
   // Install literals
   InstallLiteralPools();
 
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 9727179..e839fe5 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -561,22 +561,67 @@
 }
 
 bool ArmMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
-  DCHECK(!is_long);  // not supported yet
   DCHECK_EQ(cu_->instruction_set, kThumb2);
   // Unused - RegLocation rl_src_unsafe = info->args[0];
   RegLocation rl_src_obj = info->args[1];  // Object - known non-null
   RegLocation rl_src_offset = info->args[2];  // long low
   rl_src_offset.wide = 0;  // ignore high half in info->args[3]
   RegLocation rl_src_expected = info->args[4];  // int, long or Object
-  RegLocation rl_src_new_value = info->args[5];  // int, long or Object
+  // If is_long, high half is in info->args[5]
+  RegLocation rl_src_new_value = info->args[is_long ? 6 : 5];  // int, long or Object
+  // If is_long, high half is in info->args[7]
   RegLocation rl_dest = InlineTarget(info);  // boolean place for result
 
+  // We have only 5 temporary registers available and actually only 4 if the InlineTarget
+  // above locked one of the temps. For a straightforward CAS64 we need 7 registers:
+  // r_ptr (1), new_value (2), expected(2) and ldrexd result (2). If neither expected nor
+  // new_value is in a non-temp core register we shall reload them in the ldrex/strex loop
+  // into the same temps, reducing the number of required temps down to 5. We shall work
+  // around the potentially locked temp by using LR for r_ptr, unconditionally.
+  // TODO: Pass information about the need for more temps to the stack frame generation
+  // code so that we can rely on being able to allocate enough temps.
+  DCHECK(!reg_pool_->core_regs[rARM_LR].is_temp);
+  MarkTemp(rARM_LR);
+  FreeTemp(rARM_LR);
+  LockTemp(rARM_LR);
+  bool load_early = true;
+  if (is_long) {
+    bool expected_is_core_reg =
+        rl_src_expected.location == kLocPhysReg && !IsFpReg(rl_src_expected.low_reg);
+    bool new_value_is_core_reg =
+        rl_src_new_value.location == kLocPhysReg && !IsFpReg(rl_src_new_value.low_reg);
+    bool expected_is_good_reg = expected_is_core_reg && !IsTemp(rl_src_expected.low_reg);
+    bool new_value_is_good_reg = new_value_is_core_reg && !IsTemp(rl_src_new_value.low_reg);
+
+    if (!expected_is_good_reg && !new_value_is_good_reg) {
+      // None of expected/new_value is non-temp reg, need to load both late
+      load_early = false;
+      // Make sure they are not in the temp regs and the load will not be skipped.
+      if (expected_is_core_reg) {
+        FlushRegWide(rl_src_expected.low_reg, rl_src_expected.high_reg);
+        ClobberSReg(rl_src_expected.s_reg_low);
+        ClobberSReg(GetSRegHi(rl_src_expected.s_reg_low));
+        rl_src_expected.location = kLocDalvikFrame;
+      }
+      if (new_value_is_core_reg) {
+        FlushRegWide(rl_src_new_value.low_reg, rl_src_new_value.high_reg);
+        ClobberSReg(rl_src_new_value.s_reg_low);
+        ClobberSReg(GetSRegHi(rl_src_new_value.s_reg_low));
+        rl_src_new_value.location = kLocDalvikFrame;
+      }
+    }
+  }
 
   // Release store semantics, get the barrier out of the way.  TODO: revisit
   GenMemBarrier(kStoreLoad);
 
   RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
-  RegLocation rl_new_value = LoadValue(rl_src_new_value, kCoreReg);
+  RegLocation rl_new_value;
+  if (!is_long) {
+    rl_new_value = LoadValue(rl_src_new_value, kCoreReg);
+  } else if (load_early) {
+    rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
+  }
 
   if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
     // Mark card for object assuming new value is stored.
@@ -585,7 +630,7 @@
 
   RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
 
-  int r_ptr = AllocTemp();
+  int r_ptr = rARM_LR;
   OpRegRegReg(kOpAdd, r_ptr, rl_object.low_reg, rl_offset.low_reg);
 
   // Free now unneeded rl_object and rl_offset to give more temps.
@@ -594,29 +639,77 @@
   ClobberSReg(rl_offset.s_reg_low);
   FreeTemp(rl_offset.low_reg);
 
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  LoadConstant(rl_result.low_reg, 0);  // r_result := 0
+  RegLocation rl_expected;
+  if (!is_long) {
+    rl_expected = LoadValue(rl_src_expected, kCoreReg);
+  } else if (load_early) {
+    rl_expected = LoadValueWide(rl_src_expected, kCoreReg);
+  } else {
+    rl_new_value.low_reg = rl_expected.low_reg = AllocTemp();
+    rl_new_value.high_reg = rl_expected.high_reg = AllocTemp();
+  }
 
-  // while ([r_ptr] == rExpected && r_result == 0) {
-  //   [r_ptr] <- r_new_value && r_result := success ? 0 : 1
-  //   r_result ^= 1
-  // }
-  int r_old_value = AllocTemp();
+  // do {
+  //   tmp = [r_ptr] - expected;
+  // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
+  // result = tmp != 0;
+
+  int r_tmp = AllocTemp();
   LIR* target = NewLIR0(kPseudoTargetLabel);
-  NewLIR3(kThumb2Ldrex, r_old_value, r_ptr, 0);
 
-  RegLocation rl_expected = LoadValue(rl_src_expected, kCoreReg);
-  OpRegReg(kOpCmp, r_old_value, rl_expected.low_reg);
-  FreeTemp(r_old_value);  // Now unneeded.
-  OpIT(kCondEq, "TT");
-  NewLIR4(kThumb2Strex /* eq */, rl_result.low_reg, rl_new_value.low_reg, r_ptr, 0);
-  FreeTemp(r_ptr);  // Now unneeded.
-  OpRegImm(kOpXor /* eq */, rl_result.low_reg, 1);
-  OpRegImm(kOpCmp /* eq */, rl_result.low_reg, 0);
+  if (is_long) {
+    int r_tmp_high = AllocTemp();
+    if (!load_early) {
+      LoadValueDirectWide(rl_src_expected, rl_expected.low_reg, rl_expected.high_reg);
+    }
+    NewLIR3(kThumb2Ldrexd, r_tmp, r_tmp_high, r_ptr);
+    OpRegReg(kOpSub, r_tmp, rl_expected.low_reg);
+    OpRegReg(kOpSub, r_tmp_high, rl_expected.high_reg);
+    if (!load_early) {
+      LoadValueDirectWide(rl_src_new_value, rl_new_value.low_reg, rl_new_value.high_reg);
+    }
+    // Make sure we use ORR that sets the ccode
+    if (ARM_LOWREG(r_tmp) && ARM_LOWREG(r_tmp_high)) {
+      NewLIR2(kThumbOrr, r_tmp, r_tmp_high);
+    } else {
+      NewLIR4(kThumb2OrrRRRs, r_tmp, r_tmp, r_tmp_high, 0);
+    }
+    FreeTemp(r_tmp_high);  // Now unneeded
+
+    DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
+    OpIT(kCondEq, "T");
+    NewLIR4(kThumb2Strexd /* eq */, r_tmp, rl_new_value.low_reg, rl_new_value.high_reg, r_ptr);
+
+  } else {
+    NewLIR3(kThumb2Ldrex, r_tmp, r_ptr, 0);
+    OpRegReg(kOpSub, r_tmp, rl_expected.low_reg);
+    DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
+    OpIT(kCondEq, "T");
+    NewLIR4(kThumb2Strex /* eq */, r_tmp, rl_new_value.low_reg, r_ptr, 0);
+  }
+
+  // Still one conditional left from OpIT(kCondEq, "T") from either branch
+  OpRegImm(kOpCmp /* eq */, r_tmp, 1);
   OpCondBranch(kCondEq, target);
 
+  if (!load_early) {
+    FreeTemp(rl_expected.low_reg);  // Now unneeded.
+    FreeTemp(rl_expected.high_reg);  // Now unneeded.
+  }
+
+  // result := (tmp1 != 0) ? 0 : 1;
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  OpRegRegImm(kOpRsub, rl_result.low_reg, r_tmp, 1);
+  DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
+  OpIT(kCondCc, "");
+  LoadConstant(rl_result.low_reg, 0); /* cc */
+  FreeTemp(r_tmp);  // Now unneeded.
+
   StoreValue(rl_dest, rl_result);
 
+  // Now, restore lr to its non-temp status.
+  Clobber(rARM_LR);
+  UnmarkTemp(rARM_LR);
   return true;
 }
 
diff --git a/compiler/dex/quick/local_optimizations.cc b/compiler/dex/quick/local_optimizations.cc
index 0f29578..7a2dce1 100644
--- a/compiler/dex/quick/local_optimizations.cc
+++ b/compiler/dex/quick/local_optimizations.cc
@@ -291,9 +291,9 @@
 
     uint64_t target_flags = GetTargetInstFlags(this_lir->opcode);
     /* Skip non-interesting instructions */
-    if ((this_lir->flags.is_nop == true) ||
-        ((target_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1)) ||
-        !(target_flags & IS_LOAD)) {
+    if (!(target_flags & IS_LOAD) ||
+        (this_lir->flags.is_nop == true) ||
+        ((target_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1))) {
       continue;
     }
 
diff --git a/compiler/dex/quick/mips/assemble_mips.cc b/compiler/dex/quick/mips/assemble_mips.cc
index 5f5e5e4..bd3355f 100644
--- a/compiler/dex/quick/mips/assemble_mips.cc
+++ b/compiler/dex/quick/mips/assemble_mips.cc
@@ -793,7 +793,6 @@
   }
 
   // Install literals
-  cu_->NewTimingSplit("LiteralData");
   InstallLiteralPools();
 
   // Install switch tables
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index fa9a3ad..19d04be 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -39,7 +39,7 @@
   // Prep Src and Dest locations.
   int next_sreg = 0;
   int next_loc = 0;
-  int attrs = mir_graph_->oat_data_flow_attributes_[opcode];
+  uint64_t attrs = mir_graph_->oat_data_flow_attributes_[opcode];
   rl_src[0] = rl_src[1] = rl_src[2] = mir_graph_->GetBadLoc();
   if (attrs & DF_UA) {
     if (attrs & DF_A_WIDE) {
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index ad9b0de..f8a2d03 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -87,6 +87,7 @@
 #define REG_DEF0_USE01       (REG_DEF0 | REG_USE01)
 #define REG_DEF0_USE0        (REG_DEF0 | REG_USE0)
 #define REG_DEF0_USE12       (REG_DEF0 | REG_USE12)
+#define REG_DEF0_USE123      (REG_DEF0 | REG_USE123)
 #define REG_DEF0_USE1        (REG_DEF0 | REG_USE1)
 #define REG_DEF0_USE2        (REG_DEF0 | REG_USE2)
 #define REG_DEFAD_USEAD      (REG_DEFAD_USEA | REG_USED)
@@ -98,6 +99,7 @@
 #define REG_USE02            (REG_USE0 | REG_USE2)
 #define REG_USE12            (REG_USE1 | REG_USE2)
 #define REG_USE23            (REG_USE2 | REG_USE3)
+#define REG_USE123           (REG_USE1 | REG_USE2 | REG_USE3)
 
 struct BasicBlock;
 struct CallInfo;
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 2047f30..191c9c7 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -1503,7 +1503,6 @@
     }
   }
 
-  cu_->NewTimingSplit("LiteralData");
   // Install literals
   InstallLiteralPools();
 
diff --git a/compiler/dex/vreg_analysis.cc b/compiler/dex/vreg_analysis.cc
index 32fac0b..bef966c 100644
--- a/compiler/dex/vreg_analysis.cc
+++ b/compiler/dex/vreg_analysis.cc
@@ -121,260 +121,251 @@
  * as it doesn't propagate.  We're guaranteed at least one pass through
  * the cfg.
  */
-bool MIRGraph::InferTypeAndSize(BasicBlock* bb) {
-  MIR *mir;
-  bool changed = false;   // Did anything change?
+bool MIRGraph::InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed) {
+  SSARepresentation *ssa_rep = mir->ssa_rep;
+  if (ssa_rep) {
+    uint64_t attrs = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
+    const int* uses = ssa_rep->uses;
+    const int* defs = ssa_rep->defs;
 
-  if (bb->data_flow_info == NULL) return false;
-  if (bb->block_type != kDalvikByteCode && bb->block_type != kEntryBlock)
-    return false;
+    // Handle defs
+    if (attrs & DF_DA) {
+      if (attrs & DF_CORE_A) {
+        changed |= SetCore(defs[0]);
+      }
+      if (attrs & DF_REF_A) {
+        changed |= SetRef(defs[0]);
+      }
+      if (attrs & DF_A_WIDE) {
+        reg_location_[defs[0]].wide = true;
+        reg_location_[defs[1]].wide = true;
+        reg_location_[defs[1]].high_word = true;
+        DCHECK_EQ(SRegToVReg(defs[0])+1,
+        SRegToVReg(defs[1]));
+      }
+    }
 
-  for (mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
-    SSARepresentation *ssa_rep = mir->ssa_rep;
-    if (ssa_rep) {
-      int attrs = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
-      const int* uses = ssa_rep->uses;
-      const int* defs = ssa_rep->defs;
+    // Handles uses
+    int next = 0;
+    if (attrs & DF_UA) {
+      if (attrs & DF_CORE_A) {
+        changed |= SetCore(uses[next]);
+      }
+      if (attrs & DF_REF_A) {
+        changed |= SetRef(uses[next]);
+      }
+      if (attrs & DF_A_WIDE) {
+        reg_location_[uses[next]].wide = true;
+        reg_location_[uses[next + 1]].wide = true;
+        reg_location_[uses[next + 1]].high_word = true;
+        DCHECK_EQ(SRegToVReg(uses[next])+1,
+        SRegToVReg(uses[next + 1]));
+        next += 2;
+      } else {
+        next++;
+      }
+    }
+    if (attrs & DF_UB) {
+      if (attrs & DF_CORE_B) {
+        changed |= SetCore(uses[next]);
+      }
+      if (attrs & DF_REF_B) {
+        changed |= SetRef(uses[next]);
+      }
+      if (attrs & DF_B_WIDE) {
+        reg_location_[uses[next]].wide = true;
+        reg_location_[uses[next + 1]].wide = true;
+        reg_location_[uses[next + 1]].high_word = true;
+        DCHECK_EQ(SRegToVReg(uses[next])+1,
+                             SRegToVReg(uses[next + 1]));
+        next += 2;
+      } else {
+        next++;
+      }
+    }
+    if (attrs & DF_UC) {
+      if (attrs & DF_CORE_C) {
+        changed |= SetCore(uses[next]);
+      }
+      if (attrs & DF_REF_C) {
+        changed |= SetRef(uses[next]);
+      }
+      if (attrs & DF_C_WIDE) {
+        reg_location_[uses[next]].wide = true;
+        reg_location_[uses[next + 1]].wide = true;
+        reg_location_[uses[next + 1]].high_word = true;
+        DCHECK_EQ(SRegToVReg(uses[next])+1,
+        SRegToVReg(uses[next + 1]));
+      }
+    }
 
-      // Handle defs
-      if (attrs & DF_DA) {
-        if (attrs & DF_CORE_A) {
-          changed |= SetCore(defs[0]);
-        }
-        if (attrs & DF_REF_A) {
-          changed |= SetRef(defs[0]);
-        }
-        if (attrs & DF_A_WIDE) {
-          reg_location_[defs[0]].wide = true;
-          reg_location_[defs[1]].wide = true;
-          reg_location_[defs[1]].high_word = true;
-          DCHECK_EQ(SRegToVReg(defs[0])+1,
-          SRegToVReg(defs[1]));
+    // Special-case return handling
+    if ((mir->dalvikInsn.opcode == Instruction::RETURN) ||
+        (mir->dalvikInsn.opcode == Instruction::RETURN_WIDE) ||
+        (mir->dalvikInsn.opcode == Instruction::RETURN_OBJECT)) {
+      switch (cu_->shorty[0]) {
+          case 'I':
+            changed |= SetCore(uses[0]);
+            break;
+          case 'J':
+            changed |= SetCore(uses[0]);
+            changed |= SetCore(uses[1]);
+            reg_location_[uses[0]].wide = true;
+            reg_location_[uses[1]].wide = true;
+            reg_location_[uses[1]].high_word = true;
+            break;
+          case 'F':
+            changed |= SetFp(uses[0]);
+            break;
+          case 'D':
+            changed |= SetFp(uses[0]);
+            changed |= SetFp(uses[1]);
+            reg_location_[uses[0]].wide = true;
+            reg_location_[uses[1]].wide = true;
+            reg_location_[uses[1]].high_word = true;
+            break;
+          case 'L':
+            changed |= SetRef(uses[0]);
+            break;
+          default: break;
+      }
+    }
+
+    // Special-case handling for format 35c/3rc invokes
+    Instruction::Code opcode = mir->dalvikInsn.opcode;
+    int flags = (static_cast<int>(opcode) >= kNumPackedOpcodes)
+        ? 0 : Instruction::FlagsOf(mir->dalvikInsn.opcode);
+    if ((flags & Instruction::kInvoke) &&
+        (attrs & (DF_FORMAT_35C | DF_FORMAT_3RC))) {
+      DCHECK_EQ(next, 0);
+      int target_idx = mir->dalvikInsn.vB;
+      const char* shorty = GetShortyFromTargetIdx(target_idx);
+      // Handle result type if floating point
+      if ((shorty[0] == 'F') || (shorty[0] == 'D')) {
+        MIR* move_result_mir = FindMoveResult(bb, mir);
+        // Result might not be used at all, so no move-result
+        if (move_result_mir && (move_result_mir->dalvikInsn.opcode !=
+            Instruction::MOVE_RESULT_OBJECT)) {
+          SSARepresentation* tgt_rep = move_result_mir->ssa_rep;
+          DCHECK(tgt_rep != NULL);
+          tgt_rep->fp_def[0] = true;
+          changed |= SetFp(tgt_rep->defs[0]);
+          if (shorty[0] == 'D') {
+            tgt_rep->fp_def[1] = true;
+            changed |= SetFp(tgt_rep->defs[1]);
+          }
         }
       }
-
-      // Handles uses
-      int next = 0;
-      if (attrs & DF_UA) {
-        if (attrs & DF_CORE_A) {
-          changed |= SetCore(uses[next]);
-        }
-        if (attrs & DF_REF_A) {
-          changed |= SetRef(uses[next]);
-        }
-        if (attrs & DF_A_WIDE) {
-          reg_location_[uses[next]].wide = true;
-          reg_location_[uses[next + 1]].wide = true;
-          reg_location_[uses[next + 1]].high_word = true;
-          DCHECK_EQ(SRegToVReg(uses[next])+1,
-          SRegToVReg(uses[next + 1]));
-          next += 2;
-        } else {
-          next++;
-        }
+      int num_uses = mir->dalvikInsn.vA;
+      // If this is a non-static invoke, mark implicit "this"
+      if (((mir->dalvikInsn.opcode != Instruction::INVOKE_STATIC) &&
+          (mir->dalvikInsn.opcode != Instruction::INVOKE_STATIC_RANGE))) {
+        reg_location_[uses[next]].defined = true;
+        reg_location_[uses[next]].ref = true;
+        next++;
       }
-      if (attrs & DF_UB) {
-        if (attrs & DF_CORE_B) {
-          changed |= SetCore(uses[next]);
-        }
-        if (attrs & DF_REF_B) {
-          changed |= SetRef(uses[next]);
-        }
-        if (attrs & DF_B_WIDE) {
-          reg_location_[uses[next]].wide = true;
-          reg_location_[uses[next + 1]].wide = true;
-          reg_location_[uses[next + 1]].high_word = true;
-          DCHECK_EQ(SRegToVReg(uses[next])+1,
-                               SRegToVReg(uses[next + 1]));
-          next += 2;
-        } else {
-          next++;
-        }
-      }
-      if (attrs & DF_UC) {
-        if (attrs & DF_CORE_C) {
-          changed |= SetCore(uses[next]);
-        }
-        if (attrs & DF_REF_C) {
-          changed |= SetRef(uses[next]);
-        }
-        if (attrs & DF_C_WIDE) {
-          reg_location_[uses[next]].wide = true;
-          reg_location_[uses[next + 1]].wide = true;
-          reg_location_[uses[next + 1]].high_word = true;
-          DCHECK_EQ(SRegToVReg(uses[next])+1,
-          SRegToVReg(uses[next + 1]));
-        }
-      }
-
-      // Special-case return handling
-      if ((mir->dalvikInsn.opcode == Instruction::RETURN) ||
-          (mir->dalvikInsn.opcode == Instruction::RETURN_WIDE) ||
-          (mir->dalvikInsn.opcode == Instruction::RETURN_OBJECT)) {
-        switch (cu_->shorty[0]) {
-            case 'I':
-              changed |= SetCore(uses[0]);
+      uint32_t cpos = 1;
+      if (strlen(shorty) > 1) {
+        for (int i = next; i < num_uses;) {
+          DCHECK_LT(cpos, strlen(shorty));
+          switch (shorty[cpos++]) {
+            case 'D':
+              ssa_rep->fp_use[i] = true;
+              ssa_rep->fp_use[i+1] = true;
+              reg_location_[uses[i]].wide = true;
+              reg_location_[uses[i+1]].wide = true;
+              reg_location_[uses[i+1]].high_word = true;
+              DCHECK_EQ(SRegToVReg(uses[i])+1, SRegToVReg(uses[i+1]));
+              i++;
               break;
             case 'J':
-              changed |= SetCore(uses[0]);
-              changed |= SetCore(uses[1]);
-              reg_location_[uses[0]].wide = true;
-              reg_location_[uses[1]].wide = true;
-              reg_location_[uses[1]].high_word = true;
+              reg_location_[uses[i]].wide = true;
+              reg_location_[uses[i+1]].wide = true;
+              reg_location_[uses[i+1]].high_word = true;
+              DCHECK_EQ(SRegToVReg(uses[i])+1, SRegToVReg(uses[i+1]));
+              changed |= SetCore(uses[i]);
+              i++;
               break;
             case 'F':
-              changed |= SetFp(uses[0]);
-              break;
-            case 'D':
-              changed |= SetFp(uses[0]);
-              changed |= SetFp(uses[1]);
-              reg_location_[uses[0]].wide = true;
-              reg_location_[uses[1]].wide = true;
-              reg_location_[uses[1]].high_word = true;
+              ssa_rep->fp_use[i] = true;
               break;
             case 'L':
-              changed |= SetRef(uses[0]);
+              changed |= SetRef(uses[i]);
               break;
-            default: break;
+            default:
+              changed |= SetCore(uses[i]);
+              break;
+          }
+          i++;
         }
       }
+    }
 
-      // Special-case handling for format 35c/3rc invokes
-      Instruction::Code opcode = mir->dalvikInsn.opcode;
-      int flags = (static_cast<int>(opcode) >= kNumPackedOpcodes)
-          ? 0 : Instruction::FlagsOf(mir->dalvikInsn.opcode);
-      if ((flags & Instruction::kInvoke) &&
-          (attrs & (DF_FORMAT_35C | DF_FORMAT_3RC))) {
-        DCHECK_EQ(next, 0);
-        int target_idx = mir->dalvikInsn.vB;
-        const char* shorty = GetShortyFromTargetIdx(target_idx);
-        // Handle result type if floating point
-        if ((shorty[0] == 'F') || (shorty[0] == 'D')) {
-          MIR* move_result_mir = FindMoveResult(bb, mir);
-          // Result might not be used at all, so no move-result
-          if (move_result_mir && (move_result_mir->dalvikInsn.opcode !=
-              Instruction::MOVE_RESULT_OBJECT)) {
-            SSARepresentation* tgt_rep = move_result_mir->ssa_rep;
-            DCHECK(tgt_rep != NULL);
-            tgt_rep->fp_def[0] = true;
-            changed |= SetFp(tgt_rep->defs[0]);
-            if (shorty[0] == 'D') {
-              tgt_rep->fp_def[1] = true;
-              changed |= SetFp(tgt_rep->defs[1]);
-            }
-          }
-        }
-        int num_uses = mir->dalvikInsn.vA;
-        // If this is a non-static invoke, mark implicit "this"
-        if (((mir->dalvikInsn.opcode != Instruction::INVOKE_STATIC) &&
-            (mir->dalvikInsn.opcode != Instruction::INVOKE_STATIC_RANGE))) {
-          reg_location_[uses[next]].defined = true;
-          reg_location_[uses[next]].ref = true;
-          next++;
-        }
-        uint32_t cpos = 1;
-        if (strlen(shorty) > 1) {
-          for (int i = next; i < num_uses;) {
-            DCHECK_LT(cpos, strlen(shorty));
-            switch (shorty[cpos++]) {
-              case 'D':
-                ssa_rep->fp_use[i] = true;
-                ssa_rep->fp_use[i+1] = true;
-                reg_location_[uses[i]].wide = true;
-                reg_location_[uses[i+1]].wide = true;
-                reg_location_[uses[i+1]].high_word = true;
-                DCHECK_EQ(SRegToVReg(uses[i])+1, SRegToVReg(uses[i+1]));
-                i++;
-                break;
-              case 'J':
-                reg_location_[uses[i]].wide = true;
-                reg_location_[uses[i+1]].wide = true;
-                reg_location_[uses[i+1]].high_word = true;
-                DCHECK_EQ(SRegToVReg(uses[i])+1, SRegToVReg(uses[i+1]));
-                changed |= SetCore(uses[i]);
-                i++;
-                break;
-              case 'F':
-                ssa_rep->fp_use[i] = true;
-                break;
-              case 'L':
-                changed |= SetRef(uses[i]);
-                break;
-              default:
-                changed |= SetCore(uses[i]);
-                break;
-            }
-            i++;
-          }
-        }
+    for (int i = 0; ssa_rep->fp_use && i< ssa_rep->num_uses; i++) {
+      if (ssa_rep->fp_use[i])
+        changed |= SetFp(uses[i]);
       }
-
-      for (int i = 0; ssa_rep->fp_use && i< ssa_rep->num_uses; i++) {
-        if (ssa_rep->fp_use[i])
-          changed |= SetFp(uses[i]);
-        }
-      for (int i = 0; ssa_rep->fp_def && i< ssa_rep->num_defs; i++) {
-        if (ssa_rep->fp_def[i])
-          changed |= SetFp(defs[i]);
-        }
-      // Special-case handling for moves & Phi
-      if (attrs & (DF_IS_MOVE | DF_NULL_TRANSFER_N)) {
-        /*
-         * If any of our inputs or outputs is defined, set all.
-         * Some ugliness related to Phi nodes and wide values.
-         * The Phi set will include all low words or all high
-         * words, so we have to treat them specially.
-         */
-        bool is_phi = (static_cast<int>(mir->dalvikInsn.opcode) ==
-                      kMirOpPhi);
-        RegLocation rl_temp = reg_location_[defs[0]];
-        bool defined_fp = rl_temp.defined && rl_temp.fp;
-        bool defined_core = rl_temp.defined && rl_temp.core;
-        bool defined_ref = rl_temp.defined && rl_temp.ref;
-        bool is_wide = rl_temp.wide || ((attrs & DF_A_WIDE) != 0);
-        bool is_high = is_phi && rl_temp.wide && rl_temp.high_word;
-        for (int i = 0; i < ssa_rep->num_uses; i++) {
-          rl_temp = reg_location_[uses[i]];
-          defined_fp |= rl_temp.defined && rl_temp.fp;
-          defined_core |= rl_temp.defined && rl_temp.core;
-          defined_ref |= rl_temp.defined && rl_temp.ref;
-          is_wide |= rl_temp.wide;
-          is_high |= is_phi && rl_temp.wide && rl_temp.high_word;
-        }
-        /*
-         * We don't normally expect to see a Dalvik register definition used both as a
-         * floating point and core value, though technically it could happen with constants.
-         * Until we have proper typing, detect this situation and disable register promotion
-         * (which relies on the distinction between core a fp usages).
-         */
-        if ((defined_fp && (defined_core | defined_ref)) &&
-            ((cu_->disable_opt & (1 << kPromoteRegs)) == 0)) {
-          LOG(WARNING) << PrettyMethod(cu_->method_idx, *cu_->dex_file)
-                       << " op at block " << bb->id
-                       << " has both fp and core/ref uses for same def.";
-          cu_->disable_opt |= (1 << kPromoteRegs);
-        }
-        changed |= SetFp(defs[0], defined_fp);
-        changed |= SetCore(defs[0], defined_core);
-        changed |= SetRef(defs[0], defined_ref);
-        changed |= SetWide(defs[0], is_wide);
-        changed |= SetHigh(defs[0], is_high);
-        if (attrs & DF_A_WIDE) {
-          changed |= SetWide(defs[1]);
-          changed |= SetHigh(defs[1]);
-        }
-        for (int i = 0; i < ssa_rep->num_uses; i++) {
-          changed |= SetFp(uses[i], defined_fp);
-          changed |= SetCore(uses[i], defined_core);
-          changed |= SetRef(uses[i], defined_ref);
-          changed |= SetWide(uses[i], is_wide);
-          changed |= SetHigh(uses[i], is_high);
-        }
-        if (attrs & DF_A_WIDE) {
-          DCHECK_EQ(ssa_rep->num_uses, 2);
-          changed |= SetWide(uses[1]);
-          changed |= SetHigh(uses[1]);
-        }
+    for (int i = 0; ssa_rep->fp_def && i< ssa_rep->num_defs; i++) {
+      if (ssa_rep->fp_def[i])
+        changed |= SetFp(defs[i]);
+      }
+    // Special-case handling for moves & Phi
+    if (attrs & (DF_IS_MOVE | DF_NULL_TRANSFER_N)) {
+      /*
+       * If any of our inputs or outputs is defined, set all.
+       * Some ugliness related to Phi nodes and wide values.
+       * The Phi set will include all low words or all high
+       * words, so we have to treat them specially.
+       */
+      bool is_phi = (static_cast<int>(mir->dalvikInsn.opcode) ==
+                    kMirOpPhi);
+      RegLocation rl_temp = reg_location_[defs[0]];
+      bool defined_fp = rl_temp.defined && rl_temp.fp;
+      bool defined_core = rl_temp.defined && rl_temp.core;
+      bool defined_ref = rl_temp.defined && rl_temp.ref;
+      bool is_wide = rl_temp.wide || ((attrs & DF_A_WIDE) != 0);
+      bool is_high = is_phi && rl_temp.wide && rl_temp.high_word;
+      for (int i = 0; i < ssa_rep->num_uses; i++) {
+        rl_temp = reg_location_[uses[i]];
+        defined_fp |= rl_temp.defined && rl_temp.fp;
+        defined_core |= rl_temp.defined && rl_temp.core;
+        defined_ref |= rl_temp.defined && rl_temp.ref;
+        is_wide |= rl_temp.wide;
+        is_high |= is_phi && rl_temp.wide && rl_temp.high_word;
+      }
+      /*
+       * We don't normally expect to see a Dalvik register definition used both as a
+       * floating point and core value, though technically it could happen with constants.
+       * Until we have proper typing, detect this situation and disable register promotion
+       * (which relies on the distinction between core a fp usages).
+       */
+      if ((defined_fp && (defined_core | defined_ref)) &&
+          ((cu_->disable_opt & (1 << kPromoteRegs)) == 0)) {
+        LOG(WARNING) << PrettyMethod(cu_->method_idx, *cu_->dex_file)
+                     << " op at block " << bb->id
+                     << " has both fp and core/ref uses for same def.";
+        cu_->disable_opt |= (1 << kPromoteRegs);
+      }
+      changed |= SetFp(defs[0], defined_fp);
+      changed |= SetCore(defs[0], defined_core);
+      changed |= SetRef(defs[0], defined_ref);
+      changed |= SetWide(defs[0], is_wide);
+      changed |= SetHigh(defs[0], is_high);
+      if (attrs & DF_A_WIDE) {
+        changed |= SetWide(defs[1]);
+        changed |= SetHigh(defs[1]);
+      }
+      for (int i = 0; i < ssa_rep->num_uses; i++) {
+        changed |= SetFp(uses[i], defined_fp);
+        changed |= SetCore(uses[i], defined_core);
+        changed |= SetRef(uses[i], defined_ref);
+        changed |= SetWide(uses[i], is_wide);
+        changed |= SetHigh(uses[i], is_high);
+      }
+      if (attrs & DF_A_WIDE) {
+        DCHECK_EQ(ssa_rep->num_uses, 2);
+        changed |= SetWide(uses[1]);
+        changed |= SetHigh(uses[1]);
       }
     }
   }
@@ -417,13 +408,7 @@
                                      INVALID_REG, INVALID_REG, INVALID_SREG,
                                      INVALID_SREG};
 
-/*
- * Simple register allocation.  Some Dalvik virtual registers may
- * be promoted to physical registers.  Most of the work for temp
- * allocation is done on the fly.  We also do some initialization and
- * type inference here.
- */
-void MIRGraph::BuildRegLocations() {
+void MIRGraph::InitRegLocations() {
   /* Allocate the location map */
   RegLocation* loc = static_cast<RegLocation*>(arena_->Alloc(GetNumSSARegs() * sizeof(*loc),
                                                              ArenaAllocator::kAllocRegAlloc));
@@ -493,19 +478,14 @@
         s_reg++;
       }
   }
+}
 
-  /* Do type & size inference pass */
-  RepeatingPreOrderDfsIterator iter(this);
-  bool change = false;
-  for (BasicBlock* bb = iter.Next(false); bb != NULL; bb = iter.Next(change)) {
-    change = InferTypeAndSize(bb);
-  }
-
-  /*
-   * Set the s_reg_low field to refer to the pre-SSA name of the
-   * base Dalvik virtual register.  Once we add a better register
-   * allocator, remove this remapping.
-   */
+/*
+ * Set the s_reg_low field to refer to the pre-SSA name of the
+ * base Dalvik virtual register.  Once we add a better register
+ * allocator, remove this remapping.
+ */
+void MIRGraph::RemapRegLocations() {
   for (int i = 0; i < GetNumSSARegs(); i++) {
     if (reg_location_[i].location != kLocCompilerTemp) {
       int orig_sreg = reg_location_[i].s_reg_low;
diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc
index 936fb07..90d84d5 100644
--- a/disassembler/disassembler_arm.cc
+++ b/disassembler/disassembler_arm.cc
@@ -440,18 +440,34 @@
               if (op3 == 0) {   // op3 is 00, op4 is 00
                 opcode << "strex";
                 args << Rd << ", " << Rt << ", [" << Rn << ", #" << (imm8 << 2) << "]";
+                if (Rd.r == 13 || Rd.r == 15 || Rt.r == 13 || Rt.r == 15 || Rn.r == 15 ||
+                    Rd.r == Rn.r || Rd.r == Rt.r) {
+                  args << " (UNPREDICTABLE)";
+                }
               } else {          // op3 is 01, op4 is 00
                 // this is one of strexb, strexh or strexd
                 int op5 = (instr >> 4) & 0xf;
                 switch (op5) {
                   case 4:
-                    opcode << "strexb";
-                    break;
                   case 5:
-                    opcode << "strexh";
+                    opcode << ((op5 == 4) ? "strexb" : "strexh");
+                    Rd = ArmRegister(instr, 0);
+                    args << Rd << ", " << Rt << ", [" << Rn << "]";
+                    if (Rd.r == 13 || Rd.r == 15 || Rt.r == 13 || Rt.r == 15 || Rn.r == 15 ||
+                        Rd.r == Rn.r || Rd.r == Rt.r || (instr & 0xf00) != 0xf00) {
+                      args << " (UNPREDICTABLE)";
+                    }
                     break;
                   case 7:
                     opcode << "strexd";
+                    ArmRegister Rt2 = Rd;
+                    Rd = ArmRegister(instr, 0);
+                    args << Rd << ", " << Rt << ", " << Rt2 << ", [" << Rn << "]";
+                    if (Rd.r == 13 || Rd.r == 15 || Rt.r == 13 || Rt.r == 15 ||
+                        Rt2.r == 13 || Rt2.r == 15 || Rn.r == 15 ||
+                        Rd.r == Rn.r || Rd.r == Rt.r || Rd.r == Rt2.r) {
+                      args << " (UNPREDICTABLE)";
+                    }
                     break;
                 }
               }
@@ -460,6 +476,9 @@
               if (op3 == 0) {   // op3 is 00, op4 is 01
                 opcode << "ldrex";
                 args << Rt << ", [" << Rn << ", #" << (imm8 << 2) << "]";
+                if (Rt.r == 13 || Rt.r == 15 || Rn.r == 15 || (instr & 0xf00) != 0xf00) {
+                  args << " (UNPREDICTABLE)";
+                }
               } else {          // op3 is 01, op4 is 01
                 // this is one of strexb, strexh or strexd
                 int op5 = (instr >> 4) & 0xf;
@@ -471,13 +490,20 @@
                     opcode << "tbh";
                     break;
                   case 4:
-                    opcode << "ldrexb";
-                    break;
                   case 5:
-                    opcode << "ldrexh";
+                    opcode << ((op5 == 4) ? "ldrexb" : "ldrexh");
+                    args << Rt << ", [" << Rn << "]";
+                    if (Rt.r == 13 || Rt.r == 15 || Rn.r == 15 || (instr & 0xf0f) != 0xf0f) {
+                      args << " (UNPREDICTABLE)";
+                    }
                     break;
                   case 7:
                     opcode << "ldrexd";
+                    args << Rt << ", " << Rd /* Rt2 */ << ", [" << Rn << "]";
+                    if (Rt.r == 13 || Rt.r == 15 || Rd.r == 13 /* Rt2 */ || Rd.r == 15 /* Rt2 */ ||
+                        Rn.r == 15 || (instr & 0x00f) != 0x00f) {
+                      args << " (UNPREDICTABLE)";
+                    }
                     break;
                 }
               }
@@ -507,15 +533,6 @@
           }
         }
 
-
-        if (op3 == 0 && op4 == 0) {  // STREX
-          ArmRegister Rd(instr, 8);
-          opcode << "strex";
-          args << Rd << ", " << Rt << ", [" << Rn << ", #" << (imm8 << 2) << "]";
-        } else if (op3 == 0 && op4 == 1) {  // LDREX
-          opcode << "ldrex";
-          args << Rt << ", [" << Rn << ", #" << (imm8 << 2) << "]";
-        }
       } else if ((op2 & 0x60) == 0x20) {  // 01x xxxx
         // Data-processing (shifted register)
         // |111|1110|0000|0|0000|1111|1100|00|00|0000|
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index feb8a6c..b894c0a 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -32,7 +32,7 @@
 #if defined(__APPLE__)
 #define ART_USE_FUTEXES 0
 #else
-#define ART_USE_FUTEXES !defined(__mips__)
+#define ART_USE_FUTEXES 1
 #endif
 
 // Currently Darwin doesn't support locks with timeouts.
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 6c4d130..52a2141 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -2577,7 +2577,7 @@
   MutexLock mu(soa.Self(), *Locks::thread_list_lock_);
   Thread* thread;
   JDWP::JdwpError error = DecodeThread(soa, thread_id, thread);
-  if (error != JDWP::ERR_NONE) {
+  if (error == JDWP::ERR_NONE) {
     SingleStepControl* single_step_control = thread->GetSingleStepControl();
     DCHECK(single_step_control != nullptr);
     single_step_control->is_active = false;