Merge "Revert "ART: Split out more cases of Load/StoreRef, volatile as parameter""
diff --git a/build/Android.common.mk b/build/Android.common.mk
index 09f34b3..f916e1e 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -134,7 +134,7 @@
 # Clang on the target: only enabled for ARM64. Target builds use GCC by default.
 ART_TARGET_CLANG :=
 ART_TARGET_CLANG_arm :=
-ART_TARGET_CLANG_arm64 := true
+ART_TARGET_CLANG_arm64 :=
 ART_TARGET_CLANG_mips :=
 ART_TARGET_CLANG_x86 :=
 ART_TARGET_CLANG_x86_64 :=
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index fbb7eb3..c67a815 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -42,6 +42,11 @@
 
 $(HOST_CORE_OAT_OUT): $(HOST_CORE_IMG_OUT)
 
+IMPLICIT_CHECKS_arm := null,stack
+IMPLICIT_CHECKS_arm64 := none
+IMPLICIT_CHECKS_x86 := none
+IMPLICIT_CHECKS_x86_64 := none
+IMPLICIT_CHECKS_mips := none
 define create-oat-target-targets
 $$($(1)TARGET_CORE_IMG_OUT): $$($(1)TARGET_CORE_DEX_FILES) $$(DEX2OATD_DEPENDENCY)
 	@echo "target dex2oat: $$@ ($$?)"
@@ -49,6 +54,7 @@
 	$$(hide) $$(DEX2OATD) --runtime-arg -Xms16m --runtime-arg -Xmx16m --image-classes=$$(PRELOADED_CLASSES) $$(addprefix \
 		--dex-file=,$$(TARGET_CORE_DEX_FILES)) $$(addprefix --dex-location=,$$(TARGET_CORE_DEX_LOCATIONS)) --oat-file=$$($(1)TARGET_CORE_OAT_OUT) \
 		--oat-location=$$($(1)TARGET_CORE_OAT) --image=$$($(1)TARGET_CORE_IMG_OUT) --base=$$(LIBART_IMG_TARGET_BASE_ADDRESS) \
+		--implicit-checks=$(IMPLICIT_CHECKS_$($(1)TARGET_ARCH)) \
 		--instruction-set=$$($(1)TARGET_ARCH) --instruction-set-features=$$(TARGET_INSTRUCTION_SET_FEATURES) --android-root=$$(PRODUCT_OUT)/system
 
 # This "renaming" eases declaration in art/Android.mk
@@ -58,7 +64,7 @@
 endef
 
 ifdef TARGET_2ND_ARCH
-$(eval $(call create-oat-target-targets,2ND_))
+  $(eval $(call create-oat-target-targets,2ND_))
 endif
 $(eval $(call create-oat-target-targets,))
 
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index 7441dac..f098a34 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -86,7 +86,11 @@
 }
 
 size_t CompiledCode::CodeDelta() const {
-  switch (instruction_set_) {
+  return CodeDelta(instruction_set_);
+}
+
+size_t CompiledCode::CodeDelta(InstructionSet instruction_set) {
+  switch (instruction_set) {
     case kArm:
     case kArm64:
     case kMips:
@@ -98,7 +102,7 @@
       return 1;
     }
     default:
-      LOG(FATAL) << "Unknown InstructionSet: " << instruction_set_;
+      LOG(FATAL) << "Unknown InstructionSet: " << instruction_set;
       return 0;
   }
 }
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index 23cd250..b8cd851 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -67,6 +67,7 @@
   // returns the difference between the code address and a usable PC.
   // mainly to cope with kThumb2 where the lower bit must be set.
   size_t CodeDelta() const;
+  static size_t CodeDelta(InstructionSet instruction_set);
 
   // Returns a pointer suitable for invoking the code at the argument
   // code_pointer address.  Mainly to cope with kThumb2 where the
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index b0216b5..0845656 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -783,10 +783,11 @@
                                      uint16_t class_def_idx, uint32_t method_idx,
                                      jobject class_loader, const DexFile& dex_file,
                                      void* llvm_compilation_unit) {
-  VLOG(compiler) << "Compiling " << PrettyMethod(method_idx, dex_file) << "...";
+  std::string method_name = PrettyMethod(method_idx, dex_file);
+  VLOG(compiler) << "Compiling " << method_name << "...";
   if (code_item->insns_size_in_code_units_ >= 0x10000) {
     LOG(INFO) << "Method size exceeds compiler limits: " << code_item->insns_size_in_code_units_
-              << " in " << PrettyMethod(method_idx, dex_file);
+              << " in " << method_name;
     return NULL;
   }
 
@@ -818,8 +819,7 @@
   cu.compiler_flip_match = false;
   bool use_match = !cu.compiler_method_match.empty();
   bool match = use_match && (cu.compiler_flip_match ^
-      (PrettyMethod(method_idx, dex_file).find(cu.compiler_method_match) !=
-       std::string::npos));
+      (method_name.find(cu.compiler_method_match) != std::string::npos));
   if (!use_match || match) {
     cu.disable_opt = kCompilerOptimizerDisableFlags;
     cu.enable_debug = kCompilerDebugFlags;
@@ -830,7 +830,7 @@
   if (gVerboseMethods.size() != 0) {
     cu.verbose = false;
     for (size_t i = 0; i < gVerboseMethods.size(); ++i) {
-      if (PrettyMethod(method_idx, dex_file).find(gVerboseMethods[i])
+      if (method_name.find(gVerboseMethods[i])
           != std::string::npos) {
         cu.verbose = true;
         break;
@@ -887,22 +887,13 @@
     cu.mir_graph->EnableOpcodeCounting();
   }
 
-  // Check early if we should skip this compilation if the profiler is enabled.
-  if (cu.compiler_driver->ProfilePresent()) {
-    std::string methodname = PrettyMethod(method_idx, dex_file);
-    if (cu.mir_graph->SkipCompilationByName(methodname)) {
-      return nullptr;
-    }
-  }
-
   /* Build the raw MIR graph */
   cu.mir_graph->InlineMethod(code_item, access_flags, invoke_type, class_def_idx, method_idx,
                               class_loader, dex_file);
 
   // TODO(Arm64): Remove this when we are able to compile everything.
   if (!CanCompileMethod(method_idx, dex_file, cu)) {
-    VLOG(compiler)  << cu.instruction_set << ": Cannot compile method : "
-                    << PrettyMethod(method_idx, dex_file);
+    VLOG(compiler)  << cu.instruction_set << ": Cannot compile method : " << method_name;
     return nullptr;
   }
 
@@ -910,7 +901,7 @@
   std::string skip_message;
   if (cu.mir_graph->SkipCompilation(&skip_message)) {
     VLOG(compiler) << cu.instruction_set << ": Skipping method : "
-                   << PrettyMethod(method_idx, dex_file) << "  Reason = " << skip_message;
+                   << method_name << "  Reason = " << skip_message;
     return nullptr;
   }
 
@@ -918,6 +909,13 @@
   PassDriverMEOpts pass_driver(&cu);
   pass_driver.Launch();
 
+  /* For non-leaf methods check if we should skip compilation when the profiler is enabled. */
+  if (cu.compiler_driver->ProfilePresent()
+      && !cu.mir_graph->MethodIsLeaf()
+      && cu.mir_graph->SkipCompilationByName(method_name)) {
+    return nullptr;
+  }
+
   if (cu.enable_debug & (1 << kDebugDumpCheckStats)) {
     cu.mir_graph->DumpCheckStats();
   }
@@ -933,7 +931,7 @@
   if (cu.enable_debug & (1 << kDebugShowMemoryUsage)) {
     if (cu.arena_stack.PeakBytesAllocated() > 256 * 1024) {
       MemStats stack_stats(cu.arena_stack.GetPeakStats());
-      LOG(INFO) << PrettyMethod(method_idx, dex_file) << " " << Dumpable<MemStats>(stack_stats);
+      LOG(INFO) << method_name << " " << Dumpable<MemStats>(stack_stats);
     }
   }
   cu.arena_stack.Reset();
@@ -941,8 +939,7 @@
   CompiledMethod* result = NULL;
 
   if (cu.mir_graph->PuntToInterpreter()) {
-    VLOG(compiler) << cu.instruction_set << ": Punted method to interpreter: "
-                   << PrettyMethod(method_idx, dex_file);
+    VLOG(compiler) << cu.instruction_set << ": Punted method to interpreter: " << method_name;
     return nullptr;
   }
 
@@ -953,21 +950,21 @@
   cu.NewTimingSplit("Cleanup");
 
   if (result) {
-    VLOG(compiler) << cu.instruction_set << ": Compiled " << PrettyMethod(method_idx, dex_file);
+    VLOG(compiler) << cu.instruction_set << ": Compiled " << method_name;
   } else {
-    VLOG(compiler) << cu.instruction_set << ": Deferred " << PrettyMethod(method_idx, dex_file);
+    VLOG(compiler) << cu.instruction_set << ": Deferred " << method_name;
   }
 
   if (cu.enable_debug & (1 << kDebugShowMemoryUsage)) {
     if (cu.arena.BytesAllocated() > (1 * 1024 *1024)) {
       MemStats mem_stats(cu.arena.GetMemStats());
-      LOG(INFO) << PrettyMethod(method_idx, dex_file) << " " << Dumpable<MemStats>(mem_stats);
+      LOG(INFO) << method_name << " " << Dumpable<MemStats>(mem_stats);
     }
   }
 
   if (cu.enable_debug & (1 << kDebugShowSummaryMemoryUsage)) {
     LOG(INFO) << "MEMINFO " << cu.arena.BytesAllocated() << " " << cu.mir_graph->GetNumBlocks()
-              << " " << PrettyMethod(method_idx, dex_file);
+              << " " << method_name;
   }
 
   cu.EndTiming();
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 3e326f0..4a331fc 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -2054,7 +2054,9 @@
   ProfileFile::ProfileData data;
   if (!profile_file_.GetProfileData(&data, method_name)) {
     // Not in profile, no information can be determined.
-    VLOG(compiler) << "not compiling " << method_name << " because it's not in the profile";
+    if (kIsDebugBuild) {
+      VLOG(compiler) << "not compiling " << method_name << " because it's not in the profile";
+    }
     return true;
   }
 
@@ -2063,13 +2065,16 @@
   // falls inside a bucket.
   bool compile = data.GetTopKUsedPercentage() - data.GetUsedPercent()
                  <= compiler_options_->GetTopKProfileThreshold();
-  if (compile) {
-    LOG(INFO) << "compiling method " << method_name << " because its usage is part of top "
-        << data.GetTopKUsedPercentage() << "% with a percent of " << data.GetUsedPercent() << "%"
-        << " (topKThreshold=" << compiler_options_->GetTopKProfileThreshold() << ")";
-  } else {
-    VLOG(compiler) << "not compiling method " << method_name << " because it's not part of leading "
-        << compiler_options_->GetTopKProfileThreshold() << "% samples)";
+  if (kIsDebugBuild) {
+    if (compile) {
+      LOG(INFO) << "compiling method " << method_name << " because its usage is part of top "
+          << data.GetTopKUsedPercentage() << "% with a percent of " << data.GetUsedPercent() << "%"
+          << " (topKThreshold=" << compiler_options_->GetTopKProfileThreshold() << ")";
+    } else {
+      VLOG(compiler) << "not compiling method " << method_name
+          << " because it's not part of leading " << compiler_options_->GetTopKProfileThreshold()
+          << "% samples)";
+    }
   }
   return !compile;
 }
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index c6b9161..4590880 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -800,6 +800,7 @@
 size_t OatWriter::InitOatCode(size_t offset) {
   // calculate the offsets within OatHeader to executable code
   size_t old_offset = offset;
+  size_t adjusted_offset = offset;
   // required to be on a new page boundary
   offset = RoundUp(offset, kPageSize);
   oat_header_->SetExecutableOffset(offset);
@@ -809,7 +810,8 @@
 
     #define DO_TRAMPOLINE(field, fn_name) \
       offset = CompiledCode::AlignCode(offset, instruction_set); \
-      oat_header_->Set ## fn_name ## Offset(offset); \
+      adjusted_offset = offset + CompiledCode::CodeDelta(instruction_set); \
+      oat_header_->Set ## fn_name ## Offset(adjusted_offset); \
       field.reset(compiler_driver_->Create ## fn_name()); \
       offset += field->size();
 
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 521992a..c3a322c 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -93,15 +93,30 @@
 }
 
 template<typename T>
-void HGraphBuilder::If_22t(const Instruction& instruction, int32_t dex_offset, bool is_not) {
+void HGraphBuilder::If_22t(const Instruction& instruction, int32_t dex_offset) {
   HInstruction* first = LoadLocal(instruction.VRegA(), Primitive::kPrimInt);
   HInstruction* second = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
-  current_block_->AddInstruction(new (arena_) T(first, second));
-  if (is_not) {
-    current_block_->AddInstruction(new (arena_) HNot(current_block_->GetLastInstruction()));
-  }
-  current_block_->AddInstruction(new (arena_) HIf(current_block_->GetLastInstruction()));
-  HBasicBlock* target = FindBlockStartingAt(instruction.GetTargetOffset() + dex_offset);
+  T* comparison = new (arena_) T(first, second);
+  current_block_->AddInstruction(comparison);
+  HInstruction* ifinst = new (arena_) HIf(comparison);
+  current_block_->AddInstruction(ifinst);
+  HBasicBlock* target = FindBlockStartingAt(dex_offset + instruction.GetTargetOffset());
+  DCHECK(target != nullptr);
+  current_block_->AddSuccessor(target);
+  target = FindBlockStartingAt(dex_offset + instruction.SizeInCodeUnits());
+  DCHECK(target != nullptr);
+  current_block_->AddSuccessor(target);
+  current_block_ = nullptr;
+}
+
+template<typename T>
+void HGraphBuilder::If_21t(const Instruction& instruction, int32_t dex_offset) {
+  HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt);
+  T* comparison = new (arena_) T(value, GetIntConstant(0));
+  current_block_->AddInstruction(comparison);
+  HInstruction* ifinst = new (arena_) HIf(comparison);
+  current_block_->AddInstruction(ifinst);
+  HBasicBlock* target = FindBlockStartingAt(dex_offset + instruction.GetTargetOffset());
   DCHECK(target != nullptr);
   current_block_->AddSuccessor(target);
   target = FindBlockStartingAt(dex_offset + instruction.SizeInCodeUnits());
@@ -340,16 +355,38 @@
       break;
     }
 
+    case Instruction::CONST: {
+      int32_t register_index = instruction.VRegA();
+      HIntConstant* constant = GetIntConstant(instruction.VRegB_31i());
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::CONST_HIGH16: {
+      int32_t register_index = instruction.VRegA();
+      HIntConstant* constant = GetIntConstant(instruction.VRegB_21h() << 16);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
     case Instruction::CONST_WIDE_16: {
       int32_t register_index = instruction.VRegA();
-      HLongConstant* constant = GetLongConstant(instruction.VRegB_21s());
+      // Get 16 bits of constant value, sign extended to 64 bits.
+      int64_t value = instruction.VRegB_21s();
+      value <<= 48;
+      value >>= 48;
+      HLongConstant* constant = GetLongConstant(value);
       UpdateLocal(register_index, constant);
       break;
     }
 
     case Instruction::CONST_WIDE_32: {
       int32_t register_index = instruction.VRegA();
-      HLongConstant* constant = GetLongConstant(instruction.VRegB_31i());
+      // Get 32 bits of constant value, sign extended to 64 bits.
+      int64_t value = instruction.VRegB_31i();
+      value <<= 32;
+      value >>= 32;
+      HLongConstant* constant = GetLongConstant(value);
       UpdateLocal(register_index, constant);
       break;
     }
@@ -361,26 +398,57 @@
       break;
     }
 
-    case Instruction::MOVE: {
+    case Instruction::CONST_WIDE_HIGH16: {
+      int32_t register_index = instruction.VRegA();
+      int64_t value = static_cast<int64_t>(instruction.VRegB_21h()) << 48;
+      HLongConstant* constant = GetLongConstant(value);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    // TODO: these instructions are also used to move floating point values, so what is
+    // the type (int or float)?
+    case Instruction::MOVE:
+    case Instruction::MOVE_FROM16:
+    case Instruction::MOVE_16: {
       HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
       UpdateLocal(instruction.VRegA(), value);
       break;
     }
 
+    // TODO: these instructions are also used to move floating point values, so what is
+    // the type (long or double)?
+    case Instruction::MOVE_WIDE:
+    case Instruction::MOVE_WIDE_FROM16:
+    case Instruction::MOVE_WIDE_16: {
+      HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimLong);
+      UpdateLocal(instruction.VRegA(), value);
+      break;
+    }
+
+    case Instruction::MOVE_OBJECT:
+    case Instruction::MOVE_OBJECT_16:
+    case Instruction::MOVE_OBJECT_FROM16: {
+      HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimNot);
+      UpdateLocal(instruction.VRegA(), value);
+      break;
+    }
+
     case Instruction::RETURN_VOID: {
       BuildReturn(instruction, Primitive::kPrimVoid);
       break;
     }
 
-    case Instruction::IF_EQ: {
-      If_22t<HEqual>(instruction, dex_offset, false);
-      break;
-    }
+#define IF_XX(comparison, cond) \
+    case Instruction::IF_##cond: If_22t<comparison>(instruction, dex_offset); break; \
+    case Instruction::IF_##cond##Z: If_21t<comparison>(instruction, dex_offset); break
 
-    case Instruction::IF_NE: {
-      If_22t<HEqual>(instruction, dex_offset, true);
-      break;
-    }
+    IF_XX(HEqual, EQ);
+    IF_XX(HNotEqual, NE);
+    IF_XX(HLessThan, LT);
+    IF_XX(HLessThanOrEqual, LE);
+    IF_XX(HGreaterThan, GT);
+    IF_XX(HGreaterThanOrEqual, GE);
 
     case Instruction::GOTO:
     case Instruction::GOTO_16:
@@ -500,10 +568,10 @@
     }
 
     case Instruction::MOVE_RESULT:
-    case Instruction::MOVE_RESULT_WIDE: {
+    case Instruction::MOVE_RESULT_WIDE:
+    case Instruction::MOVE_RESULT_OBJECT:
       UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
       break;
-    }
 
     case Instruction::NOP:
       break;
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 108514a..0852a26 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -22,17 +22,11 @@
 #include "primitive.h"
 #include "utils/allocation.h"
 #include "utils/growable_array.h"
+#include "nodes.h"
 
 namespace art {
 
-class ArenaAllocator;
 class Instruction;
-class HBasicBlock;
-class HGraph;
-class HIntConstant;
-class HLongConstant;
-class HInstruction;
-class HLocal;
 
 class HGraphBuilder : public ValueObject {
  public:
@@ -90,7 +84,11 @@
   template<typename T>
   void Binop_22s(const Instruction& instruction, bool reverse);
 
-  template<typename T> void If_22t(const Instruction& instruction, int32_t dex_offset, bool is_not);
+  template<typename T>
+  void If_22t(const Instruction& instruction, int32_t dex_offset);
+
+  template<typename T>
+  void If_21t(const Instruction& instruction, int32_t dex_offset);
 
   void BuildReturn(const Instruction& instruction, Primitive::Type type);
 
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 212a6dc..c5862da 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -34,6 +34,35 @@
 
 namespace arm {
 
+
+inline Condition ARMCondition(IfCondition cond) {
+  switch (cond) {
+    case kCondEQ: return EQ;
+    case kCondNE: return NE;
+    case kCondLT: return LT;
+    case kCondLE: return LE;
+    case kCondGT: return GT;
+    case kCondGE: return GE;
+    default:
+      LOG(FATAL) << "Unknown if condition";
+  }
+  return EQ;        // Unreachable.
+}
+
+inline Condition ARMOppositeCondition(IfCondition cond) {
+  switch (cond) {
+    case kCondEQ: return NE;
+    case kCondNE: return EQ;
+    case kCondLT: return GE;
+    case kCondLE: return GT;
+    case kCondGT: return LE;
+    case kCondGE: return LT;
+    default:
+      LOG(FATAL) << "Unknown if condition";
+  }
+  return EQ;        // Unreachable.
+}
+
 static constexpr int kNumberOfPushedRegistersAtEntry = 1;
 static constexpr int kCurrentMethodStackOffset = 0;
 
@@ -419,33 +448,103 @@
 
 void LocationsBuilderARM::VisitIf(HIf* if_instr) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
-  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(0, Location::Any());
   if_instr->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) {
-  // TODO: Generate the input as a condition, instead of materializing in a register.
-  __ cmp(if_instr->GetLocations()->InAt(0).AsArm().AsCoreRegister(), ShifterOperand(0));
-  __ b(codegen_->GetLabelOf(if_instr->IfFalseSuccessor()), EQ);
-  if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfTrueSuccessor())) {
-    __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+  HInstruction* cond = if_instr->InputAt(0);
+  DCHECK(cond->IsCondition());
+  HCondition* condition = cond->AsCondition();
+  if (condition->NeedsMaterialization()) {
+    // Condition has been materialized, compare the output to 0
+    if (!if_instr->GetLocations()->InAt(0).IsRegister()) {
+      LOG(FATAL) << "Materialized condition is not in an ARM register";
+    }
+    __ cmp(if_instr->GetLocations()->InAt(0).AsArm().AsCoreRegister(),
+           ShifterOperand(0));
+    __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()), EQ);
+  } else {
+    // Condition has not been materialized, use its inputs as the comparison and its
+    // condition as the branch condition.
+    __ cmp(condition->GetLocations()->InAt(0).AsArm().AsCoreRegister(),
+           ShifterOperand(condition->GetLocations()->InAt(1).AsArm().AsCoreRegister()));
+    __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()),
+         ARMCondition(condition->GetCondition()));
+  }
+  if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfFalseSuccessor())) {
+    __ b(codegen_->GetLabelOf(if_instr->IfFalseSuccessor()));
   }
 }
 
-void LocationsBuilderARM::VisitEqual(HEqual* equal) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(equal);
+
+void LocationsBuilderARM::VisitCondition(HCondition* comp) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(comp);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister());
-  equal->SetLocations(locations);
+  comp->SetLocations(locations);
 }
 
-void InstructionCodeGeneratorARM::VisitEqual(HEqual* equal) {
-  LocationSummary* locations = equal->GetLocations();
-  __ teq(locations->InAt(0).AsArm().AsCoreRegister(),
-         ShifterOperand(locations->InAt(1).AsArm().AsCoreRegister()));
-  __ mov(locations->Out().AsArm().AsCoreRegister(), ShifterOperand(1), EQ);
-  __ mov(locations->Out().AsArm().AsCoreRegister(), ShifterOperand(0), NE);
+void InstructionCodeGeneratorARM::VisitCondition(HCondition* comp) {
+  if (comp->NeedsMaterialization()) {
+    LocationSummary* locations = comp->GetLocations();
+    __ cmp(locations->InAt(0).AsArm().AsCoreRegister(),
+           ShifterOperand(locations->InAt(1).AsArm().AsCoreRegister()));
+    __ it(ARMCondition(comp->GetCondition()), kItElse);
+    __ mov(locations->Out().AsArm().AsCoreRegister(), ShifterOperand(1),
+           ARMCondition(comp->GetCondition()));
+    __ mov(locations->Out().AsArm().AsCoreRegister(), ShifterOperand(0),
+           ARMOppositeCondition(comp->GetCondition()));
+  }
+}
+
+void LocationsBuilderARM::VisitEqual(HEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorARM::VisitEqual(HEqual* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderARM::VisitNotEqual(HNotEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorARM::VisitNotEqual(HNotEqual* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderARM::VisitLessThan(HLessThan* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorARM::VisitLessThan(HLessThan* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderARM::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorARM::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderARM::VisitGreaterThan(HGreaterThan* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorARM::VisitGreaterThan(HGreaterThan* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderARM::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorARM::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
+  VisitCondition(comp);
 }
 
 void LocationsBuilderARM::VisitLocal(HLocal* local) {
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 712a24c..0e2a079 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -20,7 +20,7 @@
 #include "code_generator.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
-#include "utils/arm/assembler_arm32.h"
+#include "utils/arm/assembler_thumb2.h"
 
 namespace art {
 namespace arm {
@@ -180,7 +180,7 @@
   LocationsBuilderARM location_builder_;
   InstructionCodeGeneratorARM instruction_visitor_;
   ParallelMoveResolverARM move_resolver_;
-  Arm32Assembler assembler_;
+  Thumb2Assembler assembler_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM);
 };
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index f4b12e2..a8ee6c0 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -34,6 +34,20 @@
 
 namespace x86 {
 
+inline Condition X86Condition(IfCondition cond) {
+  switch (cond) {
+    case kCondEQ: return kEqual;
+    case kCondNE: return kNotEqual;
+    case kCondLT: return kLess;
+    case kCondLE: return kLessEqual;
+    case kCondGT: return kGreater;
+    case kCondGE: return kGreaterEqual;
+    default:
+      LOG(FATAL) << "Unknown if condition";
+  }
+  return kEqual;
+}
+
 static constexpr int kNumberOfPushedRegistersAtEntry = 1;
 static constexpr int kCurrentMethodStackOffset = 0;
 
@@ -421,16 +435,32 @@
 }
 
 void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
-  // TODO: Generate the input as a condition, instead of materializing in a register.
-  Location location = if_instr->GetLocations()->InAt(0);
-  if (location.IsRegister()) {
-    __ cmpl(location.AsX86().AsCpuRegister(), Immediate(0));
+  HInstruction* cond = if_instr->InputAt(0);
+  DCHECK(cond->IsCondition());
+  HCondition* condition = cond->AsCondition();
+  if (condition->NeedsMaterialization()) {
+    // Materialized condition, compare against 0
+    Location lhs = if_instr->GetLocations()->InAt(0);
+    if (lhs.IsRegister()) {
+      __ cmpl(lhs.AsX86().AsCpuRegister(), Immediate(0));
+    } else {
+      __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0));
+    }
+    __ j(kEqual,  codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
   } else {
-    __ cmpl(Address(ESP, location.GetStackIndex()), Immediate(0));
+    Location lhs = condition->GetLocations()->InAt(0);
+    Location rhs = condition->GetLocations()->InAt(1);
+    // LHS is guaranteed to be in a register (see LocationsBuilderX86::VisitCondition).
+    if (rhs.IsRegister()) {
+      __ cmpl(lhs.AsX86().AsCpuRegister(), rhs.AsX86().AsCpuRegister());
+    } else {
+      __ cmpl(lhs.AsX86().AsCpuRegister(), Address(ESP, rhs.GetStackIndex()));
+    }
+    __ j(X86Condition(condition->GetCondition()),
+         codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
   }
-  __ j(kEqual, codegen_->GetLabelOf(if_instr->IfFalseSuccessor()));
-  if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfTrueSuccessor())) {
-    __ jmp(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+  if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfFalseSuccessor())) {
+    __ jmp(codegen_->GetLabelOf(if_instr->IfFalseSuccessor()));
   }
 }
 
@@ -475,24 +505,74 @@
 void InstructionCodeGeneratorX86::VisitStoreLocal(HStoreLocal* store) {
 }
 
-void LocationsBuilderX86::VisitEqual(HEqual* equal) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(equal);
+void LocationsBuilderX86::VisitCondition(HCondition* comp) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(comp);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::Any());
   locations->SetOut(Location::SameAsFirstInput());
-  equal->SetLocations(locations);
+  comp->SetLocations(locations);
 }
 
-void InstructionCodeGeneratorX86::VisitEqual(HEqual* equal) {
-  LocationSummary* locations = equal->GetLocations();
-  if (locations->InAt(1).IsRegister()) {
-    __ cmpl(locations->InAt(0).AsX86().AsCpuRegister(),
-            locations->InAt(1).AsX86().AsCpuRegister());
-  } else {
-    __ cmpl(locations->InAt(0).AsX86().AsCpuRegister(),
-            Address(ESP, locations->InAt(1).GetStackIndex()));
+void InstructionCodeGeneratorX86::VisitCondition(HCondition* comp) {
+  if (comp->NeedsMaterialization()) {
+    LocationSummary* locations = comp->GetLocations();
+    if (locations->InAt(1).IsRegister()) {
+      __ cmpl(locations->InAt(0).AsX86().AsCpuRegister(),
+              locations->InAt(1).AsX86().AsCpuRegister());
+    } else {
+      __ cmpl(locations->InAt(0).AsX86().AsCpuRegister(),
+              Address(ESP, locations->InAt(1).GetStackIndex()));
+    }
+    __ setb(X86Condition(comp->GetCondition()), locations->Out().AsX86().AsCpuRegister());
   }
-  __ setb(kEqual, locations->Out().AsX86().AsCpuRegister());
+}
+
+void LocationsBuilderX86::VisitEqual(HEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorX86::VisitEqual(HEqual* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderX86::VisitNotEqual(HNotEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorX86::VisitNotEqual(HNotEqual* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderX86::VisitLessThan(HLessThan* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorX86::VisitLessThan(HLessThan* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderX86::VisitGreaterThan(HGreaterThan* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorX86::VisitGreaterThan(HGreaterThan* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
+  VisitCondition(comp);
 }
 
 void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index ebeef9d..283f1f5 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -35,6 +35,20 @@
 
 namespace x86_64 {
 
+inline Condition X86_64Condition(IfCondition cond) {
+  switch (cond) {
+    case kCondEQ: return kEqual;
+    case kCondNE: return kNotEqual;
+    case kCondLT: return kLess;
+    case kCondLE: return kLessEqual;
+    case kCondGT: return kGreater;
+    case kCondGE: return kGreaterEqual;
+    default:
+      LOG(FATAL) << "Unknown if condition";
+  }
+  return kEqual;
+}
+
 // Some x86_64 instructions require a register to be available as temp.
 static constexpr Register TMP = R11;
 
@@ -295,16 +309,32 @@
 
 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
-  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(0, Location::Any());
   if_instr->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
-  // TODO: Generate the input as a condition, instead of materializing in a register.
-  __ cmpl(if_instr->GetLocations()->InAt(0).AsX86_64().AsCpuRegister(), Immediate(0));
-  __ j(kEqual, codegen_->GetLabelOf(if_instr->IfFalseSuccessor()));
-  if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfTrueSuccessor())) {
-    __ jmp(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+  HInstruction* cond = if_instr->InputAt(0);
+  DCHECK(cond->IsCondition());
+  HCondition* condition = cond->AsCondition();
+  if (condition->NeedsMaterialization()) {
+    // Materialized condition, compare against 0.
+    Location lhs = if_instr->GetLocations()->InAt(0);
+    if (lhs.IsRegister()) {
+      __ cmpl(lhs.AsX86_64().AsCpuRegister(), Immediate(0));
+    } else {
+      __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
+    }
+    __ j(kEqual, codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+  } else {
+    Location lhs = condition->GetLocations()->InAt(0);
+    Location rhs = condition->GetLocations()->InAt(1);
+    __ cmpl(lhs.AsX86_64().AsCpuRegister(), rhs.AsX86_64().AsCpuRegister());
+    __ j(X86_64Condition(condition->GetCondition()),
+         codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+  }
+  if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfFalseSuccessor())) {
+    __ jmp(codegen_->GetLabelOf(if_instr->IfFalseSuccessor()));
   }
 }
 
@@ -349,18 +379,69 @@
 void InstructionCodeGeneratorX86_64::VisitStoreLocal(HStoreLocal* store) {
 }
 
-void LocationsBuilderX86_64::VisitEqual(HEqual* equal) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(equal);
+void LocationsBuilderX86_64::VisitCondition(HCondition* comp) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(comp);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetOut(Location::SameAsFirstInput());
-  equal->SetLocations(locations);
+  comp->SetLocations(locations);
 }
 
-void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* equal) {
-  __ cmpq(equal->GetLocations()->InAt(0).AsX86_64().AsCpuRegister(),
-          equal->GetLocations()->InAt(1).AsX86_64().AsCpuRegister());
-  __ setcc(kEqual, equal->GetLocations()->Out().AsX86_64().AsCpuRegister());
+void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* comp) {
+  if (comp->NeedsMaterialization()) {
+    __ cmpq(comp->GetLocations()->InAt(0).AsX86_64().AsCpuRegister(),
+            comp->GetLocations()->InAt(1).AsX86_64().AsCpuRegister());
+    __ setcc(X86_64Condition(comp->GetCondition()),
+             comp->GetLocations()->Out().AsX86_64().AsCpuRegister());
+  }
+}
+
+void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
+  VisitCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index c3baf1a..fd534ce 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -51,7 +51,12 @@
 static void Run(const InternalCodeAllocator& allocator, bool has_result, int32_t expected) {
   typedef int32_t (*fptr)();
   CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize());
-  int32_t result = reinterpret_cast<fptr>(allocator.GetMemory())();
+  fptr f = reinterpret_cast<fptr>(allocator.GetMemory());
+#if defined(__arm__)
+  // For thumb we need the bottom bit set.
+  f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(f) + 1);
+#endif
+  int32_t result = f();
   if (has_result) {
     CHECK_EQ(result, expected);
   }
diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc
index 371478c..c59f836 100644
--- a/compiler/optimizing/graph_test.cc
+++ b/compiler/optimizing/graph_test.cc
@@ -30,7 +30,9 @@
   graph->AddBlock(if_block);
   HInstruction* instr = new (allocator) HIntConstant(4);
   if_block->AddInstruction(instr);
-  instr = new (allocator) HIf(instr);
+  HInstruction* equal = new (allocator) HEqual(instr, instr);
+  if_block->AddInstruction(equal);
+  instr = new (allocator) HIf(equal);
   if_block->AddInstruction(instr);
   return if_block;
 }
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 2a97fad..490d345 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -445,4 +445,23 @@
   }
 }
 
+
+bool HCondition::NeedsMaterialization() const {
+  if (!HasOnlyOneUse()) {
+    return true;
+  }
+  HUseListNode<HInstruction>* uses = GetUses();
+  HInstruction* user = uses->GetUser();
+  if (!user->IsIf()) {
+    return true;
+  }
+
+  // TODO: should we allow intervening instructions with no side-effect between this condition
+  // and the If instruction?
+  if (GetNext() != user) {
+    return true;
+  }
+  return false;
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 143d5c9..503f31d 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -38,6 +38,15 @@
 static const int kDefaultNumberOfPredecessors = 2;
 static const int kDefaultNumberOfBackEdges = 1;
 
+enum IfCondition {
+  kCondEQ,
+  kCondNE,
+  kCondLT,
+  kCondLE,
+  kCondGT,
+  kCondGE,
+};
+
 class HInstructionList {
  public:
   HInstructionList() : first_instruction_(nullptr), last_instruction_(nullptr) {}
@@ -66,7 +75,7 @@
         maximum_number_of_out_vregs_(0),
         number_of_vregs_(0),
         number_of_in_vregs_(0),
-        current_instruction_id_(0) { }
+        current_instruction_id_(0) {}
 
   ArenaAllocator* GetArena() const { return arena_; }
   const GrowableArray<HBasicBlock*>& GetBlocks() const { return blocks_; }
@@ -381,7 +390,13 @@
 
 #define FOR_EACH_INSTRUCTION(M)                            \
   M(Add)                                                   \
+  M(Condition)                                             \
   M(Equal)                                                 \
+  M(NotEqual)                                              \
+  M(LessThan)                                              \
+  M(LessThanOrEqual)                                       \
+  M(GreaterThan)                                           \
+  M(GreaterThanOrEqual)                                    \
   M(Exit)                                                  \
   M(Goto)                                                  \
   M(If)                                                    \
@@ -400,6 +415,7 @@
   M(StoreLocal)                                            \
   M(Sub)                                                   \
 
+
 #define FORWARD_DECLARATION(type) class H##type;
 FOR_EACH_INSTRUCTION(FORWARD_DECLARATION)
 #undef FORWARD_DECLARATION
@@ -413,7 +429,7 @@
 class HUseListNode : public ArenaObject {
  public:
   HUseListNode(T* user, size_t index, HUseListNode* tail)
-      : user_(user), index_(index), tail_(tail) { }
+      : user_(user), index_(index), tail_(tail) {}
 
   HUseListNode* GetTail() const { return tail_; }
   T* GetUser() const { return user_; }
@@ -444,7 +460,7 @@
         live_interval_(nullptr),
         lifetime_position_(kNoLifetime) {}
 
-  virtual ~HInstruction() { }
+  virtual ~HInstruction() {}
 
   HInstruction* GetNext() const { return next_; }
   HInstruction* GetPrevious() const { return previous_; }
@@ -507,6 +523,10 @@
 
   void ReplaceWith(HInstruction* instruction);
 
+  bool HasOnlyOneUse() const {
+    return uses_ != nullptr && uses_->GetTail() == nullptr;
+  }
+
 #define INSTRUCTION_TYPE_CHECK(type)                                           \
   bool Is##type() { return (As##type() != nullptr); }                          \
   virtual H##type* As##type() { return nullptr; }
@@ -616,7 +636,7 @@
 
 class HInputIterator : public ValueObject {
  public:
-  explicit HInputIterator(HInstruction* instruction) : instruction_(instruction), index_(0) { }
+  explicit HInputIterator(HInstruction* instruction) : instruction_(instruction), index_(0) {}
 
   bool Done() const { return index_ == instruction_->InputCount(); }
   HInstruction* Current() const { return instruction_->InputAt(index_); }
@@ -676,7 +696,7 @@
 template<typename T, intptr_t N>
 class EmbeddedArray {
  public:
-  EmbeddedArray() : elements_() { }
+  EmbeddedArray() : elements_() {}
 
   intptr_t GetLength() const { return N; }
 
@@ -721,8 +741,8 @@
 template<intptr_t N>
 class HTemplateInstruction: public HInstruction {
  public:
-  HTemplateInstruction<N>() : inputs_() { }
-  virtual ~HTemplateInstruction() { }
+  HTemplateInstruction<N>() : inputs_() {}
+  virtual ~HTemplateInstruction() {}
 
   virtual size_t InputCount() const { return N; }
   virtual HInstruction* InputAt(size_t i) const { return inputs_[i]; }
@@ -738,6 +758,18 @@
   friend class SsaBuilder;
 };
 
+template<intptr_t N>
+class HExpression: public HTemplateInstruction<N> {
+ public:
+  explicit HExpression<N>(Primitive::Type type) : type_(type) {}
+  virtual ~HExpression() {}
+
+  virtual Primitive::Type GetType() const { return type_; }
+
+ private:
+  const Primitive::Type type_;
+};
+
 // Represents dex's RETURN_VOID opcode. A HReturnVoid is a control flow
 // instruction that branches to the exit block.
 class HReturnVoid : public HTemplateInstruction<0> {
@@ -800,6 +832,7 @@
   DISALLOW_COPY_AND_ASSIGN(HGoto);
 };
 
+
 // Conditional branch. A block ending with an HIf instruction must have
 // two successors.
 class HIf : public HTemplateInstruction<1> {
@@ -820,53 +853,143 @@
 
   DECLARE_INSTRUCTION(If);
 
+  virtual bool IsIfInstruction() const { return true; }
+
  private:
   DISALLOW_COPY_AND_ASSIGN(HIf);
 };
 
-class HBinaryOperation : public HTemplateInstruction<2> {
+class HBinaryOperation : public HExpression<2> {
  public:
   HBinaryOperation(Primitive::Type result_type,
                    HInstruction* left,
-                   HInstruction* right) : result_type_(result_type) {
+                   HInstruction* right) : HExpression(result_type) {
     SetRawInputAt(0, left);
     SetRawInputAt(1, right);
   }
 
   HInstruction* GetLeft() const { return InputAt(0); }
   HInstruction* GetRight() const { return InputAt(1); }
-  Primitive::Type GetResultType() const { return result_type_; }
+  Primitive::Type GetResultType() const { return GetType(); }
 
   virtual bool IsCommutative() { return false; }
-  virtual Primitive::Type GetType() const { return GetResultType(); }
 
  private:
-  const Primitive::Type result_type_;
-
   DISALLOW_COPY_AND_ASSIGN(HBinaryOperation);
 };
 
-
-// Instruction to check if two inputs are equal to each other.
-class HEqual : public HBinaryOperation {
+class HCondition : public HBinaryOperation {
  public:
-  HEqual(HInstruction* first, HInstruction* second)
+  HCondition(HInstruction* first, HInstruction* second)
       : HBinaryOperation(Primitive::kPrimBoolean, first, second) {}
 
   virtual bool IsCommutative() { return true; }
+  bool NeedsMaterialization() const;
 
-  virtual Primitive::Type GetType() const { return Primitive::kPrimBoolean; }
+  DECLARE_INSTRUCTION(Condition);
+
+  virtual IfCondition GetCondition() const = 0;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HCondition);
+};
+
+// Instruction to check if two inputs are equal to each other.
+class HEqual : public HCondition {
+ public:
+  HEqual(HInstruction* first, HInstruction* second)
+      : HCondition(first, second) {}
 
   DECLARE_INSTRUCTION(Equal);
 
+  virtual IfCondition GetCondition() const {
+    return kCondEQ;
+  }
+
  private:
   DISALLOW_COPY_AND_ASSIGN(HEqual);
 };
 
+class HNotEqual : public HCondition {
+ public:
+  HNotEqual(HInstruction* first, HInstruction* second)
+      : HCondition(first, second) {}
+
+  DECLARE_INSTRUCTION(NotEqual);
+
+  virtual IfCondition GetCondition() const {
+    return kCondNE;
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HNotEqual);
+};
+
+class HLessThan : public HCondition {
+ public:
+  HLessThan(HInstruction* first, HInstruction* second)
+      : HCondition(first, second) {}
+
+  DECLARE_INSTRUCTION(LessThan);
+
+  virtual IfCondition GetCondition() const {
+    return kCondLT;
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HLessThan);
+};
+
+class HLessThanOrEqual : public HCondition {
+ public:
+  HLessThanOrEqual(HInstruction* first, HInstruction* second)
+      : HCondition(first, second) {}
+
+  DECLARE_INSTRUCTION(LessThanOrEqual);
+
+  virtual IfCondition GetCondition() const {
+    return kCondLE;
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HLessThanOrEqual);
+};
+
+class HGreaterThan : public HCondition {
+ public:
+  HGreaterThan(HInstruction* first, HInstruction* second)
+      : HCondition(first, second) {}
+
+  DECLARE_INSTRUCTION(GreaterThan);
+
+  virtual IfCondition GetCondition() const {
+    return kCondGT;
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HGreaterThan);
+};
+
+class HGreaterThanOrEqual : public HCondition {
+ public:
+  HGreaterThanOrEqual(HInstruction* first, HInstruction* second)
+      : HCondition(first, second) {}
+
+  DECLARE_INSTRUCTION(GreaterThanOrEqual);
+
+  virtual IfCondition GetCondition() const {
+    return kCondGE;
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HGreaterThanOrEqual);
+};
+
+
 // A local in the graph. Corresponds to a Dex register.
 class HLocal : public HTemplateInstruction<0> {
  public:
-  explicit HLocal(uint16_t reg_number) : reg_number_(reg_number) { }
+  explicit HLocal(uint16_t reg_number) : reg_number_(reg_number) {}
 
   DECLARE_INSTRUCTION(Local);
 
@@ -880,21 +1003,17 @@
 };
 
 // Load a given local. The local is an input of this instruction.
-class HLoadLocal : public HTemplateInstruction<1> {
+class HLoadLocal : public HExpression<1> {
  public:
-  explicit HLoadLocal(HLocal* local, Primitive::Type type) : type_(type) {
+  explicit HLoadLocal(HLocal* local, Primitive::Type type) : HExpression(type) {
     SetRawInputAt(0, local);
   }
 
-  virtual Primitive::Type GetType() const { return type_; }
-
   HLocal* GetLocal() const { return reinterpret_cast<HLocal*>(InputAt(0)); }
 
   DECLARE_INSTRUCTION(LoadLocal);
 
  private:
-  const Primitive::Type type_;
-
   DISALLOW_COPY_AND_ASSIGN(HLoadLocal);
 };
 
@@ -917,12 +1036,11 @@
 
 // Constants of the type int. Those can be from Dex instructions, or
 // synthesized (for example with the if-eqz instruction).
-class HIntConstant : public HTemplateInstruction<0> {
+class HIntConstant : public HExpression<0> {
  public:
-  explicit HIntConstant(int32_t value) : value_(value) { }
+  explicit HIntConstant(int32_t value) : HExpression(Primitive::kPrimInt), value_(value) {}
 
   int32_t GetValue() const { return value_; }
-  virtual Primitive::Type GetType() const { return Primitive::kPrimInt; }
 
   DECLARE_INSTRUCTION(IntConstant);
 
@@ -932,9 +1050,9 @@
   DISALLOW_COPY_AND_ASSIGN(HIntConstant);
 };
 
-class HLongConstant : public HTemplateInstruction<0> {
+class HLongConstant : public HExpression<0> {
  public:
-  explicit HLongConstant(int64_t value) : value_(value) { }
+  explicit HLongConstant(int64_t value) : HExpression(Primitive::kPrimLong), value_(value) {}
 
   int64_t GetValue() const { return value_; }
 
@@ -1008,15 +1126,14 @@
   DISALLOW_COPY_AND_ASSIGN(HInvokeStatic);
 };
 
-class HNewInstance : public HTemplateInstruction<0> {
+class HNewInstance : public HExpression<0> {
  public:
-  HNewInstance(uint32_t dex_pc, uint16_t type_index) : dex_pc_(dex_pc), type_index_(type_index) {}
+  HNewInstance(uint32_t dex_pc, uint16_t type_index) : HExpression(Primitive::kPrimNot),
+    dex_pc_(dex_pc), type_index_(type_index) {}
 
   uint32_t GetDexPc() const { return dex_pc_; }
   uint16_t GetTypeIndex() const { return type_index_; }
 
-  virtual Primitive::Type GetType() const { return Primitive::kPrimNot; }
-
   // Calls runtime so needs an environment.
   virtual bool NeedsEnvironment() const { return true; }
 
@@ -1057,15 +1174,13 @@
 
 // The value of a parameter in this method. Its location depends on
 // the calling convention.
-class HParameterValue : public HTemplateInstruction<0> {
+class HParameterValue : public HExpression<0> {
  public:
   HParameterValue(uint8_t index, Primitive::Type parameter_type)
-      : index_(index), parameter_type_(parameter_type) {}
+      : HExpression(parameter_type), index_(index) {}
 
   uint8_t GetIndex() const { return index_; }
 
-  virtual Primitive::Type GetType() const { return parameter_type_; }
-
   DECLARE_INSTRUCTION(ParameterValue);
 
  private:
@@ -1073,19 +1188,15 @@
   // than HGraph::number_of_in_vregs_;
   const uint8_t index_;
 
-  const Primitive::Type parameter_type_;
-
   DISALLOW_COPY_AND_ASSIGN(HParameterValue);
 };
 
-class HNot : public HTemplateInstruction<1> {
+class HNot : public HExpression<1> {
  public:
-  explicit HNot(HInstruction* input) {
+  explicit HNot(HInstruction* input) : HExpression(Primitive::kPrimBoolean) {
     SetRawInputAt(0, input);
   }
 
-  virtual Primitive::Type GetType() const { return Primitive::kPrimBoolean; }
-
   DECLARE_INSTRUCTION(Not);
 
  private:
@@ -1210,10 +1321,10 @@
 
 class HGraphVisitor : public ValueObject {
  public:
-  explicit HGraphVisitor(HGraph* graph) : graph_(graph) { }
-  virtual ~HGraphVisitor() { }
+  explicit HGraphVisitor(HGraph* graph) : graph_(graph) {}
+  virtual ~HGraphVisitor() {}
 
-  virtual void VisitInstruction(HInstruction* instruction) { }
+  virtual void VisitInstruction(HInstruction* instruction) {}
   virtual void VisitBasicBlock(HBasicBlock* block);
 
   void VisitInsertionOrder();
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index ccacbef..56029aa 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -101,10 +101,6 @@
   }
 
   InstructionSet instruction_set = GetCompilerDriver()->GetInstructionSet();
-  // The optimizing compiler currently does not have a Thumb2 assembler.
-  if (instruction_set == kThumb2) {
-    instruction_set = kArm;
-  }
   CodeGenerator* codegen = CodeGenerator::Create(&arena, graph, instruction_set);
   if (codegen == nullptr) {
     if (shouldCompile) {
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 348e9d4..1f4cb41 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -100,6 +100,9 @@
   interval->AddRange(start, end);
 }
 
+// TODO: make the register allocator understand instructions like HCondition
+// that may not need to be materialized.  It doesn't need to allocate any
+// registers for it.
 void RegisterAllocator::AllocateRegistersInternal() {
   number_of_registers_ = processing_core_registers_
       ? codegen_->GetNumberOfCoreRegisters()
diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc
index ac84d6a..d5225c1 100644
--- a/compiler/trampolines/trampoline_compiler.cc
+++ b/compiler/trampolines/trampoline_compiler.cc
@@ -30,11 +30,7 @@
 namespace arm {
 static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
                                                     ThreadOffset<4> offset) {
-  // NOTE: the assembler used here is ARM, not Thumb.  This is because the address
-  // returned by this function is a pointer and for thumb we would have to set the
-  // bottom bit.  It doesn't matter since the instructions generated are the same
-  // size anyway.
-  std::unique_ptr<ArmAssembler> assembler(static_cast<ArmAssembler*>(Assembler::Create(kArm)));
+  std::unique_ptr<ArmAssembler> assembler(static_cast<ArmAssembler*>(Assembler::Create(kThumb2)));
 
   switch (abi) {
     case kInterpreterAbi:  // Thread* is first argument (R0) in interpreter ABI.
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 703d68e..92a9f53 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -329,7 +329,7 @@
       ++reg;
     }
     CHECK_LT(reg, 16);
-    CHECK(am == DB_W);      // Only writeback is supported.
+    CHECK(am == IA_W);      // Only writeback is supported.
     ldr(static_cast<Register>(reg), Address(base, kRegisterSize, Address::PostIndex), cond);
   } else {
     EmitMultiMemOp(cond, am, true, base, regs);
@@ -352,8 +352,8 @@
       ++reg;
     }
     CHECK_LT(reg, 16);
-    CHECK(am == IA || am == IA_W);
-    Address::Mode strmode = am == IA ? Address::PreIndex : Address::Offset;
+    CHECK(am == DB || am == DB_W);
+    Address::Mode strmode = am == DB_W ? Address::PreIndex : Address::Offset;
     str(static_cast<Register>(reg), Address(base, -kRegisterSize, strmode), cond);
   } else {
     EmitMultiMemOp(cond, am, false, base, regs);
@@ -642,6 +642,7 @@
            if (imm > (1 << 9)) {    // 9 bit immediate.
              return true;
            }
+           return false;      // 16 bit good.
          } else if (opcode == ADD && rd != SP && rn == SP) {   // 10 bit immediate.
            if (imm > (1 << 10)) {
              return true;
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 5f3cd92..38051ea 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -748,6 +748,7 @@
                                bool* explicit_so_checks, bool* explicit_suspend_checks) {
   switch (isa) {
     case kArm:
+    case kThumb2:
       break;  // All checks implemented, leave as is.
 
     default:  // No checks implemented, reset all to explicit checks.
@@ -1039,8 +1040,8 @@
         } else {
           Usage("--implicit-checks passed non-recognized value %s", val.c_str());
         }
-        has_explicit_checks_options = true;
       }
+      has_explicit_checks_options = true;
     } else {
       Usage("Unknown argument %s", option.data());
     }
@@ -1170,6 +1171,7 @@
   CheckExplicitCheckOptions(instruction_set, &explicit_null_checks, &explicit_so_checks,
                             &explicit_suspend_checks);
 
+  LOG(INFO) << "init compiler options for explicit null: " << explicit_null_checks;
   CompilerOptions compiler_options(compiler_filter,
                                    huge_method_threshold,
                                    large_method_threshold,
@@ -1256,7 +1258,17 @@
   // TODO: Not sure whether it's a good idea to allow anything else but the runtime option in
   // this case at all, as we'll have to throw away produced code for a mismatch.
   if (!has_explicit_checks_options) {
-    if (instruction_set == kRuntimeISA) {
+    bool cross_compiling = true;
+    switch (kRuntimeISA) {
+      case kArm:
+      case kThumb2:
+        cross_compiling = instruction_set != kArm && instruction_set != kThumb2;
+        break;
+      default:
+        cross_compiling = instruction_set != kRuntimeISA;
+        break;
+    }
+    if (!cross_compiling) {
       Runtime* runtime = Runtime::Current();
       compiler_options.SetExplicitNullChecks(runtime->ExplicitNullChecks());
       compiler_options.SetExplicitStackOverflowChecks(runtime->ExplicitStackOverflowChecks());
diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc
index 4e4a512..1f565e5 100644
--- a/disassembler/disassembler_arm.cc
+++ b/disassembler/disassembler_arm.cc
@@ -269,18 +269,34 @@
         uint32_t op = (instruction >> 21) & 0xf;
         opcode = kDataProcessingOperations[op];
         bool implicit_s = ((op & ~3) == 8);  // TST, TEQ, CMP, and CMN.
-        if (implicit_s) {
-          // Rd is unused (and not shown), and we don't show the 's' suffix either.
-        } else {
+        bool is_mov = op == 0b1101 || op == 0b1111;
+        if (is_mov) {
+          // Show only Rd and Rm.
           if (s) {
-            suffixes += 's';
-          }
-          args << ArmRegister(instruction, 12) << ", ";
-        }
-        if (i) {
-          args << ArmRegister(instruction, 16) << ", " << ShiftedImmediate(instruction);
+             suffixes += 's';
+           }
+           args << ArmRegister(instruction, 12) << ", ";
+           if (i) {
+              args << ShiftedImmediate(instruction);
+            } else {
+              // TODO: Shifted register.
+              args << ArmRegister(instruction, 16) << ", " << ArmRegister(instruction, 0);
+            }
         } else {
-          args << Rm(instruction);
+          if (implicit_s) {
+            // Rd is unused (and not shown), and we don't show the 's' suffix either.
+          } else {
+            if (s) {
+              suffixes += 's';
+            }
+            args << ArmRegister(instruction, 12) << ", ";
+          }
+          if (i) {
+            args << ArmRegister(instruction, 16) << ", " << ShiftedImmediate(instruction);
+          } else {
+            // TODO: Shifted register.
+            args << ArmRegister(instruction, 16) << ", " << ArmRegister(instruction, 0);
+          }
         }
       }
       break;
@@ -1291,7 +1307,7 @@
                   int32_t imm32 = (imm8 << 24) >> 24;  // sign-extend imm8
                   if (Rn.r == 13 && P == 1 && U == 0 && W == 1 && imm32 == 4) {
                     opcode << "push";
-                    args << Rt;
+                    args << "{" << Rt << "}";
                   } else if (Rn.r == 15 || (P == 0 && W == 0)) {
                     opcode << "UNDEFINED";
                   } else {
@@ -1443,10 +1459,33 @@
             }
             args << "]";
           } else {
-            // LDRT Rt, [Rn, #imm8]            - 111 11 00 00 101 nnnn tttt 1110iiiiiiii
-            uint32_t imm8 = instr & 0xFF;
-            opcode << "ldrt";
-            args << Rt << ", [" << Rn << ", #" << imm8 << "]";
+            bool p = (instr & (1 << 10)) != 0;
+            bool w = (instr & (1 << 8)) != 0;
+            bool u = (instr & (1 << 9)) != 0;
+            if (p && u && !w) {
+              // LDRT Rt, [Rn, #imm8]            - 111 11 00 00 101 nnnn tttt 1110iiiiiiii
+              uint32_t imm8 = instr & 0xFF;
+              opcode << "ldrt";
+              args << Rt << ", [" << Rn << ", #" << imm8 << "]";
+            } else if (Rn.r == 13 && !p && u && w && (instr & 0xff) == 4) {
+              // POP
+              opcode << "pop";
+              args << "{" << Rt << "}";
+           } else {
+              bool wback = !p || w;
+              uint32_t offset = (instr & 0xff);
+              opcode << "ldr.w";
+              args << Rt << ",";
+              if (p && !wback) {
+                args << "[" << Rn << ", #" << offset << "]";
+              } else if (p && wback) {
+                args << "[" << Rn << ", #" << offset << "]!";
+              } else if (!p && wback) {
+                args << "[" << Rn << "], #" << offset;
+              } else {
+                LOG(FATAL) << p << " " << w;
+              }
+            }
           }
           break;
         }
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 55262f2..656c55b 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -529,7 +529,7 @@
 }
 
 size_t RosAlloc::Free(Thread* self, void* ptr) {
-  ReaderMutexLock rmu(self, bulk_free_lock_);
+  WriterMutexLock rmu(self, bulk_free_lock_);
   return FreeInternal(self, ptr);
 }
 
@@ -1642,7 +1642,7 @@
 void RosAlloc::RevokeThreadLocalRuns(Thread* thread) {
   Thread* self = Thread::Current();
   // Avoid race conditions on the bulk free bit maps with BulkFree() (GC).
-  WriterMutexLock wmu(self, bulk_free_lock_);
+  ReaderMutexLock wmu(self, bulk_free_lock_);
   for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; idx++) {
     MutexLock mu(self, *size_bracket_locks_[idx]);
     Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(idx));
@@ -1720,7 +1720,7 @@
   if (kIsDebugBuild) {
     Thread* self = Thread::Current();
     // Avoid race conditions on the bulk free bit maps with BulkFree() (GC).
-    WriterMutexLock wmu(self, bulk_free_lock_);
+    ReaderMutexLock wmu(self, bulk_free_lock_);
     for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; idx++) {
       MutexLock mu(self, *size_bracket_locks_[idx]);
       Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(idx));
@@ -1867,7 +1867,7 @@
   CHECK(Locks::mutator_lock_->IsExclusiveHeld(self))
       << "The mutator locks isn't exclusively locked at RosAlloc::Verify()";
   MutexLock mu(self, *Locks::thread_list_lock_);
-  WriterMutexLock wmu(self, bulk_free_lock_);
+  ReaderMutexLock wmu(self, bulk_free_lock_);
   std::vector<Run*> runs;
   {
     MutexLock mu(self, lock_);
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index a439188..13f61ec 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -45,10 +45,7 @@
     byte magic_num_;  // The magic number used for debugging only.
 
     bool IsFree() const {
-      if (kIsDebugBuild) {
-        return magic_num_ == kMagicNumFree;
-      }
-      return true;
+      return !kIsDebugBuild || magic_num_ == kMagicNumFree;
     }
     size_t ByteSize(RosAlloc* rosalloc) const EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
       const byte* fpr_base = reinterpret_cast<const byte*>(this);
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 6c1c867..8d987df 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -39,6 +39,10 @@
 #endif
 #endif
 
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
 namespace art {
 
 static std::ostream& operator<<(