Merge "Don't call ComputeEagerResolvedStrings"
diff --git a/compiler/dex/verified_method.cc b/compiler/dex/verified_method.cc
index ae814b4..977757f 100644
--- a/compiler/dex/verified_method.cc
+++ b/compiler/dex/verified_method.cc
@@ -40,6 +40,7 @@
 const VerifiedMethod* VerifiedMethod::Create(verifier::MethodVerifier* method_verifier,
                                              bool compile) {
   std::unique_ptr<VerifiedMethod> verified_method(new VerifiedMethod);
+  verified_method->has_verification_failures_ = method_verifier->HasFailures();
   if (compile) {
     /* Generate a register map. */
     if (!verified_method->GenerateGcMap(method_verifier)) {
diff --git a/compiler/dex/verified_method.h b/compiler/dex/verified_method.h
index 954cbf4..437ae52 100644
--- a/compiler/dex/verified_method.h
+++ b/compiler/dex/verified_method.h
@@ -70,6 +70,11 @@
   // by using the check-cast elision peephole optimization in the verifier.
   bool IsSafeCast(uint32_t pc) const;
 
+  // Returns true if there were any errors during verification.
+  bool HasVerificationFailures() const {
+    return has_verification_failures_;
+  }
+
  private:
   VerifiedMethod() = default;
 
@@ -107,6 +112,8 @@
   // dex PC to dex method index or dex field index based on the instruction.
   DequickenMap dequicken_map_;
   SafeCastSet safe_cast_set_;
+
+  bool has_verification_failures_;
 };
 
 }  // namespace art
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index ef47377..641d174 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -2344,6 +2344,31 @@
   return it->second;
 }
 
+bool CompilerDriver::IsMethodVerifiedWithoutFailures(uint32_t method_idx,
+                                                     uint16_t class_def_idx,
+                                                     const DexFile& dex_file) const {
+  const VerifiedMethod* verified_method = GetVerifiedMethod(&dex_file, method_idx);
+  if (verified_method != nullptr) {
+    return !verified_method->HasVerificationFailures();
+  }
+
+  // If we can't find verification metadata, check if this is a system class (we trust that system
+  // classes have their methods verified). If it's not, be conservative and assume the method
+  // has not been verified successfully.
+
+  // TODO: When compiling the boot image it should be safe to assume that everything is verified,
+  // even if methods are not found in the verification cache.
+  const char* descriptor = dex_file.GetClassDescriptor(dex_file.GetClassDef(class_def_idx));
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+  bool is_system_class = class_linker->FindSystemClass(self, descriptor) != nullptr;
+  if (!is_system_class) {
+    self->ClearException();
+  }
+  return is_system_class;
+}
+
 size_t CompilerDriver::GetNonRelativeLinkerPatchCount() const {
   MutexLock mu(Thread::Current(), compiled_methods_lock_);
   return non_relative_linker_patch_count_;
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index f1066a5..1a4ae13 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -425,6 +425,12 @@
   void RecordClassStatus(ClassReference ref, mirror::Class::Status status)
       LOCKS_EXCLUDED(compiled_classes_lock_);
 
+  // Checks if the specified method has been verified without failures. Returns
+  // false if the method is not in the verification results (GetVerificationResults).
+  bool IsMethodVerifiedWithoutFailures(uint32_t method_idx,
+                                       uint16_t class_def_idx,
+                                       const DexFile& dex_file) const;
+
   SwapVector<uint8_t>* DeduplicateCode(const ArrayRef<const uint8_t>& code);
   SwapSrcMap* DeduplicateSrcMappingTable(const ArrayRef<SrcMapElem>& src_map);
   SwapVector<uint8_t>* DeduplicateMappingTable(const ArrayRef<const uint8_t>& code);
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 507b3cd..2ea9203 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -2673,7 +2673,6 @@
 }
 
 void InstructionCodeGeneratorARM::VisitBooleanNot(HBooleanNot* bool_not) {
-  DCHECK_EQ(bool_not->InputAt(0)->GetType(), Primitive::kPrimBoolean);
   LocationSummary* locations = bool_not->GetLocations();
   Location out = locations->Out();
   Location in = locations->InAt(0);
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index f6ec729..efc41e7 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -2286,7 +2286,6 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitBooleanNot(HBooleanNot* instruction) {
-  DCHECK_EQ(instruction->InputAt(0)->GetType(), Primitive::kPrimBoolean);
   __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), vixl::Operand(1));
 }
 
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 6c75f34..879216d 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -2931,7 +2931,6 @@
 }
 
 void InstructionCodeGeneratorX86::VisitBooleanNot(HBooleanNot* bool_not) {
-  DCHECK_EQ(bool_not->InputAt(0)->GetType(), Primitive::kPrimBoolean);
   LocationSummary* locations = bool_not->GetLocations();
   Location in = locations->InAt(0);
   Location out = locations->Out();
@@ -3865,43 +3864,23 @@
 }
 
 void ParallelMoveResolverX86::MoveMemoryToMemory32(int dst, int src) {
-  ScratchRegisterScope possible_scratch(
-      this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
-  int temp = possible_scratch.GetRegister();
-  if (temp == kNoRegister) {
-    // Use the stack.
-    __ pushl(Address(ESP, src));
-    __ popl(Address(ESP, dst));
-  } else {
-    Register temp_reg = static_cast<Register>(temp);
-    __ movl(temp_reg, Address(ESP, src));
-    __ movl(Address(ESP, dst), temp_reg);
-  }
+  ScratchRegisterScope ensure_scratch(
+      this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
+  Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
+  int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
+  __ movl(temp_reg, Address(ESP, src + stack_offset));
+  __ movl(Address(ESP, dst + stack_offset), temp_reg);
 }
 
 void ParallelMoveResolverX86::MoveMemoryToMemory64(int dst, int src) {
-  ScratchRegisterScope possible_scratch(
-      this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
-  int temp = possible_scratch.GetRegister();
-  if (temp == kNoRegister) {
-    // Use the stack instead.
-    // Push src low word.
-    __ pushl(Address(ESP, src));
-    // Push src high word. Stack offset = 4.
-    __ pushl(Address(ESP, src + 4 /* offset */ + kX86WordSize /* high */));
-
-    // Pop into dst high word. Stack offset = 8.
-    // Pop with ESP address uses the 'after increment' value of ESP.
-    __ popl(Address(ESP, dst + 4 /* offset */ + kX86WordSize /* high */));
-    // Finally dst low word. Stack offset = 4.
-    __ popl(Address(ESP, dst));
-  } else {
-    Register temp_reg = static_cast<Register>(temp);
-    __ movl(temp_reg, Address(ESP, src));
-    __ movl(Address(ESP, dst), temp_reg);
-    __ movl(temp_reg, Address(ESP, src + kX86WordSize));
-    __ movl(Address(ESP, dst + kX86WordSize), temp_reg);
-  }
+  ScratchRegisterScope ensure_scratch(
+      this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
+  Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
+  int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
+  __ movl(temp_reg, Address(ESP, src + stack_offset));
+  __ movl(Address(ESP, dst + stack_offset), temp_reg);
+  __ movl(temp_reg, Address(ESP, src + stack_offset + kX86WordSize));
+  __ movl(Address(ESP, dst + stack_offset + kX86WordSize), temp_reg);
 }
 
 void ParallelMoveResolverX86::EmitMove(size_t index) {
@@ -3966,18 +3945,10 @@
           __ xorps(dest, dest);
         } else {
           ScratchRegisterScope ensure_scratch(
-              this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
-          int temp_reg = ensure_scratch.GetRegister();
-          if (temp_reg == kNoRegister) {
-            // Avoid spilling/restoring a scratch register by using the stack.
-            __ pushl(Immediate(value));
-            __ movss(dest, Address(ESP, 0));
-            __ addl(ESP, Immediate(4));
-          } else {
-            Register temp = static_cast<Register>(temp_reg);
-            __ movl(temp, Immediate(value));
-            __ movd(dest, temp);
-          }
+              this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
+          Register temp = static_cast<Register>(ensure_scratch.GetRegister());
+          __ movl(temp, Immediate(value));
+          __ movd(dest, temp);
         }
       } else {
         DCHECK(destination.IsStackSlot()) << destination;
@@ -4026,96 +3997,42 @@
   }
 }
 
-void ParallelMoveResolverX86::Exchange(Register reg1, Register reg2) {
-  // Prefer to avoid xchg as it isn't speedy on smaller processors.
-  ScratchRegisterScope possible_scratch(
-      this, reg1, codegen_->GetNumberOfCoreRegisters());
-  int temp_reg = possible_scratch.GetRegister();
-  if (temp_reg == kNoRegister || temp_reg == reg2) {
-    __ pushl(reg1);
-    __ movl(reg1, reg2);
-    __ popl(reg2);
-  } else {
-    Register temp = static_cast<Register>(temp_reg);
-    __ movl(temp, reg1);
-    __ movl(reg1, reg2);
-    __ movl(reg2, temp);
-  }
-}
-
 void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
-  ScratchRegisterScope possible_scratch(
-      this, reg, codegen_->GetNumberOfCoreRegisters());
-  int temp_reg = possible_scratch.GetRegister();
-  if (temp_reg == kNoRegister) {
-    __ pushl(Address(ESP, mem));
-    __ movl(Address(ESP, mem + kX86WordSize), reg);
-    __ popl(reg);
-  } else {
-    Register temp = static_cast<Register>(temp_reg);
-    __ movl(temp, Address(ESP, mem));
-    __ movl(Address(ESP, mem), reg);
-    __ movl(reg, temp);
-  }
+  Register suggested_scratch = reg == EAX ? EBX : EAX;
+  ScratchRegisterScope ensure_scratch(
+      this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters());
+
+  int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
+  __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset));
+  __ movl(Address(ESP, mem + stack_offset), reg);
+  __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister()));
 }
 
 void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) {
-  ScratchRegisterScope possible_scratch(
-      this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
-  int temp_reg = possible_scratch.GetRegister();
-  if (temp_reg == kNoRegister) {
-    __ pushl(Address(ESP, mem));
-    __ movss(Address(ESP, mem + kX86WordSize), reg);
-    __ movss(reg, Address(ESP, 0));
-    __ addl(ESP, Immediate(kX86WordSize));
-  } else {
-    Register temp = static_cast<Register>(temp_reg);
-    __ movl(temp, Address(ESP, mem));
-    __ movss(Address(ESP, mem), reg);
-    __ movd(reg, temp);
-  }
+  ScratchRegisterScope ensure_scratch(
+      this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
+
+  Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
+  int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
+  __ movl(temp_reg, Address(ESP, mem + stack_offset));
+  __ movss(Address(ESP, mem + stack_offset), reg);
+  __ movd(reg, temp_reg);
 }
 
 void ParallelMoveResolverX86::Exchange(int mem1, int mem2) {
-  ScratchRegisterScope possible_scratch1(
-      this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
-  int temp_reg1 = possible_scratch1.GetRegister();
-  if (temp_reg1 == kNoRegister) {
-    // No free registers.  Use the stack.
-    __ pushl(Address(ESP, mem1));
-    __ pushl(Address(ESP, mem2 + kX86WordSize));
-    // Pop with ESP address uses the 'after increment' value of ESP.
-    __ popl(Address(ESP, mem1 + kX86WordSize));
-    __ popl(Address(ESP, mem2));
-  } else {
-    // Got the first one.  Try for a second.
-    ScratchRegisterScope possible_scratch2(
-        this, temp_reg1, codegen_->GetNumberOfCoreRegisters());
-    int temp_reg2 = possible_scratch2.GetRegister();
-    if (temp_reg2 == kNoRegister) {
-      Register temp = static_cast<Register>(temp_reg1);
-      // Bummer.  Only have one free register to use.
-      // Save mem1 on the stack.
-      __ pushl(Address(ESP, mem1));
+  ScratchRegisterScope ensure_scratch1(
+      this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
 
-      // Copy mem2 into mem1.
-      __ movl(temp, Address(ESP, mem2 + kX86WordSize));
-      __ movl(Address(ESP, mem1 + kX86WordSize), temp);
+  Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX;
+  ScratchRegisterScope ensure_scratch2(
+      this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters());
 
-      // Now pop mem1 into mem2.
-      // Pop with ESP address uses the 'after increment' value of ESP.
-      __ popl(Address(ESP, mem2));
-    } else {
-      // Great.  We have 2 registers to play with.
-      Register temp1 = static_cast<Register>(temp_reg1);
-      Register temp2 = static_cast<Register>(temp_reg2);
-      DCHECK_NE(temp1, temp2);
-      __ movl(temp1, Address(ESP, mem1));
-      __ movl(temp2, Address(ESP, mem2));
-      __ movl(Address(ESP, mem2), temp1);
-      __ movl(Address(ESP, mem1), temp2);
-    }
-  }
+  int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0;
+  stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0;
+  __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset));
+  __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset));
+  __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister()));
+  __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister()));
 }
 
 void ParallelMoveResolverX86::EmitSwap(size_t index) {
@@ -4124,7 +4041,7 @@
   Location destination = move->GetDestination();
 
   if (source.IsRegister() && destination.IsRegister()) {
-    Exchange(destination.AsRegister<Register>(), source.AsRegister<Register>());
+    __ xchgl(destination.AsRegister<Register>(), source.AsRegister<Register>());
   } else if (source.IsRegister() && destination.IsStackSlot()) {
     Exchange(source.AsRegister<Register>(), destination.GetStackIndex());
   } else if (source.IsStackSlot() && destination.IsRegister()) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 00a4323..368ae0f 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -106,7 +106,6 @@
   X86Assembler* GetAssembler() const;
 
  private:
-  void Exchange(Register reg1, Register Reg2);
   void Exchange(Register reg, int mem);
   void Exchange(int mem1, int mem2);
   void Exchange32(XmmRegister reg, int mem);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index aa4d7c6..a3d3490 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -2986,7 +2986,6 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
-  DCHECK_EQ(bool_not->InputAt(0)->GetType(), Primitive::kPrimBoolean);
   LocationSummary* locations = bool_not->GetLocations();
   DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
             locations->Out().AsRegister<CpuRegister>().AsRegister());
@@ -3837,27 +3836,15 @@
 
 void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) {
   ScratchRegisterScope ensure_scratch(
-      this, TMP, codegen_->GetNumberOfCoreRegisters());
+      this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
 
-  int temp_reg = ensure_scratch.GetRegister();
-  if (temp_reg == kNoRegister) {
-    // Use the stack as a temporary.
-    // Save mem1 on the stack.
-    __ pushq(Address(CpuRegister(RSP), mem1));
-
-    // Copy mem2 into mem1.
-    __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem2 + kX86_64WordSize));
-    __ movq(Address(CpuRegister(RSP), mem1 + kX86_64WordSize), CpuRegister(TMP));
-
-    // Now pop mem1 into mem2.
-    __ popq(Address(CpuRegister(RSP), mem2));
-  } else {
-    CpuRegister temp = CpuRegister(temp_reg);
-    __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1));
-    __ movq(temp, Address(CpuRegister(RSP), mem2));
-    __ movq(Address(CpuRegister(RSP), mem2), CpuRegister(TMP));
-    __ movq(Address(CpuRegister(RSP), mem1), temp);
-  }
+  int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
+  __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
+  __ movq(CpuRegister(ensure_scratch.GetRegister()),
+          Address(CpuRegister(RSP), mem2 + stack_offset));
+  __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
+  __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
+          CpuRegister(ensure_scratch.GetRegister()));
 }
 
 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
@@ -3866,13 +3853,6 @@
   __ movd(reg, CpuRegister(TMP));
 }
 
-void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
-  // Prefer to avoid xchg as it isn't speedy on smaller processors.
-  __ movq(CpuRegister(TMP), reg1);
-  __ movq(reg1, reg2);
-  __ movq(reg2, CpuRegister(TMP));
-}
-
 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
   __ movsd(Address(CpuRegister(RSP), mem), reg);
@@ -3885,7 +3865,7 @@
   Location destination = move->GetDestination();
 
   if (source.IsRegister() && destination.IsRegister()) {
-    Exchange64(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
+    __ xchgq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
   } else if (source.IsRegister() && destination.IsStackSlot()) {
     Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
   } else if (source.IsStackSlot() && destination.IsRegister()) {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 61bf6ac..b4876ef 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -118,7 +118,6 @@
   void Exchange32(CpuRegister reg, int mem);
   void Exchange32(XmmRegister reg, int mem);
   void Exchange32(int mem1, int mem2);
-  void Exchange64(CpuRegister reg1, CpuRegister reg2);
   void Exchange64(CpuRegister reg, int mem);
   void Exchange64(XmmRegister reg, int mem);
   void Exchange64(int mem1, int mem2);
diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc
index 7623e42..61a7697 100644
--- a/compiler/optimizing/dominator_test.cc
+++ b/compiler/optimizing/dominator_test.cc
@@ -36,7 +36,13 @@
   ASSERT_EQ(graph->GetBlocks().Size(), blocks_length);
   for (size_t i = 0, e = blocks_length; i < e; ++i) {
     if (blocks[i] == -1) {
-      ASSERT_EQ(nullptr, graph->GetBlocks().Get(i)->GetDominator());
+      if (graph->GetBlocks().Get(i) == nullptr) {
+        // Dead block.
+      } else {
+        // Only the entry block has no dominator.
+        ASSERT_EQ(nullptr, graph->GetBlocks().Get(i)->GetDominator());
+        ASSERT_TRUE(graph->GetBlocks().Get(i)->IsEntryBlock());
+      }
     } else {
       ASSERT_NE(nullptr, graph->GetBlocks().Get(i)->GetDominator());
       ASSERT_EQ(blocks[i], graph->GetBlocks().Get(i)->GetDominator()->GetBlockId());
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 7c3c2bf..3a56c6c 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -18,6 +18,7 @@
 
 #include <map>
 #include <string>
+#include <sstream>
 
 #include "base/bit_vector-inl.h"
 #include "base/stringprintf.h"
@@ -194,6 +195,17 @@
     }
   }
 
+  // Check Phi uniqueness (no two Phis with the same type refer to the same register).
+  for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+    HPhi* phi = it.Current()->AsPhi();
+    if (phi->GetNextEquivalentPhiWithSameType() != nullptr) {
+      std::stringstream type_str;
+      type_str << phi->GetType();
+      AddError(StringPrintf("Equivalent phi (%d) found for VReg %d with type: %s",
+          phi->GetId(), phi->GetRegNumber(), type_str.str().c_str()));
+    }
+  }
+
   if (block->IsLoopHeader()) {
     CheckLoop(block);
   }
@@ -369,26 +381,40 @@
   }
 }
 
-void SSAChecker::VisitIf(HIf* instruction) {
-  VisitInstruction(instruction);
-  HInstruction* input = instruction->InputAt(0);
+void SSAChecker::HandleBooleanInput(HInstruction* instruction, size_t input_index) {
+  HInstruction* input = instruction->InputAt(input_index);
   if (input->IsIntConstant()) {
-    int value = input->AsIntConstant()->GetValue();
+    int32_t value = input->AsIntConstant()->GetValue();
     if (value != 0 && value != 1) {
       AddError(StringPrintf(
-          "If instruction %d has a non-Boolean constant input "
-          "whose value is: %d.",
+          "%s instruction %d has a non-Boolean constant input %d whose value is: %d.",
+          instruction->DebugName(),
           instruction->GetId(),
+          static_cast<int>(input_index),
           value));
     }
-  } else if (instruction->InputAt(0)->GetType() != Primitive::kPrimBoolean) {
+  } else if (input->GetType() == Primitive::kPrimInt && input->IsPhi()) {
+    // TODO: We need a data-flow analysis which determines if the Phi is boolean.
+  } else if (input->GetType() != Primitive::kPrimBoolean) {
     AddError(StringPrintf(
-        "If instruction %d has a non-Boolean input type: %s.",
+        "%s instruction %d has a non-Boolean input %d whose type is: %s.",
+        instruction->DebugName(),
         instruction->GetId(),
-        Primitive::PrettyDescriptor(instruction->InputAt(0)->GetType())));
+        static_cast<int>(input_index),
+        Primitive::PrettyDescriptor(input->GetType())));
   }
 }
 
+void SSAChecker::VisitIf(HIf* instruction) {
+  VisitInstruction(instruction);
+  HandleBooleanInput(instruction, 0);
+}
+
+void SSAChecker::VisitBooleanNot(HBooleanNot* instruction) {
+  VisitInstruction(instruction);
+  HandleBooleanInput(instruction, 0);
+}
+
 void SSAChecker::VisitCondition(HCondition* op) {
   VisitInstruction(op);
   if (op->GetType() != Primitive::kPrimBoolean) {
@@ -399,37 +425,23 @@
   }
   HInstruction* lhs = op->InputAt(0);
   HInstruction* rhs = op->InputAt(1);
-  if (lhs->GetType() == Primitive::kPrimNot) {
-    if (!op->IsEqual() && !op->IsNotEqual()) {
+  if (PrimitiveKind(lhs->GetType()) != PrimitiveKind(rhs->GetType())) {
+    AddError(StringPrintf(
+        "Condition %s %d has inputs of different types: %s, and %s.",
+        op->DebugName(), op->GetId(),
+        Primitive::PrettyDescriptor(lhs->GetType()),
+        Primitive::PrettyDescriptor(rhs->GetType())));
+  }
+  if (!op->IsEqual() && !op->IsNotEqual()) {
+    if ((lhs->GetType() == Primitive::kPrimNot)) {
       AddError(StringPrintf(
           "Condition %s %d uses an object as left-hand side input.",
           op->DebugName(), op->GetId()));
-    }
-    if (rhs->IsIntConstant() && rhs->AsIntConstant()->GetValue() != 0) {
-      AddError(StringPrintf(
-          "Condition %s %d compares an object with a non-zero integer: %d.",
-          op->DebugName(), op->GetId(),
-          rhs->AsIntConstant()->GetValue()));
-    }
-  } else if (rhs->GetType() == Primitive::kPrimNot) {
-    if (!op->IsEqual() && !op->IsNotEqual()) {
+    } else if (rhs->GetType() == Primitive::kPrimNot) {
       AddError(StringPrintf(
           "Condition %s %d uses an object as right-hand side input.",
           op->DebugName(), op->GetId()));
     }
-    if (lhs->IsIntConstant() && lhs->AsIntConstant()->GetValue() != 0) {
-      AddError(StringPrintf(
-          "Condition %s %d compares a non-zero integer with an object: %d.",
-          op->DebugName(), op->GetId(),
-          lhs->AsIntConstant()->GetValue()));
-    }
-  } else if (PrimitiveKind(lhs->GetType()) != PrimitiveKind(rhs->GetType())) {
-      AddError(StringPrintf(
-          "Condition %s %d has inputs of different types: "
-          "%s, and %s.",
-          op->DebugName(), op->GetId(),
-          Primitive::PrettyDescriptor(lhs->GetType()),
-          Primitive::PrettyDescriptor(rhs->GetType())));
   }
 }
 
diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h
index 89fea0a..24fee37 100644
--- a/compiler/optimizing/graph_checker.h
+++ b/compiler/optimizing/graph_checker.h
@@ -85,6 +85,7 @@
  public:
   typedef GraphChecker super_type;
 
+  // TODO: There's no need to pass a separate allocator as we could get it from the graph.
   SSAChecker(ArenaAllocator* allocator, HGraph* graph)
     : GraphChecker(allocator, graph, "art::SSAChecker: ") {}
 
@@ -107,8 +108,11 @@
   void VisitBinaryOperation(HBinaryOperation* op) OVERRIDE;
   void VisitCondition(HCondition* op) OVERRIDE;
   void VisitIf(HIf* instruction) OVERRIDE;
+  void VisitBooleanNot(HBooleanNot* instruction) OVERRIDE;
   void VisitConstant(HConstant* instruction) OVERRIDE;
 
+  void HandleBooleanInput(HInstruction* instruction, size_t input_index);
+
  private:
   DISALLOW_COPY_AND_ASSIGN(SSAChecker);
 };
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 2c17a67..6d2a8d7 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -31,6 +31,8 @@
 #include "ssa_phi_elimination.h"
 #include "scoped_thread_state_change.h"
 #include "thread.h"
+#include "dex/verified_method.h"
+#include "dex/verification_results.h"
 
 namespace art {
 
@@ -114,9 +116,11 @@
     return false;
   }
 
-  if (!resolved_method->GetDeclaringClass()->IsVerified()) {
+  uint16_t class_def_idx = resolved_method->GetDeclaringClass()->GetDexClassDefIndex();
+  if (!compiler_driver_->IsMethodVerifiedWithoutFailures(
+        resolved_method->GetDexMethodIndex(), class_def_idx, *resolved_method->GetDexFile())) {
     VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
-                   << " is not inlined because its class could not be verified";
+                   << " couldn't be verified, so it cannot be inlined";
     return false;
   }
 
@@ -258,10 +262,6 @@
     graph_->SetHasArrayAccesses(true);
   }
 
-  // Now that we have inlined the callee, we need to update the next
-  // instruction id of the caller, so that new instructions added
-  // after optimizations get a unique id.
-  graph_->SetCurrentInstructionId(callee_graph->GetNextInstructionId());
   return true;
 }
 
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index d8a8554..5fca4fa 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -51,9 +51,7 @@
   for (size_t i = 0; i < blocks_.Size(); ++i) {
     if (!visited.IsBitSet(i)) {
       HBasicBlock* block = blocks_.Get(i);
-      for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
-        RemoveAsUser(it.Current());
-      }
+      DCHECK(block->GetPhis().IsEmpty()) << "Phis are not inserted at this stage";
       for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
         RemoveAsUser(it.Current());
       }
@@ -61,19 +59,17 @@
   }
 }
 
-void HGraph::RemoveDeadBlocks(const ArenaBitVector& visited) const {
+void HGraph::RemoveDeadBlocks(const ArenaBitVector& visited) {
   for (size_t i = 0; i < blocks_.Size(); ++i) {
     if (!visited.IsBitSet(i)) {
       HBasicBlock* block = blocks_.Get(i);
+      // We only need to update the successor, which might be live.
       for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) {
         block->GetSuccessors().Get(j)->RemovePredecessor(block);
       }
-      for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
-        block->RemovePhi(it.Current()->AsPhi(), /*ensure_safety=*/ false);
-      }
-      for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
-        block->RemoveInstruction(it.Current(), /*ensure_safety=*/ false);
-      }
+      // Remove the block from the list of blocks, so that further analyses
+      // never see it.
+      blocks_.Put(i, nullptr);
     }
   }
 }
@@ -258,6 +254,7 @@
   // (2): Simplify loops by having only one back edge, and one preheader.
   for (size_t i = 0; i < blocks_.Size(); ++i) {
     HBasicBlock* block = blocks_.Get(i);
+    if (block == nullptr) continue;
     if (block->GetSuccessors().Size() > 1) {
       for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) {
         HBasicBlock* successor = block->GetSuccessors().Get(j);
@@ -274,8 +271,9 @@
 }
 
 bool HGraph::AnalyzeNaturalLoops() const {
-  for (size_t i = 0; i < blocks_.Size(); ++i) {
-    HBasicBlock* block = blocks_.Get(i);
+  // Order does not matter.
+  for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
     if (block->IsLoopHeader()) {
       HLoopInformation* info = block->GetLoopInformation();
       if (!info->Populate()) {
@@ -964,23 +962,6 @@
 }
 
 void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
-  // Walk over the entry block and:
-  // - Move constants from the entry block to the outer_graph's entry block,
-  // - Replace HParameterValue instructions with their real value.
-  // - Remove suspend checks, that hold an environment.
-  int parameter_index = 0;
-  for (HInstructionIterator it(entry_block_->GetInstructions()); !it.Done(); it.Advance()) {
-    HInstruction* current = it.Current();
-    if (current->IsConstant()) {
-      current->MoveBefore(outer_graph->GetEntryBlock()->GetLastInstruction());
-    } else if (current->IsParameterValue()) {
-      current->ReplaceWith(invoke->InputAt(parameter_index++));
-    } else {
-      DCHECK(current->IsGoto() || current->IsSuspendCheck());
-      entry_block_->RemoveInstruction(current);
-    }
-  }
-
   if (GetBlocks().Size() == 3) {
     // Simple case of an entry block, a body block, and an exit block.
     // Put the body block's instruction into `invoke`'s block.
@@ -1106,6 +1087,36 @@
     }
   }
 
+  // Update the next instruction id of the outer graph, so that instructions
+  // added later get bigger ids than those in the inner graph.
+  outer_graph->SetCurrentInstructionId(GetNextInstructionId());
+
+  // Walk over the entry block and:
+  // - Move constants from the entry block to the outer_graph's entry block,
+  // - Replace HParameterValue instructions with their real value.
+  // - Remove suspend checks, that hold an environment.
+  // We must do this after the other blocks have been inlined, otherwise ids of
+  // constants could overlap with the inner graph.
+  int parameter_index = 0;
+  for (HInstructionIterator it(entry_block_->GetInstructions()); !it.Done(); it.Advance()) {
+    HInstruction* current = it.Current();
+    if (current->IsNullConstant()) {
+      current->ReplaceWith(outer_graph->GetNullConstant());
+    } else if (current->IsIntConstant()) {
+      current->ReplaceWith(outer_graph->GetIntConstant(current->AsIntConstant()->GetValue()));
+    } else if (current->IsLongConstant()) {
+      current->ReplaceWith(outer_graph->GetLongConstant(current->AsLongConstant()->GetValue()));
+    } else if (current->IsFloatConstant() || current->IsDoubleConstant()) {
+      // TODO: Don't duplicate floating-point constants.
+      current->MoveBefore(outer_graph->GetEntryBlock()->GetLastInstruction());
+    } else if (current->IsParameterValue()) {
+      current->ReplaceWith(invoke->InputAt(parameter_index++));
+    } else {
+      DCHECK(current->IsGoto() || current->IsSuspendCheck());
+      entry_block_->RemoveInstruction(current);
+    }
+  }
+
   // Finally remove the invoke from the caller.
   invoke->GetBlock()->RemoveInstruction(invoke);
 }
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 6fb34da..649038b 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -253,7 +253,7 @@
                               ArenaBitVector* visited,
                               ArenaBitVector* visiting);
   void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const;
-  void RemoveDeadBlocks(const ArenaBitVector& visited) const;
+  void RemoveDeadBlocks(const ArenaBitVector& visited);
 
   template <class InstType, typename ValueType>
   InstType* CreateConstant(ValueType value, ArenaSafeMap<ValueType, InstType*>* cache);
@@ -2752,6 +2752,20 @@
   bool IsDead() const { return !is_live_; }
   bool IsLive() const { return is_live_; }
 
+  // Returns the next equivalent phi (starting from the current one) or null if there is none.
+  // An equivalent phi is a phi having the same dex register and type.
+  // It assumes that phis with the same dex register are adjacent.
+  HPhi* GetNextEquivalentPhiWithSameType() {
+    HInstruction* next = GetNext();
+    while (next != nullptr && next->AsPhi()->GetRegNumber() == reg_number_) {
+      if (next->GetType() == GetType()) {
+        return next->AsPhi();
+      }
+      next = next->GetNext();
+    }
+    return nullptr;
+  }
+
   DECLARE_INSTRUCTION(Phi);
 
  protected:
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index efca1a5..a17d6e1 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -31,6 +31,8 @@
 #include "constant_folding.h"
 #include "dead_code_elimination.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
+#include "dex/verified_method.h"
+#include "dex/verification_results.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
 #include "driver/dex_compilation_unit.h"
@@ -45,13 +47,13 @@
 #include "mirror/art_method-inl.h"
 #include "nodes.h"
 #include "prepare_for_register_allocation.h"
+#include "reference_type_propagation.h"
 #include "register_allocator.h"
 #include "side_effects_analysis.h"
 #include "ssa_builder.h"
 #include "ssa_phi_elimination.h"
 #include "ssa_liveness_analysis.h"
 #include "utils/assembler.h"
-#include "reference_type_propagation.h"
 
 namespace art {
 
@@ -592,15 +594,26 @@
                                             InvokeType invoke_type,
                                             uint16_t class_def_idx,
                                             uint32_t method_idx,
-                                            jobject class_loader,
+                                            jobject jclass_loader,
                                             const DexFile& dex_file) const {
-  CompiledMethod* method = TryCompile(code_item, access_flags, invoke_type, class_def_idx,
-                                      method_idx, class_loader, dex_file);
+  CompilerDriver* compiler_driver = GetCompilerDriver();
+  CompiledMethod* method = nullptr;
+  if (compiler_driver->IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file)) {
+     method = TryCompile(code_item, access_flags, invoke_type, class_def_idx,
+                         method_idx, jclass_loader, dex_file);
+  } else {
+    if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) {
+      compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime);
+    } else {
+      compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledClassNotVerified);
+    }
+  }
+
   if (method != nullptr) {
     return method;
   }
   method = delegate_->Compile(code_item, access_flags, invoke_type, class_def_idx, method_idx,
-                              class_loader, dex_file);
+                              jclass_loader, dex_file);
 
   if (method != nullptr) {
     compilation_stats_.RecordStat(MethodCompilationStat::kCompiledQuick);
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 4d5b8d0..d4a936d 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -45,6 +45,8 @@
   kNotCompiledCantAccesType,
   kNotOptimizedRegisterAllocator,
   kNotCompiledUnhandledInstruction,
+  kNotCompiledVerifyAtRuntime,
+  kNotCompiledClassNotVerified,
   kRemovedCheckedCast,
   kRemovedNullCheck,
   kInstructionSimplifications,
@@ -109,6 +111,8 @@
       case kNotCompiledSpaceFilter : return "kNotCompiledSpaceFilter";
       case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator";
       case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction";
+      case kNotCompiledVerifyAtRuntime : return "kNotCompiledVerifyAtRuntime";
+      case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified";
       case kRemovedCheckedCast: return "kRemovedCheckedCast";
       case kRemovedNullCheck: return "kRemovedNullCheck";
       case kInstructionSimplifications: return "kInstructionSimplifications";
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index 0c7f0da..ad92ca5 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -269,20 +269,6 @@
 }
 
 
-int ParallelMoveResolver::AllocateScratchRegister(int blocked,
-                                                  int register_count) {
-  int scratch = -1;
-  for (int reg = 0; reg < register_count; ++reg) {
-    if ((blocked != reg) && IsScratchLocation(Location::RegisterLocation(reg))) {
-      scratch = reg;
-      break;
-    }
-  }
-
-  return scratch;
-}
-
-
 ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
     ParallelMoveResolver* resolver, int blocked, int if_scratch, int number_of_registers)
     : resolver_(resolver),
@@ -296,16 +282,6 @@
 }
 
 
-ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
-    ParallelMoveResolver* resolver, int blocked, int number_of_registers)
-    : resolver_(resolver),
-      reg_(kNoRegister),
-      spilled_(false) {
-  // We don't want to spill a register if none are free.
-  reg_ = resolver_->AllocateScratchRegister(blocked, number_of_registers);
-}
-
-
 ParallelMoveResolver::ScratchRegisterScope::~ScratchRegisterScope() {
   if (spilled_) {
     resolver_->RestoreScratch(reg_);
diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h
index 36ce575..95f8ad5 100644
--- a/compiler/optimizing/parallel_move_resolver.h
+++ b/compiler/optimizing/parallel_move_resolver.h
@@ -42,15 +42,10 @@
  protected:
   class ScratchRegisterScope : public ValueObject {
    public:
-    // Spill a scratch register if no regs are free.
     ScratchRegisterScope(ParallelMoveResolver* resolver,
                          int blocked,
                          int if_scratch,
                          int number_of_registers);
-    // Grab a scratch register only if available.
-    ScratchRegisterScope(ParallelMoveResolver* resolver,
-                         int blocked,
-                         int number_of_registers);
     ~ScratchRegisterScope();
 
     int GetRegister() const { return reg_; }
@@ -67,8 +62,6 @@
   // Allocate a scratch register for performing a move. The method will try to use
   // a register that is the destination of a move, but that move has not been emitted yet.
   int AllocateScratchRegister(int blocked, int if_scratch, int register_count, bool* spilled);
-  // As above, but return -1 if no free register.
-  int AllocateScratchRegister(int blocked, int register_count);
 
   // Emit a move.
   virtual void EmitMove(size_t index) = 0;
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 2fbd051..a02b1da 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -1422,7 +1422,6 @@
                         : Location::StackSlot(interval->GetParent()->GetSpillSlot()));
   }
   UsePosition* use = current->GetFirstUse();
-  SafepointPosition* safepoint_position = interval->GetFirstSafepoint();
 
   // Walk over all siblings, updating locations of use positions, and
   // connecting them when they are adjacent.
@@ -1473,11 +1472,10 @@
       InsertParallelMoveAt(current->GetEnd(), interval->GetDefinedBy(), source, destination);
     }
 
-    for (; safepoint_position != nullptr; safepoint_position = safepoint_position->GetNext()) {
-      if (!current->Covers(safepoint_position->GetPosition())) {
-        DCHECK(next_sibling != nullptr);
-        break;
-      }
+    for (SafepointPosition* safepoint_position = current->GetFirstSafepoint();
+         safepoint_position != nullptr;
+         safepoint_position = safepoint_position->GetNext()) {
+      DCHECK(current->Covers(safepoint_position->GetPosition()));
 
       LocationSummary* locations = safepoint_position->GetLocations();
       if ((current->GetType() == Primitive::kPrimNot) && current->GetParent()->HasSpillSlot()) {
@@ -1523,7 +1521,6 @@
   } while (current != nullptr);
 
   if (kIsDebugBuild) {
-    DCHECK(safepoint_position == nullptr);
     // Following uses can only be environment uses. The location for
     // these environments will be none.
     while (use != nullptr) {
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index e154ea4..5c3d9bf 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -174,6 +174,54 @@
       && instruction->AsPhi()->GetRegNumber() == phi->GetRegNumber();
 }
 
+void SsaBuilder::FixNullConstantType() {
+  // The order doesn't matter here.
+  for (HReversePostOrderIterator itb(*GetGraph()); !itb.Done(); itb.Advance()) {
+    for (HInstructionIterator it(itb.Current()->GetInstructions()); !it.Done(); it.Advance()) {
+      HInstruction* equality_instr = it.Current();
+      if (!equality_instr->IsEqual() && !equality_instr->IsNotEqual()) {
+        continue;
+      }
+      HInstruction* left = equality_instr->InputAt(0);
+      HInstruction* right = equality_instr->InputAt(1);
+      HInstruction* null_instr = nullptr;
+
+      if ((left->GetType() == Primitive::kPrimNot)
+          && (right->IsNullConstant() || right->IsIntConstant())) {
+        null_instr = right;
+      } else if ((right->GetType() == Primitive::kPrimNot)
+              && (left->IsNullConstant() || left->IsIntConstant())) {
+        null_instr = left;
+      } else {
+        continue;
+      }
+
+      // If we got here, we are comparing against a reference and the int constant
+      // should be replaced with a null constant.
+      if (null_instr->IsIntConstant()) {
+        DCHECK_EQ(0, null_instr->AsIntConstant()->GetValue());
+        equality_instr->ReplaceInput(GetGraph()->GetNullConstant(), null_instr == right ? 1 : 0);
+      }
+    }
+  }
+}
+
+void SsaBuilder::EquivalentPhisCleanup() {
+  // The order doesn't matter here.
+  for (HReversePostOrderIterator itb(*GetGraph()); !itb.Done(); itb.Advance()) {
+    for (HInstructionIterator it(itb.Current()->GetPhis()); !it.Done(); it.Advance()) {
+      HPhi* phi = it.Current()->AsPhi();
+      HPhi* next = phi->GetNextEquivalentPhiWithSameType();
+      if (next != nullptr) {
+        phi->ReplaceWith(next);
+        DCHECK(next->GetNextEquivalentPhiWithSameType() == nullptr)
+            << "More then one phi equivalent with type " << phi->GetType()
+            << " found for phi" << phi->GetId();
+      }
+    }
+  }
+}
+
 void SsaBuilder::BuildSsa() {
   // 1) Visit in reverse post order. We need to have all predecessors of a block visited
   // (with the exception of loops) in order to create the right environment for that
@@ -209,11 +257,21 @@
   PrimitiveTypePropagation type_propagation(GetGraph());
   type_propagation.Run();
 
-  // 5) Mark dead phis again. Steph 4) may have introduced new phis.
+  // 5) Fix the type for null constants which are part of an equality comparison.
+  FixNullConstantType();
+
+  // 6) When creating equivalent phis we copy the inputs of the original phi which
+  // may be improperly typed. This will be fixed during the type propagation but
+  // as a result we may end up with two equivalent phis with the same type for
+  // the same dex register. This pass cleans them up.
+  EquivalentPhisCleanup();
+
+  // 7) Mark dead phis again. Step 4) may have introduced new phis.
+  // Step 6) might enable the death of new phis.
   SsaDeadPhiElimination dead_phis(GetGraph());
   dead_phis.MarkDeadPhis();
 
-  // 6) Now that the graph is correclty typed, we can get rid of redundant phis.
+  // 8) Now that the graph is correctly typed, we can get rid of redundant phis.
   // Note that we cannot do this phase before type propagation, otherwise
   // we could get rid of phi equivalents, whose presence is a requirement for the
   // type propagation phase. Note that this is to satisfy statement (a) of the
@@ -221,7 +279,7 @@
   SsaRedundantPhiElimination redundant_phi(GetGraph());
   redundant_phi.Run();
 
-  // 7) Make sure environments use the right phi "equivalent": a phi marked dead
+  // 9) Make sure environments use the right phi "equivalent": a phi marked dead
   // can have a phi equivalent that is not dead. We must therefore update
   // all environment uses of the dead phi to use its equivalent. Note that there
   // can be multiple phis for the same Dex register that are live (for example
@@ -248,7 +306,7 @@
     }
   }
 
-  // 8) Deal with phis to guarantee liveness of phis in case of a debuggable
+  // 10) Deal with phis to guarantee liveness of phis in case of a debuggable
   // application. This is for satisfying statement (c) of the SsaBuilder
   // (see ssa_builder.h).
   if (GetGraph()->IsDebuggable()) {
@@ -256,7 +314,7 @@
     dead_phi_handler.Run();
   }
 
-  // 9) Now that the right phis are used for the environments, and we
+  // 11) Now that the right phis are used for the environments, and we
   // have potentially revive dead phis in case of a debuggable application,
   // we can eliminate phis we do not need. Regardless of the debuggable status,
   // this phase is necessary for statement (b) of the SsaBuilder (see ssa_builder.h),
@@ -264,7 +322,7 @@
   // input types.
   dead_phis.EliminateDeadPhis();
 
-  // 10) Clear locals.
+  // 12) Clear locals.
   for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions());
        !it.Done();
        it.Advance()) {
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 569b3e2..265e95b 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -85,6 +85,9 @@
   static constexpr const char* kSsaBuilderPassName = "ssa_builder";
 
  private:
+  void FixNullConstantType();
+  void EquivalentPhisCleanup();
+
   static HFloatConstant* GetFloatEquivalent(HIntConstant* constant);
   static HDoubleConstant* GetDoubleEquivalent(HLongConstant* constant);
   static HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type);
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 2b51f94..98f98a2 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -492,6 +492,15 @@
     return defined_by_;
   }
 
+  SafepointPosition* FindSafepointJustBefore(size_t position) const {
+    for (SafepointPosition* safepoint = first_safepoint_, *previous = nullptr;
+         safepoint != nullptr;
+         previous = safepoint, safepoint = safepoint->GetNext()) {
+      if (safepoint->GetPosition() >= position) return previous;
+    }
+    return last_safepoint_;
+  }
+
   /**
    * Split this interval at `position`. This interval is changed to:
    * [start ... position).
@@ -510,6 +519,19 @@
     }
 
     LiveInterval* new_interval = new (allocator_) LiveInterval(allocator_, type_);
+    SafepointPosition* new_last_safepoint = FindSafepointJustBefore(position);
+    if (new_last_safepoint == nullptr) {
+      new_interval->first_safepoint_ = first_safepoint_;
+      new_interval->last_safepoint_ = last_safepoint_;
+      first_safepoint_ = last_safepoint_ = nullptr;
+    } else if (last_safepoint_ != new_last_safepoint) {
+      new_interval->last_safepoint_ = last_safepoint_;
+      new_interval->first_safepoint_ = new_last_safepoint->GetNext();
+      DCHECK(new_interval->first_safepoint_ != nullptr);
+      last_safepoint_ = new_last_safepoint;
+      last_safepoint_->SetNext(nullptr);
+    }
+
     new_interval->next_sibling_ = next_sibling_;
     next_sibling_ = new_interval;
     new_interval->parent_ = parent_;
@@ -748,7 +770,6 @@
   }
 
   SafepointPosition* GetFirstSafepoint() const {
-    DCHECK_EQ(GetParent(), this) << "Only the first sibling lists safepoints";
     return first_safepoint_;
   }
 
@@ -822,7 +843,7 @@
   LiveRange* first_range_;
   LiveRange* last_range_;
 
-  // Safepoints where this interval is live. Only set in the parent interval.
+  // Safepoints where this interval is live.
   SafepointPosition* first_safepoint_;
   SafepointPosition* last_safepoint_;
 
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index f8c0e83..51600f7 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -50,11 +50,6 @@
 
 const bool kVerboseInstrumentation = false;
 
-// Do we want to deoptimize for method entry and exit listeners or just try to intercept
-// invocations? Deoptimization forces all code to run in the interpreter and considerably hurts the
-// application's performance.
-static constexpr bool kDeoptimizeForAccurateMethodEntryExitListeners = true;
-
 static bool InstallStubsClassVisitor(mirror::Class* klass, void* arg)
     EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
   Instrumentation* instrumentation = reinterpret_cast<Instrumentation*>(arg);
@@ -846,8 +841,7 @@
   ConfigureStubs(false, false);
 }
 
-void Instrumentation::EnableMethodTracing() {
-  bool require_interpreter = kDeoptimizeForAccurateMethodEntryExitListeners;
+void Instrumentation::EnableMethodTracing(bool require_interpreter) {
   ConfigureStubs(!require_interpreter, require_interpreter);
 }
 
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 41821a6..8b7fcca 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -49,6 +49,11 @@
   kNumHandlerTables
 };
 
+// Do we want to deoptimize for method entry and exit listeners or just try to intercept
+// invocations? Deoptimization forces all code to run in the interpreter and considerably hurts the
+// application's performance.
+static constexpr bool kDeoptimizeForAccurateMethodEntryExitListeners = true;
+
 // Instrumentation event listener API. Registered listeners will get the appropriate call back for
 // the events they are listening for. The call backs supply the thread, method and dex_pc the event
 // occurred upon. The thread may or may not be Thread::Current().
@@ -170,7 +175,8 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Enable method tracing by installing instrumentation entry/exit stubs.
-  void EnableMethodTracing()
+  void EnableMethodTracing(
+      bool require_interpreter = kDeoptimizeForAccurateMethodEntryExitListeners)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::thread_list_lock_, Locks::classlinker_classes_lock_);
 
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index 022c56f..af01a02 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -18,14 +18,18 @@
 
 #include <stdlib.h>
 
+#include <cutils/process_name.h>
+
 #include "arch/instruction_set.h"
 #include "debugger.h"
 #include "java_vm_ext.h"
 #include "jit/jit.h"
 #include "jni_internal.h"
 #include "JNIHelp.h"
+#include "scoped_thread_state_change.h"
 #include "ScopedUtfChars.h"
 #include "thread-inl.h"
+#include "trace.h"
 
 #if defined(__linux__)
 #include <sys/prctl.h>
@@ -121,6 +125,11 @@
 
   runtime->PreZygoteFork();
 
+  if (Trace::GetMethodTracingMode() != TracingMode::kTracingInactive) {
+    // Tracing active, pause it.
+    Trace::Pause();
+  }
+
   // Grab thread before fork potentially makes Thread::pthread_key_self_ unusable.
   return reinterpret_cast<jlong>(ThreadForEnv(env));
 }
@@ -132,6 +141,49 @@
   thread->InitAfterFork();
   EnableDebugFeatures(debug_flags);
 
+  // Update tracing.
+  if (Trace::GetMethodTracingMode() != TracingMode::kTracingInactive) {
+    Trace::TraceOutputMode output_mode = Trace::GetOutputMode();
+    Trace::TraceMode trace_mode = Trace::GetMode();
+
+    // Just drop it.
+    Trace::Abort();
+
+    // Only restart if it was streaming mode.
+    // TODO: Expose buffer size, so we can also do file mode.
+    if (output_mode == Trace::TraceOutputMode::kStreaming) {
+      const char* proc_name_cutils = get_process_name();
+      std::string proc_name;
+      if (proc_name_cutils != nullptr) {
+        proc_name = proc_name_cutils;
+      }
+      if (proc_name_cutils == nullptr || proc_name == "zygote" || proc_name == "zygote64") {
+        // Either no process name, or the name hasn't been changed, yet. Just use pid.
+        pid_t pid = getpid();
+        proc_name = StringPrintf("%u", static_cast<uint32_t>(pid));
+      }
+
+      std::string profiles_dir(GetDalvikCache("profiles", false /* create_if_absent */));
+      if (!profiles_dir.empty()) {
+        std::string trace_file = StringPrintf("%s/%s.trace.bin", profiles_dir.c_str(),
+                                              proc_name.c_str());
+        Trace::Start(trace_file.c_str(),
+                     -1,
+                     -1,  // TODO: Expose buffer size.
+                     0,   // TODO: Expose flags.
+                     output_mode,
+                     trace_mode,
+                     0);  // TODO: Expose interval.
+        if (thread->IsExceptionPending()) {
+          ScopedObjectAccess soa(env);
+          thread->ClearException();
+        }
+      } else {
+        LOG(ERROR) << "Profiles dir is empty?!?!";
+      }
+    }
+  }
+
   if (instruction_set != nullptr) {
     ScopedUtfChars isa_string(env, instruction_set);
     InstructionSet isa = GetInstructionSetFromString(isa_string.c_str());
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index c23f744..0758b27 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -216,6 +216,8 @@
       .Define("-Xmethod-trace-file-size:_")
           .WithType<unsigned int>()
           .IntoKey(M::MethodTraceFileSize)
+      .Define("-Xmethod-trace-stream")
+          .IntoKey(M::MethodTraceStreaming)
       .Define("-Xprofile:_")
           .WithType<TraceClockSource>()
           .WithValueMap({{"threadcpuclock", TraceClockSource::kThreadCpu},
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 543b9dc..7bebb96 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -552,6 +552,17 @@
     StartProfiler(profile_output_filename_.c_str());
   }
 
+  if (trace_config_.get() != nullptr && trace_config_->trace_file != "") {
+    ScopedThreadStateChange tsc(self, kWaitingForMethodTracingStart);
+    Trace::Start(trace_config_->trace_file.c_str(),
+                 -1,
+                 static_cast<int>(trace_config_->trace_file_size),
+                 0,
+                 trace_config_->trace_output_mode,
+                 trace_config_->trace_mode,
+                 0);
+  }
+
   return true;
 }
 
@@ -1000,7 +1011,9 @@
     trace_config_->trace_file = runtime_options.ReleaseOrDefault(Opt::MethodTraceFile);
     trace_config_->trace_file_size = runtime_options.ReleaseOrDefault(Opt::MethodTraceFileSize);
     trace_config_->trace_mode = Trace::TraceMode::kMethodTracing;
-    trace_config_->trace_output_mode = Trace::TraceOutputMode::kFile;
+    trace_config_->trace_output_mode = runtime_options.Exists(Opt::MethodTraceStreaming) ?
+        Trace::TraceOutputMode::kStreaming :
+        Trace::TraceOutputMode::kFile;
   }
 
   {
@@ -1026,17 +1039,6 @@
   // TODO: move this to just be an Trace::Start argument
   Trace::SetDefaultClockSource(runtime_options.GetOrDefault(Opt::ProfileClock));
 
-  if (trace_config_.get() != nullptr) {
-    ScopedThreadStateChange tsc(self, kWaitingForMethodTracingStart);
-    Trace::Start(trace_config_->trace_file.c_str(),
-                 -1,
-                 static_cast<int>(trace_config_->trace_file_size),
-                 0,
-                 trace_config_->trace_output_mode,
-                 trace_config_->trace_mode,
-                 0);
-  }
-
   // Pre-allocate an OutOfMemoryError for the double-OOME case.
   self->ThrowNewException("Ljava/lang/OutOfMemoryError;",
                           "OutOfMemoryError thrown while trying to throw OutOfMemoryError; "
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 339f925..eff787a 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -95,6 +95,7 @@
 RUNTIME_OPTIONS_KEY (Unit,                MethodTrace)
 RUNTIME_OPTIONS_KEY (std::string,         MethodTraceFile,                "/data/method-trace-file.bin")
 RUNTIME_OPTIONS_KEY (unsigned int,        MethodTraceFileSize,            10 * MB)
+RUNTIME_OPTIONS_KEY (Unit,                MethodTraceStreaming)
 RUNTIME_OPTIONS_KEY (TraceClockSource,    ProfileClock,                   kDefaultTraceClockSource)  // -Xprofile:
 RUNTIME_OPTIONS_KEY (TestProfilerOptions, ProfilerOpts)  // -Xenable-profiler, -Xprofile-*
 RUNTIME_OPTIONS_KEY (std::string,         Compiler)
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 7326865..5322f9f 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -31,7 +31,7 @@
 #include "instrumentation.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
-#include "mirror/dex_cache.h"
+#include "mirror/dex_cache-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
 #include "os.h"
@@ -85,9 +85,12 @@
     kTraceMethodActionMask = 0x03,  // two bits
 };
 
+static constexpr uint8_t kOpNewMethod = 1U;
+static constexpr uint8_t kOpNewThread = 2U;
+
 class BuildStackTraceVisitor : public StackVisitor {
  public:
-  explicit BuildStackTraceVisitor(Thread* thread) : StackVisitor(thread, NULL),
+  explicit BuildStackTraceVisitor(Thread* thread) : StackVisitor(thread, nullptr),
       method_trace_(Trace::AllocStackTrace()) {}
 
   bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -118,7 +121,7 @@
 
 TraceClockSource Trace::default_clock_source_ = kDefaultTraceClockSource;
 
-Trace* volatile Trace::the_trace_ = NULL;
+Trace* volatile Trace::the_trace_ = nullptr;
 pthread_t Trace::sampling_pthread_ = 0U;
 std::unique_ptr<std::vector<mirror::ArtMethod*>> Trace::temp_stack_trace_;
 
@@ -138,7 +141,7 @@
 }
 
 std::vector<mirror::ArtMethod*>* Trace::AllocStackTrace() {
-  if (temp_stack_trace_.get() != NULL) {
+  if (temp_stack_trace_.get() != nullptr) {
     return temp_stack_trace_.release();
   } else {
     return new std::vector<mirror::ArtMethod*>();
@@ -246,7 +249,7 @@
 static void ClearThreadStackTraceAndClockBase(Thread* thread, void* arg ATTRIBUTE_UNUSED) {
   thread->SetTraceClockBase(0);
   std::vector<mirror::ArtMethod*>* stack_trace = thread->GetStackTraceSample();
-  thread->SetStackTraceSample(NULL);
+  thread->SetStackTraceSample(nullptr);
   delete stack_trace;
 }
 
@@ -260,7 +263,7 @@
   uint32_t thread_clock_diff = 0;
   uint32_t wall_clock_diff = 0;
   ReadClocks(thread, &thread_clock_diff, &wall_clock_diff);
-  if (old_stack_trace == NULL) {
+  if (old_stack_trace == nullptr) {
     // If there's no previous stack trace sample for this thread, log an entry event for all
     // methods in the trace.
     for (std::vector<mirror::ArtMethod*>::reverse_iterator rit = stack_trace->rbegin();
@@ -308,7 +311,7 @@
     {
       MutexLock mu(self, *Locks::trace_lock_);
       the_trace = the_trace_;
-      if (the_trace == NULL) {
+      if (the_trace == nullptr) {
         break;
       }
     }
@@ -323,7 +326,7 @@
   }
 
   runtime->DetachCurrentThread();
-  return NULL;
+  return nullptr;
 }
 
 void Trace::Start(const char* trace_filename, int trace_fd, int buffer_size, int flags,
@@ -331,7 +334,7 @@
   Thread* self = Thread::Current();
   {
     MutexLock mu(self, *Locks::trace_lock_);
-    if (the_trace_ != NULL) {
+    if (the_trace_ != nullptr) {
       LOG(ERROR) << "Trace already in progress, ignoring this request";
       return;
     }
@@ -354,7 +357,7 @@
       trace_file.reset(new File(trace_fd, "tracefile"));
       trace_file->DisableAutoClose();
     }
-    if (trace_file.get() == NULL) {
+    if (trace_file.get() == nullptr) {
       PLOG(ERROR) << "Unable to open trace file '" << trace_filename << "'";
       ScopedObjectAccess soa(self);
       ThrowRuntimeException("Unable to open trace file '%s'", trace_filename);
@@ -372,20 +375,23 @@
   // Create Trace object.
   {
     MutexLock mu(self, *Locks::trace_lock_);
-    if (the_trace_ != NULL) {
+    if (the_trace_ != nullptr) {
       LOG(ERROR) << "Trace already in progress, ignoring this request";
     } else {
       enable_stats = (flags && kTraceCountAllocs) != 0;
-      the_trace_ = new Trace(trace_file.release(), buffer_size, flags, trace_mode);
+      the_trace_ = new Trace(trace_file.release(), trace_filename, buffer_size, flags, output_mode,
+                             trace_mode);
       if (trace_mode == TraceMode::kSampling) {
-        CHECK_PTHREAD_CALL(pthread_create, (&sampling_pthread_, NULL, &RunSamplingThread,
+        CHECK_PTHREAD_CALL(pthread_create, (&sampling_pthread_, nullptr, &RunSamplingThread,
                                             reinterpret_cast<void*>(interval_us)),
                                             "Sampling profiler thread");
+        the_trace_->interval_us_ = interval_us;
       } else {
         runtime->GetInstrumentation()->AddListener(the_trace_,
                                                    instrumentation::Instrumentation::kMethodEntered |
                                                    instrumentation::Instrumentation::kMethodExited |
                                                    instrumentation::Instrumentation::kMethodUnwind);
+        // TODO: In full-PIC mode, we don't need to fully deopt.
         runtime->GetInstrumentation()->EnableMethodTracing();
       }
     }
@@ -399,18 +405,18 @@
   }
 }
 
-void Trace::Stop() {
+void Trace::StopTracing(bool finish_tracing, bool flush_file) {
   bool stop_alloc_counting = false;
   Runtime* const runtime = Runtime::Current();
   Trace* the_trace = nullptr;
   pthread_t sampling_pthread = 0U;
   {
     MutexLock mu(Thread::Current(), *Locks::trace_lock_);
-    if (the_trace_ == NULL) {
+    if (the_trace_ == nullptr) {
       LOG(ERROR) << "Trace stop requested, but no trace currently running";
     } else {
       the_trace = the_trace_;
-      the_trace_ = NULL;
+      the_trace_ = nullptr;
       sampling_pthread = sampling_pthread_;
     }
   }
@@ -418,13 +424,16 @@
   // the sampling thread access a stale pointer. This finishes since the sampling thread exits when
   // the_trace_ is null.
   if (sampling_pthread != 0U) {
-    CHECK_PTHREAD_CALL(pthread_join, (sampling_pthread, NULL), "sampling thread shutdown");
+    CHECK_PTHREAD_CALL(pthread_join, (sampling_pthread, nullptr), "sampling thread shutdown");
     sampling_pthread_ = 0U;
   }
   runtime->GetThreadList()->SuspendAll(__FUNCTION__);
+
   if (the_trace != nullptr) {
-    stop_alloc_counting = (the_trace->flags_ & kTraceCountAllocs) != 0;
-    the_trace->FinishTracing();
+    stop_alloc_counting = (the_trace->flags_ & Trace::kTraceCountAllocs) != 0;
+    if (finish_tracing) {
+      the_trace->FinishTracing();
+    }
 
     if (the_trace->trace_mode_ == TraceMode::kSampling) {
       MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
@@ -438,8 +447,12 @@
     }
     if (the_trace->trace_file_.get() != nullptr) {
       // Do not try to erase, so flush and close explicitly.
-      if (the_trace->trace_file_->Flush() != 0) {
-        PLOG(ERROR) << "Could not flush trace file.";
+      if (flush_file) {
+        if (the_trace->trace_file_->Flush() != 0) {
+          PLOG(ERROR) << "Could not flush trace file.";
+        }
+      } else {
+        the_trace->trace_file_->MarkUnchecked();  // Do not trigger guard.
       }
       if (the_trace->trace_file_->Close() != 0) {
         PLOG(ERROR) << "Could not close trace file.";
@@ -454,15 +467,118 @@
   }
 }
 
+void Trace::Abort() {
+  // Do not write anything anymore.
+  StopTracing(false, false);
+}
+
+void Trace::Stop() {
+  // Finish writing.
+  StopTracing(true, true);
+}
+
 void Trace::Shutdown() {
   if (GetMethodTracingMode() != kTracingInactive) {
     Stop();
   }
 }
 
+void Trace::Pause() {
+  bool stop_alloc_counting = false;
+  Runtime* runtime = Runtime::Current();
+  Trace* the_trace = nullptr;
+
+  pthread_t sampling_pthread = 0U;
+  {
+    MutexLock mu(Thread::Current(), *Locks::trace_lock_);
+    if (the_trace_ == nullptr) {
+      LOG(ERROR) << "Trace pause requested, but no trace currently running";
+      return;
+    } else {
+      the_trace = the_trace_;
+      sampling_pthread = sampling_pthread_;
+    }
+  }
+
+  if (sampling_pthread != 0U) {
+    {
+      MutexLock mu(Thread::Current(), *Locks::trace_lock_);
+      the_trace_ = nullptr;
+    }
+    CHECK_PTHREAD_CALL(pthread_join, (sampling_pthread, nullptr), "sampling thread shutdown");
+    sampling_pthread_ = 0U;
+    {
+      MutexLock mu(Thread::Current(), *Locks::trace_lock_);
+      the_trace_ = the_trace;
+    }
+  }
+
+  if (the_trace != nullptr) {
+    runtime->GetThreadList()->SuspendAll(__FUNCTION__);
+    stop_alloc_counting = (the_trace->flags_ & Trace::kTraceCountAllocs) != 0;
+
+    if (the_trace->trace_mode_ == TraceMode::kSampling) {
+      MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
+      runtime->GetThreadList()->ForEach(ClearThreadStackTraceAndClockBase, nullptr);
+    } else {
+      runtime->GetInstrumentation()->DisableMethodTracing();
+      runtime->GetInstrumentation()->RemoveListener(the_trace,
+                                                    instrumentation::Instrumentation::kMethodEntered |
+                                                    instrumentation::Instrumentation::kMethodExited |
+                                                    instrumentation::Instrumentation::kMethodUnwind);
+    }
+    runtime->GetThreadList()->ResumeAll();
+  }
+
+  if (stop_alloc_counting) {
+    // Can be racy since SetStatsEnabled is not guarded by any locks.
+    Runtime::Current()->SetStatsEnabled(false);
+  }
+}
+
+void Trace::Resume() {
+  Thread* self = Thread::Current();
+  Trace* the_trace;
+  {
+    MutexLock mu(self, *Locks::trace_lock_);
+    if (the_trace_ == nullptr) {
+      LOG(ERROR) << "No trace to resume (or sampling mode), ignoring this request";
+      return;
+    }
+    the_trace = the_trace_;
+  }
+
+  Runtime* runtime = Runtime::Current();
+
+  // Enable count of allocs if specified in the flags.
+  bool enable_stats = (the_trace->flags_ && kTraceCountAllocs) != 0;
+
+  runtime->GetThreadList()->SuspendAll(__FUNCTION__);
+
+  // Reenable.
+  if (the_trace->trace_mode_ == TraceMode::kSampling) {
+    CHECK_PTHREAD_CALL(pthread_create, (&sampling_pthread_, nullptr, &RunSamplingThread,
+        reinterpret_cast<void*>(the_trace->interval_us_)), "Sampling profiler thread");
+  } else {
+    runtime->GetInstrumentation()->AddListener(the_trace,
+                                               instrumentation::Instrumentation::kMethodEntered |
+                                               instrumentation::Instrumentation::kMethodExited |
+                                               instrumentation::Instrumentation::kMethodUnwind);
+    // TODO: In full-PIC mode, we don't need to fully deopt.
+    runtime->GetInstrumentation()->EnableMethodTracing();
+  }
+
+  runtime->GetThreadList()->ResumeAll();
+
+  // Can't call this when holding the mutator lock.
+  if (enable_stats) {
+    runtime->SetStatsEnabled(true);
+  }
+}
+
 TracingMode Trace::GetMethodTracingMode() {
   MutexLock mu(Thread::Current(), *Locks::trace_lock_);
-  if (the_trace_ == NULL) {
+  if (the_trace_ == nullptr) {
     return kTracingInactive;
   } else {
     switch (the_trace_->trace_mode_) {
@@ -476,13 +592,26 @@
   }
 }
 
-Trace::Trace(File* trace_file, int buffer_size, int flags, TraceMode trace_mode)
-    : trace_file_(trace_file), buf_(new uint8_t[buffer_size]()), flags_(flags),
-      trace_mode_(trace_mode), clock_source_(default_clock_source_),
-      buffer_size_(buffer_size), start_time_(MicroTime()),
-      clock_overhead_ns_(GetClockOverheadNanoSeconds()), cur_offset_(0), overflow_(false) {
-  // Set up the beginning of the trace.
+static constexpr size_t kStreamingBufferSize = 16 * KB;
+
+Trace::Trace(File* trace_file, const char* trace_name, int buffer_size, int flags,
+             TraceOutputMode output_mode, TraceMode trace_mode)
+    : trace_file_(trace_file),
+      buf_(new uint8_t[output_mode == TraceOutputMode::kStreaming ?
+          kStreamingBufferSize :
+          buffer_size]()),
+      flags_(flags), trace_output_mode_(output_mode), trace_mode_(trace_mode),
+      clock_source_(default_clock_source_),
+      buffer_size_(output_mode == TraceOutputMode::kStreaming ?
+          kStreamingBufferSize :
+          buffer_size),
+      start_time_(MicroTime()), clock_overhead_ns_(GetClockOverheadNanoSeconds()), cur_offset_(0),
+      overflow_(false), interval_us_(0), streaming_lock_(nullptr) {
   uint16_t trace_version = GetTraceVersion(clock_source_);
+  if (output_mode == TraceOutputMode::kStreaming) {
+    trace_version |= 0xF0U;
+  }
+  // Set up the beginning of the trace.
   memset(buf_.get(), 0, kTraceHeaderLength);
   Append4LE(buf_.get(), kTraceMagicValue);
   Append2LE(buf_.get() + 4, trace_version);
@@ -495,6 +624,16 @@
 
   // Update current offset.
   cur_offset_.StoreRelaxed(kTraceHeaderLength);
+
+  if (output_mode == TraceOutputMode::kStreaming) {
+    streaming_file_name_ = trace_name;
+    streaming_lock_ = new Mutex("tracing lock");
+    seen_threads_.reset(new ThreadIDBitSet());
+  }
+}
+
+Trace::~Trace() {
+  delete streaming_lock_;
 }
 
 static void DumpBuf(uint8_t* buf, size_t buf_size, TraceClockSource clock_source)
@@ -511,14 +650,38 @@
   }
 }
 
-void Trace::FinishTracing() {
-  // Compute elapsed time.
-  uint64_t elapsed = MicroTime() - start_time_;
+static void GetVisitedMethodsFromBitSets(
+    const std::map<mirror::DexCache*, DexIndexBitSet*>& seen_methods,
+    std::set<mirror::ArtMethod*>* visited_methods) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  for (auto& e : seen_methods) {
+    DexIndexBitSet* bit_set = e.second;
+    for (uint32_t i = 0; i < bit_set->size(); ++i) {
+      if ((*bit_set)[i]) {
+        visited_methods->insert(e.first->GetResolvedMethod(i));
+      }
+    }
+  }
+}
 
-  size_t final_offset = cur_offset_.LoadRelaxed();
+void Trace::FinishTracing() {
+  size_t final_offset = 0;
 
   std::set<mirror::ArtMethod*> visited_methods;
-  GetVisitedMethods(final_offset, &visited_methods);
+  if (trace_output_mode_ == TraceOutputMode::kStreaming) {
+    // Write the secondary file with all the method names.
+    GetVisitedMethodsFromBitSets(seen_methods_, &visited_methods);
+
+    // Clean up.
+    for (auto& e : seen_methods_) {
+      delete e.second;
+    }
+  } else {
+    final_offset = cur_offset_.LoadRelaxed();
+    GetVisitedMethods(final_offset, &visited_methods);
+  }
+
+  // Compute elapsed time.
+  uint64_t elapsed = MicroTime() - start_time_;
 
   std::ostringstream os;
 
@@ -535,8 +698,10 @@
     os << StringPrintf("clock=wall\n");
   }
   os << StringPrintf("elapsed-time-usec=%" PRIu64 "\n", elapsed);
-  size_t num_records = (final_offset - kTraceHeaderLength) / GetRecordSize(clock_source_);
-  os << StringPrintf("num-method-calls=%zd\n", num_records);
+  if (trace_output_mode_ != TraceOutputMode::kStreaming) {
+    size_t num_records = (final_offset - kTraceHeaderLength) / GetRecordSize(clock_source_);
+    os << StringPrintf("num-method-calls=%zd\n", num_records);
+  }
   os << StringPrintf("clock-call-overhead-nsec=%d\n", clock_overhead_ns_);
   os << StringPrintf("vm=art\n");
   os << StringPrintf("pid=%d\n", getpid());
@@ -550,27 +715,44 @@
   os << StringPrintf("%cmethods\n", kTraceTokenChar);
   DumpMethodList(os, visited_methods);
   os << StringPrintf("%cend\n", kTraceTokenChar);
-
   std::string header(os.str());
-  if (trace_file_.get() == NULL) {
-    iovec iov[2];
-    iov[0].iov_base = reinterpret_cast<void*>(const_cast<char*>(header.c_str()));
-    iov[0].iov_len = header.length();
-    iov[1].iov_base = buf_.get();
-    iov[1].iov_len = final_offset;
-    Dbg::DdmSendChunkV(CHUNK_TYPE("MPSE"), iov, 2);
-    const bool kDumpTraceInfo = false;
-    if (kDumpTraceInfo) {
-      LOG(INFO) << "Trace sent:\n" << header;
-      DumpBuf(buf_.get(), final_offset, clock_source_);
+
+  if (trace_output_mode_ == TraceOutputMode::kStreaming) {
+    File file;
+    if (!file.Open(streaming_file_name_ + ".sec", O_CREAT | O_WRONLY)) {
+      LOG(WARNING) << "Could not open secondary trace file!";
+      return;
     }
-  } else {
-    if (!trace_file_->WriteFully(header.c_str(), header.length()) ||
-        !trace_file_->WriteFully(buf_.get(), final_offset)) {
+    if (!file.WriteFully(header.c_str(), header.length())) {
+      file.Erase();
       std::string detail(StringPrintf("Trace data write failed: %s", strerror(errno)));
       PLOG(ERROR) << detail;
       ThrowRuntimeException("%s", detail.c_str());
     }
+    if (file.FlushCloseOrErase() != 0) {
+      PLOG(ERROR) << "Could not write secondary file";
+    }
+  } else {
+    if (trace_file_.get() == nullptr) {
+      iovec iov[2];
+      iov[0].iov_base = reinterpret_cast<void*>(const_cast<char*>(header.c_str()));
+      iov[0].iov_len = header.length();
+      iov[1].iov_base = buf_.get();
+      iov[1].iov_len = final_offset;
+      Dbg::DdmSendChunkV(CHUNK_TYPE("MPSE"), iov, 2);
+      const bool kDumpTraceInfo = false;
+      if (kDumpTraceInfo) {
+        LOG(INFO) << "Trace sent:\n" << header;
+        DumpBuf(buf_.get(), final_offset, clock_source_);
+      }
+    } else {
+      if (!trace_file_->WriteFully(header.c_str(), header.length()) ||
+          !trace_file_->WriteFully(buf_.get(), final_offset)) {
+        std::string detail(StringPrintf("Trace data write failed: %s", strerror(errno)));
+        PLOG(ERROR) << detail;
+        ThrowRuntimeException("%s", detail.c_str());
+      }
+    }
   }
 }
 
@@ -654,20 +836,76 @@
   }
 }
 
+bool Trace::RegisterMethod(mirror::ArtMethod* method) {
+  mirror::DexCache* dex_cache = method->GetDexCache();
+  if (dex_cache->GetResolvedMethod(method->GetDexMethodIndex()) != method) {
+    DCHECK(dex_cache->GetResolvedMethod(method->GetDexMethodIndex()) == nullptr);
+    dex_cache->SetResolvedMethod(method->GetDexMethodIndex(), method);
+  }
+  if (seen_methods_.find(dex_cache) == seen_methods_.end()) {
+    seen_methods_.insert(std::make_pair(dex_cache, new DexIndexBitSet()));
+  }
+  DexIndexBitSet* bit_set = seen_methods_.find(dex_cache)->second;
+  if (!(*bit_set)[method->GetDexMethodIndex()]) {
+    bit_set->set(method->GetDexMethodIndex());
+    return true;
+  }
+  return false;
+}
+
+bool Trace::RegisterThread(Thread* thread) {
+  pid_t tid = thread->GetTid();
+  CHECK_LT(0U, static_cast<uint32_t>(tid));
+  CHECK_LT(static_cast<uint32_t>(tid), 65536U);
+
+  if (!(*seen_threads_)[tid]) {
+    seen_threads_->set(tid);
+    return true;
+  }
+  return false;
+}
+
+static std::string GetMethodLine(mirror::ArtMethod* method)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  return StringPrintf("%p\t%s\t%s\t%s\t%s\n", method,
+      PrettyDescriptor(method->GetDeclaringClassDescriptor()).c_str(), method->GetName(),
+      method->GetSignature().ToString().c_str(), method->GetDeclaringClassSourceFile());
+}
+
+void Trace::WriteToBuf(const uint8_t* src, size_t src_size) {
+  int32_t old_offset = cur_offset_.LoadRelaxed();
+  int32_t new_offset = old_offset + static_cast<int32_t>(src_size);
+  if (new_offset > buffer_size_) {
+    // Flush buffer.
+    if (!trace_file_->WriteFully(buf_.get(), old_offset)) {
+      PLOG(WARNING) << "Failed streaming a tracing event.";
+    }
+    old_offset = 0;
+    new_offset = static_cast<int32_t>(src_size);
+  }
+  cur_offset_.StoreRelease(new_offset);
+  // Fill in data.
+  memcpy(buf_.get() + old_offset, src, src_size);
+}
+
 void Trace::LogMethodTraceEvent(Thread* thread, mirror::ArtMethod* method,
                                 instrumentation::Instrumentation::InstrumentationEvent event,
                                 uint32_t thread_clock_diff, uint32_t wall_clock_diff) {
   // Advance cur_offset_ atomically.
   int32_t new_offset;
-  int32_t old_offset;
-  do {
-    old_offset = cur_offset_.LoadRelaxed();
-    new_offset = old_offset + GetRecordSize(clock_source_);
-    if (new_offset > buffer_size_) {
-      overflow_ = true;
-      return;
-    }
-  } while (!cur_offset_.CompareExchangeWeakSequentiallyConsistent(old_offset, new_offset));
+  int32_t old_offset = 0;
+
+  // We do a busy loop here trying to acquire the next offset.
+  if (trace_output_mode_ != TraceOutputMode::kStreaming) {
+    do {
+      old_offset = cur_offset_.LoadRelaxed();
+      new_offset = old_offset + GetRecordSize(clock_source_);
+      if (new_offset > buffer_size_) {
+        overflow_ = true;
+        return;
+      }
+    } while (!cur_offset_.CompareExchangeWeakSequentiallyConsistent(old_offset, new_offset));
+  }
 
   TraceAction action = kTraceMethodEnter;
   switch (event) {
@@ -687,7 +925,15 @@
   uint32_t method_value = EncodeTraceMethodAndAction(method, action);
 
   // Write data
-  uint8_t* ptr = buf_.get() + old_offset;
+  uint8_t* ptr;
+  static constexpr size_t kPacketSize = 14U;  // The maximum size of data in a packet.
+  uint8_t stack_buf[kPacketSize];             // Space to store a packet when in streaming mode.
+  if (trace_output_mode_ == TraceOutputMode::kStreaming) {
+    ptr = stack_buf;
+  } else {
+    ptr = buf_.get() + old_offset;
+  }
+
   Append2LE(ptr, thread->GetTid());
   Append4LE(ptr + 2, method_value);
   ptr += 6;
@@ -699,6 +945,34 @@
   if (UseWallClock()) {
     Append4LE(ptr, wall_clock_diff);
   }
+  static_assert(kPacketSize == 2 + 4 + 4 + 4, "Packet size incorrect.");
+
+  if (trace_output_mode_ == TraceOutputMode::kStreaming) {
+    MutexLock mu(Thread::Current(), *streaming_lock_);  // To serialize writing.
+    if (RegisterMethod(method)) {
+      // Write a special block with the name.
+      std::string method_line(GetMethodLine(method));
+      uint8_t buf2[5];
+      Append2LE(buf2, 0);
+      buf2[2] = kOpNewMethod;
+      Append2LE(buf2 + 3, static_cast<uint16_t>(method_line.length()));
+      WriteToBuf(buf2, sizeof(buf2));
+      WriteToBuf(reinterpret_cast<const uint8_t*>(method_line.c_str()), method_line.length());
+    }
+    if (RegisterThread(thread)) {
+      // It might be better to postpone this. Threads might not have received names...
+      std::string thread_name;
+      thread->GetThreadName(thread_name);
+      uint8_t buf2[7];
+      Append2LE(buf2, 0);
+      buf2[2] = kOpNewThread;
+      Append2LE(buf2 + 3, static_cast<uint16_t>(thread->GetTid()));
+      Append2LE(buf2 + 5, static_cast<uint16_t>(thread_name.length()));
+      WriteToBuf(buf2, sizeof(buf2));
+      WriteToBuf(reinterpret_cast<const uint8_t*>(thread_name.c_str()), thread_name.length());
+    }
+    WriteToBuf(stack_buf, sizeof(stack_buf));
+  }
 }
 
 void Trace::GetVisitedMethods(size_t buf_size,
@@ -716,9 +990,7 @@
 
 void Trace::DumpMethodList(std::ostream& os, const std::set<mirror::ArtMethod*>& visited_methods) {
   for (const auto& method : visited_methods) {
-    os << StringPrintf("%p\t%s\t%s\t%s\t%s\n", method,
-        PrettyDescriptor(method->GetDeclaringClassDescriptor()).c_str(), method->GetName(),
-        method->GetSignature().ToString().c_str(), method->GetDeclaringClassSourceFile());
+    os << GetMethodLine(method);
   }
 }
 
@@ -750,4 +1022,16 @@
   }
 }
 
+Trace::TraceOutputMode Trace::GetOutputMode() {
+  MutexLock mu(Thread::Current(), *Locks::trace_lock_);
+  CHECK(the_trace_ != nullptr) << "Trace output mode requested, but no trace currently running";
+  return the_trace_->trace_output_mode_;
+}
+
+Trace::TraceMode Trace::GetMode() {
+  MutexLock mu(Thread::Current(), *Locks::trace_lock_);
+  CHECK(the_trace_ != nullptr) << "Trace mode requested, but no trace currently running";
+  return the_trace_->trace_mode_;
+}
+
 }  // namespace art
diff --git a/runtime/trace.h b/runtime/trace.h
index d8bd428..b8329ff 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -17,6 +17,8 @@
 #ifndef ART_RUNTIME_TRACE_H_
 #define ART_RUNTIME_TRACE_H_
 
+#include <bitset>
+#include <map>
 #include <memory>
 #include <ostream>
 #include <set>
@@ -34,11 +36,15 @@
 
 namespace mirror {
   class ArtMethod;
+  class DexCache;
 }  // namespace mirror
 
 class ArtField;
 class Thread;
 
+using DexIndexBitSet = std::bitset<65536>;
+using ThreadIDBitSet = std::bitset<65536>;
+
 enum TracingMode {
   kTracingInactive,
   kMethodTracingActive,
@@ -53,7 +59,8 @@
 
   enum class TraceOutputMode {
     kFile,
-    kDDMS
+    kDDMS,
+    kStreaming
   };
 
   enum class TraceMode {
@@ -61,6 +68,8 @@
     kSampling
   };
 
+  ~Trace();
+
   static void SetDefaultClockSource(TraceClockSource clock_source);
 
   static void Start(const char* trace_filename, int trace_fd, int buffer_size, int flags,
@@ -69,7 +78,16 @@
                      Locks::thread_list_lock_,
                      Locks::thread_suspend_count_lock_,
                      Locks::trace_lock_);
+  static void Pause() LOCKS_EXCLUDED(Locks::trace_lock_, Locks::thread_list_lock_);
+  static void Resume() LOCKS_EXCLUDED(Locks::trace_lock_);
+
+  // Stop tracing. This will finish the trace and write it to file/send it via DDMS.
   static void Stop()
+        LOCKS_EXCLUDED(Locks::mutator_lock_,
+                       Locks::thread_list_lock_,
+                       Locks::trace_lock_);
+  // Abort tracing. This will just stop tracing and *not* write/send the collected data.
+  static void Abort()
       LOCKS_EXCLUDED(Locks::mutator_lock_,
                      Locks::thread_list_lock_,
                      Locks::trace_lock_);
@@ -116,19 +134,25 @@
   // Save id and name of a thread before it exits.
   static void StoreExitingThreadInfo(Thread* thread);
 
+  static TraceOutputMode GetOutputMode() LOCKS_EXCLUDED(Locks::trace_lock_);
+  static TraceMode GetMode() LOCKS_EXCLUDED(Locks::trace_lock_);
+
  private:
-  explicit Trace(File* trace_file, int buffer_size, int flags, TraceMode trace_mode);
+  Trace(File* trace_file, const char* trace_name, int buffer_size, int flags,
+        TraceOutputMode output_mode, TraceMode trace_mode);
 
   // The sampling interval in microseconds is passed as an argument.
   static void* RunSamplingThread(void* arg) LOCKS_EXCLUDED(Locks::trace_lock_);
 
+  static void StopTracing(bool finish_tracing, bool flush_file);
   void FinishTracing() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void ReadClocks(Thread* thread, uint32_t* thread_clock_diff, uint32_t* wall_clock_diff);
 
   void LogMethodTraceEvent(Thread* thread, mirror::ArtMethod* method,
                            instrumentation::Instrumentation::InstrumentationEvent event,
-                           uint32_t thread_clock_diff, uint32_t wall_clock_diff);
+                           uint32_t thread_clock_diff, uint32_t wall_clock_diff)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Methods to output traced methods and threads.
   void GetVisitedMethods(size_t end_offset, std::set<mirror::ArtMethod*>* visited_methods);
@@ -136,6 +160,18 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void DumpThreadList(std::ostream& os) LOCKS_EXCLUDED(Locks::thread_list_lock_);
 
+  // Methods to register seen entitites in streaming mode. The methods return true if the entity
+  // is newly discovered.
+  bool RegisterMethod(mirror::ArtMethod* method)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) EXCLUSIVE_LOCKS_REQUIRED(streaming_lock_);
+  bool RegisterThread(Thread* thread)
+      EXCLUSIVE_LOCKS_REQUIRED(streaming_lock_);
+
+  // Copy a temporary buffer to the main buffer. Used for streaming. Exposed here for lock
+  // annotation.
+  void WriteToBuf(const uint8_t* src, size_t src_size)
+      EXCLUSIVE_LOCKS_REQUIRED(streaming_lock_);
+
   // Singleton instance of the Trace or NULL when no method tracing is active.
   static Trace* volatile the_trace_ GUARDED_BY(Locks::trace_lock_);
 
@@ -157,7 +193,10 @@
   // Flags enabling extra tracing of things such as alloc counts.
   const int flags_;
 
-  // True if traceview should sample instead of instrumenting method entry/exit.
+  // The kind of output for this tracing.
+  const TraceOutputMode trace_output_mode_;
+
+  // The tracing method.
   const TraceMode trace_mode_;
 
   const TraceClockSource clock_source_;
@@ -180,6 +219,15 @@
   // Map of thread ids and names that have already exited.
   SafeMap<pid_t, std::string> exited_threads_;
 
+  // Sampling profiler sampling interval.
+  int interval_us_;
+
+  // Streaming mode data.
+  std::string streaming_file_name_;
+  Mutex* streaming_lock_;
+  std::map<mirror::DexCache*, DexIndexBitSet*> seen_methods_;
+  std::unique_ptr<ThreadIDBitSet> seen_threads_;
+
   DISALLOW_COPY_AND_ASSIGN(Trace);
 };
 
diff --git a/runtime/utils.cc b/runtime/utils.cc
index f13da8b..a303aa4 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -1440,32 +1440,58 @@
   }
 }
 
-std::string GetDalvikCacheOrDie(const char* subdir, const bool create_if_absent) {
+static std::string GetDalvikCacheImpl(const char* subdir,
+                                      const bool create_if_absent,
+                                      const bool abort_on_error) {
   CHECK(subdir != nullptr);
   const char* android_data = GetAndroidData();
   const std::string dalvik_cache_root(StringPrintf("%s/dalvik-cache/", android_data));
   const std::string dalvik_cache = dalvik_cache_root + subdir;
-  if (create_if_absent && !OS::DirectoryExists(dalvik_cache.c_str())) {
+  if (!OS::DirectoryExists(dalvik_cache.c_str())) {
+    if (!create_if_absent) {
+      // TODO: Check callers. Traditional behavior is to not to abort, even when abort_on_error.
+      return "";
+    }
+
     // Don't create the system's /data/dalvik-cache/... because it needs special permissions.
-    if (strcmp(android_data, "/data") != 0) {
-      int result = mkdir(dalvik_cache_root.c_str(), 0700);
-      if (result != 0 && errno != EEXIST) {
-        PLOG(FATAL) << "Failed to create dalvik-cache directory " << dalvik_cache_root;
-        return "";
+    if (strcmp(android_data, "/data") == 0) {
+      if (abort_on_error) {
+        LOG(FATAL) << "Failed to find dalvik-cache directory " << dalvik_cache
+                   << ", cannot create /data dalvik-cache.";
+        UNREACHABLE();
       }
-      result = mkdir(dalvik_cache.c_str(), 0700);
-      if (result != 0) {
+      return "";
+    }
+
+    int result = mkdir(dalvik_cache_root.c_str(), 0700);
+    if (result != 0 && errno != EEXIST) {
+      if (abort_on_error) {
+        PLOG(FATAL) << "Failed to create dalvik-cache root directory " << dalvik_cache_root;
+        UNREACHABLE();
+      }
+      return "";
+    }
+
+    result = mkdir(dalvik_cache.c_str(), 0700);
+    if (result != 0) {
+      if (abort_on_error) {
         PLOG(FATAL) << "Failed to create dalvik-cache directory " << dalvik_cache;
-        return "";
+        UNREACHABLE();
       }
-    } else {
-      LOG(FATAL) << "Failed to find dalvik-cache directory " << dalvik_cache;
       return "";
     }
   }
   return dalvik_cache;
 }
 
+std::string GetDalvikCache(const char* subdir, const bool create_if_absent) {
+  return GetDalvikCacheImpl(subdir, create_if_absent, false);
+}
+
+std::string GetDalvikCacheOrDie(const char* subdir, const bool create_if_absent) {
+  return GetDalvikCacheImpl(subdir, create_if_absent, true);
+}
+
 bool GetDalvikCacheFilename(const char* location, const char* cache_location,
                             std::string* filename, std::string* error_msg) {
   if (location[0] != '/') {
diff --git a/runtime/utils.h b/runtime/utils.h
index 6dee5fe..6708c67 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -511,6 +511,9 @@
 // Find $ANDROID_DATA, /data, or return nullptr.
 const char* GetAndroidDataSafe(std::string* error_msg);
 
+// Returns the dalvik-cache location, with subdir appended. Returns the empty string if the cache
+// could not be found (or created).
+std::string GetDalvikCache(const char* subdir, bool create_if_absent = true);
 // Returns the dalvik-cache location, or dies trying. subdir will be
 // appended to the cache location.
 std::string GetDalvikCacheOrDie(const char* subdir, bool create_if_absent = true);
diff --git a/runtime/utils_test.cc b/runtime/utils_test.cc
index aa7bc64..6ccbd13 100644
--- a/runtime/utils_test.cc
+++ b/runtime/utils_test.cc
@@ -367,6 +367,15 @@
                GetDalvikCacheFilenameOrDie("/system/framework/boot.oat", "/foo").c_str());
 }
 
+TEST_F(UtilsTest, GetDalvikCache) {
+  EXPECT_STREQ("", GetDalvikCache("should-not-exist123", false).c_str());
+
+  EXPECT_STREQ((android_data_ + "/dalvik-cache/.").c_str(), GetDalvikCache(".", false).c_str());
+  EXPECT_STREQ((android_data_ + "/dalvik-cache/should-not-be-there").c_str(),
+               GetDalvikCache("should-not-be-there", true).c_str());
+}
+
+
 TEST_F(UtilsTest, GetSystemImageFilename) {
   EXPECT_STREQ("/system/framework/arm/boot.art",
                GetSystemImageFilename("/system/framework/boot.art", kArm).c_str());
diff --git a/test/444-checker-nce/src/Main.java b/test/444-checker-nce/src/Main.java
index 656c791..501d79c 100644
--- a/test/444-checker-nce/src/Main.java
+++ b/test/444-checker-nce/src/Main.java
@@ -251,3 +251,27 @@
   }
 
 }
+
+// Regression for when we created and kept equivalent phis with the same type.
+// The phi used in comparison would be different then the one used for access
+// so we could not safely discard it.
+class ListElement {
+  private ListElement next;
+
+  // CHECK-START: boolean ListElement.isShorter(ListElement, ListElement) instruction_simplifier_after_types (before)
+  // CHECK:         NullCheck
+  // CHECK:         NullCheck
+
+  // CHECK-START: boolean ListElement.isShorter(ListElement, ListElement) instruction_simplifier_after_types (after)
+  // CHECK-NOT:     NullCheck
+  static boolean isShorter(ListElement x, ListElement y) {
+    ListElement xTail = x;
+    ListElement yTail = y;
+    while (yTail != null) {
+      if (xTail == null) return true;
+      xTail = xTail.next;
+      yTail = yTail.next;
+    }
+    return false;
+  }
+}
diff --git a/test/468-checker-bool-simplifier-regression/src/Main.java b/test/468-checker-bool-simplifier-regression/src/Main.java
index 65f20b3..d45f3bf 100644
--- a/test/468-checker-bool-simplifier-regression/src/Main.java
+++ b/test/468-checker-bool-simplifier-regression/src/Main.java
@@ -17,7 +17,7 @@
 import java.lang.reflect.*;
 
 public class Main {
-  
+
   // CHECK-START: boolean TestCase.testCase() boolean_simplifier (before)
   // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
   // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
@@ -30,7 +30,7 @@
   // CHECK-DAG:     [[Value:z\d+]]    StaticFieldGet
   // CHECK-DAG:     [[Not:z\d+]]      BooleanNot [ [[Value]] ]
   // CHECK-DAG:                       Return [ [[Not]] ]
-  
+
   public static boolean runTest(boolean input) throws Exception {
     Class<?> c = Class.forName("TestCase");
     Method m = c.getMethod("testCase");
diff --git a/test/471-uninitialized-locals/expected.txt b/test/471-uninitialized-locals/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/471-uninitialized-locals/expected.txt
diff --git a/test/471-uninitialized-locals/info.txt b/test/471-uninitialized-locals/info.txt
new file mode 100644
index 0000000..ebead8e
--- /dev/null
+++ b/test/471-uninitialized-locals/info.txt
@@ -0,0 +1,2 @@
+Regression for the optimizing for crashes during compilation of methods which
+use values before initializing them.
diff --git a/test/471-uninitialized-locals/smali/Test.smali b/test/471-uninitialized-locals/smali/Test.smali
new file mode 100644
index 0000000..17a14bf
--- /dev/null
+++ b/test/471-uninitialized-locals/smali/Test.smali
@@ -0,0 +1,23 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTest;
+
+.super Ljava/lang/Object;
+
+.method public static ThrowException()V
+   .registers 1
+   throw v0
+.end method
diff --git a/test/471-uninitialized-locals/src/Main.java b/test/471-uninitialized-locals/src/Main.java
new file mode 100644
index 0000000..a5b1c48
--- /dev/null
+++ b/test/471-uninitialized-locals/src/Main.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String args[]) throws Exception {
+    try {
+      Class<?> c = Class.forName("Test");
+      Method m = c.getMethod("ThrowException", (Class[]) null);
+      m.invoke(null, (Object[]) null);
+    } catch (VerifyError e) {
+       // Compilation should go fine but we expect the runtime verification to fail.
+      return;
+    }
+
+    throw new Error("Failed to preset verification error!");
+  }
+
+}
diff --git a/test/473-checker-inliner-constants/expected.txt b/test/473-checker-inliner-constants/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/473-checker-inliner-constants/expected.txt
diff --git a/test/473-checker-inliner-constants/info.txt b/test/473-checker-inliner-constants/info.txt
new file mode 100644
index 0000000..e8e1ffb
--- /dev/null
+++ b/test/473-checker-inliner-constants/info.txt
@@ -0,0 +1 @@
+Test whether inliner deduplicates constants.
\ No newline at end of file
diff --git a/test/473-checker-inliner-constants/src/Main.java b/test/473-checker-inliner-constants/src/Main.java
new file mode 100644
index 0000000..79d89b0
--- /dev/null
+++ b/test/473-checker-inliner-constants/src/Main.java
@@ -0,0 +1,76 @@
+/*
+* Copyright (C) 2015 The Android Open Source Project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*      http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+public class Main {
+
+  // CHECK-START: java.lang.Object Main.InlineNullConstant() inliner (before)
+  // CHECK:         NullConstant
+  // CHECK-NOT:     NullConstant
+
+  // CHECK-START: java.lang.Object Main.InlineNullConstant() inliner (after)
+  // CHECK:         NullConstant
+  // CHECK-NOT:     NullConstant
+
+  public static Object returnNullConstant(Object x) {
+    return null;
+  }
+
+  public static Object InlineNullConstant() {
+    return returnNullConstant(null);
+  }
+
+  // CHECK-START: int Main.InlineIntConstant() inliner (before)
+  // CHECK:         IntConstant 42
+  // CHECK-NOT:     IntConstant 42
+
+  // CHECK-START: int Main.InlineIntConstant() inliner (after)
+  // CHECK:         IntConstant 42
+  // CHECK-NOT:     IntConstant 42
+
+  public static int returnIntConstant(int x) {
+    return 42;
+  }
+
+  public static int InlineIntConstant() {
+    return returnIntConstant(42);
+  }
+
+  // CHECK-START: long Main.InlineLongConstant() inliner (before)
+  // CHECK:         LongConstant 42
+  // CHECK-NOT:     LongConstant 42
+
+  // CHECK-START: long Main.InlineLongConstant() inliner (after)
+  // CHECK:         LongConstant 42
+  // CHECK-NOT:     LongConstant 42
+
+  public static long returnLongConstant(long x) {
+    return 42L;
+  }
+
+  public static long InlineLongConstant() {
+    return returnLongConstant(42L);
+  }
+
+  public static void main(String[] args) {
+    if (InlineNullConstant() != null) {
+      throw new Error("Expected null");
+    } else if (InlineIntConstant() != 42) {
+      throw new Error("Expected int 42");
+    } else if (InlineLongConstant() != 42L) {
+      throw new Error("Expected long 42");
+    }
+  }
+}
diff --git a/test/473-remove-dead-block/expected.txt b/test/473-remove-dead-block/expected.txt
new file mode 100644
index 0000000..c09201e
--- /dev/null
+++ b/test/473-remove-dead-block/expected.txt
@@ -0,0 +1 @@
+123368133
diff --git a/test/473-remove-dead-block/info.txt b/test/473-remove-dead-block/info.txt
new file mode 100644
index 0000000..81de4e6
--- /dev/null
+++ b/test/473-remove-dead-block/info.txt
@@ -0,0 +1,3 @@
+Regression test for optimizing's dead block removing:
+Removing from predecessors require remove successor otherwise
+CFG remains in an unexpected shape causing further crash of compiler.
diff --git a/test/473-remove-dead-block/src/Main.java b/test/473-remove-dead-block/src/Main.java
new file mode 100644
index 0000000..cca2976
--- /dev/null
+++ b/test/473-remove-dead-block/src/Main.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Main {
+  public static void main(String[] args) {
+    System.out.println(test(false, 5));
+  }
+
+  public static int test(boolean b, int i1) {
+    int j=4;
+    int s1=26294;
+
+    for (int i = 25; i > 1; --i) {
+      if (b) continue;
+      // javac/dx will remove the catch information, but
+      // keep the catch code around. The optimizing compiler
+      // used to crash in the presence of dead blocks like the
+      // code in catch.
+      try {
+        i1 = i1 * 26295 + (s1 / 26295);
+      } catch (Throwable exc2) {
+        for (j = 1; j < 39; ++j) {
+          j++;
+        }
+      }
+    }
+    return i1;
+  }
+}
diff --git a/test/474-checker-boolean-input/expected.txt b/test/474-checker-boolean-input/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/474-checker-boolean-input/expected.txt
diff --git a/test/474-checker-boolean-input/info.txt b/test/474-checker-boolean-input/info.txt
new file mode 100644
index 0000000..8ec946b
--- /dev/null
+++ b/test/474-checker-boolean-input/info.txt
@@ -0,0 +1 @@
+Tests if zero/one constants and integer Phis are accepted as boolean values.
\ No newline at end of file
diff --git a/test/474-checker-boolean-input/src/Main.java b/test/474-checker-boolean-input/src/Main.java
new file mode 100644
index 0000000..91e8d4f
--- /dev/null
+++ b/test/474-checker-boolean-input/src/Main.java
@@ -0,0 +1,75 @@
+/*
+* Copyright (C) 2015 The Android Open Source Project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*      http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+public class Main {
+
+  public static void assertBoolEquals(boolean expected, boolean result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  /*
+   * Test that zero/one constants are accepted as boolean inputs.
+   */
+
+  // CHECK-START: boolean Main.TestIntAsBoolean() inliner (before)
+  // CHECK-DAG:     [[Invoke:z\d+]]  InvokeStaticOrDirect
+  // CHECK-DAG:                      BooleanNot [ [[Invoke]] ]
+
+  // CHECK-START: boolean Main.TestIntAsBoolean() inliner (after)
+  // CHECK-DAG:     [[Const:i\d+]]   IntConstant 1
+  // CHECK-DAG:                      BooleanNot [ [[Const]] ]
+
+  public static boolean InlineConst() {
+    return true;
+  }
+
+  public static boolean TestIntAsBoolean() {
+    return InlineConst() != true ? true : false;
+  }
+
+  /*
+   * Test that integer Phis are accepted as boolean inputs until we implement
+   * a suitable type analysis.
+   */
+
+  // CHECK-START: boolean Main.TestPhiAsBoolean(int) inliner (before)
+  // CHECK-DAG:     [[Invoke:z\d+]]  InvokeStaticOrDirect
+  // CHECK-DAG:                      BooleanNot [ [[Invoke]] ]
+
+  // CHECK-START: boolean Main.TestPhiAsBoolean(int) inliner (after)
+  // CHECK-DAG:     [[Phi:i\d+]]     Phi
+  // CHECK-DAG:                      BooleanNot [ [[Phi]] ]
+
+  public static boolean f1;
+  public static boolean f2;
+
+  public static boolean InlinePhi(int x) {
+    return (x == 42) ? f1 : f2;
+  }
+
+  public static boolean TestPhiAsBoolean(int x) {
+    return InlinePhi(x) != true ? true : false;
+  }
+
+  public static void main(String[] args) {
+    f1 = true;
+    f2 = false;
+    assertBoolEquals(true, TestPhiAsBoolean(0));
+    assertBoolEquals(false, TestPhiAsBoolean(42));
+  }
+}
diff --git a/test/475-regression-inliner-ids/expected.txt b/test/475-regression-inliner-ids/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/475-regression-inliner-ids/expected.txt
diff --git a/test/475-regression-inliner-ids/info.txt b/test/475-regression-inliner-ids/info.txt
new file mode 100644
index 0000000..d12d32e
--- /dev/null
+++ b/test/475-regression-inliner-ids/info.txt
@@ -0,0 +1,2 @@
+Tests a regression when inlining a method with constants would lead to duplicate
+instruction IDs in the caller graph.
\ No newline at end of file
diff --git a/test/475-regression-inliner-ids/smali/TestCase.smali b/test/475-regression-inliner-ids/smali/TestCase.smali
new file mode 100644
index 0000000..efbe00f
--- /dev/null
+++ b/test/475-regression-inliner-ids/smali/TestCase.smali
@@ -0,0 +1,76 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+
+.super Ljava/lang/Object;
+
+.method private static flagToString(I)Ljava/lang/String;
+  .registers 2
+
+    # The bug is triggered when inlining a method with few Load/StoreLocals but
+    # many constants. The switch instruction helps with that.
+
+    sparse-switch p0, :sswitch_data_1a
+    const/4 v0, 0x0
+
+    :goto_4
+    return-object v0
+
+  :sswitch_5
+    const-string v0, "DEFAULT"
+    goto :goto_4
+
+  :sswitch_8
+    const-string v0, "FLAG_INCLUDE_NOT_IMPORTANT_VIEWS"
+    goto :goto_4
+
+  :sswitch_b
+    const-string v0, "FLAG_REQUEST_TOUCH_EXPLORATION_MODE"
+    goto :goto_4
+
+  :sswitch_e
+    const-string v0, "FLAG_REQUEST_ENHANCED_WEB_ACCESSIBILITY"
+    goto :goto_4
+
+  :sswitch_11
+    const-string v0, "FLAG_REPORT_VIEW_IDS"
+    goto :goto_4
+
+  :sswitch_14
+    const-string v0, "FLAG_REQUEST_FILTER_KEY_EVENTS"
+    goto :goto_4
+
+  :sswitch_17
+    const-string v0, "FLAG_RETRIEVE_INTERACTIVE_WINDOWS"
+    goto :goto_4
+
+  :sswitch_data_1a
+  .sparse-switch
+      0x1 -> :sswitch_5
+      0x2 -> :sswitch_8
+      0x4 -> :sswitch_b
+      0x8 -> :sswitch_e
+      0x10 -> :sswitch_11
+      0x20 -> :sswitch_14
+      0x40 -> :sswitch_17
+  .end sparse-switch
+.end method
+
+.method public static testCase(I)Ljava/lang/String;
+  .registers 2
+    invoke-static {v1}, LTestCase;->flagToString(I)Ljava/lang/String;
+    move-result-object v0
+    return-object v0
+.end method
diff --git a/test/475-regression-inliner-ids/src/Main.java b/test/475-regression-inliner-ids/src/Main.java
new file mode 100644
index 0000000..bf22062
--- /dev/null
+++ b/test/475-regression-inliner-ids/src/Main.java
@@ -0,0 +1,33 @@
+/*
+* Copyright (C) 2015 The Android Open Source Project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*      http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+import java.lang.reflect.Method;
+
+public class Main {
+
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("TestCase");
+    Method m = c.getMethod("testCase", int.class);
+    String actual = (String) m.invoke(null, 1);
+    String expected = "DEFAULT";
+    if (!expected.equals(actual)) {
+      throw new Error();
+    }
+  }
+}
diff --git a/tools/stream-trace-converter.py b/tools/stream-trace-converter.py
new file mode 100755
index 0000000..951b05b
--- /dev/null
+++ b/tools/stream-trace-converter.py
@@ -0,0 +1,186 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Script that parses a trace filed produced in streaming mode. The file is broken up into
+   a header and body part, which, when concatenated, make up a non-streaming trace file that
+   can be used with traceview."""
+
+import sys
+
+class MyException(Exception):
+  pass
+
+class BufferUnderrun(Exception):
+  pass
+
+def ReadShortLE(f):
+  byte1 = f.read(1)
+  if not byte1:
+    raise BufferUnderrun()
+  byte2 = f.read(1)
+  if not byte2:
+    raise BufferUnderrun()
+  return ord(byte1) + (ord(byte2) << 8);
+
+def WriteShortLE(f, val):
+  bytes = [ (val & 0xFF), ((val >> 8) & 0xFF) ]
+  asbytearray = bytearray(bytes)
+  f.write(asbytearray)
+
+def ReadIntLE(f):
+  byte1 = f.read(1)
+  if not byte1:
+    raise BufferUnderrun()
+  byte2 = f.read(1)
+  if not byte2:
+    raise BufferUnderrun()
+  byte3 = f.read(1)
+  if not byte3:
+    raise BufferUnderrun()
+  byte4 = f.read(1)
+  if not byte4:
+    raise BufferUnderrun()
+  return ord(byte1) + (ord(byte2) << 8) + (ord(byte3) << 16) + (ord(byte4) << 24);
+
+def WriteIntLE(f, val):
+  bytes = [ (val & 0xFF), ((val >> 8) & 0xFF), ((val >> 16) & 0xFF), ((val >> 24) & 0xFF) ]
+  asbytearray = bytearray(bytes)
+  f.write(asbytearray)
+
+def Copy(input, output, length):
+  buf = input.read(length)
+  if len(buf) != length:
+    raise BufferUnderrun()
+  output.write(buf)
+
+class Rewriter:
+
+  def PrintHeader(self, header):
+    header.write('*version\n');
+    header.write('3\n');
+    header.write('data-file-overflow=false\n');
+    header.write('clock=dual\n');
+    header.write('vm=art\n');
+
+  def ProcessDataHeader(self, input, body):
+    magic = ReadIntLE(input)
+    if magic != 0x574f4c53:
+      raise MyException("Magic wrong")
+
+    WriteIntLE(body, magic)
+
+    version = ReadShortLE(input)
+    if (version & 0xf0) != 0xf0:
+      raise MyException("Does not seem to be a streaming trace: %d." % version)
+    version = version ^ 0xf0
+
+    if version != 3:
+      raise MyException("Only support version 3")
+
+    WriteShortLE(body, version)
+
+    # read offset
+    offsetToData = ReadShortLE(input) - 16
+    WriteShortLE(body, offsetToData + 16)
+
+    # copy startWhen
+    Copy(input, body, 8)
+
+    if version == 1:
+      self._mRecordSize = 9;
+    elif version == 2:
+      self._mRecordSize = 10;
+    else:
+      self._mRecordSize = ReadShortLE(input)
+      WriteShortLE(body, self._mRecordSize)
+      offsetToData -= 2;
+
+    # Skip over offsetToData bytes
+    Copy(input, body, offsetToData)
+
+  def ProcessMethod(self, input):
+    stringLength = ReadShortLE(input)
+    str = input.read(stringLength)
+    self._methods.append(str)
+    print 'New method: %s' % str
+
+  def ProcessThread(self, input):
+    tid = ReadShortLE(input)
+    stringLength = ReadShortLE(input)
+    str = input.read(stringLength)
+    self._threads.append('%d\t%s\n' % (tid, str))
+    print 'New thread: %d/%s' % (tid, str)
+
+  def ProcessSpecial(self, input):
+    code = ord(input.read(1))
+    if code == 1:
+      self.ProcessMethod(input)
+    elif code == 2:
+      self.ProcessThread(input)
+    else:
+      raise MyException("Unknown special!")
+
+  def Process(self, input, body):
+    try:
+      while True:
+        threadId = ReadShortLE(input)
+        if threadId == 0:
+          self.ProcessSpecial(input)
+        else:
+          # Regular package, just copy
+          WriteShortLE(body, threadId)
+          Copy(input, body, self._mRecordSize - 2)
+    except BufferUnderrun:
+      print 'Buffer underrun, file was probably truncated. Results should still be usable.'
+
+  def Finalize(self, header):
+    header.write('*threads\n')
+    for t in self._threads:
+      header.write(t)
+    header.write('*methods\n')
+    for m in self._methods:
+      header.write(m)
+    header.write('*end\n')
+
+  def ProcessFile(self, filename):
+    input = open(filename, 'rb')                     # Input file
+    header = open(filename + '.header', 'w')         # Header part
+    body = open(filename + '.body', 'wb')            # Body part
+
+    self.PrintHeader(header)
+
+    self.ProcessDataHeader(input, body)
+
+    self._methods = []
+    self._threads = []
+    self.Process(input, body)
+
+    self.Finalize(header)
+
+    input.close()
+    header.close()
+    body.close()
+
+def main():
+  Rewriter().ProcessFile(sys.argv[1])
+  header_name = sys.argv[1] + '.header'
+  body_name = sys.argv[1] + '.body'
+  print 'Results have been written to %s and %s.' % (header_name, body_name)
+  print 'Concatenate the files to get a result usable with traceview.'
+  sys.exit(0)
+
+if __name__ == '__main__':
+  main()
\ No newline at end of file