Merge "SIMD type conversion for x86"
diff --git a/compiler/Android.bp b/compiler/Android.bp
index f5589cd..1ee2a21 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -52,6 +52,7 @@
         "optimizing/cha_guard_optimization.cc",
         "optimizing/code_generator.cc",
         "optimizing/code_generator_utils.cc",
+        "optimizing/code_sinking.cc",
         "optimizing/constant_folding.cc",
         "optimizing/dead_code_elimination.cc",
         "optimizing/escape.cc",
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 8dd423f..424b850 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -861,8 +861,11 @@
 bool CodeGenerator::HasStackMapAtCurrentPc() {
   uint32_t pc = GetAssembler()->CodeSize();
   size_t count = stack_map_stream_.GetNumberOfStackMaps();
+  if (count == 0) {
+    return false;
+  }
   CodeOffset native_pc_offset = stack_map_stream_.GetStackMap(count - 1).native_pc_code_offset;
-  return (count > 0) && (native_pc_offset.Uint32Value(GetInstructionSet()) == pc);
+  return (native_pc_offset.Uint32Value(GetInstructionSet()) == pc);
 }
 
 void CodeGenerator::MaybeRecordNativeDebugInfo(HInstruction* instruction,
diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc
new file mode 100644
index 0000000..dc3d378
--- /dev/null
+++ b/compiler/optimizing/code_sinking.cc
@@ -0,0 +1,403 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_sinking.h"
+
+#include "common_dominator.h"
+#include "nodes.h"
+
+namespace art {
+
+void CodeSinking::Run() {
+  HBasicBlock* exit = graph_->GetExitBlock();
+  if (exit == nullptr) {
+    // Infinite loop, just bail.
+    return;
+  }
+  // TODO(ngeoffray): we do not profile branches yet, so use throw instructions
+  // as an indicator of an uncommon branch.
+  for (HBasicBlock* exit_predecessor : exit->GetPredecessors()) {
+    if (exit_predecessor->GetLastInstruction()->IsThrow()) {
+      SinkCodeToUncommonBranch(exit_predecessor);
+    }
+  }
+}
+
+static bool IsInterestingInstruction(HInstruction* instruction) {
+  // Instructions from the entry graph (for example constants) are never interesting to move.
+  if (instruction->GetBlock() == instruction->GetBlock()->GetGraph()->GetEntryBlock()) {
+    return false;
+  }
+  // We want to move moveable instructions that cannot throw, as well as
+  // heap stores and allocations.
+
+  // Volatile stores cannot be moved.
+  if (instruction->IsInstanceFieldSet()) {
+    if (instruction->AsInstanceFieldSet()->IsVolatile()) {
+      return false;
+    }
+  }
+
+  // Check allocations first, as they can throw, but it is safe to move them.
+  if (instruction->IsNewInstance() || instruction->IsNewArray()) {
+    return true;
+  }
+
+  // All other instructions that can throw cannot be moved.
+  if (instruction->CanThrow()) {
+    return false;
+  }
+
+  // We can only store on local allocations. Other heap references can
+  // be escaping. Note that allocations can escape too, but we only move
+  // allocations if their users can move to, or are in the list of
+  // post dominated blocks.
+  if (instruction->IsInstanceFieldSet()) {
+    if (!instruction->InputAt(0)->IsNewInstance()) {
+      return false;
+    }
+  }
+
+  if (instruction->IsArraySet()) {
+    if (!instruction->InputAt(0)->IsNewArray()) {
+      return false;
+    }
+  }
+
+  // Heap accesses cannot go pass instructions that have memory side effects, which
+  // we are not tracking here. Note that the load/store elimination optimization
+  // runs before this optimization, and should have removed interesting ones.
+  // In theory, we could handle loads of local allocations, but this is currently
+  // hard to test, as LSE removes them.
+  if (instruction->IsStaticFieldGet() ||
+      instruction->IsInstanceFieldGet() ||
+      instruction->IsArrayGet()) {
+    return false;
+  }
+
+  if (instruction->IsInstanceFieldSet() ||
+      instruction->IsArraySet() ||
+      instruction->CanBeMoved()) {
+    return true;
+  }
+  return false;
+}
+
+static void AddInstruction(HInstruction* instruction,
+                           const ArenaBitVector& processed_instructions,
+                           const ArenaBitVector& discard_blocks,
+                           ArenaVector<HInstruction*>* worklist) {
+  // Add to the work list if the instruction is not in the list of blocks
+  // to discard, hasn't been already processed and is of interest.
+  if (!discard_blocks.IsBitSet(instruction->GetBlock()->GetBlockId()) &&
+      !processed_instructions.IsBitSet(instruction->GetId()) &&
+      IsInterestingInstruction(instruction)) {
+    worklist->push_back(instruction);
+  }
+}
+
+static void AddInputs(HInstruction* instruction,
+                      const ArenaBitVector& processed_instructions,
+                      const ArenaBitVector& discard_blocks,
+                      ArenaVector<HInstruction*>* worklist) {
+  for (HInstruction* input : instruction->GetInputs()) {
+    AddInstruction(input, processed_instructions, discard_blocks, worklist);
+  }
+}
+
+static void AddInputs(HBasicBlock* block,
+                      const ArenaBitVector& processed_instructions,
+                      const ArenaBitVector& discard_blocks,
+                      ArenaVector<HInstruction*>* worklist) {
+  for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+    AddInputs(it.Current(), processed_instructions, discard_blocks, worklist);
+  }
+  for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+    AddInputs(it.Current(), processed_instructions, discard_blocks, worklist);
+  }
+}
+
+static bool ShouldFilterUse(HInstruction* instruction,
+                            HInstruction* user,
+                            const ArenaBitVector& post_dominated) {
+  if (instruction->IsNewInstance()) {
+    return user->IsInstanceFieldSet() &&
+        (user->InputAt(0) == instruction) &&
+        !post_dominated.IsBitSet(user->GetBlock()->GetBlockId());
+  } else if (instruction->IsNewArray()) {
+    return user->IsArraySet() &&
+        (user->InputAt(0) == instruction) &&
+        !post_dominated.IsBitSet(user->GetBlock()->GetBlockId());
+  }
+  return false;
+}
+
+
+// Find the ideal position for moving `instruction`. If `filter` is true,
+// we filter out store instructions to that instruction, which are processed
+// first in the step (3) of the sinking algorithm.
+// This method is tailored to the sinking algorithm, unlike
+// the generic HInstruction::MoveBeforeFirstUserAndOutOfLoops.
+static HInstruction* FindIdealPosition(HInstruction* instruction,
+                                       const ArenaBitVector& post_dominated,
+                                       bool filter = false) {
+  DCHECK(!instruction->IsPhi());  // Makes no sense for Phi.
+
+  // Find the target block.
+  CommonDominator finder(/* start_block */ nullptr);
+  for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+    HInstruction* user = use.GetUser();
+    if (!(filter && ShouldFilterUse(instruction, user, post_dominated))) {
+      finder.Update(user->IsPhi()
+          ? user->GetBlock()->GetPredecessors()[use.GetIndex()]
+          : user->GetBlock());
+    }
+  }
+  for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
+    DCHECK(!use.GetUser()->GetHolder()->IsPhi());
+    DCHECK(!filter || !ShouldFilterUse(instruction, use.GetUser()->GetHolder(), post_dominated));
+    finder.Update(use.GetUser()->GetHolder()->GetBlock());
+  }
+  HBasicBlock* target_block = finder.Get();
+  if (target_block == nullptr) {
+    // No user we can go next to? Likely a LSE or DCE limitation.
+    return nullptr;
+  }
+
+  // Move to the first dominator not in a loop, if we can.
+  while (target_block->IsInLoop()) {
+    if (!post_dominated.IsBitSet(target_block->GetDominator()->GetBlockId())) {
+      break;
+    }
+    target_block = target_block->GetDominator();
+    DCHECK(target_block != nullptr);
+  }
+
+  // Find insertion position. No need to filter anymore, as we have found a
+  // target block.
+  HInstruction* insert_pos = nullptr;
+  for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+    if (use.GetUser()->GetBlock() == target_block &&
+        (insert_pos == nullptr || use.GetUser()->StrictlyDominates(insert_pos))) {
+      insert_pos = use.GetUser();
+    }
+  }
+  for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
+    HInstruction* user = use.GetUser()->GetHolder();
+    if (user->GetBlock() == target_block &&
+        (insert_pos == nullptr || user->StrictlyDominates(insert_pos))) {
+      insert_pos = user;
+    }
+  }
+  if (insert_pos == nullptr) {
+    // No user in `target_block`, insert before the control flow instruction.
+    insert_pos = target_block->GetLastInstruction();
+    DCHECK(insert_pos->IsControlFlow());
+    // Avoid splitting HCondition from HIf to prevent unnecessary materialization.
+    if (insert_pos->IsIf()) {
+      HInstruction* if_input = insert_pos->AsIf()->InputAt(0);
+      if (if_input == insert_pos->GetPrevious()) {
+        insert_pos = if_input;
+      }
+    }
+  }
+  DCHECK(!insert_pos->IsPhi());
+  return insert_pos;
+}
+
+
+void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) {
+  // Local allocator to discard data structures created below at the end of
+  // this optimization.
+  ArenaAllocator allocator(graph_->GetArena()->GetArenaPool());
+
+  size_t number_of_instructions = graph_->GetCurrentInstructionId();
+  ArenaVector<HInstruction*> worklist(allocator.Adapter(kArenaAllocMisc));
+  ArenaBitVector processed_instructions(&allocator, number_of_instructions, /* expandable */ false);
+  ArenaBitVector post_dominated(&allocator, graph_->GetBlocks().size(), /* expandable */ false);
+  ArenaBitVector instructions_that_can_move(
+      &allocator, number_of_instructions, /* expandable */ false);
+  ArenaVector<HInstruction*> move_in_order(allocator.Adapter(kArenaAllocMisc));
+
+  // Step (1): Visit post order to get a subset of blocks post dominated by `end_block`.
+  // TODO(ngeoffray): Getting the full set of post-dominated shoud be done by
+  // computint the post dominator tree, but that could be too time consuming. Also,
+  // we should start the analysis from blocks dominated by an uncommon branch, but we
+  // don't profile branches yet.
+  bool found_block = false;
+  for (HBasicBlock* block : graph_->GetPostOrder()) {
+    if (block == end_block) {
+      found_block = true;
+      post_dominated.SetBit(block->GetBlockId());
+    } else if (found_block) {
+      bool is_post_dominated = true;
+      if (block->GetSuccessors().empty()) {
+        // We currently bail for loops.
+        is_post_dominated = false;
+      } else {
+        for (HBasicBlock* successor : block->GetSuccessors()) {
+          if (!post_dominated.IsBitSet(successor->GetBlockId())) {
+            is_post_dominated = false;
+            break;
+          }
+        }
+      }
+      if (is_post_dominated) {
+        post_dominated.SetBit(block->GetBlockId());
+      }
+    }
+  }
+
+  // Now that we have found a subset of post-dominated blocks, add to the worklist all inputs
+  // of instructions in these blocks that are not themselves in these blocks.
+  // Also find the common dominator of the found post dominated blocks, to help filtering
+  // out un-movable uses in step (2).
+  CommonDominator finder(end_block);
+  for (size_t i = 0, e = graph_->GetBlocks().size(); i < e; ++i) {
+    if (post_dominated.IsBitSet(i)) {
+      finder.Update(graph_->GetBlocks()[i]);
+      AddInputs(graph_->GetBlocks()[i], processed_instructions, post_dominated, &worklist);
+    }
+  }
+  HBasicBlock* common_dominator = finder.Get();
+
+  // Step (2): iterate over the worklist to find sinking candidates.
+  while (!worklist.empty()) {
+    HInstruction* instruction = worklist.back();
+    if (processed_instructions.IsBitSet(instruction->GetId())) {
+      // The instruction has already been processed, continue. This happens
+      // when the instruction is the input/user of multiple instructions.
+      worklist.pop_back();
+      continue;
+    }
+    bool all_users_in_post_dominated_blocks = true;
+    bool can_move = true;
+    // Check users of the instruction.
+    for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+      HInstruction* user = use.GetUser();
+      if (!post_dominated.IsBitSet(user->GetBlock()->GetBlockId()) &&
+          !instructions_that_can_move.IsBitSet(user->GetId())) {
+        all_users_in_post_dominated_blocks = false;
+        // If we've already processed this user, or the user cannot be moved, or
+        // is not dominating the post dominated blocks, bail.
+        // TODO(ngeoffray): The domination check is an approximation. We should
+        // instead check if the dominated blocks post dominate the user's block,
+        // but we do not have post dominance information here.
+        if (processed_instructions.IsBitSet(user->GetId()) ||
+            !IsInterestingInstruction(user) ||
+            !user->GetBlock()->Dominates(common_dominator)) {
+          can_move = false;
+          break;
+        }
+      }
+    }
+
+    // Check environment users of the instruction. Some of these users require
+    // the instruction not to move.
+    if (all_users_in_post_dominated_blocks) {
+      for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
+        HEnvironment* environment = use.GetUser();
+        HInstruction* user = environment->GetHolder();
+        if (!post_dominated.IsBitSet(user->GetBlock()->GetBlockId())) {
+          if (graph_->IsDebuggable() ||
+              user->IsDeoptimize() ||
+              user->CanThrowIntoCatchBlock() ||
+              (user->IsSuspendCheck() && graph_->IsCompilingOsr())) {
+            can_move = false;
+            break;
+          }
+        }
+      }
+    }
+    if (!can_move) {
+      // Instruction cannot be moved, mark it as processed and remove it from the work
+      // list.
+      processed_instructions.SetBit(instruction->GetId());
+      worklist.pop_back();
+    } else if (all_users_in_post_dominated_blocks) {
+      // Instruction is a candidate for being sunk. Mark it as such, remove it from the
+      // work list, and add its inputs to the work list.
+      instructions_that_can_move.SetBit(instruction->GetId());
+      move_in_order.push_back(instruction);
+      processed_instructions.SetBit(instruction->GetId());
+      worklist.pop_back();
+      AddInputs(instruction, processed_instructions, post_dominated, &worklist);
+      // Drop the environment use not in the list of post-dominated block. This is
+      // to help step (3) of this optimization, when we start moving instructions
+      // closer to their use.
+      for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
+        HEnvironment* environment = use.GetUser();
+        HInstruction* user = environment->GetHolder();
+        if (!post_dominated.IsBitSet(user->GetBlock()->GetBlockId())) {
+          environment->RemoveAsUserOfInput(use.GetIndex());
+          environment->SetRawEnvAt(use.GetIndex(), nullptr);
+        }
+      }
+    } else {
+      // The information we have on the users was not enough to decide whether the
+      // instruction could be moved.
+      // Add the users to the work list, and keep the instruction in the work list
+      // to process it again once all users have been processed.
+      for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+        AddInstruction(use.GetUser(), processed_instructions, post_dominated, &worklist);
+      }
+    }
+  }
+
+  // Make sure we process instructions in dominated order. This is required for heap
+  // stores.
+  std::sort(move_in_order.begin(), move_in_order.end(), [](HInstruction* a, HInstruction* b) {
+    return b->StrictlyDominates(a);
+  });
+
+  // Step (3): Try to move sinking candidates.
+  for (HInstruction* instruction : move_in_order) {
+    HInstruction* position = nullptr;
+    if (instruction->IsArraySet() || instruction->IsInstanceFieldSet()) {
+      if (!instructions_that_can_move.IsBitSet(instruction->InputAt(0)->GetId())) {
+        // A store can trivially move, but it can safely do so only if the heap
+        // location it stores to can also move.
+        // TODO(ngeoffray): Handle allocation/store cycles by pruning these instructions
+        // from the set and all their inputs.
+        continue;
+      }
+      // Find the position of the instruction we're storing into, filtering out this
+      // store and all other stores to that instruction.
+      position = FindIdealPosition(instruction->InputAt(0), post_dominated, /* filter */ true);
+
+      // The position needs to be dominated by the store, in order for the store to move there.
+      if (position == nullptr || !instruction->GetBlock()->Dominates(position->GetBlock())) {
+        continue;
+      }
+    } else {
+      // Find the ideal position within the post dominated blocks.
+      position = FindIdealPosition(instruction, post_dominated);
+      if (position == nullptr) {
+        continue;
+      }
+    }
+    // Bail if we could not find a position in the post dominated blocks (for example,
+    // if there are multiple users whose common dominator is not in the list of
+    // post dominated blocks).
+    if (!post_dominated.IsBitSet(position->GetBlock()->GetBlockId())) {
+      continue;
+    }
+    MaybeRecordStat(MethodCompilationStat::kInstructionSunk);
+    instruction->MoveBefore(position, /* ensure_safety */ false);
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/code_sinking.h b/compiler/optimizing/code_sinking.h
new file mode 100644
index 0000000..59cda52
--- /dev/null
+++ b/compiler/optimizing/code_sinking.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_CODE_SINKING_H_
+#define ART_COMPILER_OPTIMIZING_CODE_SINKING_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+
+/**
+ * Optimization pass to move instructions into uncommon branches,
+ * when it is safe to do so.
+ */
+class CodeSinking : public HOptimization {
+ public:
+  CodeSinking(HGraph* graph, OptimizingCompilerStats* stats)
+      : HOptimization(graph, kCodeSinkingPassName, stats) {}
+
+  void Run() OVERRIDE;
+
+  static constexpr const char* kCodeSinkingPassName = "code_sinking";
+
+ private:
+  // Try to move code only used by `end_block` and all its post-dominated / dominated
+  // blocks, to these blocks.
+  void SinkCodeToUncommonBranch(HBasicBlock* end_block);
+
+  DISALLOW_COPY_AND_ASSIGN(CodeSinking);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_CODE_SINKING_H_
diff --git a/compiler/optimizing/common_dominator.h b/compiler/optimizing/common_dominator.h
index b459d24..9f012cf 100644
--- a/compiler/optimizing/common_dominator.h
+++ b/compiler/optimizing/common_dominator.h
@@ -36,12 +36,16 @@
   // Create a finder starting with a given block.
   explicit CommonDominator(HBasicBlock* block)
       : dominator_(block), chain_length_(ChainLength(block)) {
-    DCHECK(block != nullptr);
   }
 
   // Update the common dominator with another block.
   void Update(HBasicBlock* block) {
     DCHECK(block != nullptr);
+    if (dominator_ == nullptr) {
+      dominator_ = block;
+      chain_length_ = ChainLength(block);
+      return;
+    }
     HBasicBlock* block2 = dominator_;
     DCHECK(block2 != nullptr);
     if (block == block2) {
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index f72bd6a..3842ef9 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -56,6 +56,7 @@
 #include "builder.h"
 #include "cha_guard_optimization.h"
 #include "code_generator.h"
+#include "code_sinking.h"
 #include "compiled_method.h"
 #include "compiler.h"
 #include "constant_folding.h"
@@ -521,6 +522,8 @@
     return new (arena) HLoopOptimization(graph, most_recent_induction);
   } else if (opt_name == CHAGuardOptimization::kCHAGuardOptimizationPassName) {
     return new (arena) CHAGuardOptimization(graph);
+  } else if (opt_name == CodeSinking::kCodeSinkingPassName) {
+    return new (arena) CodeSinking(graph, stats);
 #ifdef ART_ENABLE_CODEGEN_arm
   } else if (opt_name == arm::DexCacheArrayFixups::kDexCacheArrayFixupsArmPassName) {
     return new (arena) arm::DexCacheArrayFixups(graph, codegen, stats);
@@ -787,6 +790,7 @@
       graph, stats, "instruction_simplifier$before_codegen");
   IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, stats);
   CHAGuardOptimization* cha_guard = new (arena) CHAGuardOptimization(graph);
+  CodeSinking* code_sinking = new (arena) CodeSinking(graph, stats);
 
   HOptimization* optimizations1[] = {
     intrinsics,
@@ -817,6 +821,7 @@
     lse,
     cha_guard,
     dce3,
+    code_sinking,
     // The codegen has a few assumptions that only the instruction simplifier
     // can satisfy. For example, the code generator does not expect to see a
     // HTypeConversion from a type to the same type.
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 7240d40..ae9a811 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -68,6 +68,7 @@
   kImplicitNullCheckGenerated,
   kExplicitNullCheckGenerated,
   kSimplifyIf,
+  kInstructionSunk,
   kLastStat
 };
 
@@ -166,6 +167,7 @@
       case kImplicitNullCheckGenerated: name = "ImplicitNullCheckGenerated"; break;
       case kExplicitNullCheckGenerated: name = "ExplicitNullCheckGenerated"; break;
       case kSimplifyIf: name = "SimplifyIf"; break;
+      case kInstructionSunk: name = "InstructionSunk"; break;
 
       case kLastStat:
         LOG(FATAL) << "invalid stat "
diff --git a/dexlayout/dex_ir.cc b/dexlayout/dex_ir.cc
index 609068f..131f4b9 100644
--- a/dexlayout/dex_ir.cc
+++ b/dexlayout/dex_ir.cc
@@ -653,7 +653,7 @@
       if (has_catch_all) {
         size = -size;
       }
-      if (already_added == true)  {
+      if (already_added) {
         for (int32_t i = 0; i < size; i++) {
           DecodeUnsignedLeb128(&handlers_data);
           DecodeUnsignedLeb128(&handlers_data);
diff --git a/dexlayout/dexlayout.cc b/dexlayout/dexlayout.cc
index 1add6bf..22619b9 100644
--- a/dexlayout/dexlayout.cc
+++ b/dexlayout/dexlayout.cc
@@ -46,6 +46,8 @@
 
 using android::base::StringPrintf;
 
+static constexpr uint32_t kDexCodeItemAlignment = 4;
+
 /*
  * Flags for use with createAccessFlagStr().
  */
@@ -1489,7 +1491,7 @@
   }
 }
 
-std::vector<dex_ir::ClassDef*> DexLayout::LayoutClassDefsAndClassData(const DexFile* dex_file) {
+std::vector<dex_ir::ClassData*> DexLayout::LayoutClassDefsAndClassData(const DexFile* dex_file) {
   std::vector<dex_ir::ClassDef*> new_class_def_order;
   for (std::unique_ptr<dex_ir::ClassDef>& class_def : header_->GetCollections().ClassDefs()) {
     dex::TypeIndex type_idx(class_def->ClassType()->GetIndex());
@@ -1505,46 +1507,93 @@
   }
   uint32_t class_defs_offset = header_->GetCollections().ClassDefsOffset();
   uint32_t class_data_offset = header_->GetCollections().ClassDatasOffset();
+  std::unordered_set<dex_ir::ClassData*> visited_class_data;
+  std::vector<dex_ir::ClassData*> new_class_data_order;
   for (uint32_t i = 0; i < new_class_def_order.size(); ++i) {
     dex_ir::ClassDef* class_def = new_class_def_order[i];
     class_def->SetIndex(i);
     class_def->SetOffset(class_defs_offset);
     class_defs_offset += dex_ir::ClassDef::ItemSize();
-    if (class_def->GetClassData() != nullptr) {
-      class_def->GetClassData()->SetOffset(class_data_offset);
-      class_data_offset += class_def->GetClassData()->GetSize();
+    dex_ir::ClassData* class_data = class_def->GetClassData();
+    if (class_data != nullptr && visited_class_data.find(class_data) == visited_class_data.end()) {
+      class_data->SetOffset(class_data_offset);
+      class_data_offset += class_data->GetSize();
+      visited_class_data.insert(class_data);
+      new_class_data_order.push_back(class_data);
     }
   }
-  return new_class_def_order;
+  return new_class_data_order;
 }
 
-int32_t DexLayout::LayoutCodeItems(std::vector<dex_ir::ClassDef*> new_class_def_order) {
-  int32_t diff = 0;
+// Orders code items according to specified class data ordering.
+// NOTE: If the section following the code items is byte aligned, the last code item is left in
+// place to preserve alignment. Layout needs an overhaul to handle movement of other sections.
+int32_t DexLayout::LayoutCodeItems(std::vector<dex_ir::ClassData*> new_class_data_order) {
+  // Find the last code item so we can leave it in place if the next section is not 4 byte aligned.
+  std::unordered_set<dex_ir::CodeItem*> visited_code_items;
   uint32_t offset = header_->GetCollections().CodeItemsOffset();
-  for (dex_ir::ClassDef* class_def : new_class_def_order) {
-    dex_ir::ClassData* class_data = class_def->GetClassData();
-    if (class_data != nullptr) {
-      class_data->SetOffset(class_data->GetOffset() + diff);
-      for (auto& method : *class_data->DirectMethods()) {
-        dex_ir::CodeItem* code_item = method->GetCodeItem();
-        if (code_item != nullptr) {
-          diff += UnsignedLeb128Size(offset) - UnsignedLeb128Size(code_item->GetOffset());
-          code_item->SetOffset(offset);
-          offset += RoundUp(code_item->GetSize(), 4);
-        }
+  bool is_code_item_aligned = IsNextSectionCodeItemAligned(offset);
+  if (!is_code_item_aligned) {
+    dex_ir::CodeItem* last_code_item = nullptr;
+    for (auto& code_item_pair : header_->GetCollections().CodeItems()) {
+      std::unique_ptr<dex_ir::CodeItem>& code_item = code_item_pair.second;
+      if (last_code_item == nullptr || last_code_item->GetOffset() < code_item->GetOffset()) {
+        last_code_item = code_item.get();
       }
-      for (auto& method : *class_data->VirtualMethods()) {
-        dex_ir::CodeItem* code_item = method->GetCodeItem();
-        if (code_item != nullptr) {
-          diff += UnsignedLeb128Size(offset) - UnsignedLeb128Size(code_item->GetOffset());
-          code_item->SetOffset(offset);
-          offset += RoundUp(code_item->GetSize(), 4);
-        }
+    }
+    // Preserve the last code item by marking it already visited.
+    visited_code_items.insert(last_code_item);
+  }
+
+  int32_t diff = 0;
+  for (dex_ir::ClassData* class_data : new_class_data_order) {
+    class_data->SetOffset(class_data->GetOffset() + diff);
+    for (auto& method : *class_data->DirectMethods()) {
+      dex_ir::CodeItem* code_item = method->GetCodeItem();
+      if (code_item != nullptr && visited_code_items.find(code_item) == visited_code_items.end()) {
+        visited_code_items.insert(code_item);
+        diff += UnsignedLeb128Size(offset) - UnsignedLeb128Size(code_item->GetOffset());
+        code_item->SetOffset(offset);
+        offset += RoundUp(code_item->GetSize(), kDexCodeItemAlignment);
+      }
+    }
+    for (auto& method : *class_data->VirtualMethods()) {
+      dex_ir::CodeItem* code_item = method->GetCodeItem();
+      if (code_item != nullptr && visited_code_items.find(code_item) == visited_code_items.end()) {
+        visited_code_items.insert(code_item);
+        diff += UnsignedLeb128Size(offset) - UnsignedLeb128Size(code_item->GetOffset());
+        code_item->SetOffset(offset);
+        offset += RoundUp(code_item->GetSize(), kDexCodeItemAlignment);
       }
     }
   }
+  // Adjust diff to be 4-byte aligned.
+  return RoundUp(diff, kDexCodeItemAlignment);
+}
 
-  return diff;
+bool DexLayout::IsNextSectionCodeItemAligned(uint32_t offset) {
+  dex_ir::Collections& collections = header_->GetCollections();
+  std::set<uint32_t> section_offsets;
+  section_offsets.insert(collections.MapListOffset());
+  section_offsets.insert(collections.TypeListsOffset());
+  section_offsets.insert(collections.AnnotationSetRefListsOffset());
+  section_offsets.insert(collections.AnnotationSetItemsOffset());
+  section_offsets.insert(collections.ClassDatasOffset());
+  section_offsets.insert(collections.CodeItemsOffset());
+  section_offsets.insert(collections.StringDatasOffset());
+  section_offsets.insert(collections.DebugInfoItemsOffset());
+  section_offsets.insert(collections.AnnotationItemsOffset());
+  section_offsets.insert(collections.EncodedArrayItemsOffset());
+  section_offsets.insert(collections.AnnotationsDirectoryItemsOffset());
+
+  auto found = section_offsets.find(offset);
+  if (found != section_offsets.end()) {
+    found++;
+    if (found != section_offsets.end()) {
+      return *found % kDexCodeItemAlignment == 0;
+    }
+  }
+  return false;
 }
 
 // Adjust offsets of every item in the specified section by diff bytes.
@@ -1626,10 +1675,8 @@
 }
 
 void DexLayout::LayoutOutputFile(const DexFile* dex_file) {
-  std::vector<dex_ir::ClassDef*> new_class_def_order = LayoutClassDefsAndClassData(dex_file);
-  int32_t diff = LayoutCodeItems(new_class_def_order);
-  // Adjust diff to be 4-byte aligned.
-  diff = RoundUp(diff, 4);
+  std::vector<dex_ir::ClassData*> new_class_data_order = LayoutClassDefsAndClassData(dex_file);
+  int32_t diff = LayoutCodeItems(new_class_data_order);
   // Move sections after ClassData by diff bytes.
   FixupSections(header_->GetCollections().ClassDatasOffset(), diff);
   // Update file size.
diff --git a/dexlayout/dexlayout.h b/dexlayout/dexlayout.h
index ac1a4a6..3918706 100644
--- a/dexlayout/dexlayout.h
+++ b/dexlayout/dexlayout.h
@@ -105,8 +105,9 @@
   void DumpSField(uint32_t idx, uint32_t flags, int i, dex_ir::EncodedValue* init);
   void DumpDexFile();
 
-  std::vector<dex_ir::ClassDef*> LayoutClassDefsAndClassData(const DexFile* dex_file);
-  int32_t LayoutCodeItems(std::vector<dex_ir::ClassDef*> new_class_def_order);
+  std::vector<dex_ir::ClassData*> LayoutClassDefsAndClassData(const DexFile* dex_file);
+  int32_t LayoutCodeItems(std::vector<dex_ir::ClassData*> new_class_data_order);
+  bool IsNextSectionCodeItemAligned(uint32_t offset);
   template<class T> void FixupSection(std::map<uint32_t, std::unique_ptr<T>>& map, uint32_t diff);
   void FixupSections(uint32_t offset, uint32_t diff);
 
diff --git a/dexlayout/dexlayout_test.cc b/dexlayout/dexlayout_test.cc
index 9881e28..9f0593a 100644
--- a/dexlayout/dexlayout_test.cc
+++ b/dexlayout/dexlayout_test.cc
@@ -55,6 +55,26 @@
     "qAAAAAYAAAACAAAAwAAAAAEgAAACAAAAAAEAAAIgAAAHAAAAMAEAAAMgAAACAAAAaQEAAAAgAAAC"
     "AAAAdQEAAAAQAAABAAAAjAEAAA==";
 
+// Dex file with catch handler unreferenced by try blocks.
+// Constructed by building a dex file with try/catch blocks and hex editing.
+static const char kUnreferencedCatchHandlerInputDex[] =
+    "ZGV4CjAzNQD+exd52Y0f9nY5x5GmInXq5nXrO6Kl2RV4AwAAcAAAAHhWNBIAAAAAAAAAANgCAAAS"
+    "AAAAcAAAAAgAAAC4AAAAAwAAANgAAAABAAAA/AAAAAQAAAAEAQAAAQAAACQBAAA0AgAARAEAANYB"
+    "AADeAQAA5gEAAO4BAAAAAgAADwIAACYCAAA9AgAAUQIAAGUCAAB5AgAAfwIAAIUCAACIAgAAjAIA"
+    "AKECAACnAgAArAIAAAQAAAAFAAAABgAAAAcAAAAIAAAACQAAAAwAAAAOAAAADAAAAAYAAAAAAAAA"
+    "DQAAAAYAAADIAQAADQAAAAYAAADQAQAABQABABAAAAAAAAAAAAAAAAAAAgAPAAAAAQABABEAAAAD"
+    "AAAAAAAAAAAAAAABAAAAAwAAAAAAAAADAAAAAAAAAMgCAAAAAAAAAQABAAEAAAC1AgAABAAAAHAQ"
+    "AwAAAA4AAwABAAIAAgC6AgAAIQAAAGIAAAAaAQoAbiACABAAYgAAABoBCwBuIAIAEAAOAA0AYgAA"
+    "ABoBAQBuIAIAEAAo8A0AYgAAABoBAgBuIAIAEAAo7gAAAAAAAAcAAQAHAAAABwABAAIBAg8BAhgA"
+    "AQAAAAQAAAABAAAABwAGPGluaXQ+AAZDYXRjaDEABkNhdGNoMgAQSGFuZGxlclRlc3QuamF2YQAN"
+    "TEhhbmRsZXJUZXN0OwAVTGphdmEvaW8vUHJpbnRTdHJlYW07ABVMamF2YS9sYW5nL0V4Y2VwdGlv"
+    "bjsAEkxqYXZhL2xhbmcvT2JqZWN0OwASTGphdmEvbGFuZy9TdHJpbmc7ABJMamF2YS9sYW5nL1N5"
+    "c3RlbTsABFRyeTEABFRyeTIAAVYAAlZMABNbTGphdmEvbGFuZy9TdHJpbmc7AARtYWluAANvdXQA"
+    "B3ByaW50bG4AAQAHDgAEAQAHDn17AncdHoseAAAAAgAAgYAExAIBCdwCAAANAAAAAAAAAAEAAAAA"
+    "AAAAAQAAABIAAABwAAAAAgAAAAgAAAC4AAAAAwAAAAMAAADYAAAABAAAAAEAAAD8AAAABQAAAAQA"
+    "AAAEAQAABgAAAAEAAAAkAQAAASAAAAIAAABEAQAAARAAAAIAAADIAQAAAiAAABIAAADWAQAAAyAA"
+    "AAIAAAC1AgAAACAAAAEAAADIAgAAABAAAAEAAADYAgAA";
+
 // Dex file with multiple code items that have the same debug_info_off_. Constructed by a modified
 // dexlayout on XandY.
 static const char kDexFileDuplicateOffset[] =
@@ -100,25 +120,30 @@
     "ASAAAAIAAACEAQAABiAAAAIAAACwAQAAARAAAAIAAADYAQAAAiAAABIAAADoAQAAAyAAAAIAAADw"
     "AgAABCAAAAIAAAD8AgAAACAAAAIAAAAIAwAAABAAAAEAAAAgAwAA";
 
-// Dex file with catch handler unreferenced by try blocks.
-// Constructed by building a dex file with try/catch blocks and hex editing.
-static const char kUnreferencedCatchHandlerInputDex[] =
-    "ZGV4CjAzNQD+exd52Y0f9nY5x5GmInXq5nXrO6Kl2RV4AwAAcAAAAHhWNBIAAAAAAAAAANgCAAAS"
-    "AAAAcAAAAAgAAAC4AAAAAwAAANgAAAABAAAA/AAAAAQAAAAEAQAAAQAAACQBAAA0AgAARAEAANYB"
-    "AADeAQAA5gEAAO4BAAAAAgAADwIAACYCAAA9AgAAUQIAAGUCAAB5AgAAfwIAAIUCAACIAgAAjAIA"
-    "AKECAACnAgAArAIAAAQAAAAFAAAABgAAAAcAAAAIAAAACQAAAAwAAAAOAAAADAAAAAYAAAAAAAAA"
-    "DQAAAAYAAADIAQAADQAAAAYAAADQAQAABQABABAAAAAAAAAAAAAAAAAAAgAPAAAAAQABABEAAAAD"
-    "AAAAAAAAAAAAAAABAAAAAwAAAAAAAAADAAAAAAAAAMgCAAAAAAAAAQABAAEAAAC1AgAABAAAAHAQ"
-    "AwAAAA4AAwABAAIAAgC6AgAAIQAAAGIAAAAaAQoAbiACABAAYgAAABoBCwBuIAIAEAAOAA0AYgAA"
-    "ABoBAQBuIAIAEAAo8A0AYgAAABoBAgBuIAIAEAAo7gAAAAAAAAcAAQAHAAAABwABAAIBAg8BAhgA"
-    "AQAAAAQAAAABAAAABwAGPGluaXQ+AAZDYXRjaDEABkNhdGNoMgAQSGFuZGxlclRlc3QuamF2YQAN"
-    "TEhhbmRsZXJUZXN0OwAVTGphdmEvaW8vUHJpbnRTdHJlYW07ABVMamF2YS9sYW5nL0V4Y2VwdGlv"
-    "bjsAEkxqYXZhL2xhbmcvT2JqZWN0OwASTGphdmEvbGFuZy9TdHJpbmc7ABJMamF2YS9sYW5nL1N5"
-    "c3RlbTsABFRyeTEABFRyeTIAAVYAAlZMABNbTGphdmEvbGFuZy9TdHJpbmc7AARtYWluAANvdXQA"
-    "B3ByaW50bG4AAQAHDgAEAQAHDn17AncdHoseAAAAAgAAgYAExAIBCdwCAAANAAAAAAAAAAEAAAAA"
-    "AAAAAQAAABIAAABwAAAAAgAAAAgAAAC4AAAAAwAAAAMAAADYAAAABAAAAAEAAAD8AAAABQAAAAQA"
-    "AAAEAQAABgAAAAEAAAAkAQAAASAAAAIAAABEAQAAARAAAAIAAADIAQAAAiAAABIAAADWAQAAAyAA"
-    "AAIAAAC1AgAAACAAAAEAAADIAgAAABAAAAEAAADYAgAA";
+// Dex file with shared empty class data item for multiple class defs.
+// Constructing by building a dex file with multiple classes and hex editing.
+static const char kMultiClassDataInputDex[] =
+    "ZGV4CjAzNQALJgF9TtnLq748xVe/+wyxETrT9lTEiW6YAQAAcAAAAHhWNBIAAAAAAAAAADQBAAAI"
+    "AAAAcAAAAAQAAACQAAAAAAAAAAAAAAACAAAAoAAAAAAAAAAAAAAAAgAAALAAAACoAAAA8AAAAPAA"
+    "AAD4AAAAAAEAAAMBAAAIAQAADQEAACEBAAAkAQAAAgAAAAMAAAAEAAAABQAAAAEAAAAGAAAAAgAA"
+    "AAcAAAABAAAAAQYAAAMAAAAAAAAAAAAAAAAAAAAnAQAAAAAAAAIAAAABBgAAAwAAAAAAAAABAAAA"
+    "AAAAACcBAAAAAAAABkEuamF2YQAGQi5qYXZhAAFJAANMQTsAA0xCOwASTGphdmEvbGFuZy9PYmpl"
+    "Y3Q7AAFhAAFiAAAAAAABAAAAARkAAAAIAAAAAAAAAAEAAAAAAAAAAQAAAAgAAABwAAAAAgAAAAQA"
+    "AACQAAAABAAAAAIAAACgAAAABgAAAAIAAACwAAAAAiAAAAgAAADwAAAAACAAAAIAAAAnAQAAABAA"
+    "AAEAAAA0AQAA";
+
+// Dex file with code info followed by non 4-byte aligned section.
+// Constructed a dex file with code info followed by string data and hex edited.
+static const char kUnalignedCodeInfoInputDex[] =
+    "ZGV4CjAzNQDXJzXNb4iWn2SLhmLydW/8h1K9moERIw7UAQAAcAAAAHhWNBIAAAAAAAAAAEwBAAAG"
+    "AAAAcAAAAAMAAACIAAAAAQAAAJQAAAAAAAAAAAAAAAMAAACgAAAAAQAAALgAAAD8AAAA2AAAAAIB"
+    "AAAKAQAAEgEAABcBAAArAQAALgEAAAIAAAADAAAABAAAAAQAAAACAAAAAAAAAAAAAAAAAAAAAAAA"
+    "AAUAAAABAAAAAAAAAAAAAAABAAAAAQAAAAAAAAABAAAAAAAAADsBAAAAAAAAAQABAAEAAAAxAQAA"
+    "BAAAAHAQAgAAAA4AAQABAAAAAAA2AQAAAQAAAA4ABjxpbml0PgAGQS5qYXZhAANMQTsAEkxqYXZh"
+    "L2xhbmcvT2JqZWN0OwABVgABYQABAAcOAAMABw4AAAABAQCBgATYAQEB8AEAAAALAAAAAAAAAAEA"
+    "AAAAAAAAAQAAAAYAAABwAAAAAgAAAAMAAACIAAAAAwAAAAEAAACUAAAABQAAAAMAAACgAAAABgAA"
+    "AAEAAAC4AAAAASAAAAIAAADYAAAAAiAAAAYAAAACAQAAAyAAAAIAAAAxAQAAACAAAAEAAAA7AQAA"
+    "ABAAAAEAAABMAQAA";
 
 static void WriteBase64ToFile(const char* base64, File* file) {
   // Decode base64.
@@ -314,6 +339,12 @@
   ASSERT_TRUE(DexFileLayoutExec(&error_msg)) << error_msg;
 }
 
+TEST_F(DexLayoutTest, UnreferencedCatchHandler) {
+  // Disable test on target.
+  TEST_DISABLED_FOR_TARGET();
+  std::string error_msg;
+  ASSERT_TRUE(UnreferencedCatchHandlerExec(&error_msg)) << error_msg;
+}
 TEST_F(DexLayoutTest, DuplicateOffset) {
   ScratchFile temp;
   WriteBase64ToFile(kDexFileDuplicateOffset, temp.GetFile());
@@ -351,11 +382,40 @@
   }
 }
 
-TEST_F(DexLayoutTest, UnreferencedCatchHandler) {
-  // Disable test on target.
-  TEST_DISABLED_FOR_TARGET();
+TEST_F(DexLayoutTest, MultiClassData) {
+  ScratchFile temp;
+  WriteBase64ToFile(kMultiClassDataInputDex, temp.GetFile());
+  ScratchFile temp2;
+  WriteBase64ToFile(kDexFileLayoutInputProfile, temp2.GetFile());
+  EXPECT_EQ(temp.GetFile()->Flush(), 0);
+  std::string dexlayout = GetTestAndroidRoot() + "/bin/dexlayout";
+  EXPECT_TRUE(OS::FileExists(dexlayout.c_str())) << dexlayout << " should be a valid file path";
+  std::vector<std::string> dexlayout_exec_argv =
+      { dexlayout, "-p", temp2.GetFilename(), "-o", "/dev/null", temp.GetFilename() };
   std::string error_msg;
-  ASSERT_TRUE(UnreferencedCatchHandlerExec(&error_msg)) << error_msg;
+  const bool result = ::art::Exec(dexlayout_exec_argv, &error_msg);
+  EXPECT_TRUE(result);
+  if (!result) {
+    LOG(ERROR) << "Error " << error_msg;
+  }
+}
+
+TEST_F(DexLayoutTest, UnalignedCodeInfo) {
+  ScratchFile temp;
+  WriteBase64ToFile(kUnalignedCodeInfoInputDex, temp.GetFile());
+  ScratchFile temp2;
+  WriteBase64ToFile(kDexFileLayoutInputProfile, temp2.GetFile());
+  EXPECT_EQ(temp.GetFile()->Flush(), 0);
+  std::string dexlayout = GetTestAndroidRoot() + "/bin/dexlayout";
+  EXPECT_TRUE(OS::FileExists(dexlayout.c_str())) << dexlayout << " should be a valid file path";
+  std::vector<std::string> dexlayout_exec_argv =
+      { dexlayout, "-p", temp2.GetFilename(), "-o", "/dev/null", temp.GetFilename() };
+  std::string error_msg;
+  const bool result = ::art::Exec(dexlayout_exec_argv, &error_msg);
+  EXPECT_TRUE(result);
+  if (!result) {
+    LOG(ERROR) << "Error " << error_msg;
+  }
 }
 
 }  // namespace art
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 21cdede..e5722a1 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -139,7 +139,7 @@
     EXPECT_FALSE(JavaLangObject->IsFinal());
     EXPECT_FALSE(JavaLangObject->IsPrimitive());
     EXPECT_FALSE(JavaLangObject->IsSynthetic());
-    EXPECT_EQ(2U, JavaLangObject->NumDirectMethods());
+    EXPECT_EQ(4U, JavaLangObject->NumDirectMethods());
     EXPECT_EQ(11U, JavaLangObject->NumVirtualMethods());
     if (!kUseBrooksReadBarrier) {
       EXPECT_EQ(2U, JavaLangObject->NumInstanceFields());
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 53be30e..37963e4 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -3961,7 +3961,14 @@
 
 void Heap::CheckPreconditionsForAllocObject(ObjPtr<mirror::Class> c, size_t byte_count) {
   CHECK(c == nullptr || (c->IsClassClass() && byte_count >= sizeof(mirror::Class)) ||
-        (c->IsVariableSize() || c->GetObjectSize() == byte_count)) << c->GetClassFlags();
+        (c->IsVariableSize() || c->GetObjectSize() == byte_count))
+      << "ClassFlags=" << c->GetClassFlags()
+      << " IsClassClass=" << c->IsClassClass()
+      << " byte_count=" << byte_count
+      << " IsVariableSize=" << c->IsVariableSize()
+      << " ObjectSize=" << c->GetObjectSize()
+      << " sizeof(Class)=" << sizeof(mirror::Class)
+      << " klass=" << c.Ptr();
   CHECK_GE(byte_count, sizeof(mirror::Object));
 }
 
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index e59c4bb..495fec7 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -1111,7 +1111,9 @@
   if (space != nullptr) {
     if (space->IsZygoteSpace()) {
       heap_type = HPROF_HEAP_ZYGOTE;
-    } else if (space->IsImageSpace()) {
+    } else if (space->IsImageSpace() && heap->ObjectIsInBootImageSpace(obj)) {
+      // Only count objects in the boot image as HPROF_HEAP_IMAGE, this leaves app image objects as
+      // HPROF_HEAP_APP. b/35762934
       heap_type = HPROF_HEAP_IMAGE;
     }
   } else {
diff --git a/runtime/native/java_lang_Object.cc b/runtime/native/java_lang_Object.cc
index 6989244..fb4f99a 100644
--- a/runtime/native/java_lang_Object.cc
+++ b/runtime/native/java_lang_Object.cc
@@ -48,12 +48,19 @@
   soa.Decode<mirror::Object>(java_this)->Wait(soa.Self(), ms, ns);
 }
 
+static jint Object_identityHashCodeNative(JNIEnv* env, jclass, jobject javaObject) {
+  ScopedFastNativeObjectAccess soa(env);
+  ObjPtr<mirror::Object> o = soa.Decode<mirror::Object>(javaObject);
+  return static_cast<jint>(o->IdentityHashCode());
+}
+
 static JNINativeMethod gMethods[] = {
   FAST_NATIVE_METHOD(Object, internalClone, "()Ljava/lang/Object;"),
   FAST_NATIVE_METHOD(Object, notify, "()V"),
   FAST_NATIVE_METHOD(Object, notifyAll, "()V"),
   OVERLOADED_FAST_NATIVE_METHOD(Object, wait, "()V", wait),
   OVERLOADED_FAST_NATIVE_METHOD(Object, wait, "(JI)V", waitJI),
+  FAST_NATIVE_METHOD(Object, identityHashCodeNative, "(Ljava/lang/Object;)I"),
 };
 
 void register_java_lang_Object(JNIEnv* env) {
diff --git a/runtime/native/java_lang_System.cc b/runtime/native/java_lang_System.cc
index d7c9cd0..2cabce8 100644
--- a/runtime/native/java_lang_System.cc
+++ b/runtime/native/java_lang_System.cc
@@ -227,15 +227,6 @@
       javaDst, dstPos, count);
 }
 
-static jint System_identityHashCode(JNIEnv* env, jclass, jobject javaObject) {
-  if (UNLIKELY(javaObject == nullptr)) {
-    return 0;
-  }
-  ScopedFastNativeObjectAccess soa(env);
-  ObjPtr<mirror::Object> o = soa.Decode<mirror::Object>(javaObject);
-  return static_cast<jint>(o->IdentityHashCode());
-}
-
 static JNINativeMethod gMethods[] = {
   FAST_NATIVE_METHOD(System, arraycopy, "(Ljava/lang/Object;ILjava/lang/Object;II)V"),
   FAST_NATIVE_METHOD(System, arraycopyCharUnchecked, "([CI[CII)V"),
@@ -246,7 +237,6 @@
   FAST_NATIVE_METHOD(System, arraycopyFloatUnchecked, "([FI[FII)V"),
   FAST_NATIVE_METHOD(System, arraycopyDoubleUnchecked, "([DI[DII)V"),
   FAST_NATIVE_METHOD(System, arraycopyBooleanUnchecked, "([ZI[ZII)V"),
-  FAST_NATIVE_METHOD(System, identityHashCode, "(Ljava/lang/Object;)I"),
 };
 
 void register_java_lang_System(JNIEnv* env) {
diff --git a/runtime/openjdkjvmti/OpenjdkJvmTi.cc b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
index 77ca9ce..450b6b6 100644
--- a/runtime/openjdkjvmti/OpenjdkJvmTi.cc
+++ b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
@@ -73,7 +73,6 @@
 namespace openjdkjvmti {
 
 EventHandler gEventHandler;
-ObjectTagTable gObjectTagTable(&gEventHandler);
 
 #define ENSURE_NON_NULL(n)      \
   do {                          \
@@ -334,7 +333,7 @@
                                      const jvmtiHeapCallbacks* callbacks,
                                      const void* user_data) {
     ENSURE_HAS_CAP(env, can_tag_objects);
-    HeapUtil heap_util(&gObjectTagTable);
+    HeapUtil heap_util(ArtJvmTiEnv::AsArtJvmTiEnv(env)->object_tag_table.get());
     return heap_util.FollowReferences(env,
                                       heap_filter,
                                       klass,
@@ -349,7 +348,7 @@
                                        const jvmtiHeapCallbacks* callbacks,
                                        const void* user_data) {
     ENSURE_HAS_CAP(env, can_tag_objects);
-    HeapUtil heap_util(&gObjectTagTable);
+    HeapUtil heap_util(ArtJvmTiEnv::AsArtJvmTiEnv(env)->object_tag_table.get());
     return heap_util.IterateThroughHeap(env, heap_filter, klass, callbacks, user_data);
   }
 
@@ -363,7 +362,7 @@
 
     art::ScopedObjectAccess soa(jni_env);
     art::ObjPtr<art::mirror::Object> obj = soa.Decode<art::mirror::Object>(object);
-    if (!gObjectTagTable.GetTag(obj.Ptr(), tag_ptr)) {
+    if (!ArtJvmTiEnv::AsArtJvmTiEnv(env)->object_tag_table->GetTag(obj.Ptr(), tag_ptr)) {
       *tag_ptr = 0;
     }
 
@@ -384,7 +383,7 @@
 
     art::ScopedObjectAccess soa(jni_env);
     art::ObjPtr<art::mirror::Object> obj = soa.Decode<art::mirror::Object>(object);
-    gObjectTagTable.Set(obj.Ptr(), tag);
+    ArtJvmTiEnv::AsArtJvmTiEnv(env)->object_tag_table->Set(obj.Ptr(), tag);
 
     return ERR(NONE);
   }
@@ -403,12 +402,12 @@
     }
 
     art::ScopedObjectAccess soa(jni_env);
-    return gObjectTagTable.GetTaggedObjects(env,
-                                            tag_count,
-                                            tags,
-                                            count_ptr,
-                                            object_result_ptr,
-                                            tag_result_ptr);
+    return ArtJvmTiEnv::AsArtJvmTiEnv(env)->object_tag_table->GetTaggedObjects(env,
+                                                                               tag_count,
+                                                                               tags,
+                                                                               count_ptr,
+                                                                               object_result_ptr,
+                                                                               tag_result_ptr);
   }
 
   static jvmtiError ForceGarbageCollection(jvmtiEnv* env) {
@@ -579,7 +578,7 @@
   }
 
   static jvmtiError GetLoadedClasses(jvmtiEnv* env, jint* class_count_ptr, jclass** classes_ptr) {
-    HeapUtil heap_util(&gObjectTagTable);
+    HeapUtil heap_util(ArtJvmTiEnv::AsArtJvmTiEnv(env)->object_tag_table.get());
     return heap_util.GetLoadedClasses(env, class_count_ptr, classes_ptr);
   }
 
@@ -678,6 +677,7 @@
     ENSURE_HAS_CAP(env, can_retransform_classes);
     std::string error_msg;
     jvmtiError res = Transformer::RetransformClasses(ArtJvmTiEnv::AsArtJvmTiEnv(env),
+                                                     &gEventHandler,
                                                      art::Runtime::Current(),
                                                      art::Thread::Current(),
                                                      class_count,
@@ -695,6 +695,7 @@
     ENSURE_HAS_CAP(env, can_redefine_classes);
     std::string error_msg;
     jvmtiError res = Redefiner::RedefineClasses(ArtJvmTiEnv::AsArtJvmTiEnv(env),
+                                                &gEventHandler,
                                                 art::Runtime::Current(),
                                                 art::Thread::Current(),
                                                 class_count,
@@ -1162,6 +1163,8 @@
   static jvmtiError DisposeEnvironment(jvmtiEnv* env) {
     ENSURE_VALID_ENV(env);
     gEventHandler.RemoveArtJvmTiEnv(ArtJvmTiEnv::AsArtJvmTiEnv(env));
+    art::Runtime::Current()->RemoveSystemWeakHolder(
+        ArtJvmTiEnv::AsArtJvmTiEnv(env)->object_tag_table.get());
     delete env;
     return OK;
   }
@@ -1333,13 +1336,25 @@
          version == JVMTI_VERSION;
 }
 
+extern const jvmtiInterface_1 gJvmtiInterface;
+ArtJvmTiEnv::ArtJvmTiEnv(art::JavaVMExt* runtime, EventHandler* event_handler)
+    : art_vm(runtime),
+      local_data(nullptr),
+      capabilities(),
+      object_tag_table(new ObjectTagTable(event_handler)) {
+  functions = &gJvmtiInterface;
+}
+
 // Creates a jvmtiEnv and returns it with the art::ti::Env that is associated with it. new_art_ti
 // is a pointer to the uninitialized memory for an art::ti::Env.
 static void CreateArtJvmTiEnv(art::JavaVMExt* vm, /*out*/void** new_jvmtiEnv) {
-  struct ArtJvmTiEnv* env = new ArtJvmTiEnv(vm);
+  struct ArtJvmTiEnv* env = new ArtJvmTiEnv(vm, &gEventHandler);
   *new_jvmtiEnv = env;
 
   gEventHandler.RegisterArtJvmTiEnv(env);
+
+  art::Runtime::Current()->AddSystemWeakHolder(
+      ArtJvmTiEnv::AsArtJvmTiEnv(env)->object_tag_table.get());
 }
 
 // A hook that the runtime uses to allow plugins to handle GetEnv calls. It returns true and
@@ -1371,7 +1386,6 @@
   SearchUtil::Register();
 
   runtime->GetJavaVM()->AddEnvironmentHook(GetEnvHandler);
-  runtime->AddSystemWeakHolder(&gObjectTagTable);
 
   return true;
 }
diff --git a/runtime/openjdkjvmti/art_jvmti.h b/runtime/openjdkjvmti/art_jvmti.h
index 99139a1..2ff3a47 100644
--- a/runtime/openjdkjvmti/art_jvmti.h
+++ b/runtime/openjdkjvmti/art_jvmti.h
@@ -48,8 +48,7 @@
 
 namespace openjdkjvmti {
 
-extern const jvmtiInterface_1 gJvmtiInterface;
-extern EventHandler gEventHandler;
+class ObjectTagTable;
 
 // A structure that is a jvmtiEnv with additional information for the runtime.
 struct ArtJvmTiEnv : public jvmtiEnv {
@@ -60,10 +59,10 @@
   EventMasks event_masks;
   std::unique_ptr<jvmtiEventCallbacks> event_callbacks;
 
-  explicit ArtJvmTiEnv(art::JavaVMExt* runtime)
-      : art_vm(runtime), local_data(nullptr), capabilities() {
-    functions = &gJvmtiInterface;
-  }
+  // Tagging is specific to the jvmtiEnv.
+  std::unique_ptr<ObjectTagTable> object_tag_table;
+
+  ArtJvmTiEnv(art::JavaVMExt* runtime, EventHandler* event_handler);
 
   static ArtJvmTiEnv* AsArtJvmTiEnv(jvmtiEnv* env) {
     return art::down_cast<ArtJvmTiEnv*>(env);
diff --git a/runtime/openjdkjvmti/ti_field.cc b/runtime/openjdkjvmti/ti_field.cc
index 8c3f2ff..1e5fbda 100644
--- a/runtime/openjdkjvmti/ti_field.cc
+++ b/runtime/openjdkjvmti/ti_field.cc
@@ -88,7 +88,6 @@
     *signature_ptr = signature_copy.get();
   }
 
-  // TODO: Support generic signature.
   if (generic_ptr != nullptr) {
     *generic_ptr = nullptr;
     if (!art_field->GetDeclaringClass()->IsProxyClass()) {
diff --git a/runtime/openjdkjvmti/ti_phase.cc b/runtime/openjdkjvmti/ti_phase.cc
index 60371cf..e494cb6 100644
--- a/runtime/openjdkjvmti/ti_phase.cc
+++ b/runtime/openjdkjvmti/ti_phase.cc
@@ -56,7 +56,6 @@
   }
 
   void NextRuntimePhase(RuntimePhase phase) REQUIRES_SHARED(art::Locks::mutator_lock_) OVERRIDE {
-    // TODO: Events.
     switch (phase) {
       case RuntimePhase::kInitialAgents:
         PhaseUtil::current_phase_ = JVMTI_PHASE_PRIMORDIAL;
diff --git a/runtime/openjdkjvmti/ti_redefine.cc b/runtime/openjdkjvmti/ti_redefine.cc
index 7cc7a63..c4d20c0 100644
--- a/runtime/openjdkjvmti/ti_redefine.cc
+++ b/runtime/openjdkjvmti/ti_redefine.cc
@@ -303,6 +303,7 @@
 }
 
 jvmtiError Redefiner::RedefineClasses(ArtJvmTiEnv* env,
+                                      EventHandler* event_handler,
                                       art::Runtime* runtime,
                                       art::Thread* self,
                                       jint class_count,
@@ -350,6 +351,7 @@
   }
   // Call all the transformation events.
   jvmtiError res = Transformer::RetransformClassesDirect(env,
+                                                         event_handler,
                                                          self,
                                                          &def_vector);
   if (res != OK) {
diff --git a/runtime/openjdkjvmti/ti_redefine.h b/runtime/openjdkjvmti/ti_redefine.h
index 65ee291..4e6d05f 100644
--- a/runtime/openjdkjvmti/ti_redefine.h
+++ b/runtime/openjdkjvmti/ti_redefine.h
@@ -88,6 +88,7 @@
   // The caller is responsible for freeing it. The runtime makes its own copy of the data.
   // TODO This function should call the transformation events.
   static jvmtiError RedefineClasses(ArtJvmTiEnv* env,
+                                    EventHandler* event_handler,
                                     art::Runtime* runtime,
                                     art::Thread* self,
                                     jint class_count,
diff --git a/runtime/openjdkjvmti/ti_search.cc b/runtime/openjdkjvmti/ti_search.cc
index df80f85..f51a98f 100644
--- a/runtime/openjdkjvmti/ti_search.cc
+++ b/runtime/openjdkjvmti/ti_search.cc
@@ -212,7 +212,6 @@
     return ERR(WRONG_PHASE);
   }
   if (current->GetClassLinker() == nullptr) {
-    // TODO: Support boot classpath change in OnLoad.
     return ERR(WRONG_PHASE);
   }
   if (segment == nullptr) {
diff --git a/runtime/openjdkjvmti/transform.cc b/runtime/openjdkjvmti/transform.cc
index 2fec631..36421b9 100644
--- a/runtime/openjdkjvmti/transform.cc
+++ b/runtime/openjdkjvmti/transform.cc
@@ -63,12 +63,13 @@
 
 jvmtiError Transformer::RetransformClassesDirect(
       ArtJvmTiEnv* env,
+      EventHandler* event_handler,
       art::Thread* self,
       /*in-out*/std::vector<ArtClassDefinition>* definitions) {
   for (ArtClassDefinition& def : *definitions) {
     jint new_len = -1;
     unsigned char* new_data = nullptr;
-    gEventHandler.DispatchEvent<ArtJvmtiEvent::kClassFileLoadHookRetransformable>(
+    event_handler->DispatchEvent<ArtJvmtiEvent::kClassFileLoadHookRetransformable>(
         self,
         GetJniEnv(env),
         def.klass,
@@ -85,6 +86,7 @@
 }
 
 jvmtiError Transformer::RetransformClasses(ArtJvmTiEnv* env,
+                                           EventHandler* event_handler,
                                            art::Runtime* runtime,
                                            art::Thread* self,
                                            jint class_count,
@@ -114,7 +116,7 @@
     }
     definitions.push_back(std::move(def));
   }
-  res = RetransformClassesDirect(env, self, &definitions);
+  res = RetransformClassesDirect(env, event_handler, self, &definitions);
   if (res != OK) {
     return res;
   }
diff --git a/runtime/openjdkjvmti/transform.h b/runtime/openjdkjvmti/transform.h
index 65f2ae1..c6a36e8 100644
--- a/runtime/openjdkjvmti/transform.h
+++ b/runtime/openjdkjvmti/transform.h
@@ -42,14 +42,20 @@
 
 namespace openjdkjvmti {
 
+class EventHandler;
+
 jvmtiError GetClassLocation(ArtJvmTiEnv* env, jclass klass, /*out*/std::string* location);
 
 class Transformer {
  public:
   static jvmtiError RetransformClassesDirect(
-      ArtJvmTiEnv* env, art::Thread* self, /*in-out*/std::vector<ArtClassDefinition>* definitions);
+      ArtJvmTiEnv* env,
+      EventHandler* event_handler,
+      art::Thread* self,
+      /*in-out*/std::vector<ArtClassDefinition>* definitions);
 
   static jvmtiError RetransformClasses(ArtJvmTiEnv* env,
+                                       EventHandler* event_handler,
                                        art::Runtime* runtime,
                                        art::Thread* self,
                                        jint class_count,
diff --git a/test/639-checker-code-sinking/expected.txt b/test/639-checker-code-sinking/expected.txt
new file mode 100644
index 0000000..52e756c
--- /dev/null
+++ b/test/639-checker-code-sinking/expected.txt
@@ -0,0 +1,3 @@
+0
+class java.lang.Object
+43
diff --git a/test/639-checker-code-sinking/info.txt b/test/639-checker-code-sinking/info.txt
new file mode 100644
index 0000000..9722bdf
--- /dev/null
+++ b/test/639-checker-code-sinking/info.txt
@@ -0,0 +1 @@
+Checker tests for the code sinking optimization pass.
diff --git a/test/639-checker-code-sinking/src/Main.java b/test/639-checker-code-sinking/src/Main.java
new file mode 100644
index 0000000..1da19b6
--- /dev/null
+++ b/test/639-checker-code-sinking/src/Main.java
@@ -0,0 +1,355 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void main(String[] args) {
+    testSimpleUse();
+    testTwoUses();
+    testFieldStores(doThrow);
+    testFieldStoreCycle();
+    testArrayStores();
+    testOnlyStoreUses();
+    testNoUse();
+    testPhiInput();
+    testVolatileStore();
+    doThrow = true;
+    try {
+      testInstanceSideEffects();
+    } catch (Error e) {
+      // expected
+      System.out.println(e.getMessage());
+    }
+    try {
+      testStaticSideEffects();
+    } catch (Error e) {
+      // expected
+      System.out.println(e.getMessage());
+    }
+
+    try {
+      testStoreStore(doThrow);
+    } catch (Error e) {
+      // expected
+      System.out.println(e.getMessage());
+    }
+  }
+
+  /// CHECK-START: void Main.testSimpleUse() code_sinking (before)
+  /// CHECK: <<LoadClass:l\d+>> LoadClass class_name:java.lang.Object
+  /// CHECK:                    NewInstance [<<LoadClass>>]
+  /// CHECK:                    If
+  /// CHECK:                    begin_block
+  /// CHECK:                    Throw
+
+  /// CHECK-START: void Main.testSimpleUse() code_sinking (after)
+  /// CHECK-NOT:                NewInstance
+  /// CHECK:                    If
+  /// CHECK:                    begin_block
+  /// CHECK: <<Error:l\d+>>     LoadClass class_name:java.lang.Error
+  /// CHECK: <<LoadClass:l\d+>> LoadClass class_name:java.lang.Object
+  /// CHECK-NOT:                begin_block
+  /// CHECK:                    NewInstance [<<LoadClass>>]
+  /// CHECK-NOT:                begin_block
+  /// CHECK:                    NewInstance [<<Error>>]
+  /// CHECK:                    Throw
+  public static void testSimpleUse() {
+    Object o = new Object();
+    if (doThrow) {
+      throw new Error(o.toString());
+    }
+  }
+
+  /// CHECK-START: void Main.testTwoUses() code_sinking (before)
+  /// CHECK: <<LoadClass:l\d+>> LoadClass class_name:java.lang.Object
+  /// CHECK:                    NewInstance [<<LoadClass>>]
+  /// CHECK:                    If
+  /// CHECK:                    begin_block
+  /// CHECK:                    Throw
+
+  /// CHECK-START: void Main.testTwoUses() code_sinking (after)
+  /// CHECK-NOT:                NewInstance
+  /// CHECK:                    If
+  /// CHECK:                    begin_block
+  /// CHECK: <<Error:l\d+>>     LoadClass class_name:java.lang.Error
+  /// CHECK: <<LoadClass:l\d+>> LoadClass class_name:java.lang.Object
+  /// CHECK-NOT:                begin_block
+  /// CHECK:                    NewInstance [<<LoadClass>>]
+  /// CHECK-NOT:                begin_block
+  /// CHECK:                    NewInstance [<<Error>>]
+  /// CHECK:                    Throw
+  public static void testTwoUses() {
+    Object o = new Object();
+    if (doThrow) {
+      throw new Error(o.toString() + o.toString());
+    }
+  }
+
+  /// CHECK-START: void Main.testFieldStores(boolean) code_sinking (before)
+  /// CHECK: <<Int42:i\d+>>       IntConstant 42
+  /// CHECK: <<LoadClass:l\d+>>   LoadClass class_name:Main
+  /// CHECK: <<NewInstance:l\d+>> NewInstance [<<LoadClass>>]
+  /// CHECK:                      InstanceFieldSet [<<NewInstance>>,<<Int42>>]
+  /// CHECK:                      If
+  /// CHECK:                      begin_block
+  /// CHECK:                      Throw
+
+  /// CHECK-START: void Main.testFieldStores(boolean) code_sinking (after)
+  /// CHECK: <<Int42:i\d+>>       IntConstant 42
+  /// CHECK-NOT:                  NewInstance
+  /// CHECK:                      If
+  /// CHECK:                      begin_block
+  /// CHECK: <<Error:l\d+>>       LoadClass class_name:java.lang.Error
+  /// CHECK: <<LoadClass:l\d+>>   LoadClass class_name:Main
+  /// CHECK-NOT:                  begin_block
+  /// CHECK: <<NewInstance:l\d+>> NewInstance [<<LoadClass>>]
+  /// CHECK-NOT:                  begin_block
+  /// CHECK:                      InstanceFieldSet [<<NewInstance>>,<<Int42>>]
+  /// CHECK-NOT:                  begin_block
+  /// CHECK:                      NewInstance [<<Error>>]
+  /// CHECK:                      Throw
+  public static void testFieldStores(boolean doThrow) {
+    Main m = new Main();
+    m.intField = 42;
+    if (doThrow) {
+      throw new Error(m.toString());
+    }
+  }
+
+  /// CHECK-START: void Main.testFieldStoreCycle() code_sinking (before)
+  /// CHECK: <<LoadClass:l\d+>>    LoadClass class_name:Main
+  /// CHECK: <<NewInstance1:l\d+>> NewInstance [<<LoadClass>>]
+  /// CHECK: <<NewInstance2:l\d+>> NewInstance [<<LoadClass>>]
+  /// CHECK:                       InstanceFieldSet [<<NewInstance1>>,<<NewInstance2>>]
+  /// CHECK:                       InstanceFieldSet [<<NewInstance2>>,<<NewInstance1>>]
+  /// CHECK:                       If
+  /// CHECK:                       begin_block
+  /// CHECK:                       Throw
+
+  // TODO(ngeoffray): Handle allocation/store cycles.
+  /// CHECK-START: void Main.testFieldStoreCycle() code_sinking (after)
+  /// CHECK: begin_block
+  /// CHECK: <<LoadClass:l\d+>>    LoadClass class_name:Main
+  /// CHECK: <<NewInstance1:l\d+>> NewInstance [<<LoadClass>>]
+  /// CHECK: <<NewInstance2:l\d+>> NewInstance [<<LoadClass>>]
+  /// CHECK:                       InstanceFieldSet [<<NewInstance1>>,<<NewInstance2>>]
+  /// CHECK:                       InstanceFieldSet [<<NewInstance2>>,<<NewInstance1>>]
+  /// CHECK:                       If
+  /// CHECK:                       begin_block
+  /// CHECK:                       Throw
+  public static void testFieldStoreCycle() {
+    Main m1 = new Main();
+    Main m2 = new Main();
+    m1.objectField = m2;
+    m2.objectField = m1;
+    if (doThrow) {
+      throw new Error(m1.toString() + m2.toString());
+    }
+  }
+
+  /// CHECK-START: void Main.testArrayStores() code_sinking (before)
+  /// CHECK: <<Int1:i\d+>>        IntConstant 1
+  /// CHECK: <<Int0:i\d+>>        IntConstant 0
+  /// CHECK: <<LoadClass:l\d+>>   LoadClass class_name:java.lang.Object[]
+  /// CHECK: <<NewArray:l\d+>>    NewArray [<<LoadClass>>,<<Int1>>]
+  /// CHECK:                      ArraySet [<<NewArray>>,<<Int0>>,<<NewArray>>]
+  /// CHECK:                      If
+  /// CHECK:                      begin_block
+  /// CHECK:                      Throw
+
+  /// CHECK-START: void Main.testArrayStores() code_sinking (after)
+  /// CHECK: <<Int1:i\d+>>        IntConstant 1
+  /// CHECK: <<Int0:i\d+>>        IntConstant 0
+  /// CHECK-NOT:                  NewArray
+  /// CHECK:                      If
+  /// CHECK:                      begin_block
+  /// CHECK: <<Error:l\d+>>       LoadClass class_name:java.lang.Error
+  /// CHECK: <<LoadClass:l\d+>>   LoadClass class_name:java.lang.Object[]
+  /// CHECK-NOT:                  begin_block
+  /// CHECK: <<NewArray:l\d+>>    NewArray [<<LoadClass>>,<<Int1>>]
+  /// CHECK-NOT:                  begin_block
+  /// CHECK:                      ArraySet [<<NewArray>>,<<Int0>>,<<NewArray>>]
+  /// CHECK-NOT:                  begin_block
+  /// CHECK:                      NewInstance [<<Error>>]
+  /// CHECK:                      Throw
+  public static void testArrayStores() {
+    Object[] o = new Object[1];
+    o[0] = o;
+    if (doThrow) {
+      throw new Error(o.toString());
+    }
+  }
+
+  // Make sure code sinking does not crash on dead allocations.
+  public static void testOnlyStoreUses() {
+    Main m = new Main();
+    Object[] o = new Object[1];  // dead allocation, should eventually be removed b/35634932.
+    o[0] = m;
+    o = null;  // Avoid environment uses for the array allocation.
+    if (doThrow) {
+      throw new Error(m.toString());
+    }
+  }
+
+  // Make sure code sinking does not crash on dead code.
+  public static void testNoUse() {
+    Main m = new Main();
+    boolean load = Main.doLoop;  // dead code, not removed because of environment use.
+    // Ensure one environment use for the static field
+    $opt$noinline$foo();
+    load = false;
+    if (doThrow) {
+      throw new Error(m.toString());
+    }
+  }
+
+  // Make sure we can move code only used by a phi.
+  /// CHECK-START: void Main.testPhiInput() code_sinking (before)
+  /// CHECK: <<Null:l\d+>>        NullConstant
+  /// CHECK: <<LoadClass:l\d+>>   LoadClass class_name:java.lang.Object
+  /// CHECK: <<NewInstance:l\d+>> NewInstance [<<LoadClass>>]
+  /// CHECK:                      If
+  /// CHECK:                      begin_block
+  /// CHECK:                      Phi [<<Null>>,<<NewInstance>>]
+  /// CHECK:                      Throw
+
+  /// CHECK-START: void Main.testPhiInput() code_sinking (after)
+  /// CHECK: <<Null:l\d+>>        NullConstant
+  /// CHECK-NOT:                  NewInstance
+  /// CHECK:                      If
+  /// CHECK:                      begin_block
+  /// CHECK: <<LoadClass:l\d+>>   LoadClass class_name:java.lang.Object
+  /// CHECK: <<NewInstance:l\d+>> NewInstance [<<LoadClass>>]
+  /// CHECK:                      begin_block
+  /// CHECK:                      Phi [<<Null>>,<<NewInstance>>]
+  /// CHECK: <<Error:l\d+>>       LoadClass class_name:java.lang.Error
+  /// CHECK:                      NewInstance [<<Error>>]
+  /// CHECK:                      Throw
+  public static void testPhiInput() {
+    Object f = new Object();
+    if (doThrow) {
+      Object o = null;
+      int i = 2;
+      if (doLoop) {
+        o = f;
+        i = 42;
+      }
+      throw new Error(o.toString() + i);
+    }
+  }
+
+  static void $opt$noinline$foo() {}
+
+  // Check that we do not move volatile stores.
+  /// CHECK-START: void Main.testVolatileStore() code_sinking (before)
+  /// CHECK: <<Int42:i\d+>>        IntConstant 42
+  /// CHECK: <<LoadClass:l\d+>>    LoadClass class_name:Main
+  /// CHECK: <<NewInstance:l\d+>>  NewInstance [<<LoadClass>>]
+  /// CHECK:                       InstanceFieldSet [<<NewInstance>>,<<Int42>>]
+  /// CHECK:                       If
+  /// CHECK:                       begin_block
+  /// CHECK:                       Throw
+
+  /// CHECK-START: void Main.testVolatileStore() code_sinking (after)
+  /// CHECK: <<Int42:i\d+>>        IntConstant 42
+  /// CHECK: <<LoadClass:l\d+>>    LoadClass class_name:Main
+  /// CHECK: <<NewInstance:l\d+>>  NewInstance [<<LoadClass>>]
+  /// CHECK:                       InstanceFieldSet [<<NewInstance>>,<<Int42>>]
+  /// CHECK:                       If
+  /// CHECK:                       begin_block
+  /// CHECK:                       Throw
+  public static void testVolatileStore() {
+    Main m = new Main();
+    m.volatileField = 42;
+    if (doThrow) {
+      throw new Error(m.toString());
+    }
+  }
+
+  public static void testInstanceSideEffects() {
+    int a = mainField.intField;
+    $noinline$changeIntField();
+    if (doThrow) {
+      throw new Error("" + a);
+    }
+  }
+
+  static void $noinline$changeIntField() {
+    mainField.intField = 42;
+  }
+
+  public static void testStaticSideEffects() {
+    Object o = obj;
+    $noinline$changeStaticObjectField();
+    if (doThrow) {
+      throw new Error(o.getClass().toString());
+    }
+  }
+
+  static void $noinline$changeStaticObjectField() {
+    obj = new Main();
+  }
+
+  // Test that we preserve the order of stores.
+  /// CHECK-START: void Main.testStoreStore(boolean) code_sinking (before)
+  /// CHECK: <<Int42:i\d+>>       IntConstant 42
+  /// CHECK: <<Int43:i\d+>>       IntConstant 43
+  /// CHECK: <<LoadClass:l\d+>>   LoadClass class_name:Main
+  /// CHECK: <<NewInstance:l\d+>> NewInstance [<<LoadClass>>]
+  /// CHECK:                      InstanceFieldSet [<<NewInstance>>,<<Int42>>]
+  /// CHECK:                      InstanceFieldSet [<<NewInstance>>,<<Int43>>]
+  /// CHECK:                      If
+  /// CHECK:                      begin_block
+  /// CHECK:                      Throw
+
+  /// CHECK-START: void Main.testStoreStore(boolean) code_sinking (after)
+  /// CHECK: <<Int42:i\d+>>       IntConstant 42
+  /// CHECK: <<Int43:i\d+>>       IntConstant 43
+  /// CHECK-NOT:                  NewInstance
+  /// CHECK:                      If
+  /// CHECK:                      begin_block
+  /// CHECK: <<Error:l\d+>>       LoadClass class_name:java.lang.Error
+  /// CHECK: <<LoadClass:l\d+>>   LoadClass class_name:Main
+  /// CHECK-NOT:                  begin_block
+  /// CHECK: <<NewInstance:l\d+>> NewInstance [<<LoadClass>>]
+  /// CHECK-NOT:                  begin_block
+  /// CHECK:                      InstanceFieldSet [<<NewInstance>>,<<Int42>>]
+  /// CHECK-NOT:                  begin_block
+  /// CHECK:                      InstanceFieldSet [<<NewInstance>>,<<Int43>>]
+  /// CHECK-NOT:                  begin_block
+  /// CHECK:                      NewInstance [<<Error>>]
+  /// CHECK:                      Throw
+  public static void testStoreStore(boolean doThrow) {
+    Main m = new Main();
+    m.intField = 42;
+    m.intField = 43;
+    if (doThrow) {
+      throw new Error(m.$opt$noinline$toString());
+    }
+  }
+
+  public String $opt$noinline$toString() {
+    return "" + intField;
+  }
+
+  volatile int volatileField;
+  int intField;
+  Object objectField;
+  static boolean doThrow;
+  static boolean doLoop;
+  static Main mainField = new Main();
+  static Object obj = new Object();
+}
diff --git a/test/903-hello-tagging/expected.txt b/test/903-hello-tagging/expected.txt
index 872b79b..acfdbd8 100644
--- a/test/903-hello-tagging/expected.txt
+++ b/test/903-hello-tagging/expected.txt
@@ -8,3 +8,4 @@
 [<null;1>, <null;1>, <null;2>, <null;2>, <null;3>, <null;3>, <null;4>, <null;4>, <null;5>, <null;5>, <null;6>, <null;6>, <null;7>, <null;7>, <null;8>, <null;8>, <null;9>, <null;9>]
 18
 [<1;0>, <2;0>, <3;0>, <4;0>, <5;0>, <6;0>, <7;0>, <8;0>, <9;0>, <11;0>, <12;0>, <13;0>, <14;0>, <15;0>, <16;0>, <17;0>, <18;0>, <19;0>]
+[100, 101, 102, 103, 104, 105, 106, 107, 108, 109]
diff --git a/test/903-hello-tagging/src/Main.java b/test/903-hello-tagging/src/Main.java
index 2f0365a..48896b2 100644
--- a/test/903-hello-tagging/src/Main.java
+++ b/test/903-hello-tagging/src/Main.java
@@ -22,6 +22,7 @@
   public static void main(String[] args) {
     doTest();
     testGetTaggedObjects();
+    testTags();
   }
 
   public static void doTest() {
@@ -35,6 +36,12 @@
     }
   }
 
+  public static void testTags() {
+    Object o = new Object();
+    long[] res = testTagsInDifferentEnvs(o, 100, 10);
+    System.out.println(Arrays.toString(res));
+  }
+
   private static WeakReference<Object> test() {
     Object o1 = new Object();
     setTag(o1, 1);
@@ -166,4 +173,5 @@
   private static native long getTag(Object o);
   private static native Object[] getTaggedObjects(long[] searchTags, boolean returnObjects,
       boolean returnTags);
+  private static native long[] testTagsInDifferentEnvs(Object o, long baseTag, int n);
 }
diff --git a/test/903-hello-tagging/tagging.cc b/test/903-hello-tagging/tagging.cc
index f74c1fc..6177263 100644
--- a/test/903-hello-tagging/tagging.cc
+++ b/test/903-hello-tagging/tagging.cc
@@ -139,6 +139,62 @@
   return resultArray;
 }
 
+static jvmtiEnv* CreateJvmtiEnv(JNIEnv* env) {
+  JavaVM* jvm;
+  CHECK_EQ(0, env->GetJavaVM(&jvm));
+
+  jvmtiEnv* new_jvmti_env;
+  CHECK_EQ(0, jvm->GetEnv(reinterpret_cast<void**>(&new_jvmti_env), JVMTI_VERSION_1_0));
+
+  jvmtiCapabilities capa;
+  memset(&capa, 0, sizeof(jvmtiCapabilities));
+  capa.can_tag_objects = 1;
+  jvmtiError error = new_jvmti_env->AddCapabilities(&capa);
+  CHECK_EQ(JVMTI_ERROR_NONE, error);
+
+  return new_jvmti_env;
+}
+
+static void SetTag(jvmtiEnv* env, jobject obj, jlong tag) {
+  jvmtiError ret = env->SetTag(obj, tag);
+  CHECK_EQ(JVMTI_ERROR_NONE, ret);
+}
+
+static jlong GetTag(jvmtiEnv* env, jobject obj) {
+  jlong tag;
+  jvmtiError ret = env->GetTag(obj, &tag);
+  CHECK_EQ(JVMTI_ERROR_NONE, ret);
+  return tag;
+}
+
+extern "C" JNIEXPORT jlongArray JNICALL Java_Main_testTagsInDifferentEnvs(
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jobject obj, jlong base_tag, jint count) {
+  std::unique_ptr<jvmtiEnv*[]> envs = std::unique_ptr<jvmtiEnv*[]>(new jvmtiEnv*[count]);
+  envs[0] = jvmti_env;
+  for (int32_t i = 1; i != count; ++i) {
+    envs[i] = CreateJvmtiEnv(env);
+  }
+
+  for (int32_t i = 0; i != count; ++i) {
+    SetTag(envs[i], obj, base_tag + i);
+  }
+  std::unique_ptr<jlong[]> vals = std::unique_ptr<jlong[]>(new jlong[count]);
+  for (int32_t i = 0; i != count; ++i) {
+    vals[i] = GetTag(envs[i], obj);
+  }
+
+  for (int32_t i = 1; i != count; ++i) {
+    CHECK_EQ(JVMTI_ERROR_NONE, envs[i]->DisposeEnvironment());
+  }
+
+  jlongArray res = env->NewLongArray(count);
+  if (res == nullptr) {
+    return nullptr;
+  }
+  env->SetLongArrayRegion(res, 0, count, vals.get());
+  return res;
+}
+
 }  // namespace Test903HelloTagging
 }  // namespace art
 
diff --git a/test/913-heaps/heaps.cc b/test/913-heaps/heaps.cc
index 1de1a69..99bc48e 100644
--- a/test/913-heaps/heaps.cc
+++ b/test/913-heaps/heaps.cc
@@ -18,6 +18,7 @@
 #include <stdio.h>
 #include <string.h>
 
+#include <iostream>
 #include <vector>
 
 #include "android-base/stringprintf.h"
@@ -29,6 +30,7 @@
 #include "native_stack_dump.h"
 #include "openjdkjvmti/jvmti.h"
 #include "runtime.h"
+#include "scoped_thread_state_change-inl.h"
 #include "thread-inl.h"
 #include "thread_list.h"
 
@@ -279,8 +281,14 @@
                         jlong size,
                         jint length,
                         const jvmtiHeapReferenceInfo* reference_info)
+          REQUIRES_SHARED(Locks::mutator_lock_)
           : Elem(referrer, referree, size, length) {
         memcpy(&info_, reference_info, sizeof(jvmtiHeapReferenceInfo));
+        // Debug stack trace for failure condition. Remove when done.
+        if (info_.stack_local.depth == 3 && info_.stack_local.slot == 13) {
+          DumpNativeStack(std::cerr, GetTid());
+          Thread::Current()->DumpJavaStack(std::cerr, false, false);
+        }
       }
 
      protected:
diff --git a/test/913-heaps/run b/test/913-heaps/run
index c6e62ae..dd35526 100755
--- a/test/913-heaps/run
+++ b/test/913-heaps/run
@@ -14,4 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-./default-run "$@" --jvmti
+./default-run "$@" --jvmti -Xcompiler-option -g
diff --git a/test/etc/default-build b/test/etc/default-build
index 4318966..d74b24d 100755
--- a/test/etc/default-build
+++ b/test/etc/default-build
@@ -97,7 +97,7 @@
 while true; do
   if [ "x$1" = "x--dx-option" ]; then
     shift
-    on="$1"
+    option="$1"
     DX_FLAGS="${DX_FLAGS} $option"
     shift
   elif [ "x$1" = "x--jvm" ]; then
@@ -209,9 +209,9 @@
     ${JACK} --import classes.jill.jar --output-dex .
   else
     if [ ${NEED_DEX} = "true" ]; then
-      ${DX} -JXmx256m --debug --dex --dump-to=classes-ex.lst --output=classes.dex --dump-width=1000 classes-ex
+      ${DX} -JXmx256m --debug --dex --dump-to=classes-ex.lst --output=classes.dex --dump-width=1000 ${DX_FLAGS} classes-ex
       zip ${TEST_NAME}-ex.jar classes.dex
-      ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex --dump-width=1000 classes
+      ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex --dump-width=1000 ${DX_FLAGS} classes
     fi
   fi
 else
diff --git a/test/run-test b/test/run-test
index e808dee..6134a14 100755
--- a/test/run-test
+++ b/test/run-test
@@ -247,6 +247,11 @@
         option="$1"
         run_args="${run_args} -Xcompiler-option $option"
         shift
+    elif [ "x$1" = "x--build-option" ]; then
+        shift
+        option="$1"
+        build_args="${build_args} $option"
+        shift
     elif [ "x$1" = "x--runtime-option" ]; then
         shift
         option="$1"
@@ -611,6 +616,7 @@
         echo "  Runtime Options:"
         echo "    -O                    Run non-debug rather than debug build (off by default)."
         echo "    -Xcompiler-option     Pass an option to the compiler."
+        echo "    --build-option        Pass an option to the build script."
         echo "    --runtime-option      Pass an option to the runtime."
         echo "    --debug               Wait for a debugger to attach."
         echo "    --debuggable          Whether to compile Java code for a debugger."