Merge "(Experimental) Add Brooks pointers."
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 541b8d3..67f09f9 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -24,6 +24,7 @@
 	compiler/jni/jni_compiler_test.cc \
 	compiler/leb128_encoder_test.cc \
 	compiler/oat_test.cc \
+	compiler/optimizing/dominator_test.cc \
 	compiler/optimizing/pretty_printer_test.cc \
 	compiler/output_stream_test.cc \
 	compiler/utils/arena_allocator_test.cc \
diff --git a/compiler/dex/arena_bit_vector.cc b/compiler/dex/arena_bit_vector.cc
index b567ae8..1b37b71 100644
--- a/compiler/dex/arena_bit_vector.cc
+++ b/compiler/dex/arena_bit_vector.cc
@@ -44,4 +44,14 @@
                                bool expandable, OatBitMapKind kind)
   :  BitVector(start_bits, expandable, new (arena) ArenaBitVectorAllocator(arena)), kind_(kind) {}
 
+BasicBlock* ArenaBitVector::BasicBlockIterator::Next() {
+    int idx = internal_iterator_.Next();
+
+    if (idx == -1) {
+      return nullptr;
+    }
+
+    return mir_graph_->GetBasicBlock(idx);
+}
+
 }  // namespace art
diff --git a/compiler/dex/arena_bit_vector.h b/compiler/dex/arena_bit_vector.h
index e904406..cdd5c68 100644
--- a/compiler/dex/arena_bit_vector.h
+++ b/compiler/dex/arena_bit_vector.h
@@ -20,14 +20,45 @@
 #include "base/bit_vector.h"
 #include "compiler_enums.h"
 #include "utils/arena_allocator.h"
+#include "compiler_ir.h"
 
 namespace art {
 
+// Forward declaration
+class MIRGraph;
+
 /*
  * A BitVector implementation that uses Arena allocation.
  */
 class ArenaBitVector : public BitVector {
   public:
+    /**
+     * @class BasicBlockIterator
+     * @brief Helper class to get the BasicBlocks when iterating through the ArenaBitVector.
+     */
+    class BasicBlockIterator {
+      public:
+        explicit BasicBlockIterator(ArenaBitVector* bv, MIRGraph* mir_graph)
+          : mir_graph_(mir_graph),
+            internal_iterator_(bv) {}
+
+        explicit BasicBlockIterator(ArenaBitVector* bv, CompilationUnit* c_unit)
+          : mir_graph_(c_unit->mir_graph.get()),
+            internal_iterator_(bv) {}
+
+        BasicBlock* Next();
+
+        static void* operator new(size_t size, ArenaAllocator* arena) {
+          return arena->Alloc(sizeof(ArenaBitVector::BasicBlockIterator),
+                              ArenaAllocator::kAllocGrowableArray);
+        };
+        static void operator delete(void* p) {}  // Nop.
+
+      private:
+        MIRGraph* const mir_graph_;
+        Iterator internal_iterator_;
+    };
+
     ArenaBitVector(ArenaAllocator* arena, uint32_t start_bits, bool expandable,
                    OatBitMapKind kind = kBitMapMisc);
     ~ArenaBitVector() {}
diff --git a/compiler/dex/ssa_transformation.cc b/compiler/dex/ssa_transformation.cc
index 502df1e..0f79f41 100644
--- a/compiler/dex/ssa_transformation.cc
+++ b/compiler/dex/ssa_transformation.cc
@@ -248,22 +248,11 @@
   }
 
   /* Calculate DF_up */
-  ArenaBitVector::Iterator bv_iterator(bb->i_dominated);
-  while (true) {
-    // TUNING: hot call to BitVectorIteratorNext
-    int dominated_idx = bv_iterator.Next();
-    if (dominated_idx == -1) {
-      break;
-    }
-    BasicBlock* dominated_bb = GetBasicBlock(dominated_idx);
-    ArenaBitVector::Iterator df_iterator(dominated_bb->dom_frontier);
-    while (true) {
-      // TUNING: hot call to BitVectorIteratorNext
-      int df_up_idx = df_iterator.Next();
-      if (df_up_idx == -1) {
-        break;
-      }
-      BasicBlock* df_up_block = GetBasicBlock(df_up_idx);
+  ArenaBitVector::BasicBlockIterator it(bb->i_dominated, cu_);
+  for (BasicBlock *dominated_bb = it.Next(); dominated_bb != nullptr; dominated_bb = it.Next()) {
+    ArenaBitVector::BasicBlockIterator inner_it(dominated_bb->dom_frontier, cu_);
+    for (BasicBlock *df_up_block = inner_it.Next(); df_up_block != nullptr;
+         df_up_block = inner_it.Next()) {
       CheckForDominanceFrontier(bb, df_up_block);
     }
   }
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 2c1091c..e6db7bc 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -22,39 +22,120 @@
 namespace art {
 
 HGraph* HGraphBuilder::BuildGraph(const uint16_t* code_ptr, const uint16_t* code_end) {
+  // Setup the graph with the entry block and exit block.
   graph_ = new (arena_) HGraph(arena_);
-
   entry_block_ = new (arena_) HBasicBlock(graph_);
   graph_->AddBlock(entry_block_);
-
+  entry_block_->AddInstruction(new (arena_) HGoto());
   exit_block_ = new (arena_) HBasicBlock(graph_);
-  // The exit block is added at the end of this method to ensure
-  // its id is the greatest. This is needed for dominator computation.
+  exit_block_->AddInstruction(new (arena_) HExit());
 
-  entry_block_->AddInstruction(new (arena_) HGoto(entry_block_));
+  // To avoid splitting blocks, we compute ahead of time the instructions that
+  // start a new block, and create these blocks.
+  ComputeBranchTargets(code_ptr, code_end);
 
-  current_block_ = new (arena_) HBasicBlock(graph_);
-  graph_->AddBlock(current_block_);
-  entry_block_->AddSuccessor(current_block_);
-
+  size_t dex_offset = 0;
   while (code_ptr < code_end) {
+    // Update the current block if dex_offset starts a new block.
+    MaybeUpdateCurrentBlock(dex_offset);
     const Instruction& instruction = *Instruction::At(code_ptr);
-    if (!AnalyzeDexInstruction(instruction)) return nullptr;
+    if (!AnalyzeDexInstruction(instruction, dex_offset)) return nullptr;
+    dex_offset += instruction.SizeInCodeUnits();
     code_ptr += instruction.SizeInCodeUnits();
   }
 
-  exit_block_->AddInstruction(new (arena_) HExit(exit_block_));
+  // Add the exit block at the end to give it the highest id.
   graph_->AddBlock(exit_block_);
   return graph_;
 }
 
-bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction) {
+void HGraphBuilder::MaybeUpdateCurrentBlock(size_t index) {
+  HBasicBlock* block = FindBlockStartingAt(index);
+  if (block == nullptr) return;
+
+  if (current_block_ != nullptr) {
+    // Branching instructions clear current_block, so we know
+    // the last instruction of the current block is not a branching
+    // instruction. We add an unconditional goto to the found block.
+    current_block_->AddInstruction(new (arena_) HGoto());
+    current_block_->AddSuccessor(block);
+  }
+  graph_->AddBlock(block);
+  current_block_ = block;
+}
+
+void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, const uint16_t* code_end) {
+  // TODO: Support switch instructions.
+  branch_targets_.SetSize(code_end - code_ptr);
+
+  // Create the first block for the dex instructions, single successor of the entry block.
+  HBasicBlock* block = new (arena_) HBasicBlock(graph_);
+  branch_targets_.Put(0, block);
+  entry_block_->AddSuccessor(block);
+
+  // Iterate over all instructions and find branching instructions. Create blocks for
+  // the locations these instructions branch to.
+  size_t dex_offset = 0;
+  while (code_ptr < code_end) {
+    const Instruction& instruction = *Instruction::At(code_ptr);
+    if (instruction.IsBranch()) {
+      int32_t target = instruction.GetTargetOffset() + dex_offset;
+      // Create a block for the target instruction.
+      if (FindBlockStartingAt(target) == nullptr) {
+        block = new (arena_) HBasicBlock(graph_);
+        branch_targets_.Put(target, block);
+      }
+      dex_offset += instruction.SizeInCodeUnits();
+      code_ptr += instruction.SizeInCodeUnits();
+      if ((code_ptr < code_end) && (FindBlockStartingAt(dex_offset) == nullptr)) {
+        block = new (arena_) HBasicBlock(graph_);
+        branch_targets_.Put(dex_offset, block);
+      }
+    } else {
+      code_ptr += instruction.SizeInCodeUnits();
+      dex_offset += instruction.SizeInCodeUnits();
+    }
+  }
+}
+
+HBasicBlock* HGraphBuilder::FindBlockStartingAt(int32_t index) const {
+  DCHECK_GE(index, 0);
+  return branch_targets_.Get(index);
+}
+
+bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, int32_t dex_offset) {
+  if (current_block_ == nullptr) return true;  // Dead code
+
   switch (instruction.Opcode()) {
     case Instruction::RETURN_VOID:
-      current_block_->AddInstruction(new (arena_) HReturnVoid(current_block_));
+      current_block_->AddInstruction(new (arena_) HReturnVoid());
       current_block_->AddSuccessor(exit_block_);
       current_block_ = nullptr;
       break;
+    case Instruction::IF_EQ: {
+      // TODO: Read the dex register.
+      HBasicBlock* target = FindBlockStartingAt(instruction.GetTargetOffset() + dex_offset);
+      DCHECK(target != nullptr);
+      current_block_->AddInstruction(new (arena_) HIf());
+      current_block_->AddSuccessor(target);
+      target = FindBlockStartingAt(dex_offset + instruction.SizeInCodeUnits());
+      DCHECK(target != nullptr);
+      current_block_->AddSuccessor(target);
+      current_block_ = nullptr;
+      break;
+    }
+    case Instruction::GOTO:
+    case Instruction::GOTO_16:
+    case Instruction::GOTO_32: {
+      HBasicBlock* target = FindBlockStartingAt(instruction.GetTargetOffset() + dex_offset);
+      DCHECK(target != nullptr);
+      current_block_->AddInstruction(new (arena_) HGoto());
+      current_block_->AddSuccessor(target);
+      current_block_ = nullptr;
+      break;
+    }
+    case Instruction::NOP:
+      break;
     default:
       return false;
   }
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 3e94fba..fbeb7a7 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_OPTIMIZING_BUILDER_H_
 
 #include "utils/allocation.h"
+#include "utils/growable_array.h"
 
 namespace art {
 
@@ -30,6 +31,7 @@
  public:
   explicit HGraphBuilder(ArenaAllocator* arena)
       : arena_(arena),
+        branch_targets_(arena, 0),
         entry_block_(nullptr),
         exit_block_(nullptr),
         current_block_(nullptr),
@@ -41,9 +43,21 @@
   // Analyzes the dex instruction and adds HInstruction to the graph
   // to execute that instruction. Returns whether the instruction can
   // be handled.
-  bool AnalyzeDexInstruction(const Instruction& instruction);
+  bool AnalyzeDexInstruction(const Instruction& instruction, int32_t dex_offset);
+
+  // Finds all instructions that start a new block, and populates branch_targets_ with
+  // the newly created blocks.
+  void ComputeBranchTargets(const uint16_t* start, const uint16_t* end);
+  void MaybeUpdateCurrentBlock(size_t index);
+  HBasicBlock* FindBlockStartingAt(int32_t index) const;
 
   ArenaAllocator* const arena_;
+
+  // A list of the size of the dex code holding block information for
+  // the method. If an entry contains a block, then the dex instruction
+  // starting at that entry is the first instruction of a new block.
+  GrowableArray<HBasicBlock*> branch_targets_;
+
   HBasicBlock* entry_block_;
   HBasicBlock* exit_block_;
   HBasicBlock* current_block_;
diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc
new file mode 100644
index 0000000..30f288f
--- /dev/null
+++ b/compiler/optimizing/dominator_test.cc
@@ -0,0 +1,261 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "builder.h"
+#include "dex_instruction.h"
+#include "nodes.h"
+#include "utils/arena_allocator.h"
+
+#include "gtest/gtest.h"
+
+namespace art {
+
+static void TestCode(const uint16_t* data, int length, const int* blocks, size_t blocks_length) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+  HGraphBuilder builder(&allocator);
+  HGraph* graph = builder.BuildGraph(data, data + length);
+  ASSERT_NE(graph, nullptr);
+  graph->BuildDominatorTree();
+  ASSERT_EQ(graph->blocks()->Size(), blocks_length);
+  for (size_t i = 0; i < blocks_length; i++) {
+    if (blocks[i] == -1) {
+      ASSERT_EQ(nullptr, graph->blocks()->Get(i)->dominator());
+    } else {
+      ASSERT_NE(nullptr, graph->blocks()->Get(i)->dominator());
+      ASSERT_EQ(blocks[i], graph->blocks()->Get(i)->dominator()->block_id());
+    }
+  }
+}
+
+TEST(OptimizerTest, ReturnVoid) {
+  const uint16_t data[] = {
+      Instruction::RETURN_VOID  // Block number 1
+  };
+
+  const int dominators[] = {
+    -1,
+    0,
+    1
+  };
+
+  TestCode(data, sizeof(data) / sizeof(uint16_t), dominators, sizeof(dominators) / sizeof(int));
+}
+
+TEST(OptimizerTest, CFG1) {
+  const uint16_t data[] = {
+    Instruction::GOTO | 0x100,  // Block number 1
+    Instruction::RETURN_VOID  // Block number 2
+  };
+
+  const int dominators[] = {
+    -1,
+    0,
+    1,
+    2
+  };
+
+  TestCode(data, sizeof(data) / sizeof(uint16_t), dominators, sizeof(dominators) / sizeof(int));
+}
+
+TEST(OptimizerTest, CFG2) {
+  const uint16_t data[] = {
+    Instruction::GOTO | 0x100,  // Block number 1
+    Instruction::GOTO | 0x100,  // Block number 2
+    Instruction::RETURN_VOID  // Block number 3
+  };
+
+  const int dominators[] = {
+    -1,
+    0,
+    1,
+    2,
+    3
+  };
+
+  TestCode(data, sizeof(data) / sizeof(uint16_t), dominators, sizeof(dominators) / sizeof(int));
+}
+
+TEST(OptimizerTest, CFG3) {
+  const uint16_t data1[] = {
+    Instruction::GOTO | 0x200,  // Block number 1
+    Instruction::RETURN_VOID,  // Block number 2
+    Instruction::GOTO | 0xFF00  // Block number 3
+  };
+  const int dominators[] = {
+    -1,
+    0,
+    3,
+    1,
+    2
+  };
+
+  TestCode(data1, sizeof(data1) / sizeof(uint16_t), dominators, sizeof(dominators) / sizeof(int));
+
+  const uint16_t data2[] = {
+    Instruction::GOTO_16, 3,
+    Instruction::RETURN_VOID,
+    Instruction::GOTO_16, 0xFFFF
+  };
+
+  TestCode(data2, sizeof(data2) / sizeof(uint16_t), dominators, sizeof(dominators) / sizeof(int));
+
+  const uint16_t data3[] = {
+    Instruction::GOTO_32, 4, 0,
+    Instruction::RETURN_VOID,
+    Instruction::GOTO_32, 0xFFFF, 0xFFFF
+  };
+
+  TestCode(data3, sizeof(data3) / sizeof(uint16_t), dominators, sizeof(dominators) / sizeof(int));
+}
+
+TEST(OptimizerTest, CFG4) {
+  const uint16_t data1[] = {
+    Instruction::NOP,
+    Instruction::GOTO | 0xFF00
+  };
+
+  const int dominators[] = {
+    -1,
+    0,
+    -1
+  };
+
+  TestCode(data1, sizeof(data1) / sizeof(uint16_t), dominators, sizeof(dominators) / sizeof(int));
+
+  const uint16_t data2[] = {
+    Instruction::GOTO_32, 0, 0
+  };
+
+  TestCode(data2, sizeof(data2) / sizeof(uint16_t), dominators, sizeof(dominators) / sizeof(int));
+}
+
+TEST(OptimizerTest, CFG5) {
+  const uint16_t data[] = {
+    Instruction::RETURN_VOID,   // Block number 1
+    Instruction::GOTO | 0x100,  // Dead block
+    Instruction::GOTO | 0xFE00  // Block number 2
+  };
+
+  const int dominators[] = {
+    -1,
+    0,
+    -1,
+    1
+  };
+
+  TestCode(data, sizeof(data) / sizeof(uint16_t), dominators, sizeof(dominators) / sizeof(int));
+}
+
+TEST(OptimizerTest, CFG6) {
+  const uint16_t data[] = {
+    Instruction::IF_EQ, 3,
+    Instruction::GOTO | 0x100,
+    Instruction::RETURN_VOID
+  };
+
+  const int dominators[] = {
+    -1,
+    0,
+    1,
+    1,
+    3
+  };
+
+  TestCode(data, sizeof(data) / sizeof(uint16_t), dominators, sizeof(dominators) / sizeof(int));
+}
+
+TEST(OptimizerTest, CFG7) {
+  const uint16_t data[] = {
+    Instruction::IF_EQ, 3,  // Block number 1
+    Instruction::GOTO | 0x100,  // Block number 2
+    Instruction::GOTO | 0xFF00  // Block number 3
+  };
+
+  const int dominators[] = {
+    -1,
+    0,
+    1,
+    1,
+    -1  // exit block is not dominated by any block due to the spin loop.
+  };
+
+  TestCode(data, sizeof(data) / sizeof(uint16_t), dominators, sizeof(dominators) / sizeof(int));
+}
+
+TEST(OptimizerTest, CFG8) {
+  const uint16_t data[] = {
+    Instruction::IF_EQ, 3,  // Block number 1
+    Instruction::GOTO | 0x200,  // Block number 2
+    Instruction::GOTO | 0x100,  // Block number 3
+    Instruction::GOTO | 0xFF00  // Block number 4
+  };
+
+  const int dominators[] = {
+    -1,
+    0,
+    1,
+    1,
+    1,
+    -1  // exit block is not dominated by any block due to the spin loop.
+  };
+
+  TestCode(data, sizeof(data) / sizeof(uint16_t), dominators, sizeof(dominators) / sizeof(int));
+}
+
+TEST(OptimizerTest, CFG9) {
+  const uint16_t data[] = {
+    Instruction::IF_EQ, 3,  // Block number 1
+    Instruction::GOTO | 0x200,  // Block number 2
+    Instruction::GOTO | 0x100,  // Block number 3
+    Instruction::GOTO | 0xFE00  // Block number 4
+  };
+
+  const int dominators[] = {
+    -1,
+    0,
+    1,
+    1,
+    1,
+    -1  // exit block is not dominated by any block due to the spin loop.
+  };
+
+  TestCode(data, sizeof(data) / sizeof(uint16_t), dominators, sizeof(dominators) / sizeof(int));
+}
+
+TEST(OptimizerTest, CFG10) {
+  const uint16_t data[] = {
+    Instruction::IF_EQ, 6,  // Block number 1
+    Instruction::IF_EQ, 3,  // Block number 2
+    Instruction::GOTO | 0x100,  // Block number 3
+    Instruction::GOTO | 0x100,  // Block number 4
+    Instruction::RETURN_VOID  // Block number 5
+  };
+
+  const int dominators[] = {
+    -1,
+    0,
+    1,
+    2,
+    2,
+    1,
+    5  // Block number 5 dominates exit block
+  };
+
+  TestCode(data, sizeof(data) / sizeof(uint16_t), dominators, sizeof(dominators) / sizeof(int));
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index e7e9f47..9ec8e89 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -24,6 +24,103 @@
   blocks_.Add(block);
 }
 
+void HGraph::FindBackEdges(ArenaBitVector* visited) const {
+  ArenaBitVector visiting(arena_, blocks_.Size(), false);
+  VisitBlockForBackEdges(GetEntryBlock(), visited, &visiting);
+}
+
+void HGraph::RemoveDeadBlocks(const ArenaBitVector& visited) const {
+  for (size_t i = 0; i < blocks_.Size(); i++) {
+    if (!visited.IsBitSet(i)) {
+      HBasicBlock* block = blocks_.Get(i);
+      for (size_t j = 0; j < block->successors()->Size(); j++) {
+        block->successors()->Get(j)->RemovePredecessor(block);
+      }
+    }
+  }
+}
+
+void HGraph::VisitBlockForBackEdges(HBasicBlock* block,
+                                    ArenaBitVector* visited,
+                                    ArenaBitVector* visiting) const {
+  int id = block->block_id();
+  if (visited->IsBitSet(id)) return;
+
+  visited->SetBit(id);
+  visiting->SetBit(id);
+  for (size_t i = 0; i < block->successors()->Size(); i++) {
+    HBasicBlock* successor = block->successors()->Get(i);
+    if (visiting->IsBitSet(successor->block_id())) {
+      successor->AddBackEdge(block);
+    } else {
+      VisitBlockForBackEdges(successor, visited, visiting);
+    }
+  }
+  visiting->ClearBit(id);
+}
+
+void HGraph::BuildDominatorTree() {
+  ArenaBitVector visited(arena_, blocks_.Size(), false);
+
+  // (1) Find the back edges in the graph doing a DFS traversal.
+  FindBackEdges(&visited);
+
+  // (2) Remove blocks not visited during the initial DFS.
+  //     Step (3) requires dead blocks to be removed from the
+  //     predecessors list of live blocks.
+  RemoveDeadBlocks(visited);
+
+  // (3) Compute the immediate dominator of each block. We visit
+  //     the successors of a block only when all its forward branches
+  //     have been processed.
+  GrowableArray<size_t> visits(arena_, blocks_.Size());
+  visits.SetSize(blocks_.Size());
+  HBasicBlock* entry = GetEntryBlock();
+  dominator_order_.Add(entry);
+  for (size_t i = 0; i < entry->successors()->Size(); i++) {
+    VisitBlockForDominatorTree(entry->successors()->Get(i), entry, &visits);
+  }
+}
+
+HBasicBlock* HGraph::FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const {
+  ArenaBitVector visited(arena_, blocks_.Size(), false);
+  // Walk the dominator tree of the first block and mark the visited blocks.
+  while (first != nullptr) {
+    visited.SetBit(first->block_id());
+    first = first->dominator();
+  }
+  // Walk the dominator tree of the second block until a marked block is found.
+  while (second != nullptr) {
+    if (visited.IsBitSet(second->block_id())) {
+      return second;
+    }
+    second = second->dominator();
+  }
+  LOG(ERROR) << "Could not find common dominator";
+  return nullptr;
+}
+
+void HGraph::VisitBlockForDominatorTree(HBasicBlock* block,
+                                        HBasicBlock* predecessor,
+                                        GrowableArray<size_t>* visits) {
+  if (block->dominator() == nullptr) {
+    block->set_dominator(predecessor);
+  } else {
+    block->set_dominator(FindCommonDominator(block->dominator(), predecessor));
+  }
+
+  visits->Increment(block->block_id());
+  // Once all the forward edges have been visited, we know the immediate
+  // dominator of the block. We can then start visiting its successors.
+  if (visits->Get(block->block_id()) ==
+      block->predecessors()->Size() - block->NumberOfBackEdges()) {
+    dominator_order_.Add(block);
+    for (size_t i = 0; i < block->successors()->Size(); i++) {
+      VisitBlockForDominatorTree(block->successors()->Get(i), block, visits);
+    }
+  }
+}
+
 void HBasicBlock::AddInstruction(HInstruction* instruction) {
   if (first_instruction_ == nullptr) {
     DCHECK(last_instruction_ == nullptr);
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 3d1c25e..8f6a26c 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -19,6 +19,7 @@
 
 #include "utils/allocation.h"
 #include "utils/growable_array.h"
+#include "dex/arena_bit_vector.h"
 
 namespace art {
 
@@ -29,26 +30,67 @@
 static const int kDefaultNumberOfBlocks = 8;
 static const int kDefaultNumberOfSuccessors = 2;
 static const int kDefaultNumberOfPredecessors = 2;
+static const int kDefaultNumberOfBackEdges = 1;
 
 // Control-flow graph of a method. Contains a list of basic blocks.
 class HGraph : public ArenaObject {
  public:
   explicit HGraph(ArenaAllocator* arena)
       : arena_(arena),
-        blocks_(arena, kDefaultNumberOfBlocks) { }
+        blocks_(arena, kDefaultNumberOfBlocks),
+        dominator_order_(arena, kDefaultNumberOfBlocks) { }
 
   ArenaAllocator* arena() const { return arena_; }
   const GrowableArray<HBasicBlock*>* blocks() const { return &blocks_; }
 
   void AddBlock(HBasicBlock* block);
+  void BuildDominatorTree();
 
  private:
+  HBasicBlock* FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const;
+  void VisitBlockForDominatorTree(HBasicBlock* block,
+                                  HBasicBlock* predecessor,
+                                  GrowableArray<size_t>* visits);
+  void FindBackEdges(ArenaBitVector* visited) const;
+  void VisitBlockForBackEdges(HBasicBlock* block,
+                              ArenaBitVector* visited,
+                              ArenaBitVector* visiting) const;
+  void RemoveDeadBlocks(const ArenaBitVector& visited) const;
+
+  HBasicBlock* GetEntryBlock() const { return blocks_.Get(0); }
+
   ArenaAllocator* const arena_;
+
+  // List of blocks in insertion order.
   GrowableArray<HBasicBlock*> blocks_;
 
+  // List of blocks to perform a pre-order dominator tree traversal.
+  GrowableArray<HBasicBlock*> dominator_order_;
+
   DISALLOW_COPY_AND_ASSIGN(HGraph);
 };
 
+class HLoopInformation : public ArenaObject {
+ public:
+  HLoopInformation(HBasicBlock* header, HGraph* graph)
+      : header_(header),
+        back_edges_(graph->arena(), kDefaultNumberOfBackEdges) { }
+
+  void AddBackEdge(HBasicBlock* back_edge) {
+    back_edges_.Add(back_edge);
+  }
+
+  int NumberOfBackEdges() const {
+    return back_edges_.Size();
+  }
+
+ private:
+  HBasicBlock* header_;
+  GrowableArray<HBasicBlock*> back_edges_;
+
+  DISALLOW_COPY_AND_ASSIGN(HLoopInformation);
+};
+
 // A block in a method. Contains the list of instructions represented
 // as a double linked list. Each block knows its predecessors and
 // successors.
@@ -60,6 +102,8 @@
         successors_(graph->arena(), kDefaultNumberOfSuccessors),
         first_instruction_(nullptr),
         last_instruction_(nullptr),
+        loop_information_(nullptr),
+        dominator_(nullptr),
         block_id_(-1) { }
 
   const GrowableArray<HBasicBlock*>* predecessors() const {
@@ -70,11 +114,27 @@
     return &successors_;
   }
 
+  void AddBackEdge(HBasicBlock* back_edge) {
+    if (loop_information_ == nullptr) {
+      loop_information_ = new (graph_->arena()) HLoopInformation(this, graph_);
+    }
+    loop_information_->AddBackEdge(back_edge);
+  }
+
   HGraph* graph() const { return graph_; }
 
   int block_id() const { return block_id_; }
   void set_block_id(int id) { block_id_ = id; }
 
+  HBasicBlock* dominator() const { return dominator_; }
+  void set_dominator(HBasicBlock* dominator) { dominator_ = dominator; }
+
+  int NumberOfBackEdges() const {
+    return loop_information_ == nullptr
+        ? 0
+        : loop_information_->NumberOfBackEdges();
+  }
+
   HInstruction* first_instruction() const { return first_instruction_; }
   HInstruction* last_instruction() const { return last_instruction_; }
 
@@ -83,6 +143,10 @@
     block->predecessors_.Add(this);
   }
 
+  void RemovePredecessor(HBasicBlock* block) {
+    predecessors_.Delete(block);
+  }
+
   void AddInstruction(HInstruction* instruction);
 
  private:
@@ -91,6 +155,8 @@
   GrowableArray<HBasicBlock*> successors_;
   HInstruction* first_instruction_;
   HInstruction* last_instruction_;
+  HLoopInformation* loop_information_;
+  HBasicBlock* dominator_;
   int block_id_;
 
   DISALLOW_COPY_AND_ASSIGN(HBasicBlock);
@@ -99,6 +165,7 @@
 #define FOR_EACH_INSTRUCTION(M)                            \
   M(Exit)                                                  \
   M(Goto)                                                  \
+  M(If)                                                    \
   M(ReturnVoid)                                            \
 
 #define DECLARE_INSTRUCTION(type)                          \
@@ -107,9 +174,7 @@
 
 class HInstruction : public ArenaObject {
  public:
-  explicit HInstruction(HBasicBlock* block)
-      : previous_(nullptr),
-        next_(nullptr) { }
+  HInstruction() : previous_(nullptr), next_(nullptr) { }
   virtual ~HInstruction() { }
 
   HInstruction* next() const { return next_; }
@@ -199,9 +264,7 @@
 template<intptr_t N>
 class HTemplateInstruction: public HInstruction {
  public:
-  HTemplateInstruction<N>(HBasicBlock* block)
-      : HInstruction(block),
-        inputs_() { }
+  HTemplateInstruction<N>() : inputs_() { }
   virtual ~HTemplateInstruction() { }
 
   virtual intptr_t InputCount() const { return N; }
@@ -215,7 +278,7 @@
 // instruction that branches to the exit block.
 class HReturnVoid : public HTemplateInstruction<0> {
  public:
-  explicit HReturnVoid(HBasicBlock* block) : HTemplateInstruction<0>(block) { }
+  HReturnVoid() { }
 
   DECLARE_INSTRUCTION(ReturnVoid)
 
@@ -228,7 +291,7 @@
 // exit block.
 class HExit : public HTemplateInstruction<0> {
  public:
-  explicit HExit(HBasicBlock* block) : HTemplateInstruction<0>(block) { }
+  HExit() { }
 
   DECLARE_INSTRUCTION(Exit)
 
@@ -239,7 +302,7 @@
 // Jumps from one block to another.
 class HGoto : public HTemplateInstruction<0> {
  public:
-  explicit HGoto(HBasicBlock* block) : HTemplateInstruction<0>(block) { }
+  HGoto() { }
 
   DECLARE_INSTRUCTION(Goto)
 
@@ -247,6 +310,19 @@
   DISALLOW_COPY_AND_ASSIGN(HGoto);
 };
 
+// Conditional branch. A block ending with an HIf instruction must have
+// two successors.
+// TODO: Make it take an input.
+class HIf : public HTemplateInstruction<0> {
+ public:
+  HIf() { }
+
+  DECLARE_INSTRUCTION(If)
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HIf);
+};
+
 class HGraphVisitor : public ValueObject {
  public:
   explicit HGraphVisitor(HGraph* graph) : graph_(graph) { }
diff --git a/compiler/optimizing/pretty_printer.h b/compiler/optimizing/pretty_printer.h
index 62a5a2c..d4b6165 100644
--- a/compiler/optimizing/pretty_printer.h
+++ b/compiler/optimizing/pretty_printer.h
@@ -34,6 +34,24 @@
   virtual void VisitBasicBlock(HBasicBlock* block) {
     PrintString("BasicBlock ");
     PrintInt(block->block_id());
+    const GrowableArray<HBasicBlock*>* blocks = block->predecessors();
+    if (!blocks->IsEmpty()) {
+      PrintString(", pred: ");
+      for (size_t i = 0; i < blocks->Size() -1; i++) {
+        PrintInt(blocks->Get(i)->block_id());
+        PrintString(", ");
+      }
+      PrintInt(blocks->Peek()->block_id());
+    }
+    blocks = block->successors();
+    if (!blocks->IsEmpty()) {
+      PrintString(", succ: ");
+      for (size_t i = 0; i < blocks->Size() - 1; i++) {
+        PrintInt(blocks->Get(i)->block_id());
+        PrintString(", ");
+      }
+      PrintInt(blocks->Peek()->block_id());
+    }
     PrintNewLine();
     HGraphVisitor::VisitBasicBlock(block);
   }
diff --git a/compiler/optimizing/pretty_printer_test.cc b/compiler/optimizing/pretty_printer_test.cc
index 81a0d91..4c8201e 100644
--- a/compiler/optimizing/pretty_printer_test.cc
+++ b/compiler/optimizing/pretty_printer_test.cc
@@ -25,19 +25,10 @@
 
 namespace art {
 
-const uint16_t data[] = { Instruction::RETURN_VOID };
-
-const char* expected =
-    "BasicBlock 0\n"
-    "  Goto\n"
-    "BasicBlock 1\n"
-    "  ReturnVoid\n"
-    "BasicBlock 2\n"
-    "  Exit\n";
-
 class StringPrettyPrinter : public HPrettyPrinter {
  public:
-  explicit StringPrettyPrinter(HGraph* graph) : HPrettyPrinter(graph), str_("") { }
+  explicit StringPrettyPrinter(HGraph* graph)
+      : HPrettyPrinter(graph), str_(""), current_block_(nullptr) { }
 
   virtual void PrintInt(int value) {
     str_ += StringPrintf("%d", value);
@@ -55,33 +46,213 @@
 
   std::string str() const { return str_; }
 
+  virtual void VisitBasicBlock(HBasicBlock* block) {
+    current_block_ = block;
+    HPrettyPrinter::VisitBasicBlock(block);
+  }
+
+  virtual void VisitGoto(HGoto* gota) {
+    str_ += "  Goto ";
+    PrintInt(current_block_->successors()->Get(0)->block_id());
+    PrintNewLine();
+  }
+
  private:
   std::string str_;
+  HBasicBlock* current_block_;
+
   DISALLOW_COPY_AND_ASSIGN(StringPrettyPrinter);
 };
 
-TEST(OptimizerTest, ReturnVoid) {
+
+static void TestCode(const uint16_t* data, int length, const char* expected) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HGraphBuilder builder(&allocator);
-  HGraph* graph = builder.BuildGraph(data, data + 1);
+  HGraph* graph = builder.BuildGraph(data, data + length);
   ASSERT_NE(graph, nullptr);
   StringPrettyPrinter printer(graph);
   printer.VisitInsertionOrder();
   ASSERT_STREQ(expected, printer.str().c_str());
-
-  const GrowableArray<HBasicBlock*>* blocks = graph->blocks();
-  ASSERT_EQ(blocks->Get(0)->predecessors()->Size(), (size_t)0);
-  ASSERT_EQ(blocks->Get(1)->predecessors()->Size(), (size_t)1);
-  ASSERT_EQ(blocks->Get(1)->predecessors()->Get(0), blocks->Get(0));
-  ASSERT_EQ(blocks->Get(2)->predecessors()->Size(), (size_t)1);
-  ASSERT_EQ(blocks->Get(2)->predecessors()->Get(0), blocks->Get(1));
-
-  ASSERT_EQ(blocks->Get(0)->successors()->Size(), (size_t)1);
-  ASSERT_EQ(blocks->Get(1)->successors()->Get(0), blocks->Get(2));
-  ASSERT_EQ(blocks->Get(1)->successors()->Size(), (size_t)1);
-  ASSERT_EQ(blocks->Get(1)->successors()->Get(0), blocks->Get(2));
-  ASSERT_EQ(blocks->Get(2)->successors()->Size(), (size_t)0);
 }
 
+TEST(PrettyPrinterTest, ReturnVoid) {
+  const uint16_t data[] = { Instruction::RETURN_VOID };
+
+  const char* expected =
+      "BasicBlock 0, succ: 1\n"
+      "  Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 2\n"
+      "  ReturnVoid\n"
+      "BasicBlock 2, pred: 1\n"
+      "  Exit\n";
+
+  TestCode(data, sizeof(data) / sizeof(uint16_t), expected);
+}
+
+TEST(PrettyPrinterTest, CFG1) {
+  const char* expected =
+    "BasicBlock 0, succ: 1\n"
+    "  Goto 1\n"
+    "BasicBlock 1, pred: 0, succ: 2\n"
+    "  Goto 2\n"
+    "BasicBlock 2, pred: 1, succ: 3\n"
+    "  ReturnVoid\n"
+    "BasicBlock 3, pred: 2\n"
+    "  Exit\n";
+
+  const uint16_t data[] = {
+    Instruction::GOTO | 0x100,
+    Instruction::RETURN_VOID
+  };
+
+  TestCode(data, sizeof(data) / sizeof(uint16_t), expected);
+}
+
+TEST(PrettyPrinterTest, CFG2) {
+  const char* expected =
+    "BasicBlock 0, succ: 1\n"
+    "  Goto 1\n"
+    "BasicBlock 1, pred: 0, succ: 2\n"
+    "  Goto 2\n"
+    "BasicBlock 2, pred: 1, succ: 3\n"
+    "  Goto 3\n"
+    "BasicBlock 3, pred: 2, succ: 4\n"
+    "  ReturnVoid\n"
+    "BasicBlock 4, pred: 3\n"
+    "  Exit\n";
+
+  const uint16_t data[] = {
+    Instruction::GOTO | 0x100,
+    Instruction::GOTO | 0x100,
+    Instruction::RETURN_VOID
+  };
+
+  TestCode(data, sizeof(data) / sizeof(uint16_t), expected);
+}
+
+TEST(PrettyPrinterTest, CFG3) {
+  const char* expected =
+    "BasicBlock 0, succ: 1\n"
+    "  Goto 1\n"
+    "BasicBlock 1, pred: 0, succ: 3\n"
+    "  Goto 3\n"
+    "BasicBlock 2, pred: 3, succ: 4\n"
+    "  ReturnVoid\n"
+    "BasicBlock 3, pred: 1, succ: 2\n"
+    "  Goto 2\n"
+    "BasicBlock 4, pred: 2\n"
+    "  Exit\n";
+
+  const uint16_t data1[] = {
+    Instruction::GOTO | 0x200,
+    Instruction::RETURN_VOID,
+    Instruction::GOTO | 0xFF00
+  };
+
+  TestCode(data1, sizeof(data1) / sizeof(uint16_t), expected);
+
+  const uint16_t data2[] = {
+    Instruction::GOTO_16, 3,
+    Instruction::RETURN_VOID,
+    Instruction::GOTO_16, 0xFFFF
+  };
+
+  TestCode(data2, sizeof(data2) / sizeof(uint16_t), expected);
+
+  const uint16_t data3[] = {
+    Instruction::GOTO_32, 4, 0,
+    Instruction::RETURN_VOID,
+    Instruction::GOTO_32, 0xFFFF, 0xFFFF
+  };
+
+  TestCode(data3, sizeof(data3) / sizeof(uint16_t), expected);
+}
+
+TEST(PrettyPrinterTest, CFG4) {
+  const char* expected =
+    "BasicBlock 0, succ: 1\n"
+    "  Goto 1\n"
+    "BasicBlock 1, pred: 0, 1, succ: 1\n"
+    "  Goto 1\n"
+    "BasicBlock 2\n"
+    "  Exit\n";
+
+  const uint16_t data1[] = {
+    Instruction::NOP,
+    Instruction::GOTO | 0xFF00
+  };
+
+  TestCode(data1, sizeof(data1) / sizeof(uint16_t), expected);
+
+  const uint16_t data2[] = {
+    Instruction::GOTO_32, 0, 0
+  };
+
+  TestCode(data2, sizeof(data2) / sizeof(uint16_t), expected);
+}
+
+TEST(PrettyPrinterTest, CFG5) {
+  const char* expected =
+    "BasicBlock 0, succ: 1\n"
+    "  Goto 1\n"
+    "BasicBlock 1, pred: 0, 2, succ: 3\n"
+    "  ReturnVoid\n"
+    "BasicBlock 2, succ: 1\n"
+    "  Goto 1\n"
+    "BasicBlock 3, pred: 1\n"
+    "  Exit\n";
+
+  const uint16_t data[] = {
+    Instruction::RETURN_VOID,
+    Instruction::GOTO | 0x100,
+    Instruction::GOTO | 0xFE00
+  };
+
+  TestCode(data, sizeof(data) / sizeof(uint16_t), expected);
+}
+
+TEST(OptimizerTest, CFG6) {
+  const char* expected =
+    "BasicBlock 0, succ: 1\n"
+    "  Goto 1\n"
+    "BasicBlock 1, pred: 0, succ: 3, 2\n"
+    "  If\n"
+    "BasicBlock 2, pred: 1, succ: 3\n"
+    "  Goto 3\n"
+    "BasicBlock 3, pred: 1, 2, succ: 4\n"
+    "  ReturnVoid\n"
+    "BasicBlock 4, pred: 3\n"
+    "  Exit\n";
+
+  const uint16_t data[] = {
+    Instruction::IF_EQ, 3,
+    Instruction::GOTO | 0x100,
+    Instruction::RETURN_VOID
+  };
+
+  TestCode(data, sizeof(data) / sizeof(uint16_t), expected);
+}
+
+TEST(OptimizerTest, CFG7) {
+  const char* expected =
+    "BasicBlock 0, succ: 1\n"
+    "  Goto 1\n"
+    "BasicBlock 1, pred: 0, succ: 3, 2\n"
+    "  If\n"
+    "BasicBlock 2, pred: 1, 3, succ: 3\n"
+    "  Goto 3\n"
+    "BasicBlock 3, pred: 1, 2, succ: 2\n"
+    "  Goto 2\n"
+    "BasicBlock 4\n"
+    "  Exit\n";
+
+  const uint16_t data[] = {
+    Instruction::IF_EQ, 3,
+    Instruction::GOTO | 0x100,
+    Instruction::GOTO | 0xFF00
+  };
+
+  TestCode(data, sizeof(data) / sizeof(uint16_t), expected);
+}
 }  // namespace art
diff --git a/compiler/utils/growable_array.h b/compiler/utils/growable_array.h
index b591870..82b6a60 100644
--- a/compiler/utils/growable_array.h
+++ b/compiler/utils/growable_array.h
@@ -161,6 +161,18 @@
 
     size_t Size() const { return num_used_; }
 
+    bool IsEmpty() const { return num_used_ == 0; }
+
+    T Pop() {
+      DCHECK_GE(num_used_, (size_t)0);
+      return elem_list_[--num_used_];
+    }
+
+    T Peek() const {
+      DCHECK_GE(num_used_, (size_t)0);
+      return elem_list_[num_used_ - 1];
+    }
+
     void SetSize(size_t new_size) {
       Resize(new_size);
       num_used_ = new_size;
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 736dcb1..7b2bc3b 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -62,30 +62,30 @@
 namespace collector {
 
 // Performance options.
-constexpr bool kUseRecursiveMark = false;
-constexpr bool kUseMarkStackPrefetch = true;
-constexpr size_t kSweepArrayChunkFreeSize = 1024;
-constexpr bool kPreCleanCards = true;
+static constexpr bool kUseRecursiveMark = false;
+static constexpr bool kUseMarkStackPrefetch = true;
+static constexpr size_t kSweepArrayChunkFreeSize = 1024;
+static constexpr bool kPreCleanCards = true;
 
 // Parallelism options.
-constexpr bool kParallelCardScan = true;
-constexpr bool kParallelRecursiveMark = true;
+static constexpr bool kParallelCardScan = true;
+static constexpr bool kParallelRecursiveMark = true;
 // Don't attempt to parallelize mark stack processing unless the mark stack is at least n
 // elements. This is temporary until we reduce the overhead caused by allocating tasks, etc.. Not
 // having this can add overhead in ProcessReferences since we may end up doing many calls of
 // ProcessMarkStack with very small mark stacks.
-constexpr size_t kMinimumParallelMarkStackSize = 128;
-constexpr bool kParallelProcessMarkStack = true;
+static constexpr size_t kMinimumParallelMarkStackSize = 128;
+static constexpr bool kParallelProcessMarkStack = true;
 
 // Profiling and information flags.
-constexpr bool kCountClassesMarked = false;
-constexpr bool kProfileLargeObjects = false;
-constexpr bool kMeasureOverhead = false;
-constexpr bool kCountTasks = false;
-constexpr bool kCountJavaLangRefs = false;
+static constexpr bool kCountClassesMarked = false;
+static constexpr bool kProfileLargeObjects = false;
+static constexpr bool kMeasureOverhead = false;
+static constexpr bool kCountTasks = false;
+static constexpr bool kCountJavaLangRefs = false;
 
 // Turn off kCheckLocks when profiling the GC since it slows the GC down by up to 40%.
-constexpr bool kCheckLocks = kDebugLocking;
+static constexpr bool kCheckLocks = kDebugLocking;
 
 void MarkSweep::ImmuneSpace(space::ContinuousSpace* space) {
   // Bind live to mark bitmap if necessary.
@@ -240,7 +240,18 @@
     CHECK(!Locks::mutator_lock_->IsExclusiveHeld(self));
     // Process dirty cards and add dirty cards to mod union tables, also ages cards.
     heap_->ProcessCards(timings_);
-    // Required so that we see aged cards before we start scanning the cards.
+    // The checkpoint root marking is required to avoid a race condition which occurs if the
+    // following happens during a reference write:
+    // 1. mutator dirties the card (write barrier)
+    // 2. GC ages the card (the above ProcessCards call)
+    // 3. GC scans the object (the RecursiveMarkDirtyObjects call below)
+    // 4. mutator writes the value (corresponding to the write barrier in 1.)
+    // This causes the GC to age the card but not necessarily mark the reference which the mutator
+    // wrote into the object stored in the card.
+    // Having the checkpoint fixes this issue since it ensures that the card mark and the
+    // reference write are visible to the GC before the card is scanned (this is due to locks being
+    // acquired / released in the checkpoint code).
+    // The other roots are also marked to help reduce the pause.
     MarkThreadRoots(self);
     // TODO: Only mark the dirty roots.
     MarkNonThreadRoots();
diff --git a/runtime/gc/collector/sticky_mark_sweep.cc b/runtime/gc/collector/sticky_mark_sweep.cc
index 450445e..9e3adb4 100644
--- a/runtime/gc/collector/sticky_mark_sweep.cc
+++ b/runtime/gc/collector/sticky_mark_sweep.cc
@@ -53,8 +53,6 @@
   // TODO: Not put these objects in the mark stack in the first place.
   mark_stack_->Reset();
   RecursiveMarkDirtyObjects(false, accounting::CardTable::kCardDirty - 1);
-  // Pre clean dirtied cards to reduce pauses.
-  PreCleanCards();
 }
 
 void StickyMarkSweep::Sweep(bool swap_bitmaps) {
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index cb9e2e8..40d4ea3 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -372,22 +372,12 @@
   void* memory = alloca(ShadowFrame::ComputeSize(num_regs));
   ShadowFrame* shadow_frame(ShadowFrame::Create(num_regs, last_shadow_frame, method, 0, memory));
   self->PushShadowFrame(shadow_frame);
-  self->EndAssertNoThreadSuspension(old_cause);
 
   size_t cur_reg = num_regs - num_ins;
   if (!method->IsStatic()) {
     CHECK(receiver != NULL);
     shadow_frame->SetVRegReference(cur_reg, receiver);
     ++cur_reg;
-  } else if (UNLIKELY(!method->GetDeclaringClass()->IsInitializing())) {
-    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-    SirtRef<mirror::Class> sirt_c(self, method->GetDeclaringClass());
-    if (UNLIKELY(!class_linker->EnsureInitialized(sirt_c, true, true))) {
-      CHECK(self->IsExceptionPending());
-      self->PopShadowFrame();
-      return;
-    }
-    CHECK(sirt_c->IsInitializing());
   }
   const char* shorty = mh.GetShorty();
   for (size_t shorty_pos = 0, arg_pos = 0; cur_reg < num_regs; ++shorty_pos, ++arg_pos, cur_reg++) {
@@ -410,6 +400,17 @@
         break;
     }
   }
+  self->EndAssertNoThreadSuspension(old_cause);
+  // Do this after populating the shadow frame in case EnsureInitialized causes a GC.
+  if (method->IsStatic() && UNLIKELY(!method->GetDeclaringClass()->IsInitializing())) {
+    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+    SirtRef<mirror::Class> sirt_c(self, method->GetDeclaringClass());
+    if (UNLIKELY(!class_linker->EnsureInitialized(sirt_c, true, true))) {
+      CHECK(self->IsExceptionPending());
+      self->PopShadowFrame();
+      return;
+    }
+  }
   if (LIKELY(!method->IsNative())) {
     JValue r = Execute(self, mh, code_item, *shadow_frame, JValue());
     if (result != NULL) {
@@ -418,6 +419,9 @@
   } else {
     // We don't expect to be asked to interpret native code (which is entered via a JNI compiler
     // generated stub) except during testing and image writing.
+    // Update args to be the args in the shadow frame since the input ones could hold stale
+    // references pointers due to moving GC.
+    args = shadow_frame->GetVRegArgs(method->IsStatic() ? 0 : 1);
     if (!Runtime::Current()->IsStarted()) {
       UnstartedRuntimeJni(self, method, receiver, args, result);
     } else {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 8b93b91..4fe9169 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1005,6 +1005,15 @@
   }
 }
 
+void Thread::AssertNoPendingExceptionForNewException(const char* msg) const {
+  if (UNLIKELY(IsExceptionPending())) {
+    ScopedObjectAccess soa(Thread::Current());
+    mirror::Throwable* exception = GetException(nullptr);
+    LOG(FATAL) << "Throwing new exception " << msg << " with unexpected pending exception: "
+        << exception->Dump();
+  }
+}
+
 static void MonitorExitVisitor(mirror::Object** object, void* arg, uint32_t /*thread_id*/,
                                RootType /*root_type*/)
     NO_THREAD_SAFETY_ANALYSIS {
@@ -1507,7 +1516,8 @@
 
 void Thread::ThrowNewException(const ThrowLocation& throw_location, const char* exception_class_descriptor,
                                const char* msg) {
-  AssertNoPendingException();  // Callers should either clear or call ThrowNewWrappedException.
+  // Callers should either clear or call ThrowNewWrappedException.
+  AssertNoPendingExceptionForNewException(msg);
   ThrowNewWrappedException(throw_location, exception_class_descriptor, msg);
 }
 
diff --git a/runtime/thread.h b/runtime/thread.h
index 9813130..f9d31af 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -285,6 +285,7 @@
   }
 
   void AssertNoPendingException() const;
+  void AssertNoPendingExceptionForNewException(const char* msg) const;
 
   void SetException(const ThrowLocation& throw_location, mirror::Throwable* new_exception)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {